532 files changed, 23242 insertions, 19955 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9bce039..7063640 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -296,6 +296,7 @@ add_subdirectory(lib/Bitcode/Reader)
 add_subdirectory(lib/Bitcode/Writer)
 add_subdirectory(lib/Transforms/Utils)
 add_subdirectory(lib/Transforms/Instrumentation)
+add_subdirectory(lib/Transforms/InstCombine)
 add_subdirectory(lib/Transforms/Scalar)
 add_subdirectory(lib/Transforms/IPO)
 add_subdirectory(lib/Transforms/Hello)
diff --git a/LICENSE.TXT b/LICENSE.TXT
index fd49172..b8d2c74 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
 All rights reserved.
 
 Developed by:
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 9ebaadc..7915593 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -512,7 +512,7 @@ case "$enableval" in
             PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index 97d07bd..bc2f45b 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -7,9 +7,9 @@ set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLV
 set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
@@ -21,15 +21,16 @@ set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC
 set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
 set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
 set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
@@ -37,13 +38,13 @@ set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
@@ -53,14 +54,14 @@ set(MSVC_LIB_DEPS_LLVMSystem )
 set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa)
 set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMX86CodeGen LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Disassembler LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86Disassembler )
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
 set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo)
 set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem)
diff --git a/configure b/configure
index 3e0ca0a..fc30999 100755
--- a/configure
+++ b/configure
@@ -5080,7 +5080,7 @@ case "$enableval" in
             PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index d39de19..2eb7abc 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1731,11 +1731,6 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
    (because one or more of above constraints are not met) to be followed by a
    readjustment of the stack. So performance might be worse in such cases.</p>
 
-<p>On x86 and x86-64 one register is reserved for indirect tail calls (e.g via a
-   function pointer). So there is one less register for integer argument
-   passing. For x86 this means 2 registers (if <tt>inreg</tt> parameter
-   attribute is used) and for x86-64 this means 5 register are used.</p>
-
 </div>
 <!-- ======================================================================= -->
 <div class="doc_subsection">
@@ -2121,7 +2116,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-10 23:30:55 +0200 (Sat, 10 Oct 2009) $
+  Last modified: $Date: 2010-01-11 19:53:47 +0100 (Mon, 11 Jan 2010) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index ba09f4c..6ea0ead 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -43,6 +43,7 @@
       <li><a href="#globalvars">Global Variables</a></li>
       <li><a href="#functionstructure">Functions</a></li>
       <li><a href="#aliasstructure">Aliases</a></li>
+      <li><a href="#namedmetadatastructure">Named Metadata</a></li>
       <li><a href="#paramattrs">Parameter Attributes</a></li>
       <li><a href="#fnattrs">Function Attributes</a></li>
       <li><a href="#gc">Garbage Collector Names</a></li>
@@ -85,12 +86,12 @@
       <li><a href="#undefvalues">Undefined Values</a></li>
       <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
       <li><a href="#constantexprs">Constant Expressions</a></li>
-      <li><a href="#metadata">Embedded Metadata</a></li>
     </ol>
   </li>
   <li><a href="#othervalues">Other Values</a>
     <ol>
       <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
+      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a></li>
     </ol>
   </li>
   <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
@@ -498,14 +499,19 @@ define i32 @main() {                                        <i>; i32()* </i>
 
   <i>; Call puts function to write out the string to stdout.</i>
   <a href="#i_call">call</a> i32 @puts(i8 * %cast210)                             <i>; i32</i>
-  <a href="#i_ret">ret</a> i32 0<br>}<br>
+  <a href="#i_ret">ret</a> i32 0<br>}
+
+<i>; Named metadata</i>
+!1 = metadata !{i32 41}
+!foo = !{!1, null}
 </pre>
 </div>
 
 <p>This example is made up of a <a href="#globalvars">global variable</a> named
-   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function, and
+   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function,
    a <a href="#functionstructure">function definition</a> for
-   "<tt>main</tt>".</p>
+   "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a> 
+   "<tt>foo"</tt>.</p>
 
 <p>In general, a module is made up of a list of global values, where both
    functions and global variables are global values.  Global values are
@@ -558,10 +564,17 @@ define i32 @main() {                                        <i>; i32()* </i>
 
   <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt></dt>
   <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
-      the same name when linkage occurs.  This is typically used to implement
-      inline functions, templates, or other code which must be generated in each
-      translation unit that uses it.  Unreferenced <tt>linkonce</tt> globals are
-      allowed to be discarded.</dd>
+      the same name when linkage occurs.  This can be used to implement
+      some forms of inline functions, templates, or other code which must be
+      generated in each translation unit that uses it, but where the body may
+      be overridden with a more definitive definition later.  Unreferenced
+      <tt>linkonce</tt> globals are allowed to be discarded.  Note that
+      <tt>linkonce</tt> linkage does not actually allow the optimizer to
+      inline the body of this function into callers because it doesn't know if
+      this definition of the function is the definitive definition within the
+      program or whether it will be overridden by a stronger definition.
+      To enable inlining and other optimizations, use "<tt>linkonce_odr</tt>"
+      linkage.</dd>
 
   <dt><tt><b><a name="linkage_weak">weak</a></b></tt></dt>
   <dd>"<tt>weak</tt>" linkage has the same merging semantics as
@@ -671,9 +684,9 @@ define i32 @main() {                                        <i>; i32()* </i>
       (e.g. by passing things in registers).  This calling convention allows the
       target to use whatever tricks it wants to produce fast code for the
       target, without having to conform to an externally specified ABI
-      (Application Binary Interface).  Implementations of this convention should
-      allow arbitrary <a href="CodeGenerator.html#tailcallopt">tail call
-      optimization</a> to be supported.  This calling convention does not
+      (Application Binary Interface).
+      <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
+      when this convention is used.</a>  This calling convention does not
       support varargs and requires the prototype of all callees to exactly match
       the prototype of the function definition.</dd>
 
@@ -905,6 +918,27 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 </div>
 
 <!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="namedmetadatastructure">Named Metadata</a>
+</div>
+
+<div class="doc_text">
+
+<p>Named metadata is a collection of metadata. <a href="#metadata"> Metadata </a>
+   node and null are the only valid named metadata operands. 
+   Metadata strings are not allowed as an named metadata operand.</p>
+
+<h5>Syntax:</h5>
+<div class="doc_code">
+<pre>
+!1 = metadata !{metadata !"one"}
+!name = !{null, !1}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
 <div class="doc_subsection"><a name="paramattrs">Parameter Attributes</a></div>
 
 <div class="doc_text">
@@ -1649,10 +1683,12 @@ Classifications</a> </div>
    underlying processor.  The elements of a structure may be any type that has a
    size.</p>
 
-<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
-   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
-   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
-
+<p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
+   and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
+   with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
+   Structures in registers are accessed using the
+   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
+   '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
 <h5>Syntax:</h5>
 <pre>
   { &lt;type list&gt; }
@@ -2305,12 +2341,12 @@ has undefined behavior.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="metadata">Embedded Metadata</a>
+<div class="doc_subsection"><a name="metadata">Metadata Nodes and Metadata Strings</a>
 </div>
 
 <div class="doc_text">
 
-<p>Embedded metadata provides a way to attach arbitrary data to the instruction
+<p>Metadata provides a way to attach arbitrary data to the instruction
    stream without affecting the behaviour of the program.  There are two
    metadata primitives, strings and nodes. All metadata has the
    <tt>metadata</tt> type and is identified in syntax by a preceding exclamation
@@ -2329,6 +2365,9 @@ has undefined behavior.</p>
    event that a value is deleted, it will be replaced with a typeless
    "<tt>null</tt>", such as "<tt>metadata !{null, i32 10}</tt>".</p>
 
+<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
+   metadata nodes. For example: "<tt>!foo =  metadata !{!4, !3}</tt>".
+
 <p>Optimizations may rely on metadata to provide additional information about
    the program that isn't available in the instructions, or that isn't easily
    computable. Similarly, the code generator may expect a certain metadata
@@ -3848,7 +3887,7 @@ Instruction</a> </div>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;val&gt;, &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;    <i>; yields &lt;aggregate type&gt;</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -3873,7 +3912,8 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  &lt;result&gt; = insertvalue {i32, float} %agg, i32 1, 0    <i>; yields {i32, float}</i>
+  %agg1 = insertvalue {i32, float} undef, i32 1, 0         <i>; yields {i32 1, float undef}</i>
+  %agg2 = insertvalue {i32, float} %agg1, float %val, 1    <i>; yields {i32 1, float %val}</i>
 </pre>
 
 </div>
@@ -4983,15 +5023,31 @@ Loop:       ; Infinite loop that counts from 0 on up...
 <p>This instruction requires several arguments:</p>
 
 <ol>
-  <li>The optional "tail" marker indicates whether the callee function accesses
-      any allocas or varargs in the caller.  If the "tail" marker is present,
-      the function call is eligible for tail call optimization.  Note that calls
-      may be marked "tail" even if they do not occur before
-      a <a href="#i_ret"><tt>ret</tt></a> instruction.</li>
+  <li>The optional "tail" marker indicates that the callee function does not
+      access any allocas or varargs in the caller.  Note that calls may be
+      marked "tail" even if they do not occur before
+      a <a href="#i_ret"><tt>ret</tt></a> instruction.  If the "tail" marker is
+      present, the function call is eligible for tail call optimization,
+      but <a href="CodeGenerator.html#tailcallopt">might not in fact be
+      optimized into a jump</a>.  As of this writing, the extra requirements for
+      a call to actually be optimized are:
+      <ul>
+        <li>Caller and callee both have the calling
+            convention <tt>fastcc</tt>.</li>
+        <li>The call is in tail position (ret immediately follows call and ret
+            uses value of call or is void).</li>
+        <li>Option <tt>-tailcallopt</tt> is enabled,
+            or <code>llvm::PerformTailCallOpt</code> is <code>true</code>.</li>
+        <li><a href="CodeGenerator.html#tailcallopt">Platform specific
+            constraints are met.</a></li>
+      </ul>
+  </li>
 
   <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
       convention</a> the call should use.  If none is specified, the call
-      defaults to using C calling conventions.</li>
+      defaults to using C calling conventions.  The calling convention of the
+      call must match the calling convention of the target function, or else the
+      behavior is undefined.</li>
 
   <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
       return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
@@ -7263,7 +7319,7 @@ LLVM</a>.</p>
 
 <h5>Overview:</h5>
 <p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information
-   to the optimizers to either discover at compile time either a) when an
+   to the optimizers to discover at compile time either a) when an
    operation like memcpy will either overflow a buffer that corresponds to
    an object, or b) to determine that a runtime check for overflow isn't
    necessary. An object in this context means an allocation of a
@@ -7294,7 +7350,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-12-23 01:29:49 +0100 (Wed, 23 Dec 2009) $
+  Last modified: $Date: 2010-01-11 20:35:55 +0100 (Mon, 11 Jan 2010) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index c5451b6..80a5db0 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -94,6 +94,7 @@ option</a></li>
     <li><a href="#ds_bit">BitVector-like containers</a>
     <ul>
       <li><a href="#dss_bitvector">A dense bitvector</a></li>
+      <li><a href="#dss_smallbitvector">A "small" dense bitvector</a></li>
       <li><a href="#dss_sparsebitvector">A sparse bitvector</a></li>
     </ul></li>
   </ul>
@@ -1584,7 +1585,7 @@ please don't use it.</p>
 </div>
 
 <div class="doc_text">
-<p> The BitVector container provides a fixed size set of bits for manipulation.
+<p> The BitVector container provides a dynamic size set of bits for manipulation.
 It supports individual bit setting/testing, as well as set operations.  The set
 operations take time O(size of bitvector), but operations are performed one word
 at a time, instead of one bit at a time.  This makes the BitVector very fast for
@@ -1595,6 +1596,25 @@ the number of set bits to be high (IE a dense set).
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
+  <a name="dss_smallbitvector">SmallBitVector</a>
+</div>
+
+<div class="doc_text">
+<p> The SmallBitVector container provides the same interface as BitVector, but
+it is optimized for the case where only a small number of bits, less than
+25 or so, are needed. It also transparently supports larger bit counts, but
+slightly less efficiently than a plain BitVector, so SmallBitVector should
+only be used when larger counts are rare.
+</p>
+
+<p>
+At this time, SmallBitVector does not support set operations (and, or, xor),
+and its operator[] does not provide an assignable lvalue.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
   <a name="dss_sparsebitvector">SparseBitVector</a>
 </div>
 
@@ -3872,7 +3892,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-23 00:11:22 +0200 (Fri, 23 Oct 2009) $
+  Last modified: $Date: 2010-01-05 19:24:00 +0100 (Tue, 05 Jan 2010) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 5a0936f..aff0836 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -58,6 +58,7 @@ Almost dead code.
   include/llvm/Analysis/LiveValues.h => Dan
   lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
   llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
+  ABCD, SCCVN, GEPSplitterPass
 -->
  
    
@@ -1348,7 +1349,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-11-03 22:50:09 +0100 (Tue, 03 Nov 2009) $
+  Last modified: $Date: 2010-01-09 23:30:40 +0100 (Sat, 09 Jan 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 4aca52c..83c979b 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -38,6 +38,7 @@
     <li><a href="#format_common_intrinsics">Debugger intrinsic functions</a>
       <ul>
       <li><a href="#format_common_declare">llvm.dbg.declare</a></li>
+      <li><a href="#format_common_value">llvm.dbg.value</a></li>
     </ul></li>
   </ol></li>
   <li><a href="#format_common_lifetime">Object lifetimes and scoping</a></li>
@@ -775,6 +776,25 @@ DW_TAG_return_variable = 258
 </div>
 
 <!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_common_value">llvm.dbg.value</a>
+</div>
+
+<div class="doc_text">
+<pre>
+  void %<a href="#format_common_value">llvm.dbg.value</a>( metadata, i64, metadata )
+</pre>
+
+<p>This intrinsic provides information when a user source variable is set to a
+   new value.  The first argument is the new value (wrapped as metadata).  The
+   second argument is the offset in the user source variable where the new value
+   is written.  The third argument is
+   the <tt>%<a href="#format_variables">llvm.dbg.variable</a></tt> containing
+   the description of the user source variable. </p>
+
+</div>
+
+<!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="format_common_lifetime">Object lifetimes and scoping</a>
 </div>
@@ -1718,7 +1738,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-12-01 01:59:58 +0100 (Tue, 01 Dec 2009) $
+  Last modified: $Date: 2010-01-11 23:53:48 +0100 (Mon, 11 Jan 2010) $
 </address>
 
 </body>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 4ae6718..b96aeab 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -423,6 +423,10 @@ class.  This operation is analogous to $(foreach) in GNU make.</dd>
   <dd>An integer {0,1} indicating whether list 'a' is empty.</dd>
 <dt><tt>!if(a,b,c)</tt></dt>
   <dd>'b' if the result of integer operator 'a' is nonzero, 'c' otherwise.</dd>
+<dt><tt>!eq(a,b)</tt></dt>
+  <dd>Integer one if string a is equal to string b, zero otherwise.  This
+      only operates on string objects.  Use !cast<string> to compare other
+      types of objects.</dd>
 </dl>
 
 <p>Note that all of the values have rules specifying how they convert to values
@@ -794,7 +798,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-10-29 19:10:34 +0100 (Thu, 29 Oct 2009) $
+  Last modified: $Date: 2010-01-05 20:11:42 +0100 (Tue, 05 Jan 2010) $
 </address>
 
 </body>
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index 68d8e65..e1e4487 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -36,12 +36,12 @@ typedef enum {
 /* Verifies that a module is valid, taking the specified action if not.
    Optionally returns a human-readable description of any invalid constructs.
    OutMessage must be disposed with LLVMDisposeMessage. */
-int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
-                     char **OutMessage);
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessage);
 
 /* Verifies that a single function is valid, taking the specified action. Useful
    for debugging. */
-int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action);
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action);
 
 /* Open up a ghostview window that displays the CFG of the current function.
    Useful for debugging. */
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index a184f60..59269ce 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -29,24 +29,24 @@ extern "C" {
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
    Optionally returns a human-readable error message via OutMessage. */ 
-int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
-                     LLVMModuleRef *OutModule, char **OutMessage);
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+                          LLVMModuleRef *OutModule, char **OutMessage);
 
-int LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
-                              LLVMMemoryBufferRef MemBuf,
-                              LLVMModuleRef *OutModule, char **OutMessage);
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                                   LLVMMemoryBufferRef MemBuf,
+                                   LLVMModuleRef *OutModule, char **OutMessage);
 
 /* Reads a module from the specified path, returning via the OutMP parameter
    a module provider which performs lazy deserialization. Returns 0 on success.
    Optionally returns a human-readable error message via OutMessage. */ 
-int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
-                                 LLVMModuleProviderRef *OutMP,
-                                 char **OutMessage);
-
-int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
-                                          LLVMMemoryBufferRef MemBuf,
-                                          LLVMModuleProviderRef *OutMP,
-                                          char **OutMessage);
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage);
+
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                               LLVMMemoryBufferRef MemBuf,
+                                               LLVMModuleProviderRef *OutMP,
+                                               char **OutMessage);
 
 
 #ifdef __cplusplus
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index c741d1c..6879205 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -46,6 +46,8 @@ extern "C" {
 #endif
 
 
+typedef int LLVMBool;
+
 /* Opaque types. */
 
 /**
@@ -292,7 +294,7 @@ const char *LLVMGetTarget(LLVMModuleRef M);
 void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 
 /** See Module::addTypeName. */
-int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
+LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
 void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name);
 LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
 
@@ -355,20 +357,20 @@ LLVMTypeRef LLVMPPCFP128Type(void);
 /* Operations on function types */
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
-                             int IsVarArg);
-int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy);
+                             LLVMBool IsVarArg);
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy);
 LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy);
 unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy);
 void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest);
 
 /* Operations on struct types */
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
-                                    unsigned ElementCount, int Packed);
+                                    unsigned ElementCount, LLVMBool Packed);
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
-                           int Packed);
+                           LLVMBool Packed);
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
 void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
-int LLVMIsPackedStruct(LLVMTypeRef StructTy);
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
 
 /* Operations on array, pointer, and vector types (sequence types) */
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
@@ -427,10 +429,6 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
         macro(IntrinsicInst)                \
           macro(DbgInfoIntrinsic)           \
             macro(DbgDeclareInst)           \
-            macro(DbgFuncStartInst)         \
-            macro(DbgRegionEndInst)         \
-            macro(DbgRegionStartInst)       \
-            macro(DbgStopPointInst)         \
           macro(EHSelectorInst)             \
           macro(MemIntrinsic)               \
             macro(MemCpyInst)               \
@@ -499,14 +497,14 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
 LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
 LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */
 LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
-int LLVMIsConstant(LLVMValueRef Val);
-int LLVMIsNull(LLVMValueRef Val);
-int LLVMIsUndef(LLVMValueRef Val);
+LLVMBool LLVMIsConstant(LLVMValueRef Val);
+LLVMBool LLVMIsNull(LLVMValueRef Val);
+LLVMBool LLVMIsUndef(LLVMValueRef Val);
 LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty);
 
 /* Operations on scalar constants */
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
-                          int SignExtend);
+                          LLVMBool SignExtend);
 LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
                                   uint8_t Radix);
 LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
@@ -521,17 +519,17 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal);
 
 /* Operations on composite constants */
 LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
-                                      unsigned Length, int DontNullTerminate);
+                                      unsigned Length, LLVMBool DontNullTerminate);
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
-                                      unsigned Count, int Packed);
+                                      unsigned Count, LLVMBool Packed);
 
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
-                             int DontNullTerminate);
+                             LLVMBool DontNullTerminate);
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length);
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
-                             int Packed);
+                             LLVMBool Packed);
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
 
 /* Constant expressions */
@@ -591,7 +589,7 @@ LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
 LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
                                   LLVMTypeRef ToType);
 LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
-                              unsigned isSigned);
+                              LLVMBool isSigned);
 LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
                              LLVMValueRef ConstantIfTrue,
@@ -609,13 +607,13 @@ LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
 LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                   LLVMValueRef ElementValueConstant,
                                   unsigned *IdxList, unsigned NumIdx);
-LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, 
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty,
                                 const char *AsmString, const char *Constraints,
-                                int HasSideEffects);
+                                LLVMBool HasSideEffects, LLVMBool IsAlignStack);
 
 /* Operations on global variables, functions, and aliases (globals) */
 LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global);
-int LLVMIsDeclaration(LLVMValueRef Global);
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global);
 LLVMLinkage LLVMGetLinkage(LLVMValueRef Global);
 void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage);
 const char *LLVMGetSection(LLVMValueRef Global);
@@ -635,10 +633,10 @@ LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar);
 void LLVMDeleteGlobal(LLVMValueRef GlobalVar);
 LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar);
 void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal);
-int LLVMIsThreadLocal(LLVMValueRef GlobalVar);
-void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal);
-int LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
-void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant);
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar);
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
 
 /* Operations on aliases */
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
@@ -678,7 +676,7 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
 /* Operations on basic blocks */
 LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB);
-int LLVMValueIsBasicBlock(LLVMValueRef Val);
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val);
 LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val);
 LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
 unsigned LLVMCountBasicBlocks(LLVMValueRef Fn);
@@ -718,8 +716,8 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned align);
 
 /* Operations on call instructions (only) */
-int LLVMIsTailCall(LLVMValueRef CallInst);
-void LLVMSetTailCall(LLVMValueRef CallInst, int IsTailCall);
+LLVMBool LLVMIsTailCall(LLVMValueRef CallInst);
+void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall);
 
 /* Operations on phi nodes */
 void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
@@ -932,11 +930,11 @@ void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP);
 
 /*===-- Memory buffers ----------------------------------------------------===*/
 
-int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
-                                             LLVMMemoryBufferRef *OutMemBuf,
-                                             char **OutMessage);
-int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
-                                    char **OutMessage);
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
+                                                  LLVMMemoryBufferRef *OutMemBuf,
+                                                  char **OutMessage);
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
 
@@ -956,23 +954,23 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP);
 /** Initializes, executes on the provided module, and finalizes all of the
     passes scheduled in the pass manager. Returns 1 if any of the passes
     modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */
-int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M);
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M);
 
 /** Initializes all of the function passes scheduled in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
     See llvm::FunctionPassManager::doInitialization. */
-int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM);
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Executes all of the function passes scheduled in the function pass manager
     on the provided function. Returns 1 if any of the passes modified the
     function, false otherwise.
     See llvm::FunctionPassManager::run(Function&). */
-int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F);
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F);
 
 /** Finalizes all of the function passes scheduled in in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
     See llvm::FunctionPassManager::doFinalization. */
-int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM);
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Frees the memory of a pass pipeline. For function pipelines, does not free
     the module provider.
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index 05f2a89..151c935 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -36,7 +36,7 @@ typedef struct LLVMOpaqueExecutionEngine *LLVMExecutionEngineRef;
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
                                                 unsigned long long N,
-                                                int IsSigned);
+                                                LLVMBool IsSigned);
 
 LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P);
 
@@ -45,7 +45,7 @@ LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef Ty, double N);
 unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef);
 
 unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenVal,
-                                         int IsSigned);
+                                         LLVMBool IsSigned);
 
 void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal);
 
@@ -55,18 +55,18 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal);
 
 /*===-- Operations on execution engines -----------------------------------===*/
 
-int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
-                              LLVMModuleProviderRef MP,
-                              char **OutError);
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError);
 
-int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
-                          LLVMModuleProviderRef MP,
-                          char **OutError);
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError);
 
-int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
-                          LLVMModuleProviderRef MP,
-                          unsigned OptLevel,
-                          char **OutError);
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError);
 
 void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE);
 
@@ -86,12 +86,12 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F);
 
 void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP);
 
-int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
-                             LLVMModuleProviderRef MP,
-                             LLVMModuleRef *OutMod, char **OutError);
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+                                  LLVMModuleProviderRef MP,
+                                  LLVMModuleRef *OutMod, char **OutError);
 
-int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
-                     LLVMValueRef *OutFn);
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+                          LLVMValueRef *OutFn);
 
 LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE);
 
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 0057182..e705a99 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -26,8 +26,7 @@
 extern "C" {
 #endif
 
-enum { LLVMBigEndian, LLVMLittleEndian };
-typedef int LLVMByteOrdering;
+enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian };
 
 typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
 typedef struct LLVMStructLayout *LLVMStructLayoutRef;
@@ -62,7 +61,7 @@ static inline void LLVMInitializeAllTargets() {
 /** LLVMInitializeNativeTarget - The main program should call this function to
     initialize the native target corresponding to the host.  This is useful 
     for JIT applications to ensure that the target gets linked in correctly. */
-static inline int LLVMInitializeNativeTarget() {
+static inline LLVMBool LLVMInitializeNativeTarget() {
   /* If we have a native target, initialize it to ensure it is linked in. */
 #ifdef LLVM_NATIVE_ARCH
 #define DoInit2(TARG) \
@@ -97,7 +96,7 @@ char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef);
 /** Returns the byte order of a target, either LLVMBigEndian or
     LLVMLittleEndian.
     See the method llvm::TargetData::isLittleEndian. */
-LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef);
+enum LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef);
 
 /** Returns the pointer size in bytes for a target.
     See the method llvm::TargetData::getPointerSize. */
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9c046ef..45108c8 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -95,6 +95,9 @@ public:
     delete[] Bits;
   }
 
+  /// empty - Tests whether there are no bits in this bitvector.
+  bool empty() const { return Size == 0; }
+
   /// size - Returns the number of bits in this bitvector.
   unsigned size() const { return Size; }
 
@@ -341,6 +344,12 @@ public:
     return *this;
   }
 
+  void swap(BitVector &RHS) {
+    std::swap(Bits, RHS.Bits);
+    std::swap(Size, RHS.Size);
+    std::swap(Capacity, RHS.Capacity);
+  }
+
 private:
   unsigned NumBitWords(unsigned S) const {
     return (S + BITWORD_SIZE-1) / BITWORD_SIZE;
@@ -406,4 +415,13 @@ inline BitVector operator^(const BitVector &LHS, const BitVector &RHS) {
 }
 
 } // End llvm namespace
+
+namespace std {
+  /// Implement std::swap in terms of BitVector swap.
+  inline void
+  swap(llvm::BitVector &LHS, llvm::BitVector &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
 #endif
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
new file mode 100644
index 0000000..346fb1c
--- /dev/null
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -0,0 +1,373 @@
+//===- llvm/ADT/SmallBitVector.h - 'Normally small' bit vectors -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallBitVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLBITVECTOR_H
+#define LLVM_ADT_SMALLBITVECTOR_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array),
+/// optimized for the case when the array is small.  It contains one
+/// pointer-sized field, which is directly used as a plain collection of bits
+/// when possible, or as a pointer to a larger heap-allocated array when
+/// necessary.  This allows normal "small" cases to be fast without losing
+/// generality for large inputs.
+///
+class SmallBitVector {
+  // TODO: In "large" mode, a pointer to a BitVector is used, leading to an
+  // unnecessary level of indirection. It would be more efficient to use a
+  // pointer to memory containing size, allocation size, and the array of bits.
+  PointerIntPair<BitVector *, 1, uintptr_t> X;
+
+  // The number of bits in this class.
+  static const size_t NumBaseBits = sizeof(uintptr_t) * CHAR_BIT;
+
+  // One bit is used to discriminate between small and large mode. The
+  // remaining bits are used for the small-mode representation.
+  static const size_t SmallNumRawBits = NumBaseBits - 1;
+
+  // A few more bits are used to store the size of the bit set in small mode.
+  // Theoretically this is a ceil-log2. These bits are encoded in the most
+  // significant bits of the raw bits.
+  static const size_t SmallNumSizeBits = (NumBaseBits == 32 ? 5 :
+                                          NumBaseBits == 64 ? 6 :
+                                          SmallNumRawBits);
+
+  // The remaining bits are used to store the actual set in small mode.
+  static const size_t SmallNumDataBits = SmallNumRawBits - SmallNumSizeBits;
+
+  bool isSmall() const {
+    return X.getInt();
+  }
+
+  void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
+    X.setInt(true);
+    setSmallSize(NewSize);
+    setSmallBits(NewSmallBits);
+  }
+
+  void switchToLarge(BitVector *BV) {
+    X.setInt(false);
+    X.setPointer(BV);
+  }
+
+  // Return all the bits used for the "small" representation; this includes
+  // bits for the size as well as the element bits.
+  uintptr_t getSmallRawBits() const {
+    return reinterpret_cast<uintptr_t>(X.getPointer()) >> 1;
+  }
+
+  void setSmallRawBits(uintptr_t NewRawBits) {
+    return X.setPointer(reinterpret_cast<BitVector *>(NewRawBits << 1));
+  }
+
+  // Return the size.
+  size_t getSmallSize() const {
+    return getSmallRawBits() >> SmallNumDataBits;
+  }
+
+  void setSmallSize(size_t Size) {
+    setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
+  }
+
+  // Return the element bits.
+  uintptr_t getSmallBits() const {
+    return getSmallRawBits() & ~(~uintptr_t(0) << SmallNumDataBits);
+  }
+
+  void setSmallBits(uintptr_t NewBits) {
+    setSmallRawBits((getSmallRawBits() & (~uintptr_t(0) << SmallNumDataBits)) |
+                    (NewBits & ~(~uintptr_t(0) << getSmallSize())));
+  }
+
+public:
+  /// SmallBitVector default ctor - Creates an empty bitvector.
+  SmallBitVector() : X(0, 1) {}
+
+  /// SmallBitVector ctor - Creates a bitvector of specified number of bits. All
+  /// bits are initialized to the specified value.
+  explicit SmallBitVector(unsigned s, bool t = false) : X(0, 1) {
+    if (s <= SmallNumRawBits)
+      switchToSmall(t ? ~uintptr_t(0) : 0, s);
+    else
+      switchToLarge(new BitVector(s, t));
+  }
+
+  /// SmallBitVector copy ctor.
+  SmallBitVector(const SmallBitVector &RHS) {
+    if (RHS.isSmall())
+      X = RHS.X;
+    else
+      switchToLarge(new BitVector(*RHS.X.getPointer()));
+  }
+
+  ~SmallBitVector() {
+    if (!isSmall())
+      delete X.getPointer();
+  }
+
+  /// empty - Tests whether there are no bits in this bitvector.
+  bool empty() const {
+    return isSmall() ? getSmallSize() == 0 : X.getPointer()->empty();
+  }
+
+  /// size - Returns the number of bits in this bitvector.
+  size_t size() const {
+    return isSmall() ? getSmallSize() : X.getPointer()->size();
+  }
+
+  /// count - Returns the number of bits which are set.
+  unsigned count() const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountPopulation_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountPopulation_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return X.getPointer()->count();
+  }
+
+  /// any - Returns true if any bit is set.
+  bool any() const {
+    if (isSmall())
+      return getSmallBits() != 0;
+    return X.getPointer()->any();
+  }
+
+  /// none - Returns true if none of the bits are set.
+  bool none() const {
+    if (isSmall())
+      return getSmallBits() == 0;
+    return X.getPointer()->none();
+  }
+
+  /// find_first - Returns the index of the first set bit, -1 if none
+  /// of the bits are set.
+  int find_first() const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountTrailingZeros_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountTrailingZeros_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return X.getPointer()->find_first();
+  }
+
+  /// find_next - Returns the index of the next set bit following the
+  /// "Prev" bit. Returns -1 if the next set bit is not found.
+  int find_next(unsigned Prev) const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      // Mask off previous bits.
+      Bits &= ~uintptr_t(0) << Prev;
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountTrailingZeros_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountTrailingZeros_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return X.getPointer()->find_next(Prev);
+  }
+
+  /// clear - Clear all bits.
+  void clear() {
+    if (!isSmall())
+      delete X.getPointer();
+    switchToSmall(0, 0);
+  }
+
+  /// resize - Grow or shrink the bitvector.
+  void resize(unsigned N, bool t = false) {
+    if (!isSmall()) {
+      X.getPointer()->resize(N, t);
+    } else if (getSmallSize() >= N) {
+      setSmallSize(N);
+      setSmallBits(getSmallBits());
+    } else {
+      BitVector *BV = new BitVector(N, t);
+      uintptr_t OldBits = getSmallBits();
+      for (size_t i = 0, e = getSmallSize(); i != e; ++i)
+        (*BV)[i] = (OldBits >> i) & 1;
+      switchToLarge(BV);
+    }
+  }
+
+  void reserve(unsigned N) {
+    if (isSmall()) {
+      if (N > SmallNumDataBits) {
+        uintptr_t OldBits = getSmallRawBits();
+        size_t SmallSize = getSmallSize();
+        BitVector *BV = new BitVector(SmallSize);
+        for (size_t i = 0; i < SmallSize; ++i)
+          if ((OldBits >> i) & 1)
+            BV->set(i);
+        BV->reserve(N);
+        switchToLarge(BV);
+      }
+    } else {
+      X.getPointer()->reserve(N);
+    }
+  }
+
+  // Set, reset, flip
+  SmallBitVector &set() {
+    if (isSmall())
+      setSmallBits(~uintptr_t(0));
+    else
+      X.getPointer()->set();
+    return *this;
+  }
+
+  SmallBitVector &set(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() | (uintptr_t(1) << Idx));
+    else
+      X.getPointer()->set(Idx);
+    return *this;
+  }
+
+  SmallBitVector &reset() {
+    if (isSmall())
+      setSmallBits(0);
+    else
+      X.getPointer()->reset();
+    return *this;
+  }
+
+  SmallBitVector &reset(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() & ~(uintptr_t(1) << Idx));
+    else
+      X.getPointer()->reset(Idx);
+    return *this;
+  }
+
+  SmallBitVector &flip() {
+    if (isSmall())
+      setSmallBits(~getSmallBits());
+    else
+      X.getPointer()->flip();
+    return *this;
+  }
+
+  SmallBitVector &flip(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() ^ (uintptr_t(1) << Idx));
+    else
+      X.getPointer()->flip(Idx);
+    return *this;
+  }
+
+  // No argument flip.
+  SmallBitVector operator~() const {
+    return SmallBitVector(*this).flip();
+  }
+
+  // Indexing.
+  // TODO: Add an index operator which returns a "reference" (proxy class).
+  bool operator[](unsigned Idx) const {
+    assert(Idx < size() && "Out-of-bounds Bit access.");
+    if (isSmall())
+      return ((getSmallBits() >> Idx) & 1) != 0;
+    return X.getPointer()->operator[](Idx);
+  }
+
+  bool test(unsigned Idx) const {
+    return (*this)[Idx];
+  }
+
+  // Comparison operators.
+  bool operator==(const SmallBitVector &RHS) const {
+    if (size() != RHS.size())
+      return false;
+    if (isSmall())
+      return getSmallBits() == RHS.getSmallBits();
+    else
+      return *X.getPointer() == *RHS.X.getPointer();
+  }
+
+  bool operator!=(const SmallBitVector &RHS) const {
+    return !(*this == RHS);
+  }
+
+  // Intersection, union, disjoint union.
+  BitVector &operator&=(const SmallBitVector &RHS); // TODO: implement
+
+  BitVector &operator|=(const SmallBitVector &RHS); // TODO: implement
+
+  BitVector &operator^=(const SmallBitVector &RHS); // TODO: implement
+
+  // Assignment operator.
+  const SmallBitVector &operator=(const SmallBitVector &RHS) {
+    if (isSmall()) {
+      if (RHS.isSmall())
+        X = RHS.X;
+      else
+        switchToLarge(new BitVector(*RHS.X.getPointer()));
+    } else {
+      if (!RHS.isSmall())
+        *X.getPointer() = *RHS.X.getPointer();
+      else {
+        delete X.getPointer();
+        X = RHS.X;
+      }
+    }
+    return *this;
+  }
+
+  void swap(SmallBitVector &RHS) {
+    std::swap(X, RHS.X);
+  }
+};
+
+inline SmallBitVector
+operator&(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result &= RHS;
+  return Result;
+}
+
+inline SmallBitVector
+operator|(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result |= RHS;
+  return Result;
+}
+
+inline SmallBitVector
+operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result ^= RHS;
+  return Result;
+}
+
+} // End llvm namespace
+
+namespace std {
+  /// Implement std::swap in terms of BitVector swap.
+  inline void
+  swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
+#endif
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 85936c0..1ea546f 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 namespace llvm {
+template<typename T> class SmallVectorImpl;
 
 /// hexdigit - Return the (uppercase) hexadecimal character for the
 /// given number \arg X (which should be less than 16).
@@ -136,86 +137,25 @@ static inline std::string UppercaseString(const std::string &S) {
   return result;
 }
 
-/// StringsEqualNoCase - Return true if the two strings are equal, ignoring
-/// case.
-static inline bool StringsEqualNoCase(const std::string &LHS,
-                                      const std::string &RHS) {
-  if (LHS.size() != RHS.size()) return false;
-  for (unsigned i = 0, e = static_cast<unsigned>(LHS.size()); i != e; ++i)
-    if (tolower(LHS[i]) != tolower(RHS[i])) return false;
-  return true;
-}
-
-/// StringsEqualNoCase - Return true if the two strings are equal, ignoring
-/// case.
-static inline bool StringsEqualNoCase(const std::string &LHS,
-                                      const char *RHS) {
-  for (unsigned i = 0, e = static_cast<unsigned>(LHS.size()); i != e; ++i) {
-    if (RHS[i] == 0) return false;  // RHS too short.
-    if (tolower(LHS[i]) != tolower(RHS[i])) return false;
-  }
-  return RHS[LHS.size()] == 0;  // Not too long?
-}
-  
-/// StringsEqualNoCase - Return true if the two null-terminated C strings are
-///  equal, ignoring
-
-static inline bool StringsEqualNoCase(const char *LHS, const char *RHS,
-                                      unsigned len) {
-
-  for (unsigned i = 0; i < len; ++i) {
-    if (tolower(LHS[i]) != tolower(RHS[i]))
-      return false;
-    
-    // If RHS[i] == 0 then LHS[i] == 0 or otherwise we would have returned
-    // at the previous branch as tolower('\0') == '\0'.
-    if (RHS[i] == 0)
-      return true;
-  }
-  
-  return true;
-}
-
-/// CStrInCStrNoCase - Portable version of strcasestr.  Locates the first
-///  occurance of c-string 's2' in string 's1', ignoring case.  Returns
-///  NULL if 's2' cannot be found.
-static inline const char* CStrInCStrNoCase(const char *s1, const char *s2) {
-
-  // Are either strings NULL or empty?
-  if (!s1 || !s2 || s1[0] == '\0' || s2[0] == '\0')
-    return 0;
-
-  if (s1 == s2)
-    return s1;
-
-  const char *I1=s1, *I2=s2;
-
-  while (*I1 != '\0' && *I2 != '\0' )
-    if (tolower(*I1) != tolower(*I2)) { // No match.  Start over.
-      ++s1; I1 = s1; I2 = s2;
-    }
-    else { // Character match.  Advance to the next character.
-      ++I1; ++I2;
-    }
-
-  // If we exhausted all of the characters in 's2', then 's2' appears in 's1'.
-  return *I2 == '\0' ? s1 : 0;
-}
+/// StrInStrNoCase - Portable version of strcasestr.  Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case.  Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2);
 
 /// getToken - This function extracts one token from source, ignoring any
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
-/// and any delimiter prefix from it.
-std::string getToken(std::string &Source,
-                     const char *Delimiters = " \t\n\v\f\r");
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> getToken(StringRef Source,
+                                         StringRef Delimiters = " \t\n\v\f\r");
 
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void SplitString(const std::string &Source,
-                 std::vector<std::string> &OutFragments,
-                 const char *Delimiters = " \t\n\v\f\r");
+void SplitString(StringRef Source,
+                 SmallVectorImpl<StringRef> &OutFragments,
+                 StringRef Delimiters = " \t\n\v\f\r");
 
 /// HashString - Hash funtion for strings.
 ///
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 1c73836..3064af3 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -29,6 +29,7 @@ namespace llvm {
   class StringRef {
   public:
     typedef const char *iterator;
+    typedef const char *const_iterator;
     static const size_t npos = ~size_t(0);
     typedef size_t size_type;
 
@@ -42,15 +43,8 @@ namespace llvm {
     // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min()
     // Changing the arg of min to be an integer, instead of a reference to an
     // integer works around this bug.
-    size_t min(size_t a, size_t b) const
-    {
-      return a < b ? a : b;
-    }
-
-    size_t max(size_t a, size_t b) const
-    {
-      return a > b ? a : b;
-    }
+    size_t min(size_t a, size_t b) const { return a < b ? a : b; }
+    size_t max(size_t a, size_t b) const { return a > b ? a : b; }
 
   public:
     /// @name Constructors
@@ -191,7 +185,7 @@ namespace llvm {
 
     /// find - Search for the first character \arg C in the string.
     ///
-    /// \return - The index of the first occurence of \arg C, or npos if not
+    /// \return - The index of the first occurrence of \arg C, or npos if not
     /// found.
     size_t find(char C, size_t From = 0) const {
       for (size_t i = min(From, Length), e = Length; i != e; ++i)
@@ -202,13 +196,13 @@ namespace llvm {
 
     /// find - Search for the first string \arg Str in the string.
     ///
-    /// \return - The index of the first occurence of \arg Str, or npos if not
+    /// \return - The index of the first occurrence of \arg Str, or npos if not
     /// found.
     size_t find(StringRef Str, size_t From = 0) const;
 
     /// rfind - Search for the last character \arg C in the string.
     ///
-    /// \return - The index of the last occurence of \arg C, or npos if not
+    /// \return - The index of the last occurrence of \arg C, or npos if not
     /// found.
     size_t rfind(char C, size_t From = npos) const {
       From = min(From, Length);
@@ -223,7 +217,7 @@ namespace llvm {
 
     /// rfind - Search for the last string \arg Str in the string.
     ///
-    /// \return - The index of the last occurence of \arg Str, or npos if not
+    /// \return - The index of the last occurrence of \arg Str, or npos if not
     /// found.
     size_t rfind(StringRef Str) const;
 
@@ -313,7 +307,7 @@ namespace llvm {
       return StringRef(Data + Start, End - Start);
     }
 
-    /// split - Split into two substrings around the first occurence of a
+    /// split - Split into two substrings around the first occurrence of a
     /// separator character.
     ///
     /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
@@ -330,7 +324,7 @@ namespace llvm {
       return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
     }
 
-    /// split - Split into two substrings around the first occurence of a
+    /// split - Split into two substrings around the first occurrence of a
     /// separator string.
     ///
     /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
@@ -347,7 +341,7 @@ namespace llvm {
       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
     }
 
-    /// split - Split into substrings around the occurences of a separator
+    /// split - Split into substrings around the occurrences of a separator
     /// string.
     ///
     /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most
@@ -366,7 +360,7 @@ namespace llvm {
                StringRef Separator, int MaxSplit = -1,
                bool KeepEmpty = true) const;
 
-    /// rsplit - Split into two substrings around the last occurence of a
+    /// rsplit - Split into two substrings around the last occurrence of a
     /// separator character.
     ///
     /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index ca0be53..97e9df4 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -329,6 +329,22 @@ namespace llvm {
     bool isTriviallyEmpty() const {
       return isNullary();
     }
+    
+    /// isSingleStringRef - Return true if this twine can be dynamically
+    /// accessed as a single StringRef value with getSingleStringRef().
+    bool isSingleStringRef() const {
+      if (getRHSKind() != EmptyKind) return false;
+      
+      switch (getLHSKind()) {
+      case EmptyKind:
+      case CStringKind:
+      case StdStringKind:
+      case StringRefKind:
+        return true;
+      default:
+        return false;
+      }
+    }
 
     /// @}
     /// @name String Operations
@@ -347,6 +363,24 @@ namespace llvm {
     /// SmallVector.
     void toVector(SmallVectorImpl<char> &Out) const;
 
+    /// getSingleStringRef - This returns the twine as a single StringRef.  This
+    /// method is only valid if isSingleStringRef() is true.
+    StringRef getSingleStringRef() const {
+      assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
+      switch (getLHSKind()) {
+      default: assert(0 && "Out of sync with isSingleStringRef");
+      case EmptyKind:      return StringRef();
+      case CStringKind:    return StringRef((const char*)LHS);
+      case StdStringKind:  return StringRef(*(const std::string*)LHS);
+      case StringRefKind:  return *(const StringRef*)LHS;
+      }
+    }
+
+    /// toStringRef - This returns the twine as a single StringRef if it can be
+    /// represented as such. Otherwise the twine is written into the given
+    /// SmallVector and a StringRef to the SmallVector's data is returned.
+    StringRef toStringRef(SmallVectorImpl<char> &Out) const;
+
     /// print - Write the concatenated string represented by this twine to the
     /// stream \arg OS.
     void print(raw_ostream &OS) const;
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 2d43bdd..9f41135 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -197,6 +197,10 @@ public:
   virtual ModRefBehavior getModRefBehavior(Function *F,
                                    std::vector<PointerAccessInfo> *Info = 0);
 
+  /// getModRefBehavior - Return the modref behavior of the intrinsic with the
+  /// given id.
+  static ModRefBehavior getModRefBehavior(unsigned iid);
+
   /// doesNotAccessMemory - If the specified call is known to never read or
   /// write memory, return true.  If the call only reads from known-constant
   /// memory, it is also legal to return true.  Calls that unwind the stack
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index fdbd9c1..cc9514c 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -30,11 +30,7 @@ namespace llvm {
   class Module;
   class Type;
   class Value;
-  struct DbgStopPointInst;
-  struct DbgDeclareInst;
-  struct DbgFuncStartInst;
-  struct DbgRegionStartInst;
-  struct DbgRegionEndInst;
+  class DbgDeclareInst;
   class DebugLoc;
   struct DebugLocTracker;
   class Instruction;
@@ -495,7 +491,6 @@ namespace llvm {
     Module &M;
     LLVMContext& VMContext;
 
-    const Type *EmptyStructPtr; // "{}*".
     Function *DeclareFn;     // llvm.dbg.declare
     Function *ValueFn;       // llvm.dbg.value
 
@@ -651,27 +646,19 @@ namespace llvm {
                                Instruction *InsertBefore);
 
     /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset,
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset,
                                          DIVariable D, BasicBlock *InsertAtEnd);
 
     /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset,
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset,
                                        DIVariable D, Instruction *InsertBefore);
   private:
     Constant *GetTagConstant(unsigned TAG);
   };
 
-  /// Finds the stoppoint coressponding to this instruction, that is the
-  /// stoppoint that dominates this instruction
-  const DbgStopPointInst *findStopPoint(const Instruction *Inst);
-
-  /// Finds the stoppoint corresponding to first real (non-debug intrinsic)
-  /// instruction in this Basic Block, and returns the stoppoint for it.
-  const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB);
-
   /// Finds the dbg.declare intrinsic corresponding to this value if any.
   /// It looks through pointer casts too.
-  const DbgDeclareInst *findDbgDeclare(const Value *V, bool stripCasts = true);
+  const DbgDeclareInst *findDbgDeclare(const Value *V);
 
   /// Find the debug info descriptor corresponding to this global variable.
   Value *findDbgGlobalDeclare(GlobalVariable *V);
@@ -681,20 +668,10 @@ namespace llvm {
                        std::string &Dir);
 
   /// ExtractDebugLocation - Extract debug location information
-  /// from llvm.dbg.stoppoint intrinsic.
-  DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI,
-                                DebugLocTracker &DebugLocInfo);
-
-  /// ExtractDebugLocation - Extract debug location information
   /// from DILocation.
   DebugLoc ExtractDebugLocation(DILocation &Loc,
                                 DebugLocTracker &DebugLocInfo);
 
-  /// ExtractDebugLocation - Extract debug location information
-  /// from llvm.dbg.func_start intrinsic.
-  DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI,
-                                DebugLocTracker &DebugLocInfo);
-
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(MDNode *Scope);
 
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index cca0d50..5ecb348 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -347,15 +347,8 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   DT.IDoms.clear();
   DT.Info.clear();
   std::vector<typename GraphT::NodeType*>().swap(DT.Vertex);
-  
-  // FIXME: This does not work on PostDomTrees.  It seems likely that this is
-  // due to an error in the algorithm for post-dominators.  This really should
-  // be investigated and fixed at some point.
-  // DT.updateDFSNumbers();
-
-  // Start out with the DFS numbers being invalid.  Let them be computed if
-  // demanded.
-  DT.DFSInfoValid = false;
+
+  DT.updateDFSNumbers();
 }
 
 }
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 2e149d5..31c19c4 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -390,6 +390,13 @@ public:
     if (A == 0 || B == 0)
       return false;
 
+    // Compare the result of the tree walk and the dfs numbers, if expensive
+    // checks are enabled.
+#ifdef XDEBUG
+    assert(!DFSInfoValid
+           || (dominatedBySlowTreeWalk(A, B) == B->DominatedBy(A)));
+#endif
+
     if (DFSInfoValid)
       return B->DominatedBy(A);
 
@@ -585,29 +592,35 @@ protected:
     SmallVector<std::pair<DomTreeNodeBase<NodeT>*,
                 typename DomTreeNodeBase<NodeT>::iterator>, 32> WorkStack;
 
-    for (unsigned i = 0, e = (unsigned)this->Roots.size(); i != e; ++i) {
-      DomTreeNodeBase<NodeT> *ThisRoot = getNode(this->Roots[i]);
-      WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin()));
-      ThisRoot->DFSNumIn = DFSNum++;
-
-      while (!WorkStack.empty()) {
-        DomTreeNodeBase<NodeT> *Node = WorkStack.back().first;
-        typename DomTreeNodeBase<NodeT>::iterator ChildIt =
-                                                        WorkStack.back().second;
-
-        // If we visited all of the children of this node, "recurse" back up the
-        // stack setting the DFOutNum.
-        if (ChildIt == Node->end()) {
-          Node->DFSNumOut = DFSNum++;
-          WorkStack.pop_back();
-        } else {
-          // Otherwise, recursively visit this child.
-          DomTreeNodeBase<NodeT> *Child = *ChildIt;
-          ++WorkStack.back().second;
-
-          WorkStack.push_back(std::make_pair(Child, Child->begin()));
-          Child->DFSNumIn = DFSNum++;
-        }
+    DomTreeNodeBase<NodeT> *ThisRoot = getRootNode();
+
+    if (!ThisRoot)
+      return;
+
+    // Even in the case of multiple exits that form the post dominator root
+    // nodes, do not iterate over all exits, but start from the virtual root
+    // node. Otherwise bbs, that are not post dominated by any exit but by the
+    // virtual root node, will never be assigned a DFS number.
+    WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin()));
+    ThisRoot->DFSNumIn = DFSNum++;
+
+    while (!WorkStack.empty()) {
+      DomTreeNodeBase<NodeT> *Node = WorkStack.back().first;
+      typename DomTreeNodeBase<NodeT>::iterator ChildIt =
+        WorkStack.back().second;
+
+      // If we visited all of the children of this node, "recurse" back up the
+      // stack setting the DFOutNum.
+      if (ChildIt == Node->end()) {
+        Node->DFSNumOut = DFSNum++;
+        WorkStack.pop_back();
+      } else {
+        // Otherwise, recursively visit this child.
+        DomTreeNodeBase<NodeT> *Child = *ChildIt;
+        ++WorkStack.back().second;
+
+        WorkStack.push_back(std::make_pair(Child, Child->begin()));
+        Child->DFSNumIn = DFSNum++;
       }
     }
 
@@ -646,21 +659,17 @@ public:
   /// recalculate - compute a dominator tree for the given function
   template<class FT>
   void recalculate(FT& F) {
-    if (!this->IsPostDominators) {
-      reset();
+    reset();
+    this->Vertex.push_back(0);
 
-      // Initialize roots
+    if (!this->IsPostDominators) {
+      // Initialize root
       this->Roots.push_back(&F.front());
       this->IDoms[&F.front()] = 0;
       this->DomTreeNodes[&F.front()] = 0;
-      this->Vertex.push_back(0);
 
       Calculate<FT, NodeT*>(*this, F);
-
-      updateDFSNumbers();
     } else {
-      reset();     // Reset from the last time we were run...
-
       // Initialize the roots list
       for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) {
         if (std::distance(GraphTraits<FT*>::child_begin(I),
@@ -672,8 +681,6 @@ public:
         this->DomTreeNodes[I] = 0;
       }
 
-      this->Vertex.push_back(0);
-
       Calculate<FT, Inverse<NodeT*> >(*this, F);
     }
   }
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 060286f..33bf0b0 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -478,7 +478,7 @@ public:
     for (iterator I = begin(), E = end(); I != E; ++I)
       (*I)->print(OS, Depth+2);
   }
-  
+
 protected:
   friend class LoopInfoBase<BlockT, LoopT>;
   explicit LoopBase(BlockT *BB) : ParentLoop(0) {
@@ -588,6 +588,8 @@ public:
   /// block, return that block. Otherwise return null.
   BasicBlock *getUniqueExitBlock() const;
 
+  void dump() const;
+  
 private:
   friend class LoopInfoBase<BasicBlock, Loop>;
   explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {}
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index ea14b2d..3681cc01 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -36,19 +36,23 @@ struct PostDominatorTree : public FunctionPass {
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesAll();
   }
-  
+
   inline const std::vector<BasicBlock*> &getRoots() const {
     return DT->getRoots();
   }
-  
+
   inline DomTreeNode *getRootNode() const {
     return DT->getRootNode();
   }
-  
+
   inline DomTreeNode *operator[](BasicBlock *BB) const {
     return DT->getNode(BB);
   }
-  
+
+  inline DomTreeNode *getNode(BasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
   inline bool dominates(DomTreeNode* A, DomTreeNode* B) const {
     return DT->dominates(A, B);
   }
@@ -60,7 +64,7 @@ struct PostDominatorTree : public FunctionPass {
   inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const {
     return DT->properlyDominates(A, B);
   }
-  
+
   inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const {
     return DT->properlyDominates(A, B);
   }
@@ -97,7 +101,7 @@ template <> struct GraphTraits<PostDominatorTree*>
 ///
 struct PostDominanceFrontier : public DominanceFrontierBase {
   static char ID;
-  PostDominanceFrontier() 
+  PostDominanceFrontier()
     : DominanceFrontierBase(&ID, true) {}
 
   virtual bool runOnFunction(Function &) {
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 0bbdc34..068f81f 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -1,4 +1,4 @@
-//===-- llvm/Attributes.h - Container for Attributes ---*---------- C++ -*-===//
+//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -27,9 +27,9 @@ typedef unsigned Attributes;
 
 namespace Attribute {
 
-/// Function parameters and results can have attributes to indicate how they 
-/// should be treated by optimizations and code generation. This enumeration 
-/// lists the attributes that can be associated with parameters, function 
+/// Function parameters and results can have attributes to indicate how they
+/// should be treated by optimizations and code generation. This enumeration
+/// lists the attributes that can be associated with parameters, function
 /// results or the function itself.
 /// @brief Function attributes.
 
@@ -45,7 +45,7 @@ const Attributes ByVal     = 1<<7;  ///< Pass structure by value
 const Attributes Nest      = 1<<8;  ///< Nested function static chain
 const Attributes ReadNone  = 1<<9;  ///< Function does not access memory
 const Attributes ReadOnly  = 1<<10; ///< Function only reads from memory
-const Attributes NoInline        = 1<<11; ///< inline=never 
+const Attributes NoInline        = 1<<11; ///< inline=never
 const Attributes AlwaysInline    = 1<<12; ///< inline=always
 const Attributes OptimizeForSize = 1<<13; ///< opt_size
 const Attributes StackProtect    = 1<<14; ///< Stack protection.
@@ -58,14 +58,15 @@ const Attributes NoRedZone = 1<<22; /// disable redzone
 const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
                                           /// instructions.
 const Attributes Naked           = 1<<24; ///< Naked function
-const Attributes InlineHint      = 1<<25; ///< source said inlining was desirable
+const Attributes InlineHint      = 1<<25; ///< source said inlining was
+                                          ///desirable
 
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 
 /// @brief Attributes that may be applied to the function itself.  These cannot
 /// be used on return values or function parameters.
-const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
+const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
   NoRedZone | NoImplicitFloat | Naked | InlineHint;
 
@@ -100,26 +101,26 @@ inline unsigned getAlignmentFromAttrs(Attributes A) {
   Attributes Align = A & Attribute::Alignment;
   if (Align == 0)
     return 0;
-  
+
   return 1U << ((Align >> 16) - 1);
 }
-  
-  
+
+
 /// The set of Attributes set in Attributes is converted to a
 /// string of equivalent mnemonics. This is, presumably, for writing out
-/// the mnemonics for the assembly writer. 
+/// the mnemonics for the assembly writer.
 /// @brief Convert attribute bits to text
 std::string getAsString(Attributes Attrs);
 } // end namespace Attribute
 
 /// This is just a pair of values to associate a set of attributes
-/// with an index. 
+/// with an index.
 struct AttributeWithIndex {
   Attributes Attrs; ///< The attributes that are set, or'd together.
   unsigned Index; ///< Index of the parameter for which the attributes apply.
                   ///< Index 0 is used for return value attributes.
                   ///< Index ~0U is used for function attributes.
-  
+
   static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
     AttributeWithIndex P;
     P.Index = Idx;
@@ -127,14 +128,14 @@ struct AttributeWithIndex {
     return P;
   }
 };
-  
+
 //===----------------------------------------------------------------------===//
 // AttrListPtr Smart Pointer
 //===----------------------------------------------------------------------===//
 
 class AttributeListImpl;
-  
-/// AttrListPtr - This class manages the ref count for the opaque 
+
+/// AttrListPtr - This class manages the ref count for the opaque
 /// AttributeListImpl object and provides accessors for it.
 class AttrListPtr {
   /// AttrList - The attributes that we are managing.  This can be null
@@ -145,14 +146,14 @@ public:
   AttrListPtr(const AttrListPtr &P);
   const AttrListPtr &operator=(const AttrListPtr &RHS);
   ~AttrListPtr();
-  
+
   //===--------------------------------------------------------------------===//
   // Attribute List Construction and Mutation
   //===--------------------------------------------------------------------===//
-  
+
   /// get - Return a Attributes list with the specified parameter in it.
   static AttrListPtr get(const AttributeWithIndex *Attr, unsigned NumAttrs);
-  
+
   /// get - Return a Attribute list with the parameters specified by the
   /// consecutive random access iterator range.
   template <typename Iter>
@@ -165,24 +166,24 @@ public:
   /// attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
   AttrListPtr addAttr(unsigned Idx, Attributes Attrs) const;
-  
+
   /// removeAttr - Remove the specified attribute at the specified index from
   /// this attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
   AttrListPtr removeAttr(unsigned Idx, Attributes Attrs) const;
-  
+
   //===--------------------------------------------------------------------===//
   // Attribute List Accessors
   //===--------------------------------------------------------------------===//
   /// getParamAttributes - The attributes for the specified index are
-  /// returned. 
+  /// returned.
   Attributes getParamAttributes(unsigned Idx) const {
     assert (Idx && Idx != ~0U && "Invalid parameter index!");
     return getAttributes(Idx);
   }
 
   /// getRetAttributes - The attributes for the ret value are
-  /// returned. 
+  /// returned.
   Attributes getRetAttributes() const {
     return getAttributes(0);
   }
@@ -191,58 +192,60 @@ public:
   Attributes getFnAttributes() const {
     return getAttributes(~0U);
   }
-  
+
   /// paramHasAttr - Return true if the specified parameter index has the
   /// specified attribute set.
   bool paramHasAttr(unsigned Idx, Attributes Attr) const {
     return getAttributes(Idx) & Attr;
   }
-  
+
   /// getParamAlignment - Return the alignment for the specified function
   /// parameter.
   unsigned getParamAlignment(unsigned Idx) const {
     return Attribute::getAlignmentFromAttrs(getAttributes(Idx));
   }
-  
+
   /// hasAttrSomewhere - Return true if the specified attribute is set for at
   /// least one parameter or for the return value.
   bool hasAttrSomewhere(Attributes Attr) const;
 
   /// operator==/!= - Provide equality predicates.
-  bool operator==(const AttrListPtr &RHS) const { return AttrList == RHS.AttrList; }
-  bool operator!=(const AttrListPtr &RHS) const { return AttrList != RHS.AttrList; }
-  
+  bool operator==(const AttrListPtr &RHS) const
+  { return AttrList == RHS.AttrList; }
+  bool operator!=(const AttrListPtr &RHS) const
+  { return AttrList != RHS.AttrList; }
+
   void dump() const;
 
   //===--------------------------------------------------------------------===//
   // Attribute List Introspection
   //===--------------------------------------------------------------------===//
-  
+
   /// getRawPointer - Return a raw pointer that uniquely identifies this
-  /// attribute list. 
+  /// attribute list.
   void *getRawPointer() const {
     return AttrList;
   }
-  
+
   // Attributes are stored as a dense set of slots, where there is one
   // slot for each argument that has an attribute.  This allows walking over the
   // dense set instead of walking the sparse list of attributes.
-  
+
   /// isEmpty - Return true if there are no attributes.
   ///
   bool isEmpty() const {
     return AttrList == 0;
   }
-  
-  /// getNumSlots - Return the number of slots used in this attribute list. 
+
+  /// getNumSlots - Return the number of slots used in this attribute list.
   /// This is the number of arguments that have an attribute set on them
   /// (including the function itself).
   unsigned getNumSlots() const;
-  
+
   /// getSlot - Return the AttributeWithIndex at the specified slot.  This
   /// holds a index number plus a set of attributes.
   const AttributeWithIndex &getSlot(unsigned Slot) const;
-  
+
 private:
   explicit AttrListPtr(AttributeListImpl *L);
 
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index c037399..9bb50d4 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -111,10 +111,11 @@ namespace bitc {
   enum MetadataCodes {
     METADATA_STRING        = 1,   // MDSTRING:      [values]
     METADATA_NODE          = 2,   // MDNODE:        [n x (type num, value num)]
-    METADATA_NAME          = 3,   // STRING:        [values]
-    METADATA_NAMED_NODE    = 4,   // NAMEDMDNODE:   [n x mdnodes]
-    METADATA_KIND          = 5,   // [n x [id, name]]
-    METADATA_ATTACHMENT    = 6    // [m x [value, [n x [id, mdnode]]]
+    METADATA_FN_NODE       = 3,   // FN_MDNODE:     [n x (type num, value num)]
+    METADATA_NAME          = 4,   // STRING:        [values]
+    METADATA_NAMED_NODE    = 5,   // NAMEDMDNODE:   [n x mdnodes]
+    METADATA_KIND          = 6,   // [n x [id, name]]
+    METADATA_ATTACHMENT    = 7    // [m x [value, [n x [id, mdnode]]]
   };
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
   // constant and maintains an implicit current type value.
diff --git a/include/llvm/CodeGen/DAGISelHeader.h b/include/llvm/CodeGen/DAGISelHeader.h
index 7233f3f..4d50879 100644
--- a/include/llvm/CodeGen/DAGISelHeader.h
+++ b/include/llvm/CodeGen/DAGISelHeader.h
@@ -109,7 +109,7 @@ void SelectRoot(SelectionDAG &DAG) {
 #if 0
     DAG.setSubgraphColor(Node, "red");
 #endif
-    SDNode *ResNode = Select(SDValue(Node, 0));
+    SDNode *ResNode = Select(Node);
     // If node should not be replaced, continue with the next one.
     if (ResNode == Node)
       continue;
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 806952a..9d0f0d9 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -139,7 +139,7 @@ protected:
   /// be emitted.
   virtual unsigned FastEmit_(MVT VT,
                              MVT RetVT,
-                             ISD::NodeType Opcode);
+                             unsigned Opcode);
 
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -147,7 +147,7 @@ protected:
   ///
   virtual unsigned FastEmit_r(MVT VT,
                               MVT RetVT,
-                              ISD::NodeType Opcode, unsigned Op0);
+                              unsigned Opcode, unsigned Op0);
 
   /// FastEmit_rr - This method is called by target-independent code
   /// to request that an instruction with the given type, opcode, and
@@ -155,7 +155,7 @@ protected:
   ///
   virtual unsigned FastEmit_rr(MVT VT,
                                MVT RetVT,
-                               ISD::NodeType Opcode,
+                               unsigned Opcode,
                                unsigned Op0, unsigned Op1);
 
   /// FastEmit_ri - This method is called by target-independent code
@@ -164,7 +164,7 @@ protected:
   ///
   virtual unsigned FastEmit_ri(MVT VT,
                                MVT RetVT,
-                               ISD::NodeType Opcode,
+                               unsigned Opcode,
                                unsigned Op0, uint64_t Imm);
 
   /// FastEmit_rf - This method is called by target-independent code
@@ -173,7 +173,7 @@ protected:
   ///
   virtual unsigned FastEmit_rf(MVT VT,
                                MVT RetVT,
-                               ISD::NodeType Opcode,
+                               unsigned Opcode,
                                unsigned Op0, ConstantFP *FPImm);
 
   /// FastEmit_rri - This method is called by target-independent code
@@ -182,7 +182,7 @@ protected:
   ///
   virtual unsigned FastEmit_rri(MVT VT,
                                 MVT RetVT,
-                                ISD::NodeType Opcode,
+                                unsigned Opcode,
                                 unsigned Op0, unsigned Op1, uint64_t Imm);
 
   /// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
@@ -190,7 +190,7 @@ protected:
   /// If that fails, it materializes the immediate into a register and try
   /// FastEmit_rr instead.
   unsigned FastEmit_ri_(MVT VT,
-                        ISD::NodeType Opcode,
+                        unsigned Opcode,
                         unsigned Op0, uint64_t Imm,
                         MVT ImmType);
   
@@ -199,7 +199,7 @@ protected:
   /// If that fails, it materializes the immediate into a register and try
   /// FastEmit_rr instead.
   unsigned FastEmit_rf_(MVT VT,
-                        ISD::NodeType Opcode,
+                        unsigned Opcode,
                         unsigned Op0, ConstantFP *FPImm,
                         MVT ImmType);
   
@@ -208,7 +208,7 @@ protected:
   /// immediate operand be emitted.
   virtual unsigned FastEmit_i(MVT VT,
                               MVT RetVT,
-                              ISD::NodeType Opcode,
+                              unsigned Opcode,
                               uint64_t Imm);
 
   /// FastEmit_f - This method is called by target-independent code
@@ -216,7 +216,7 @@ protected:
   /// floating-point immediate operand be emitted.
   virtual unsigned FastEmit_f(MVT VT,
                               MVT RetVT,
-                              ISD::NodeType Opcode,
+                              unsigned Opcode,
                               ConstantFP *FPImm);
 
   /// FastEmitInst_ - Emit a MachineInstr with no operands and a
@@ -298,7 +298,7 @@ protected:
   }
 
 private:
-  bool SelectBinaryOp(User *I, ISD::NodeType ISDOpcode);
+  bool SelectBinaryOp(User *I, unsigned ISDOpcode);
 
   bool SelectFNeg(User *I);
 
@@ -308,7 +308,7 @@ private:
 
   bool SelectBitCast(User *I);
   
-  bool SelectCast(User *I, ISD::NodeType Opcode);
+  bool SelectCast(User *I, unsigned Opcode);
 };
 
 }
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index f1bfa01..a12a55a 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -315,6 +315,8 @@ public:
   /// 'Orig' instruction, identical in all ways except the the instruction
   /// has no parent, prev, or next.
   ///
+  /// See also TargetInstrInfo::duplicate() for target-specific fixes to cloned
+  /// instructions.
   MachineInstr *CloneMachineInstr(const MachineInstr *Orig);
 
   /// DeleteMachineInstr - Delete the given MachineInstr.
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 87b67d6..c2a0578 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -288,7 +288,7 @@ public:
   bool addRegisterKilled(unsigned IncomingReg,
                          const TargetRegisterInfo *RegInfo,
                          bool AddIfNotFound = false);
-  
+
   /// addRegisterDead - We have determined MI defined a register without a use.
   /// Look for the operand that defines it and mark it as IsDead. If
   /// AddIfNotFound is true, add a implicit operand if it's not found. Returns
@@ -296,6 +296,11 @@ public:
   bool addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo,
                        bool AddIfNotFound = false);
 
+  /// addRegisterDefined - We have determined MI defines a register. Make sure
+  /// there is an operand defining Reg.
+  void addRegisterDefined(unsigned IncomingReg,
+                          const TargetRegisterInfo *RegInfo);
+
   /// isSafeToMove - Return true if it is safe to move this instruction. If
   /// SawStore is set to true, it means that there is a store (or call) between
   /// the instruction's location and its intended destination.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 6ca63f0..8eb0add 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -22,6 +22,7 @@
 namespace llvm {
 
 class TargetInstrDesc;
+class MDNode;
 
 namespace RegState {
   enum {
@@ -123,6 +124,11 @@ public:
     MI->addOperand(MO);
     return *this;
   }
+
+  const MachineInstrBuilder &addMetadata(MDNode *MD) const {
+    MI->addOperand(MachineOperand::CreateMetadata(MD));
+    return *this;
+  }
 };
 
 /// BuildMI - Builder interface.  Specify how to create the initial instruction
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index d3df805..8459a8d 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -49,6 +49,8 @@ public:
   /// contiguous with the part the contains the header.
   MachineBasicBlock *getBottomBlock();
 
+  void dump() const;
+
 private:
   friend class LoopInfoBase<MachineBasicBlock, MachineLoop>;
   explicit MachineLoop(MachineBasicBlock *MBB)
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 8748afc..907c25a 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -26,6 +26,7 @@ class GlobalValue;
 class MachineInstr;
 class TargetMachine;
 class MachineRegisterInfo;
+class MDNode;
 class raw_ostream;
   
 /// MachineOperand class - Representation of each machine instruction operand.
@@ -42,7 +43,8 @@ public:
     MO_JumpTableIndex,         ///< Address of indexed Jump Table for switch
     MO_ExternalSymbol,         ///< Name of external global symbol
     MO_GlobalAddress,          ///< Address of a global value
-    MO_BlockAddress            ///< Address of a basic block
+    MO_BlockAddress,           ///< Address of a basic block
+    MO_Metadata                ///< Metadata reference (for debug info)
   };
 
 private:
@@ -94,6 +96,7 @@ private:
     MachineBasicBlock *MBB;   // For MO_MachineBasicBlock.
     const ConstantFP *CFP;    // For MO_FPImmediate.
     int64_t ImmVal;           // For MO_Immediate.
+    MDNode *MD;               // For MO_Metadata.
 
     struct {                  // For MO_Register.
       unsigned RegNo;
@@ -158,6 +161,8 @@ public:
   bool isSymbol() const { return OpKind == MO_ExternalSymbol; }
   /// isBlockAddress - Tests if this is a MO_BlockAddress operand.
   bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
+  /// isMetadata - Tests if this is a MO_Metadata operand.
+  bool isMetadata() const { return OpKind == MO_Metadata; }
 
   //===--------------------------------------------------------------------===//
   // Accessors for Register Operands
@@ -311,6 +316,11 @@ public:
     assert(isSymbol() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.SymbolName;
   }
+
+  const MDNode *getMetadata() const {
+    assert(isMetadata() && "Wrong MachineOperand accessor");
+    return Contents.MD;
+  }
   
   //===--------------------------------------------------------------------===//
   // Mutators for various operand types.
@@ -443,6 +453,13 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
+  static MachineOperand CreateMetadata(MDNode *Meta,
+                                       unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_Metadata);
+    Op.Contents.MD = Meta;
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
 
   friend class MachineInstr;
   friend class MachineRegisterInfo;
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 99f8c34..2203f8c 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -170,6 +170,10 @@ namespace llvm {
   /// instructions.
   FunctionPass *createMachineSinkingPass();
 
+  /// createOptimizeExtsPass - This pass performs sign / zero extension
+  /// optimization by increasing uses of extended values.
+  FunctionPass *createOptimizeExtsPass();
+
   /// createStackSlotColoringPass - This pass performs stack slot coloring.
   FunctionPass *createStackSlotColoringPass(bool);
 
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index bfd3492..b33b21d 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -111,11 +111,11 @@ protected:
                     int64_t DesiredMaskS) const;
   
   // Calls to these functions are generated by tblgen.
-  SDNode *Select_INLINEASM(SDValue N);
-  SDNode *Select_UNDEF(const SDValue &N);
-  SDNode *Select_EH_LABEL(const SDValue &N);
-  void CannotYetSelect(SDValue N);
-  void CannotYetSelectIntrinsic(SDValue N);
+  SDNode *Select_INLINEASM(SDNode *N);
+  SDNode *Select_UNDEF(SDNode *N);
+  SDNode *Select_EH_LABEL(SDNode *N);
+  void CannotYetSelect(SDNode *N);
+  void CannotYetSelectIntrinsic(SDNode *N);
 
 private:
   void SelectAllBasicBlocks(Function &Fn, MachineFunction &MF,
@@ -131,6 +131,7 @@ private:
   void CodeGenAndEmitDAG();
   void LowerArguments(BasicBlock *BB);
   
+  void ShrinkDemandedOps();
   void ComputeLiveOutVRegInfo();
 
   void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB);
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 9dc4c7b..0125190 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -149,7 +149,7 @@ namespace llvm {
               SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
     }
     
-    /// isPow2VectorType - Retuns true if the given vector is a power of 2.
+    /// isPow2VectorType - Returns true if the given vector is a power of 2.
     bool isPow2VectorType() const {
       unsigned NElts = getVectorNumElements();
       return !(NElts & (NElts - 1));
@@ -437,25 +437,17 @@ namespace llvm {
 
     /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
     bool isFloatingPoint() const {
-      return isSimple() ?
-             ((V >= MVT::f32 && V <= MVT::ppcf128) ||
-              (V >= MVT::v2f32 && V <= MVT::v4f64)) : isExtendedFloatingPoint();
+      return isSimple() ? V.isFloatingPoint() : isExtendedFloatingPoint();
     }
 
     /// isInteger - Return true if this is an integer, or a vector integer type.
     bool isInteger() const {
-      return isSimple() ?
-             ((V >= MVT::FIRST_INTEGER_VALUETYPE &&
-               V <= MVT::LAST_INTEGER_VALUETYPE) ||
-              (V >= MVT::v2i8 && V <= MVT::v4i64)) : isExtendedInteger();
+      return isSimple() ? V.isInteger() : isExtendedInteger();
     }
 
     /// isVector - Return true if this is a vector value type.
     bool isVector() const {
-      return isSimple() ?
-             (V >= MVT::FIRST_VECTOR_VALUETYPE && V <= 
-                   MVT::LAST_VECTOR_VALUETYPE) :
-             isExtendedVector();
+      return isSimple() ? V.isVector() : isExtendedVector();
     }
 
     /// is64BitVector - Return true if this is a 64-bit vector type.
@@ -641,7 +633,7 @@ namespace llvm {
     static EVT getEVT(const Type *Ty, bool HandleUnknown = false);
 
     intptr_t getRawBits() {
-      if (V.SimpleTy <= MVT::LastSimpleValueType)
+      if (isSimple())
         return V.SimpleTy;
       else
         return (intptr_t)(LLVMTy);
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 3c18de1..f40e8cc 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -25,6 +25,7 @@
 #define LLVM_INTRINSICINST_H
 
 #include "llvm/Constants.h"
+#include "llvm/Metadata.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -58,16 +59,13 @@ namespace llvm {
 
   /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
   ///
-  struct DbgInfoIntrinsic : public IntrinsicInst {
+  class DbgInfoIntrinsic : public IntrinsicInst {
+  public:
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgInfoIntrinsic *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
-      case Intrinsic::dbg_stoppoint:
-      case Intrinsic::dbg_func_start:
-      case Intrinsic::dbg_region_start:
-      case Intrinsic::dbg_region_end:
       case Intrinsic::dbg_declare:
       case Intrinsic::dbg_value:
         return true;
@@ -81,84 +79,16 @@ namespace llvm {
     static Value *StripCast(Value *C);
   };
 
-  /// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction.
-  ///
-  struct DbgStopPointInst : public DbgInfoIntrinsic {
-    Value *getLineValue() const { return const_cast<Value*>(getOperand(1)); }
-    Value *getColumnValue() const { return const_cast<Value*>(getOperand(2)); }
-    MDNode *getContext() const {
-      return cast<MDNode>(getOperand(3));
-    }
-
-    unsigned getLine() const {
-      return unsigned(cast<ConstantInt>(getOperand(1))->getZExtValue());
-    }
-    unsigned getColumn() const {
-      return unsigned(cast<ConstantInt>(getOperand(2))->getZExtValue());
-    }
-    
-    Value *getFileName() const;
-    Value *getDirectory() const;
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgStopPointInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_stoppoint;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-  
-  /// DbgFuncStartInst - This represents the llvm.dbg.func.start instruction.
-  ///
-  struct DbgFuncStartInst : public DbgInfoIntrinsic {
-    MDNode *getSubprogram() const { return cast<MDNode>(getOperand(1)); }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgFuncStartInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_func_start;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// DbgRegionStartInst - This represents the llvm.dbg.region.start
-  /// instruction.
-  struct DbgRegionStartInst : public DbgInfoIntrinsic {
-    MDNode *getContext() const { return cast<MDNode>(getOperand(1)); }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgRegionStartInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_region_start;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// DbgRegionEndInst - This represents the llvm.dbg.region.end instruction.
-  ///
-  struct DbgRegionEndInst : public DbgInfoIntrinsic {
-    MDNode *getContext() const { return cast<MDNode>(getOperand(1)); }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const DbgRegionEndInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_region_end;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
   /// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
   ///
-  struct DbgDeclareInst : public DbgInfoIntrinsic {
-    Value *getAddress()  const { return getOperand(1); }
+  class DbgDeclareInst : public DbgInfoIntrinsic {
+  public:
+    Value *getAddress() const {
+      if (MDNode* MD = dyn_cast<MDNode>(getOperand(1)))
+      	return MD->getOperand(0);
+      else
+      	return NULL;
+    }
     MDNode *getVariable() const { return cast<MDNode>(getOperand(2)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -173,10 +103,16 @@ namespace llvm {
 
   /// DbgValueInst - This represents the llvm.dbg.value instruction.
   ///
-  struct DbgValueInst : public DbgInfoIntrinsic {
-    Value *getValue() const;
-    Value *getOffset() const { return getOperand(2); }
-    MDNode *getVariable() const { return cast<MDNode>(getOperand(3)); }
+  class DbgValueInst : public DbgInfoIntrinsic {
+  public:
+    const Value *getValue() const;
+    Value *getValue();
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(
+                             const_cast<Value*>(getOperand(2)))->getZExtValue();
+    }
+    const MDNode *getVariable() const { return cast<MDNode>(getOperand(3)); }
+    MDNode *getVariable() { return cast<MDNode>(getOperand(3)); }
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const DbgValueInst *) { return true; }
@@ -190,7 +126,8 @@ namespace llvm {
 
   /// MemIntrinsic - This is the common base class for memset/memcpy/memmove.
   ///
-  struct MemIntrinsic : public IntrinsicInst {
+  class MemIntrinsic : public IntrinsicInst {
+  public:
     Value *getRawDest() const { return const_cast<Value*>(getOperand(1)); }
 
     Value *getLength() const { return const_cast<Value*>(getOperand(3)); }
@@ -247,7 +184,8 @@ namespace llvm {
 
   /// MemSetInst - This class wraps the llvm.memset intrinsic.
   ///
-  struct MemSetInst : public MemIntrinsic {
+  class MemSetInst : public MemIntrinsic {
+  public:
     /// get* - Return the arguments to the instruction.
     ///
     Value *getValue() const { return const_cast<Value*>(getOperand(2)); }
@@ -270,7 +208,8 @@ namespace llvm {
   
   /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
   ///
-  struct MemTransferInst : public MemIntrinsic {
+  class MemTransferInst : public MemIntrinsic {
+  public:
     /// get* - Return the arguments to the instruction.
     ///
     Value *getRawSource() const { return const_cast<Value*>(getOperand(2)); }
@@ -300,7 +239,8 @@ namespace llvm {
   
   /// MemCpyInst - This class wraps the llvm.memcpy intrinsic.
   ///
-  struct MemCpyInst : public MemTransferInst {
+  class MemCpyInst : public MemTransferInst {
+  public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const MemCpyInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
@@ -313,7 +253,8 @@ namespace llvm {
 
   /// MemMoveInst - This class wraps the llvm.memmove intrinsic.
   ///
-  struct MemMoveInst : public MemTransferInst {
+  class MemMoveInst : public MemTransferInst {
+  public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const MemMoveInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
@@ -326,7 +267,8 @@ namespace llvm {
 
   /// EHSelectorInst - This represents the llvm.eh.selector instruction.
   ///
-  struct EHSelectorInst : public IntrinsicInst {
+  class EHSelectorInst : public IntrinsicInst {
+  public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const EHSelectorInst *) { return true; }
     static inline bool classof(const IntrinsicInst *I) {
@@ -340,7 +282,8 @@ namespace llvm {
   /// MemoryUseIntrinsic - This is the common base class for the memory use
   /// marker intrinsics.
   ///
-  struct MemoryUseIntrinsic : public IntrinsicInst {
+  class MemoryUseIntrinsic : public IntrinsicInst {
+  public:
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const MemoryUseIntrinsic *) { return true; }
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index c472f2b..684f872 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -282,14 +282,8 @@ let Properties = [IntrNoMem] in {
 // optimizers can change them aggressively.  Special handling needed in a few
 // places.
 let Properties = [IntrNoMem] in {
-  def int_dbg_stoppoint    : Intrinsic<[llvm_void_ty],
-                                       [llvm_i32_ty, llvm_i32_ty,
-                                        llvm_metadata_ty]>;
-  def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
-  def int_dbg_region_end   : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
-  def int_dbg_func_start   : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
   def int_dbg_declare      : Intrinsic<[llvm_void_ty],
-                                       [llvm_descriptor_ty, llvm_metadata_ty]>;
+                                       [llvm_metadata_ty, llvm_metadata_ty]>;
   def int_dbg_value  	   : Intrinsic<[llvm_void_ty],
                                        [llvm_metadata_ty, llvm_i64_ty,
                                         llvm_metadata_ty]>;
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 4aba210..a7e2e05 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -53,7 +53,6 @@ namespace {
       (void) llvm::createLibCallAliasAnalysisPass(0);
       (void) llvm::createScalarEvolutionAliasAnalysisPass();
       (void) llvm::createBlockPlacementPass();
-      (void) llvm::createBlockProfilerPass();
       (void) llvm::createBreakCriticalEdgesPass();
       (void) llvm::createCFGSimplificationPass();
       (void) llvm::createConstantMergePass();
@@ -71,7 +70,6 @@ namespace {
       (void) llvm::createOptimalEdgeProfilerPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
-      (void) llvm::createFunctionProfilerPass();
       (void) llvm::createGlobalDCEPass();
       (void) llvm::createGlobalOptimizerPass();
       (void) llvm::createGlobalsModRefPass();
@@ -120,8 +118,6 @@ namespace {
       (void) llvm::createTailDuplicationPass();
       (void) llvm::createJumpThreadingPass();
       (void) llvm::createUnifyFunctionExitNodesPass();
-      (void) llvm::createNullProfilerRSPass();
-      (void) llvm::createRSProfilingPass();
       (void) llvm::createInstCountPass();
       (void) llvm::createCodeGenPreparePass();
       (void) llvm::createGVNPass();
diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h
index da471d2..e9a6e3f 100644
--- a/include/llvm/MC/MCAsmLexer.h
+++ b/include/llvm/MC/MCAsmLexer.h
@@ -20,7 +20,8 @@ class SMLoc;
 class Target;
 
 /// AsmToken - Target independent representation for an assembler token.
-struct AsmToken {
+class AsmToken {
+public:
   enum TokenKind {
     // Markers
     Eof, Error,
diff --git a/include/llvm/MC/MCParsedAsmOperand.h b/include/llvm/MC/MCParsedAsmOperand.h
new file mode 100644
index 0000000..7c2f5be
--- /dev/null
+++ b/include/llvm/MC/MCParsedAsmOperand.h
@@ -0,0 +1,33 @@
+//===-- llvm/MC/MCParsedAsmOperand.h - Asm Parser Operand -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMOPERAND_H
+#define LLVM_MC_MCASMOPERAND_H
+
+namespace llvm {
+class SMLoc;
+
+/// MCParsedAsmOperand - This abstract class represents a source-level assembly
+/// instruction operand.  It should be subclassed by target-specific code.  This
+/// base class is used by target-independent clients and is the interface
+/// between parsing an asm instruction and recognizing it.
+class MCParsedAsmOperand {
+public:  
+  MCParsedAsmOperand() {}
+  virtual ~MCParsedAsmOperand() {}
+  
+  /// getStartLoc - Get the location of the first token of this operand.
+  virtual SMLoc getStartLoc() const;
+  /// getEndLoc - Get the location of the last token of this operand.
+  virtual SMLoc getEndLoc() const;
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index cfe04d8..eb59453 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -136,6 +136,11 @@ namespace llvm {
 
     /// dump - Print the value to stderr.
     void dump() const;
+    
+    /// printMangledName - Print the specified string in mangled form if it uses
+    /// any unusual characters.
+    static void printMangledName(StringRef Str, raw_ostream &OS,
+                                 const MCAsmInfo *MAI);
   };
 
 } // end namespace llvm
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index ec6ba1b..179010b 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -31,7 +31,7 @@ template<typename ValueSubClass, typename ItemParentClass>
   
   
 //===----------------------------------------------------------------------===//
-// MetadataBase  - A base class for MDNode, MDString and NamedMDNode.
+// MetadataBase  - A base class for MDNode and MDString.
 class MetadataBase : public Value {
 protected:
   MetadataBase(const Type *Ty, unsigned scid)
@@ -42,8 +42,7 @@ public:
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const MetadataBase *) { return true; }
   static bool classof(const Value *V) {
-    return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal
-      || V->getValueID() == NamedMDNodeVal;
+    return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal;
   }
 };
 
@@ -113,6 +112,13 @@ class MDNode : public MetadataBase, public FoldingSetNode {
     DestroyFlag      = 1 << 2
   };
   
+  // FunctionLocal enums.
+  enum FunctionLocalness {
+    FL_Unknown = -1,
+    FL_No = 0,
+    FL_Yes = 1
+  };
+  
   // Replace each instance of F from the operand list of this node with T.
   void replaceOperand(MDNodeOperand *Op, Value *NewVal);
   ~MDNode();
@@ -120,10 +126,17 @@ class MDNode : public MetadataBase, public FoldingSetNode {
 protected:
   explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
                   bool isFunctionLocal);
+  
+  static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+                           FunctionLocalness FL);
 public:
   // Constructors and destructors.
-  static MDNode *get(LLVMContext &Context, Value *const *Vals, unsigned NumVals,
-                     bool isFunctionLocal = false);
+  static MDNode *get(LLVMContext &Context, Value *const *Vals,
+                     unsigned NumVals);
+  // getWhenValsUnresolved - Construct MDNode determining function-localness
+  // from isFunctionLocal argument, not by analyzing Vals.
+  static MDNode *getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
+                                       unsigned NumVals, bool isFunctionLocal);
   
   /// getOperand - Return specified operand.
   Value *getOperand(unsigned i) const;
@@ -138,6 +151,11 @@ public:
   bool isFunctionLocal() const {
     return (getSubclassDataFromValue() & FunctionLocalBit) != 0;
   }
+  
+  // getFunction - If this metadata is function-local and recursively has a
+  // function-local operand, return the first such operand's parent function.
+  // Otherwise, return null. 
+  Function *getFunction() const;
 
   // destroy - Delete this node.  Only when there are no uses.
   void destroy();
@@ -167,24 +185,25 @@ private:
 };
 
 //===----------------------------------------------------------------------===//
-/// NamedMDNode - a tuple of other metadata. 
+/// NamedMDNode - a tuple of MDNodes.
 /// NamedMDNode is always named. All NamedMDNode operand has a type of metadata.
-class NamedMDNode : public MetadataBase, public ilist_node<NamedMDNode> {
+class NamedMDNode : public Value, public ilist_node<NamedMDNode> {
   friend class SymbolTableListTraits<NamedMDNode, Module>;
+  friend struct ilist_traits<NamedMDNode>;
   friend class LLVMContextImpl;
-
   NamedMDNode(const NamedMDNode &);      // DO NOT IMPLEMENT
 
+  std::string Name;
   Module *Parent;
-  void *Operands; // SmallVector<TrackingVH<MetadataBase>, 4>
+  void *Operands; // SmallVector<WeakVH<MDNode>, 4>
 
   void setParent(Module *M) { Parent = M; }
 protected:
-  explicit NamedMDNode(LLVMContext &C, const Twine &N, MetadataBase*const *Vals, 
+  explicit NamedMDNode(LLVMContext &C, const Twine &N, MDNode*const *Vals, 
                        unsigned NumVals, Module *M = 0);
 public:
-  static NamedMDNode *Create(LLVMContext &C, const Twine &N, 
-                             MetadataBase *const *MDs, 
+  static NamedMDNode *Create(LLVMContext &C, const Twine &N,
+                             MDNode *const *MDs, 
                              unsigned NumMDs, Module *M = 0) {
     return new NamedMDNode(C, N, MDs, NumMDs, M);
   }
@@ -206,14 +225,20 @@ public:
   inline const Module *getParent() const { return Parent; }
 
   /// getOperand - Return specified operand.
-  MetadataBase *getOperand(unsigned i) const;
+  MDNode *getOperand(unsigned i) const;
   
   /// getNumOperands - Return the number of NamedMDNode operands.
   unsigned getNumOperands() const;
 
   /// addOperand - Add metadata operand.
-  void addOperand(MetadataBase *M);
-  
+  void addOperand(MDNode *M);
+
+  /// setName - Set the name of this named metadata.
+  void setName(const Twine &NewName);
+
+  /// getName - Return a constant reference to this named metadata's name.
+  StringRef getName() const;
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const NamedMDNode *) { return true; }
   static bool classof(const Value *V) {
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 9a8b53a..3c8055d 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -26,6 +26,7 @@ namespace llvm {
 
 class FunctionType;
 class LLVMContext;
+class MDSymbolTable;
 
 template<> struct ilist_traits<Function>
   : public SymbolTableListTraits<Function, Module> {
@@ -56,6 +57,7 @@ template<> struct ilist_traits<GlobalAlias>
   static GlobalAlias *createSentinel();
   static void destroySentinel(GlobalAlias *GA) { delete GA; }
 };
+
 template<> struct ilist_traits<NamedMDNode>
   : public SymbolTableListTraits<NamedMDNode, Module> {
   // createSentinel is used to get hold of a node that marks the end of
@@ -68,6 +70,8 @@ template<> struct ilist_traits<NamedMDNode>
   NamedMDNode *provideInitialHead() const { return createSentinel(); }
   NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
   static void noteHead(NamedMDNode*, NamedMDNode*) {}
+  void addNodeToList(NamedMDNode *N);
+  void removeNodeFromList(NamedMDNode *N);
 private:
   mutable ilist_node<NamedMDNode> Sentinel;
 };
@@ -131,19 +135,20 @@ public:
 /// @name Member Variables
 /// @{
 private:
-  LLVMContext &Context;          ///< The LLVMContext from which types and
-                                 ///< constants are allocated.
-  GlobalListType GlobalList;     ///< The Global Variables in the module
-  FunctionListType FunctionList; ///< The Functions in the module
-  AliasListType AliasList;       ///< The Aliases in the module
-  LibraryListType LibraryList;   ///< The Libraries needed by the module
-  NamedMDListType NamedMDList;   ///< The named metadata in the module
-  std::string GlobalScopeAsm;    ///< Inline Asm at global scope.
-  ValueSymbolTable *ValSymTab;   ///< Symbol table for values
-  TypeSymbolTable *TypeSymTab;   ///< Symbol table for types
-  std::string ModuleID;          ///< Human readable identifier for the module
-  std::string TargetTriple;      ///< Platform target triple Module compiled on
-  std::string DataLayout;        ///< Target data description
+  LLVMContext &Context;           ///< The LLVMContext from which types and
+                                  ///< constants are allocated.
+  GlobalListType GlobalList;      ///< The Global Variables in the module
+  FunctionListType FunctionList;  ///< The Functions in the module
+  AliasListType AliasList;        ///< The Aliases in the module
+  LibraryListType LibraryList;    ///< The Libraries needed by the module
+  NamedMDListType NamedMDList;    ///< The named metadata in the module
+  std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
+  ValueSymbolTable *ValSymTab;    ///< Symbol table for values
+  TypeSymbolTable *TypeSymTab;    ///< Symbol table for types
+  std::string ModuleID;           ///< Human readable identifier for the module
+  std::string TargetTriple;       ///< Platform target triple Module compiled on
+  std::string DataLayout;         ///< Target data description
+  MDSymbolTable *NamedMDSymTab;   ///< NamedMDNode names.
 
   friend class Constant;
 
@@ -379,6 +384,10 @@ public:
   const TypeSymbolTable  &getTypeSymbolTable() const  { return *TypeSymTab; }
   /// Get the Module's symbol table of types
   TypeSymbolTable        &getTypeSymbolTable()        { return *TypeSymTab; }
+  /// Get the symbol table of named metadata
+  const MDSymbolTable  &getMDSymbolTable() const      { return *NamedMDSymTab; }
+  /// Get the Module's symbol table of named metadata
+  MDSymbolTable        &getMDSymbolTable()            { return *NamedMDSymTab; }
 
 /// @}
 /// @name Global Variable Iteration
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index 3a20696..90b95bf 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -93,7 +93,7 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
 public:
   typedef SuccIterator<Term_, BB_> _Self;
   typedef typename super::pointer pointer;
-  // TODO: This can be random access iterator, need operator+ and stuff tho
+  // TODO: This can be random access iterator, only operator[] missing.
 
   inline SuccIterator(Term_ T) : Term(T), idx(0) {         // begin iterator
     assert(T && "getTerminator returned null!");
@@ -109,6 +109,10 @@ public:
     return *this;
   }
 
+  inline bool index_is_valid (int idx) {
+    return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
+  }
+
   /// getSuccessorIndex - This is used to interface between code that wants to
   /// operate on terminator instructions directly.
   unsigned getSuccessorIndex() const { return idx; }
@@ -120,6 +124,7 @@ public:
   inline pointer operator->() const { return operator*(); }
 
   inline _Self& operator++() { ++idx; return *this; } // Preincrement
+
   inline _Self operator++(int) { // Postincrement
     _Self tmp = *this; ++*this; return tmp;
   }
@@ -128,6 +133,67 @@ public:
   inline _Self operator--(int) { // Postdecrement
     _Self tmp = *this; --*this; return tmp;
   }
+
+  inline bool operator<(const _Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx < x.idx;
+  }
+
+  inline bool operator<=(const _Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx <= x.idx;
+  }
+  inline bool operator>=(const _Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx >= x.idx;
+  }
+
+  inline bool operator>(const _Self& x) const {
+    return idx > x.idx;
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+  }
+
+  inline _Self& operator+=(int Right) {
+    unsigned new_idx = idx + Right;
+    assert(index_is_valid(new_idx) && "Iterator index out of bound");
+    idx = new_idx;
+    return *this;
+  }
+
+  inline _Self operator+(int Right) {
+    _Self tmp = *this;
+    tmp += Right;
+    return tmp;
+  }
+
+  inline _Self& operator-=(int Right) {
+    return operator+=(-Right);
+  }
+
+  inline _Self operator-(int Right) {
+    return operator+(-Right);
+  }
+
+  inline int operator-(const _Self& x) {
+    assert(Term == x.Term && "Cannot work on iterators of different blocks!");
+    int distance = idx - x.idx;
+    return distance;
+  }
+
+  // This works for read access, however write access is difficult as changes
+  // to Term are only possible with Term->setSuccessor(idx). Pointers that can
+  // be modified are not available.
+  //
+  // inline pointer operator[](int offset) {
+  //  _Self tmp = *this;
+  //  tmp += offset;
+  //  return tmp.operator*();
+  // }
+
+  /// Get the source BB of this iterator.
+  inline BB_ *getSource() {
+      return Term->getParent();
+  }
 };
 
 typedef SuccIterator<TerminatorInst*, BasicBlock> succ_iterator;
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 09ab17c..af546f0 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -144,6 +144,10 @@ formatted_raw_ostream &fouts();
 /// standard error.  Use it like: ferrs() << "foo" << "bar";
 formatted_raw_ostream &ferrs();
 
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// debug output.  Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &fdbgs();
+
 } // end llvm namespace
 
 
diff --git a/include/llvm/Support/Mangler.h b/include/llvm/Support/Mangler.h
index 03c5648..aa230d4 100644
--- a/include/llvm/Support/Mangler.h
+++ b/include/llvm/Support/Mangler.h
@@ -19,6 +19,7 @@
 #include <string>
 
 namespace llvm {
+class Twine;
 class Type;
 class Module;
 class Value;
@@ -101,9 +102,25 @@ public:
   /// specified suffix.  If 'ForcePrivate' is specified, the label is specified
   /// to have a private label prefix.
   ///
+  /// FIXME: This is deprecated, new code should use getNameWithPrefix and use
+  /// MCSymbol printing to handle quotes or not etc.
+  ///
   std::string getMangledName(const GlobalValue *V, const char *Suffix = "",
                              bool ForcePrivate = false);
 
+  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+  /// and the specified global variable's name.  If the global variable doesn't
+  /// have a name, this fills in a unique name for the global.
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+                         bool isImplicitlyPrivate);
+  
+  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+  /// and the specified name as the global variable name.  GVName must not be
+  /// empty.
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
+                         ManglerPrefixTy PrefixTy = Mangler::Default);
+
+private:
   /// makeNameProper - We don't want identifier names with ., space, or
   /// - in them, so we mangle these characters into the strings "d_",
   /// "s_", and "D_", respectively. This is a very simple mangling that
@@ -111,14 +128,13 @@ public:
   /// does this for you, so there's no point calling it on the result
   /// from getValueName.
   ///
-  std::string makeNameProper(const std::string &x,
-                             ManglerPrefixTy PrefixTy = Mangler::Default);
+  /// FIXME: This is deprecated, new code should use getNameWithPrefix and use
+  /// MCSymbol printing to handle quotes or not etc.
+  ///
+  void makeNameProper(SmallVectorImpl<char> &OutName,
+                      const Twine &Name,
+                      ManglerPrefixTy PrefixTy = Mangler::Default);
   
-  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
-  /// and the specified global variable's name.  If the global variable doesn't
-  /// have a name, this fills in a unique name for the global.
-  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-                         bool isImplicitlyPrivate);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 438b021e..fa12416 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -160,7 +160,7 @@ inline unsigned CountLeadingZeros_32(uint32_t Value) {
 #else
   if (!Value) return 32;
   Count = 0;
-  // bisecton method for count leading zeros
+  // bisection method for count leading zeros
   for (unsigned Shift = 32 >> 1; Shift; Shift >>= 1) {
     uint32_t Tmp = Value >> Shift;
     if (Tmp) {
@@ -197,7 +197,7 @@ inline unsigned CountLeadingZeros_64(uint64_t Value) {
   if (sizeof(long) == sizeof(int64_t)) {
     if (!Value) return 64;
     Count = 0;
-    // bisecton method for count leading zeros
+    // bisection method for count leading zeros
     for (unsigned Shift = 64 >> 1; Shift; Shift >>= 1) {
       uint64_t Tmp = Value >> Shift;
       if (Tmp) {
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index c0b6a6b..23daad9 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -437,7 +437,7 @@ m_SelectCst(const Cond &C) {
 // Matchers for CastInst classes
 //
 
-template<typename Op_t, typename Class>
+template<typename Op_t, unsigned Opcode>
 struct CastClass_match {
   Op_t Op;
 
@@ -445,17 +445,28 @@ struct CastClass_match {
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (Class *I = dyn_cast<Class>(V))
-      return Op.match(I->getOperand(0));
+    if (CastInst *I = dyn_cast<CastInst>(V))
+      return I->getOpcode() == Opcode && Op.match(I->getOperand(0));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode() == Opcode && Op.match(CE->getOperand(0));
     return false;
   }
 };
 
-template<typename Class, typename OpTy>
-inline CastClass_match<OpTy, Class> m_Cast(const OpTy &Op) {
-  return CastClass_match<OpTy, Class>(Op);
+/// m_PtrToInt
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::PtrToInt>
+m_PtrToInt(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
 }
 
+/// m_Trunc
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::Trunc>
+m_Trunc(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::Trunc>(Op);
+}
+  
 
 //===----------------------------------------------------------------------===//
 // Matchers for unary operators
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 6f1e066..206e42e 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -477,6 +477,13 @@ def COPY_TO_REGCLASS : Instruction {
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
+def DEBUG_VALUE : Instruction {
+  let OutOperandList = (ops);
+  let InOperandList = (ops unknown:$value, i64imm:$offset, unknown:$meta);
+  let AsmString = "DEBUG_VALUE";
+  let Namespace = "TargetInstrInfo";
+  let isAsCheapAsAMove = 1;
+}
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
index ef1fc49..1d3da8b 100644
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -10,13 +10,15 @@
 #ifndef LLVM_TARGET_TARGETPARSER_H
 #define LLVM_TARGET_TARGETPARSER_H
 
-#include "llvm/MC/MCAsmLexer.h"
-
 namespace llvm {
 class MCAsmParser;
 class MCInst;
 class StringRef;
 class Target;
+class SMLoc;
+class AsmToken;
+class MCParsedAsmOperand;
+template <typename T> class SmallVectorImpl;
 
 /// TargetAsmParser - Generic interface to target specific assembly parsers.
 class TargetAsmParser {
@@ -43,9 +45,11 @@ public:
   //
   /// \param AP - The current parser object.
   /// \param Name - The instruction name.
-  /// \param Inst [out] - On success, the parsed instruction.
+  /// \param Operands [out] - The list of parsed operands, this returns
+  ///        ownership of them to the caller.
   /// \return True on failure.
-  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst) = 0;
+  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+                            SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;
 
   /// ParseDirective - Parse a target specific assembler directive
   ///
@@ -58,6 +62,14 @@ public:
   ///
   /// \param ID - the identifier token of the directive.
   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
+  
+  /// MatchInstruction - Recognize a series of operands of a parsed instruction
+  /// as an actual MCInst.  This returns false and fills in Inst on success and
+  /// returns true on failure to match.
+  virtual bool 
+  MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                   MCInst &Inst) = 0;
+  
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 1bcd6fd..8e2157e 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -88,7 +88,10 @@ public:
     /// only needed in cases where the register classes implied by the
     /// instructions are insufficient. The actual MachineInstrs to perform
     /// the copy are emitted with the TargetInstrInfo::copyRegToReg hook.
-    COPY_TO_REGCLASS = 10
+    COPY_TO_REGCLASS = 10,
+
+    // DEBUG_VALUE - a mapping of the llvm.dbg.value intrinsic
+    DEBUG_VALUE = 11
   };
 
   unsigned getNumOpcodes() const { return NumOpcodes; }
@@ -143,6 +146,18 @@ public:
     return false;
   }
 
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const {
+    return false;
+  }
+
   /// isIdentityCopy - Return true if the instruction is a copy (or
   /// extract_subreg, insert_subreg, subreg_to_reg) where the source and
   /// destination registers are the same.
@@ -232,6 +247,14 @@ public:
                              const MachineInstr *Orig,
                              const TargetRegisterInfo *TRI) const = 0;
 
+  /// duplicate - Create a duplicate of the Orig instruction in MF. This is like
+  /// MachineFunction::CloneMachineInstr(), but the target may update operands
+  /// that are required to be unique.
+  ///
+  /// The instruction must be duplicable as indicated by isNotDuplicable().
+  virtual MachineInstr *duplicate(MachineInstr *Orig,
+                                  MachineFunction &MF) const = 0;
+
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
   /// may be able to convert a two-address instruction into one or more true
@@ -560,6 +583,8 @@ public:
                              unsigned DestReg, unsigned SubReg,
                              const MachineInstr *Orig,
                              const TargetRegisterInfo *TRI) const;
+  virtual MachineInstr *duplicate(MachineInstr *Orig,
+                                  MachineFunction &MF) const;
   virtual bool isIdentical(const MachineInstr *MI,
                            const MachineInstr *Other,
                            const MachineRegisterInfo *MRI) const;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index dd28a87..15da845 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -774,10 +774,12 @@ public:
   /// that want to combine 
   struct TargetLoweringOpt {
     SelectionDAG &DAG;
+    bool ShrinkOps;
     SDValue Old;
     SDValue New;
 
-    explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {}
+    explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
+      DAG(InDAG), ShrinkOps(Shrink) {}
     
     bool CombineTo(SDValue O, SDValue N) { 
       Old = O; 
@@ -1478,7 +1480,7 @@ public:
   }
 
   /// isZExtFree - Return true if any actual instruction that defines a
-  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+  /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result
   /// register. This does not necessarily include registers defined in
   /// unknown ways, such as incoming arguments, or copies from unknown
   /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 9a64191..3dd7471 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -352,7 +352,7 @@ public:
 
   /// getCOFFSection - Return the MCSection for the specified COFF section.
   /// FIXME: Switch this to a semantic view eventually.
-  const MCSection *getCOFFSection(const char *Name, bool isDirective,
+  const MCSection *getCOFFSection(StringRef Name, bool isDirective,
                                   SectionKind K) const;
 };
 
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index dec0b1d..f93eadb 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -664,7 +664,7 @@ public:
   /// frame indices from instructions which may use them.  The instruction
   /// referenced by the iterator contains an MO_FrameIndex operand which must be
   /// eliminated by this method.  This method may modify or replace the
-  /// specified instruction, as long as it keeps the iterator pointing the the
+  /// specified instruction, as long as it keeps the iterator pointing at the
   /// finished product. SPAdj is the SP adjustment due to call frame setup
   /// instruction.
   ///
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 7f54f81..4b72f81 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -843,11 +843,6 @@ class Pat<dag pattern, dag result> : Pattern<pattern, [result]>;
 // Complex pattern definitions.
 //
 
-class CPAttribute;
-// Pass the parent Operand as root to CP function rather 
-// than the root of the sub-DAG
-def CPAttrParentAsRoot : CPAttribute;
-
 // Complex patterns, e.g. X86 addressing mode, requires pattern matching code
 // in C++. NumOperands is the number of operands returned by the select function;
 // SelectFunc is the name of the function used to pattern match the max. pattern;
@@ -855,12 +850,10 @@ def CPAttrParentAsRoot : CPAttribute;
 // e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>;
 //
 class ComplexPattern<ValueType ty, int numops, string fn,
-                     list<SDNode> roots = [], list<SDNodeProperty> props = [],
-                     list<CPAttribute> attrs = []> {
+                     list<SDNode> roots = [], list<SDNodeProperty> props = []> {
   ValueType Ty = ty;
   int NumOperands = numops;
   string SelectFunc = fn;
   list<SDNode> RootNodes = roots;
   list<SDNodeProperty> Properties = props;
-  list<CPAttribute> Attributes = attrs;
 }
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 9794ffd..9c579ac 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -19,22 +19,12 @@ namespace llvm {
 class ModulePass;
 class FunctionPass;
 
-// Insert function profiling instrumentation
-ModulePass *createFunctionProfilerPass();
-
-// Insert block profiling instrumentation
-ModulePass *createBlockProfilerPass();
-
 // Insert edge profiling instrumentation
 ModulePass *createEdgeProfilerPass();
 
 // Insert optimal edge profiling instrumentation
 ModulePass *createOptimalEdgeProfilerPass();
 
-// Random Sampling Profiling Framework
-ModulePass* createNullProfilerRSPass();
-FunctionPass* createRSProfilingPass();
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 8172114..3f4571e 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -40,8 +40,9 @@ void FoldSingleEntryPHINodes(BasicBlock *BB);
 /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
 /// is dead. Also recursively delete any operands that become dead as
 /// a result. This includes tracing the def-use list from the PHI to see if
-/// it is ultimately unused or if it reaches an unused cycle.
-void DeleteDeadPHIs(BasicBlock *BB);
+/// it is ultimately unused or if it reaches an unused cycle. Return true
+/// if any PHIs were deleted.
+bool DeleteDeadPHIs(BasicBlock *BB);
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
@@ -65,11 +66,6 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
 //
 void ReplaceInstWithInst(Instruction *From, Instruction *To);
 
-/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint,
-/// make a copy of the stoppoint before InsertPos (presumably before copying
-/// or moving I).
-void CopyPrecedingStopPoint(Instruction *I, BasicBlock::iterator InsertPos);
-
 /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
 /// instruction before ScanFrom) checking to see if we have the value at the
 /// memory address *Ptr locally available within a small number of instructions.
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 2cdd31f..0b8147e 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -27,7 +27,7 @@ class PHINode;
 class AllocaInst;
 class ConstantExpr;
 class TargetData;
-struct DbgInfoIntrinsic;
+class DbgInfoIntrinsic;
 
 template<typename T> class SmallVectorImpl;
   
@@ -63,16 +63,25 @@ bool isInstructionTriviallyDead(Instruction *I);
 
 /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
 /// trivially dead instruction, delete it.  If that makes any of its operands
-/// trivially dead, delete them too, recursively.
-void RecursivelyDeleteTriviallyDeadInstructions(Value *V);
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool RecursivelyDeleteTriviallyDeadInstructions(Value *V);
 
 /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
 /// dead PHI node, due to being a def-use chain of single-use nodes that
 /// either forms a cycle or is terminated by a trivially dead instruction,
 /// delete it.  If that makes any of its operands trivially dead, delete them
-/// too, recursively.
-void RecursivelyDeleteDeadPHINode(PHINode *PN);
+/// too, recursively.  Return true if the PHI node is actually deleted.
+bool RecursivelyDeleteDeadPHINode(PHINode *PN);
 
+  
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0);
+    
 //===----------------------------------------------------------------------===//
 //  Control Flow Graph Restructuring.
 //
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index e516982..2c37a68 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -217,6 +217,9 @@ public:
   ///
   bool isInteger() const { return ID == IntegerTyID; } 
 
+  /// isInteger - Return true if this is an IntegerType of the specified width.
+  bool isInteger(unsigned Bitwidth) const;
+
   /// isIntOrIntVector - Return true if this is an integer type or a vector of
   /// integer types.
   ///
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index e05fdbd..53815ba 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -17,6 +17,7 @@
 #include "llvm/Value.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/System/DataTypes.h"
+#include "llvm/ADT/ilist_node.h"
 
 namespace llvm {
   template<typename ValueSubClass, typename ItemParentClass>
@@ -26,7 +27,7 @@ namespace llvm {
   class NamedMDNode;
   class Module;
   class StringRef;
-  
+
 /// This class provides a symbol table of name/value pairs. It is essentially
 /// a std::map<std::string,Value*> but has a controlled interface provided by
 /// LLVM as well as ensuring uniqueness of names.
@@ -39,7 +40,6 @@ class ValueSymbolTable {
   friend class SymbolTableListTraits<Function, Module>;
   friend class SymbolTableListTraits<GlobalVariable, Module>;
   friend class SymbolTableListTraits<GlobalAlias, Module>;
-  friend class SymbolTableListTraits<NamedMDNode, Module>;
 /// @name Types
 /// @{
 public:
@@ -129,6 +129,88 @@ private:
 /// @}
 };
 
+/// This class provides a symbol table of name/NamedMDNode pairs. It is 
+/// essentially a StringMap wrapper.
+
+class MDSymbolTable {
+  friend class SymbolTableListTraits<NamedMDNode, Module>;
+/// @name Types
+/// @{
+private:
+  /// @brief A mapping of names to metadata
+  typedef StringMap<NamedMDNode*> MDMap;
+
+public:
+  /// @brief An iterator over a ValueMap.
+  typedef MDMap::iterator iterator;
+
+  /// @brief A const_iterator over a ValueMap.
+  typedef MDMap::const_iterator const_iterator;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+
+  MDSymbolTable(const MDNode &);             // DO NOT IMPLEMENT
+  void operator=(const MDSymbolTable &);     // DO NOT IMPLEMENT
+  MDSymbolTable() : mmap(0) {}
+  ~MDSymbolTable();
+
+/// @}
+/// @name Accessors
+/// @{
+public:
+
+  /// This method finds the value with the given \p Name in the
+  /// the symbol table. 
+  /// @returns the NamedMDNode associated with the \p Name
+  /// @brief Lookup a named Value.
+  NamedMDNode *lookup(StringRef Name) const { return mmap.lookup(Name); }
+
+  /// @returns true iff the symbol table is empty
+  /// @brief Determine if the symbol table is empty
+  inline bool empty() const { return mmap.empty(); }
+
+  /// @brief The number of name/type pairs is returned.
+  inline unsigned size() const { return unsigned(mmap.size()); }
+
+/// @}
+/// @name Iteration
+/// @{
+public:
+  /// @brief Get an iterator that from the beginning of the symbol table.
+  inline iterator begin() { return mmap.begin(); }
+
+  /// @brief Get a const_iterator that from the beginning of the symbol table.
+  inline const_iterator begin() const { return mmap.begin(); }
+
+  /// @brief Get an iterator to the end of the symbol table.
+  inline iterator end() { return mmap.end(); }
+
+  /// @brief Get a const_iterator to the end of the symbol table.
+  inline const_iterator end() const { return mmap.end(); }
+  
+/// @}
+/// @name Mutators
+/// @{
+public:
+  /// insert - The method inserts a new entry into the stringmap.
+  void insert(StringRef Name,  NamedMDNode *Node) {
+    (void) mmap.GetOrCreateValue(Name, Node);
+  }
+  
+  /// This method removes a NamedMDNode from the symbol table.  
+  void remove(StringRef Name) { mmap.erase(Name); }
+
+/// @}
+/// @name Internal Data
+/// @{
+private:
+  MDMap mmap;                  ///< The map that holds the symbol table.
+/// @}
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index dee9b53..371dcaf 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -116,13 +116,16 @@ AliasAnalysis::getModRefBehavior(Function *F,
       return DoesNotAccessMemory;
     if (F->onlyReadsMemory())
       return OnlyReadsMemory;
-    if (unsigned id = F->getIntrinsicID()) {
+    if (unsigned id = F->getIntrinsicID())
+      return getModRefBehavior(id);
+  }
+  return UnknownModRefBehavior;
+}
+
+AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) {
 #define GET_INTRINSIC_MODREF_BEHAVIOR
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_MODREF_BEHAVIOR
-    }
-  }
-  return UnknownModRefBehavior;
 }
 
 AliasAnalysis::ModRefResult
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index f8cb323..398dec7 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -13,11 +13,11 @@
 
 using namespace llvm;
 
-int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
-                     char **OutMessages) {
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessages) {
   std::string Messages;
   
-  int Result = verifyModule(*unwrap(M),
+  LLVMBool Result = verifyModule(*unwrap(M),
                             static_cast<VerifierFailureAction>(Action),
                             OutMessages? &Messages : 0);
   
@@ -27,7 +27,7 @@ int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
   return Result;
 }
 
-int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
   return verifyFunction(*unwrap<Function>(Fn),
                         static_cast<VerifierFailureAction>(Action));
 }
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index eaf90d0..4ae8859 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -398,8 +398,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
                           BytesLoaded, TD))
     return 0;
 
-  APInt ResultVal(IntType->getBitWidth(), 0);
-  for (unsigned i = 0; i != BytesLoaded; ++i) {
+  APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
+  for (unsigned i = 1; i != BytesLoaded; ++i) {
     ResultVal <<= 8;
     ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]);
   }
@@ -718,14 +718,13 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
   
   switch (Opcode) {
   default: return 0;
+  case Instruction::ICmp:
+  case Instruction::FCmp: assert(0 && "Invalid for compares");
   case Instruction::Call:
     if (Function *F = dyn_cast<Function>(Ops[0]))
       if (canConstantFoldCallTo(F))
         return ConstantFoldCall(F, Ops+1, NumOps-1);
     return 0;
-  case Instruction::ICmp:
-  case Instruction::FCmp:
-    llvm_unreachable("This function is invalid for compares: no predicate specified");
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
     // the width of a pointer, so it can't be done in ConstantExpr::getCast.
@@ -877,6 +876,20 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                  CE1->getOperand(0), TD);
       }
     }
+    
+    // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+    // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+    if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+        CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+      Constant *LHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+      Constant *RHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+      unsigned OpC = 
+        Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+      Constant *Ops[] = { LHS, RHS };
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+    }
   }
   
   return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 7d72b38..3532b05 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -37,8 +37,6 @@ PrintDirectory("print-fullpath",
 namespace {
   class PrintDbgInfo : public FunctionPass {
     raw_ostream &Out;
-    void printStopPoint(const DbgStopPointInst *DSI);
-    void printFuncStart(const DbgFuncStartInst *FS);
     void printVariableDeclaration(const Value *V);
   public:
     static char ID; // Pass identification
@@ -74,27 +72,6 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) {
   Out << File << ":" << LineNo << "\n";
 }
 
-void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) {
-  if (PrintDirectory)
-    if (MDString *Str = dyn_cast<MDString>(DSI->getDirectory()))
-      Out << Str->getString() << '/';
-
-  if (MDString *Str = dyn_cast<MDString>(DSI->getFileName()))
-    Out << Str->getString();
-  Out << ':' << DSI->getLine();
-
-  if (unsigned Col = DSI->getColumn())
-    Out << ':' << Col;
-}
-
-void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
-  DISubprogram Subprogram(FS->getSubprogram());
-  Out << "; fully qualified function name: " << Subprogram.getDisplayName()
-      << " return type: " << Subprogram.getReturnTypeName()
-      << " at line " << Subprogram.getLineNumber()
-      << "\n\n";
-}
-
 bool PrintDbgInfo::runOnFunction(Function &F) {
   if (F.isDeclaration())
     return false;
@@ -108,57 +85,21 @@ bool PrintDbgInfo::runOnFunction(Function &F) {
       // Skip dead blocks.
       continue;
 
-    const DbgStopPointInst *DSI = findBBStopPoint(BB);
     Out << BB->getName();
     Out << ":";
 
-    if (DSI) {
-      Out << "; (";
-      printStopPoint(DSI);
-      Out << ")";
-    }
-
     Out << "\n";
 
-    // A dbgstoppoint's information is valid until we encounter a new one.
-    const DbgStopPointInst *LastDSP = DSI;
-    bool Printed = DSI != 0;
     for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
          i != e; ++i) {
-      if (isa<DbgInfoIntrinsic>(i)) {
-        if ((DSI = dyn_cast<DbgStopPointInst>(i))) {
-          if (DSI->getContext() == LastDSP->getContext() &&
-              DSI->getLineValue() == LastDSP->getLineValue() &&
-              DSI->getColumnValue() == LastDSP->getColumnValue())
-            // Don't print same location twice.
-            continue;
-
-          LastDSP = cast<DbgStopPointInst>(i);
-
-          // Don't print consecutive stoppoints, use a flag to know which one we
-          // printed.
-          Printed = false;
-        } else if (const DbgFuncStartInst *FS = dyn_cast<DbgFuncStartInst>(i)) {
-          printFuncStart(FS);
-        }
-      } else {
-        if (!Printed && LastDSP) {
-          Out << "; ";
-          printStopPoint(LastDSP);
-          Out << "\n";
-          Printed = true;
-        }
 
-        Out << *i << '\n';
         printVariableDeclaration(i);
 
         if (const User *U = dyn_cast<User>(i)) {
           for(unsigned i=0;i<U->getNumOperands();i++)
             printVariableDeclaration(U->getOperand(i));
         }
-      }
     }
   }
-
   return false;
 }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index de2d839..59ba807 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -599,9 +599,7 @@ void DIVariable::dump() const {
 //===----------------------------------------------------------------------===//
 
 DIFactory::DIFactory(Module &m)
-  : M(m), VMContext(M.getContext()), DeclareFn(0) {
-  EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext));
-}
+  : M(m), VMContext(M.getContext()), DeclareFn(0) {}
 
 Constant *DIFactory::GetTagConstant(unsigned TAG) {
   assert((TAG & LLVMDebugVersionMask) == 0 &&
@@ -1033,58 +1031,52 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
 
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
-                              Instruction *InsertBefore) {
-  // Cast the storage to a {}* for the call to llvm.dbg.declare.
-  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore);
-
+                                      Instruction *InsertBefore) {
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { Storage, D.getNode() };
+  Value *Elts[] = { Storage };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() };
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
 /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
 Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
-                              BasicBlock *InsertAtEnd) {
-  // Cast the storage to a {}* for the call to llvm.dbg.declare.
-  Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd);
-
+                                      BasicBlock *InsertAtEnd) {
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { Storage, D.getNode() };
+  Value *Elts[] = { Storage };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() };
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
 }
 
 /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
-  assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
-         "offset must be i64");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
   Value *Elts[] = { V };
-  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     D.getNode() };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
 }
 
 /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
                                                 DIVariable D,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
-  assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
-         "offset must be i64");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
   Value *Elts[] = { V };
-  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), 
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     D.getNode() };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
 }
@@ -1242,52 +1234,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
   return true;
 }
 
-/// findStopPoint - Find the stoppoint coressponding to this instruction, that
-/// is the stoppoint that dominates this instruction.
-const DbgStopPointInst *llvm::findStopPoint(const Instruction *Inst) {
-  if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(Inst))
-    return DSI;
-
-  const BasicBlock *BB = Inst->getParent();
-  BasicBlock::const_iterator I = Inst, B;
-  while (BB) {
-    B = BB->begin();
-
-    // A BB consisting only of a terminator can't have a stoppoint.
-    while (I != B) {
-      --I;
-      if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I))
-        return DSI;
-    }
-
-    // This BB didn't have a stoppoint: if there is only one predecessor, look
-    // for a stoppoint there. We could use getIDom(), but that would require
-    // dominator info.
-    BB = I->getParent()->getUniquePredecessor();
-    if (BB)
-      I = BB->getTerminator();
-  }
-
-  return 0;
-}
-
-/// findBBStopPoint - Find the stoppoint corresponding to first real
-/// (non-debug intrinsic) instruction in this Basic Block, and return the
-/// stoppoint for it.
-const DbgStopPointInst *llvm::findBBStopPoint(const BasicBlock *BB) {
-  for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I))
-      return DSI;
-
-  // Fallback to looking for stoppoint of unique predecessor. Useful if this
-  // BB contains no stoppoints, but unique predecessor does.
-  BB = BB->getUniquePredecessor();
-  if (BB)
-    return findStopPoint(BB->getTerminator());
-
-  return 0;
-}
-
 Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) {
   const Module *M = V->getParent();
   NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
@@ -1306,25 +1252,24 @@ Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) {
 
 /// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
 /// It looks through pointer casts too.
-const DbgDeclareInst *llvm::findDbgDeclare(const Value *V, bool stripCasts) {
-  if (stripCasts) {
-    V = V->stripPointerCasts();
-
-    // Look for the bitcast.
-    for (Value::use_const_iterator I = V->use_begin(), E =V->use_end();
-          I != E; ++I)
-      if (isa<BitCastInst>(I)) {
-        const DbgDeclareInst *DDI = findDbgDeclare(*I, false);
-        if (DDI) return DDI;
-      }
+const DbgDeclareInst *llvm::findDbgDeclare(const Value *V) {
+  V = V->stripPointerCasts();
+  
+  if (!isa<Instruction>(V) && !isa<Argument>(V))
     return 0;
-  }
-
-  // Find llvm.dbg.declare among uses of the instruction.
-  for (Value::use_const_iterator I = V->use_begin(), E =V->use_end();
-        I != E; ++I)
-    if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I))
-      return DDI;
+    
+  const Function *F = NULL;
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    F = I->getParent()->getParent();
+  else if (const Argument *A = dyn_cast<Argument>(V))
+    F = A->getParent();
+  
+  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+         BI != BE; ++BI)
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DDI->getAddress() == V)
+          return DDI;
 
   return 0;
 }
@@ -1372,29 +1317,6 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
 }
 
 /// ExtractDebugLocation - Extract debug location information
-/// from llvm.dbg.stoppoint intrinsic.
-DebugLoc llvm::ExtractDebugLocation(DbgStopPointInst &SPI,
-                                    DebugLocTracker &DebugLocInfo) {
-  DebugLoc DL;
-  Value *Context = SPI.getContext();
-
-  // If this location is already tracked then use it.
-  DebugLocTuple Tuple(cast<MDNode>(Context), NULL, SPI.getLine(),
-                      SPI.getColumn());
-  DenseMap<DebugLocTuple, unsigned>::iterator II
-    = DebugLocInfo.DebugIdMap.find(Tuple);
-  if (II != DebugLocInfo.DebugIdMap.end())
-    return DebugLoc::get(II->second);
-
-  // Add a new location entry.
-  unsigned Id = DebugLocInfo.DebugLocations.size();
-  DebugLocInfo.DebugLocations.push_back(Tuple);
-  DebugLocInfo.DebugIdMap[Tuple] = Id;
-
-  return DebugLoc::get(Id);
-}
-
-/// ExtractDebugLocation - Extract debug location information
 /// from DILocation.
 DebugLoc llvm::ExtractDebugLocation(DILocation &Loc,
                                     DebugLocTracker &DebugLocInfo) {
@@ -1419,32 +1341,6 @@ DebugLoc llvm::ExtractDebugLocation(DILocation &Loc,
   return DebugLoc::get(Id);
 }
 
-/// ExtractDebugLocation - Extract debug location information
-/// from llvm.dbg.func_start intrinsic.
-DebugLoc llvm::ExtractDebugLocation(DbgFuncStartInst &FSI,
-                                    DebugLocTracker &DebugLocInfo) {
-  DebugLoc DL;
-  Value *SP = FSI.getSubprogram();
-
-  DISubprogram Subprogram(cast<MDNode>(SP));
-  unsigned Line = Subprogram.getLineNumber();
-  DICompileUnit CU(Subprogram.getCompileUnit());
-
-  // If this location is already tracked then use it.
-  DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0);
-  DenseMap<DebugLocTuple, unsigned>::iterator II
-    = DebugLocInfo.DebugIdMap.find(Tuple);
-  if (II != DebugLocInfo.DebugIdMap.end())
-    return DebugLoc::get(II->second);
-
-  // Add a new location entry.
-  unsigned Id = DebugLocInfo.DebugLocations.size();
-  DebugLocInfo.DebugLocations.push_back(Tuple);
-  DebugLocInfo.DebugIdMap[Tuple] = Id;
-
-  return DebugLoc::get(Id);
-}
-
 /// getDISubprogram - Find subprogram that is enclosing this scope.
 DISubprogram llvm::getDISubprogram(MDNode *Scope) {
   DIDescriptor D(Scope);
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index df9e31c..26c0c9e 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -128,8 +128,9 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
     if (!AddRecStride->properlyDominates(Header, DT))
       return false;
 
-    DEBUG(dbgs() << "[" << L->getHeader()->getName()
-                 << "] Variable stride: " << *AddRec << "\n");
+    DEBUG(dbgs() << "[";
+          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+          dbgs() << "] Variable stride: " << *AddRec << "\n");
   }
 
   Stride = AddRecStride;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index bd9377b..651c918 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -102,6 +102,37 @@ unsigned InlineCostAnalyzer::FunctionInfo::
   return Reduction;
 }
 
+// callIsSmall - If a call is likely to lower to a single target instruction, or
+// is otherwise deemed small return true.
+// TODO: Perhaps calls like memcpy, strcpy, etc?
+static bool callIsSmall(const Function *F) {
+  if (!F) return false;
+  
+  if (F->hasLocalLinkage()) return false;
+  
+  if (!F->hasName()) return false;
+  
+  StringRef Name = F->getName();
+  
+  // These will all likely lower to a single selection DAG node.
+  if (Name == "copysign" || Name == "copysignf" ||
+      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+      Name == "sin" || Name == "sinf" || Name == "sinl" ||
+      Name == "cos" || Name == "cosf" || Name == "cosl" ||
+      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+    return true;
+  
+  // These are all likely to be optimized into something smaller.
+  if (Name == "pow" || Name == "powf" || Name == "powl" ||
+      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+      Name == "floor" || Name == "floorf" || Name == "ceil" ||
+      Name == "round" || Name == "ffs" || Name == "ffsl" ||
+      Name == "abs" || Name == "labs" || Name == "llabs")
+    return true;
+  
+  return false;
+}
+
 /// analyzeBasicBlock - Fill in the current structure with information gleaned
 /// from the specified block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
@@ -129,7 +160,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
 
       // Calls often compile into many machine instructions.  Bump up their
       // cost to reflect this.
-      if (!isa<IntrinsicInst>(II))
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction()))
         NumInsts += InlineConstants::CallPenalty;
     }
     
@@ -141,11 +172,16 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
     if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
       ++NumVectorInsts; 
     
-    // Noop casts, including ptr <-> int,  don't count.
     if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      // Noop casts, including ptr <-> int,  don't count.
       if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
           isa<PtrToIntInst>(CI))
         continue;
+      // Result of a cmp instruction is often extended (to be used by other
+      // cmp instructions, logical or return instructions). These are usually
+      // nop on most sane targets.
+      if (isa<CmpInst>(CI->getOperand(0)))
+        continue;
     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
       // If a GEP has all constant indices, it will probably be folded with
       // a load/store.
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 5d31c11..453af5a 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
@@ -385,6 +386,10 @@ BasicBlock *Loop::getUniqueExitBlock() const {
   return 0;
 }
 
+void Loop::dump() const {
+  print(dbgs());
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 17dc686..4d85ce4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -316,7 +316,9 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
     OS << ",+," << *Operands[i];
-  OS << "}<" << L->getHeader()->getName() + ">";
+  OS << "}<";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ">";
 }
 
 void SCEVFieldOffsetExpr::print(raw_ostream &OS) const {
@@ -5193,7 +5195,9 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
     PrintLoopInfo(OS, SE, *I);
 
-  OS << "Loop " << L->getHeader()->getName() << ": ";
+  OS << "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
 
   SmallVector<BasicBlock *, 8> ExitBlocks;
   L->getExitBlocks(ExitBlocks);
@@ -5206,8 +5210,10 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
     OS << "Unpredictable backedge-taken count. ";
   }
 
-  OS << "\n";
-  OS << "Loop " << L->getHeader()->getName() << ": ";
+  OS << "\n"
+        "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
@@ -5227,7 +5233,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   // const isn't dangerous.
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
-  OS << "Classifying expressions for: " << F->getName() << "\n";
+  OS << "Classifying expressions for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
     if (isSCEVable(I->getType())) {
       OS << *I << '\n';
@@ -5256,7 +5264,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
       OS << "\n";
     }
 
-  OS << "Determining loop execution counts for: " << F->getName() << "\n";
+  OS << "Determining loop execution counts for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
     PrintLoopInfo(OS, &SE, *I);
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index acd3119..91e5bc3 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -726,8 +726,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
       
     Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
     if (Tmp2 == 1) return 1;
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
     
   case Instruction::Sub:
     Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
@@ -757,8 +756,24 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
+      
+  case Instruction::PHI: {
+    PHINode *PN = cast<PHINode>(U);
+    // Don't analyze large in-degree PHIs.
+    if (PN->getNumIncomingValues() > 4) break;
+    
+    // Take the minimum of all incoming values.  This can't infinitely loop
+    // because of our depth threshold.
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+      if (Tmp == 1) return Tmp;
+      Tmp = std::min(Tmp,
+                     ComputeNumSignBits(PN->getIncomingValue(1), TD, Depth+1));
+    }
+    return Tmp;
+  }
+
   case Instruction::Trunc:
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
@@ -1348,7 +1363,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // Make sure the index-ee is a pointer to array of i8.
     const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext()))
+    if (AT == 0 || !AT->getElementType()->isInteger(8))
       return false;
     
     // Check to make sure that the first operand of the GEP is an integer and
@@ -1387,8 +1402,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 ||
-      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
+  if (Array == 0 || !Array->getType()->getElementType()->isInteger(8))
     return false;
   
   // Get the number of elements in the array
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 15a9832..e4039ab 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -510,12 +510,17 @@ bool LLParser::ParseNamedMetadata() {
       ParseToken(lltok::lbrace, "Expected '{' here"))
     return true;
 
-  SmallVector<MetadataBase *, 8> Elts;
+  SmallVector<MDNode *, 8> Elts;
   do {
+    // Null is a special case since it is typeless.
+    if (EatIfPresent(lltok::kw_null)) {
+      Elts.push_back(0);
+      continue;
+    }
+
     if (ParseToken(lltok::exclaim, "Expected '!' here"))
       return true;
     
-    // FIXME: This rejects MDStrings.  Are they legal in an named MDNode or not?
     MDNode *N = 0;
     if (ParseMDNodeID(N)) return true;
     Elts.push_back(N);
@@ -543,7 +548,7 @@ bool LLParser::ParseStandaloneMetadata() {
       ParseType(Ty, TyLoc) ||
       ParseToken(lltok::exclaim, "Expected '!' here") ||
       ParseToken(lltok::lbrace, "Expected '{' here") ||
-      ParseMDNodeVector(Elts) ||
+      ParseMDNodeVector(Elts, NULL) ||
       ParseToken(lltok::rbrace, "expected end of metadata node"))
     return true;
 
@@ -1715,8 +1720,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
   }
 
   // Don't make placeholders with invalid type.
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) &&
-      Ty != Type::getLabelTy(F.getContext())) {
+  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1757,8 +1761,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
     return 0;
   }
 
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) &&
-      Ty != Type::getLabelTy(F.getContext())) {
+  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1881,8 +1884,10 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
 /// ParseValID - Parse an abstract value that doesn't necessarily have a
 /// type implied.  For example, if we parse "4" we don't know what integer type
 /// it has.  The value will later be combined with its type and checked for
-/// sanity.
-bool LLParser::ParseValID(ValID &ID) {
+/// sanity.  PFS is used to convert function-local operands of metadata (since
+/// metadata operands are not just parsed here but also converted to values).
+/// PFS can be null when we are not parsing metadata values inside a function.
+bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   ID.Loc = Lex.getLoc();
   switch (Lex.getKind()) {
   default: return TokError("expected value token");
@@ -1908,7 +1913,7 @@ bool LLParser::ParseValID(ValID &ID) {
     
     if (EatIfPresent(lltok::lbrace)) {
       SmallVector<Value*, 16> Elts;
-      if (ParseMDNodeVector(Elts) ||
+      if (ParseMDNodeVector(Elts, PFS) ||
           ParseToken(lltok::rbrace, "expected end of metadata node"))
         return true;
 
@@ -2353,30 +2358,85 @@ bool LLParser::ParseValID(ValID &ID) {
 }
 
 /// ParseGlobalValue - Parse a global value with the specified type.
-bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&V) {
-  V = 0;
+bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
+  C = 0;
   ValID ID;
-  return ParseValID(ID) ||
-         ConvertGlobalValIDToValue(Ty, ID, V);
+  Value *V = NULL;
+  bool Parsed = ParseValID(ID) ||
+                ConvertValIDToValue(Ty, ID, V, NULL);
+  if (V && !(C = dyn_cast<Constant>(V)))
+    return Error(ID.Loc, "global values must be constants");
+  return Parsed;
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+  PATypeHolder Type(Type::getVoidTy(Context));
+  return ParseType(Type) ||
+         ParseGlobalValue(Type, V);
+}
+
+/// ParseGlobalValueVector
+///   ::= /*empty*/
+///   ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+  // Empty list.
+  if (Lex.getKind() == lltok::rbrace ||
+      Lex.getKind() == lltok::rsquare ||
+      Lex.getKind() == lltok::greater ||
+      Lex.getKind() == lltok::rparen)
+    return false;
+
+  Constant *C;
+  if (ParseGlobalTypeAndValue(C)) return true;
+  Elts.push_back(C);
+
+  while (EatIfPresent(lltok::comma)) {
+    if (ParseGlobalTypeAndValue(C)) return true;
+    Elts.push_back(C);
+  }
+
+  return false;
 }
 
-/// ConvertGlobalValIDToValue - Apply a type to a ValID to get a fully resolved
-/// constant.
-bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
-                                         Constant *&V) {
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+                                   PerFunctionState *PFS) {
   if (isa<FunctionType>(Ty))
     return Error(ID.Loc, "functions are not values, refer to them as pointers");
 
   switch (ID.Kind) {
   default: llvm_unreachable("Unknown ValID!");
-  case ValID::t_MDNode:
-  case ValID::t_MDString:
-    return Error(ID.Loc, "invalid use of metadata");
   case ValID::t_LocalID:
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
+    return (V == 0);
   case ValID::t_LocalName:
-    return Error(ID.Loc, "invalid use of function-local name");
-  case ValID::t_InlineAsm:
-    return Error(ID.Loc, "inline asm can only be an operand of call/invoke");
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
+    return (V == 0);
+  case ValID::t_InlineAsm: {
+    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+    const FunctionType *FTy = 
+      PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+    if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+      return Error(ID.Loc, "invalid type for inline asm constraint string");
+    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
+    return false;
+  }
+  case ValID::t_MDNode:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDNodeVal;
+    return false;
+  case ValID::t_MDString:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDStringVal;
+    return false;
   case ValID::t_GlobalName:
     V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
     return V == 0;
@@ -2440,90 +2500,11 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID,
   }
 }
 
-/// ConvertGlobalOrMetadataValIDToValue - Apply a type to a ValID to get a fully
-/// resolved constant or metadata value.
-bool LLParser::ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID,
-                                                   Value *&V) {
-  switch (ID.Kind) {
-  case ValID::t_MDNode:
-    if (!Ty->isMetadataTy())
-      return Error(ID.Loc, "metadata value must have metadata type");
-    V = ID.MDNodeVal;
-    return false;
-  case ValID::t_MDString:
-    if (!Ty->isMetadataTy())
-      return Error(ID.Loc, "metadata value must have metadata type");
-    V = ID.MDStringVal;
-    return false;
-  default:
-    Constant *C;
-    if (ConvertGlobalValIDToValue(Ty, ID, C)) return true;
-    V = C;
-    return false;
-  }
-}
-  
-
-bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
-  PATypeHolder Type(Type::getVoidTy(Context));
-  return ParseType(Type) ||
-         ParseGlobalValue(Type, V);
-}
-
-/// ParseGlobalValueVector
-///   ::= /*empty*/
-///   ::= TypeAndValue (',' TypeAndValue)*
-bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
-  // Empty list.
-  if (Lex.getKind() == lltok::rbrace ||
-      Lex.getKind() == lltok::rsquare ||
-      Lex.getKind() == lltok::greater ||
-      Lex.getKind() == lltok::rparen)
-    return false;
-
-  Constant *C;
-  if (ParseGlobalTypeAndValue(C)) return true;
-  Elts.push_back(C);
-
-  while (EatIfPresent(lltok::comma)) {
-    if (ParseGlobalTypeAndValue(C)) return true;
-    Elts.push_back(C);
-  }
-
-  return false;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Function Parsing.
-//===----------------------------------------------------------------------===//
-
-bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
-                                   PerFunctionState &PFS) {
-  switch (ID.Kind) {
-  case ValID::t_LocalID: V = PFS.GetVal(ID.UIntVal, Ty, ID.Loc); break;
-  case ValID::t_LocalName: V = PFS.GetVal(ID.StrVal, Ty, ID.Loc); break;
-  case ValID::t_InlineAsm: {
-    const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    const FunctionType *FTy = 
-      PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
-    if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
-      return Error(ID.Loc, "invalid type for inline asm constraint string");
-    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
-    return false;
-  }
-  default:
-    return ConvertGlobalOrMetadataValIDToValue(Ty, ID, V);
-  }
-
-  return V == 0;
-}
-
 bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
   V = 0;
   ValID ID;
-  return ParseValID(ID) ||
-         ConvertValIDToValue(Ty, ID, V, PFS);
+  return ParseValID(ID, &PFS) ||
+         ConvertValIDToValue(Ty, ID, V, &PFS);
 }
 
 bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
@@ -2663,8 +2644,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  if (PAL.paramHasAttr(1, Attribute::StructRet) &&
-      RetType != Type::getVoidTy(Context))
+  if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy())
     return Error(RetTypeLoc, "functions with 'sret' argument must return void");
 
   const FunctionType *FT =
@@ -2766,6 +2746,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   
   PerFunctionState PFS(*this, Fn, FunctionNumber);
 
+  // We need at least one basic block.
+  if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end)
+    return TokError("function body requires at least one basic block");
+  
   while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
     if (ParseBasicBlock(PFS)) return true;
 
@@ -3232,7 +3216,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
   // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
   // function attributes.
@@ -3578,7 +3562,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
   // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
   // function attributes.
@@ -3660,7 +3644,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
     }
   }
 
-  if (Size && Size->getType() != Type::getInt32Ty(Context))
+  if (Size && !Size->getType()->isInteger(32))
     return Error(SizeLoc, "element count must be i32");
 
   if (isAlloca) {
@@ -3840,7 +3824,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= Element (',' Element)*
 /// Element
 ///   ::= 'null' | TypeAndValue
-bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) {
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
+                                 PerFunctionState *PFS) {
   do {
     // Null is a special case since it is typeless.
     if (EatIfPresent(lltok::kw_null)) {
@@ -3851,8 +3836,8 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) {
     Value *V = 0;
     PATypeHolder Ty(Type::getVoidTy(Context));
     ValID ID;
-    if (ParseType(Ty) || ParseValID(ID) ||
-        ConvertGlobalOrMetadataValIDToValue(Ty, ID, V))
+    if (ParseType(Ty) || ParseValID(ID, PFS) ||
+        ConvertValIDToValue(Ty, ID, V, PFS))
       return true;
     
     Elts.push_back(V);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 803832f..bea0593 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -216,17 +216,6 @@ namespace llvm {
     bool ParseFunctionType(PATypeHolder &Result);
     PATypeHolder HandleUpRefs(const Type *Ty);
 
-    // Constants.
-    bool ParseValID(ValID &ID);
-    bool ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V);
-    bool ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID,
-                                             Value *&V);
-    bool ParseGlobalValue(const Type *Ty, Constant *&V);
-    bool ParseGlobalTypeAndValue(Constant *&V);
-    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
-    bool ParseMDNodeVector(SmallVectorImpl<Value*> &);
-
-
     // Function Semantic Analysis.
     class PerFunctionState {
       LLParser &P;
@@ -270,7 +259,7 @@ namespace llvm {
     };
 
     bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
-                             PerFunctionState &PFS);
+                             PerFunctionState *PFS);
 
     bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
     bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
@@ -301,6 +290,13 @@ namespace llvm {
     bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
                             PerFunctionState &PFS);
 
+    // Constant Parsing.
+    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+    bool ParseGlobalValue(const Type *Ty, Constant *&V);
+    bool ParseGlobalTypeAndValue(Constant *&V);
+    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+
     // Function Parsing.
     struct ArgInfo {
       LocTy Loc;
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index f513d41..32b97e8 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -18,9 +18,9 @@ using namespace llvm;
 
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
-   Optionally returns a human-readable error message via OutMessage. */ 
-int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
-                     LLVMModuleRef *OutModule, char **OutMessage) {
+   Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+                          LLVMModuleRef *OutModule, char **OutMessage) {
   std::string Message;
   
   *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(),  
@@ -34,9 +34,10 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
   return 0;
 }
 
-int LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
-                              LLVMMemoryBufferRef MemBuf,
-                              LLVMModuleRef *OutModule, char **OutMessage) {
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                                   LLVMMemoryBufferRef MemBuf,
+                                   LLVMModuleRef *OutModule,
+                                   char **OutMessage) {
   std::string Message;
   
   *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
@@ -53,9 +54,9 @@ int LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
 /* Reads a module from the specified path, returning via the OutModule parameter
    a module provider which performs lazy deserialization. Returns 0 on success.
    Optionally returns a human-readable error message via OutMessage. */ 
-int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
-                                 LLVMModuleProviderRef *OutMP,
-                                  char **OutMessage) {
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage) {
   std::string Message;
 
   *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), 
@@ -70,10 +71,10 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
   return 0;
 }
 
-int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
-                                          LLVMMemoryBufferRef MemBuf,
-                                          LLVMModuleProviderRef *OutMP,
-                                          char **OutMessage) {
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                               LLVMMemoryBufferRef MemBuf,
+                                               LLVMModuleProviderRef *OutMP,
+                                               char **OutMessage) {
   std::string Message;
   
   *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef),
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 7dffafa..aabbc90 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -737,7 +737,7 @@ bool BitcodeReader::ParseValueSymbolTable() {
 }
 
 bool BitcodeReader::ParseMetadata() {
-  unsigned NextValueNo = MDValueList.size();
+  unsigned NextMDValueNo = MDValueList.size();
 
   if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
     return Error("Malformed block record");
@@ -766,6 +766,7 @@ bool BitcodeReader::ParseMetadata() {
       continue;
     }
 
+    bool IsFunctionLocal = false;
     // Read a record.
     Record.clear();
     switch (Stream.ReadRecord(Code, Record)) {
@@ -787,17 +788,25 @@ bool BitcodeReader::ParseMetadata() {
 
       // Read named metadata elements.
       unsigned Size = Record.size();
-      SmallVector<MetadataBase*, 8> Elts;
+      SmallVector<MDNode *, 8> Elts;
       for (unsigned i = 0; i != Size; ++i) {
-        Value *MD = MDValueList.getValueFwdRef(Record[i]);
-        if (MetadataBase *B = dyn_cast<MetadataBase>(MD))
-        Elts.push_back(B);
+        if (Record[i] == ~0U) {
+          Elts.push_back(NULL);
+          continue;
+        }
+        MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+        if (MD == 0)
+          return Error("Malformed metadata record");
+        Elts.push_back(MD);
       }
       Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(),
                                      Elts.size(), TheModule);
-      MDValueList.AssignValue(V, NextValueNo++);
+      MDValueList.AssignValue(V, NextMDValueNo++);
       break;
     }
+    case bitc::METADATA_FN_NODE:
+      IsFunctionLocal = true;
+      // fall-through
     case bitc::METADATA_NODE: {
       if (Record.empty() || Record.size() % 2 == 1)
         return Error("Invalid METADATA_NODE record");
@@ -808,13 +817,15 @@ bool BitcodeReader::ParseMetadata() {
         const Type *Ty = getTypeByID(Record[i], false);
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
-        else if (Ty != Type::getVoidTy(Context))
+        else if (!Ty->isVoidTy())
           Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
         else
           Elts.push_back(NULL);
       }
-      Value *V = MDNode::get(Context, &Elts[0], Elts.size());
-      MDValueList.AssignValue(V, NextValueNo++);
+      Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(),
+                                               IsFunctionLocal);
+      IsFunctionLocal = false;
+      MDValueList.AssignValue(V, NextMDValueNo++);
       break;
     }
     case bitc::METADATA_STRING: {
@@ -825,7 +836,7 @@ bool BitcodeReader::ParseMetadata() {
         String[i] = Record[i];
       Value *V = MDString::get(Context,
                                StringRef(String.data(), String.size()));
-      MDValueList.AssignValue(V, NextValueNo++);
+      MDValueList.AssignValue(V, NextMDValueNo++);
       break;
     }
     case bitc::METADATA_KIND: {
@@ -1646,6 +1657,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       case bitc::METADATA_ATTACHMENT_ID:
         if (ParseMetadataAttachment()) return true;
         break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata()) return true;
+        break;
       }
       continue;
     }
@@ -2238,7 +2252,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     }
 
     // Non-void values get registered in the value table for future use.
-    if (I && I->getType() != Type::getVoidTy(Context))
+    if (I && !I->getType()->isVoidTy())
       ValueList.AssignValue(I, NextValueNo++);
   }
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index c78a30e..5a4a1b2 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -484,7 +484,9 @@ static void WriteMDNode(const MDNode *N,
       Record.push_back(0);
     }
   }
-  Stream.EmitRecord(bitc::METADATA_NODE, Record, 0);
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+                                           bitc::METADATA_NODE;
+  Stream.EmitRecord(MDCode, Record, 0);
   Record.clear();
 }
 
@@ -497,11 +499,13 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
   for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
 
     if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
-      if (!StartedMetadataBlock) {
-        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
-        StartedMetadataBlock = true;
+      if (!N->isFunctionLocal()) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
       }
-      WriteMDNode(N, VE, Stream, Record);
     } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
       if (!StartedMetadataBlock)  {
         Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
@@ -528,10 +532,9 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
       }
 
       // Write name.
-      std::string Str = NMD->getNameStr();
-      const char *StrBegin = Str.c_str();
-      for (unsigned i = 0, e = Str.length(); i != e; ++i)
-        Record.push_back(StrBegin[i]);
+      StringRef Str = NMD->getName();
+      for (unsigned i = 0, e = Str.size(); i != e; ++i)
+        Record.push_back(Str[i]);
       Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
       Record.clear();
 
@@ -540,7 +543,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
         if (NMD->getOperand(i))
           Record.push_back(VE.getValueID(NMD->getOperand(i)));
         else
-          Record.push_back(0);
+          Record.push_back(~0U);
       }
       Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
       Record.clear();
@@ -551,6 +554,27 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
     Stream.ExitBlock();
 }
 
+static void WriteFunctionLocalMetadata(const Function &F,
+                                       const ValueEnumerator &VE,
+                                       BitstreamWriter &Stream) {
+  bool StartedMetadataBlock = false;
+  SmallVector<uint64_t, 64> Record;
+  const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+  
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first))
+      if (N->getFunction() == &F) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
+      }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
 static void WriteMetadataAttachment(const Function &F,
                                     const ValueEnumerator &VE,
                                     BitstreamWriter &Stream) {
@@ -1194,6 +1218,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   VE.getFunctionConstantRange(CstStart, CstEnd);
   WriteConstants(CstStart, CstEnd, VE, Stream, false);
 
+  // If there is function-local metadata, emit it now.
+  WriteFunctionLocalMetadata(F, VE, Stream);
+
   // Keep a running idea of what the instruction ID is.
   unsigned InstID = CstEnd;
 
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index d8128db..cb139e5 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -74,9 +74,10 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   // Enumerate types used by the type symbol table.
   EnumerateTypeSymbolTable(M->getTypeSymbolTable());
 
-  // Insert constants that are named at module level into the slot pool so that
-  // the module symbol table can refer to them...
+  // Insert constants and metadata  that are named at module level into the slot 
+  // pool so that the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
+  EnumerateMDSymbolTable(M->getMDSymbolTable());
 
   SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
 
@@ -90,8 +91,13 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
     for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
         for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
-             OI != E; ++OI)
+             OI != E; ++OI) {
+          if (MDNode *MD = dyn_cast<MDNode>(*OI))
+            if (MD->isFunctionLocal())
+              // These will get enumerated during function-incorporation.
+              continue;
           EnumerateOperandType(*OI);
+        }
         EnumerateType(I->getType());
         if (const CallInst *CI = dyn_cast<CallInst>(I))
           EnumerateAttributes(CI->getAttributes());
@@ -196,6 +202,33 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
     EnumerateValue(VI->getValue());
 }
 
+/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata
+/// table.
+void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) {
+  for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end();
+       MI != ME; ++MI)
+    EnumerateValue(MI->getValue());
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[MD];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value.
+  EnumerateType(MD->getType());
+
+  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+    if (MDNode *E = MD->getOperand(i))
+      EnumerateValue(E);
+  MDValues.push_back(std::make_pair(MD, 1U));
+  MDValueMap[MD] = Values.size();
+}
+
 void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) {
   // Check to see if it's already in!
   unsigned &MDValueID = MDValueMap[MD];
@@ -212,7 +245,7 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) {
     MDValues.push_back(std::make_pair(MD, 1U));
     MDValueMap[MD] = MDValues.size();
     MDValueID = MDValues.size();
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {    
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
       if (Value *V = N->getOperand(i))
         EnumerateValue(V);
       else
@@ -221,14 +254,6 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) {
     return;
   }
   
-  if (const NamedMDNode *N = dyn_cast<NamedMDNode>(MD)) {
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      EnumerateValue(N->getOperand(i));
-    MDValues.push_back(std::make_pair(MD, 1U));
-    MDValueMap[MD] = Values.size();
-    return;
-  }
-
   // Add the value.
   assert(isa<MDString>(MD) && "Unknown metadata kind");
   MDValues.push_back(std::make_pair(MD, 1U));
@@ -239,6 +264,8 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
   assert(!V->getType()->isVoidTy() && "Can't insert void values!");
   if (const MetadataBase *MB = dyn_cast<MetadataBase>(V))
     return EnumerateMetadata(MB);
+  else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V))
+    return EnumerateNamedMDNode(NMD);
 
   // Check to see if it's already in!
   unsigned &ValueID = ValueMap[V];
@@ -309,6 +336,7 @@ void ValueEnumerator::EnumerateType(const Type *Ty) {
 // walk through it, enumerating the types of the constant.
 void ValueEnumerator::EnumerateOperandType(const Value *V) {
   EnumerateType(V->getType());
+  
   if (const Constant *C = dyn_cast<Constant>(V)) {
     // If this constant is already enumerated, ignore it, we know its type must
     // be enumerated.
@@ -382,7 +410,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   // Add all of the instructions.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
-      if (I->getType() != Type::getVoidTy(F.getContext()))
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if (MDNode *MD = dyn_cast<MDNode>(*OI))
+          if (!MD->isFunctionLocal())
+              // These were already enumerated during ValueEnumerator creation.
+              continue;
+        EnumerateOperandType(*OI);
+      }
+      if (!I->getType()->isVoidTy())
         EnumerateValue(I);
     }
   }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 3c83e35..c50fe9c 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -27,9 +27,11 @@ class BasicBlock;
 class Function;
 class Module;
 class MetadataBase;
+class NamedMDNode;
 class AttrListPtr;
 class TypeSymbolTable;
 class ValueSymbolTable;
+class MDSymbolTable;
 
 class ValueEnumerator {
 public:
@@ -126,6 +128,7 @@ private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
     
   void EnumerateMetadata(const MetadataBase *MD);
+  void EnumerateNamedMDNode(const NamedMDNode *NMD);
   void EnumerateValue(const Value *V);
   void EnumerateType(const Type *T);
   void EnumerateOperandType(const Value *V);
@@ -133,6 +136,7 @@ private:
   
   void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
   void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+  void EnumerateMDSymbolTable(const MDSymbolTable &ST);
 };
 
 } // End llvm namespace
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 761fbc6..ca1f4a3 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -77,18 +77,18 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
 {
   assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
   assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
-  
+
   // find group for each register
   unsigned Group1 = GetGroup(Reg1);
   unsigned Group2 = GetGroup(Reg2);
-  
+
   // if either group is 0, then that must become the parent
   unsigned Parent = (Group1 == 0) ? Group1 : Group2;
   unsigned Other = (Parent == Group1) ? Group2 : Group1;
   GroupNodes.at(Other) = Parent;
   return Parent;
 }
-  
+
 unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
 {
   // Create a new GroupNode for Reg. Reg's existing GroupNode must
@@ -111,7 +111,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 AggressiveAntiDepBreaker::
 AggressiveAntiDepBreaker(MachineFunction& MFi,
-                         TargetSubtarget::RegClassVector& CriticalPathRCs) : 
+                         TargetSubtarget::RegClassVector& CriticalPathRCs) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
@@ -126,9 +126,9 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
     else
       CriticalPathSet |= CPSet;
    }
- 
+
   DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
-  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; 
+  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
              r = CriticalPathSet.find_next(r))
           dbgs() << " " << TRI->getName(r));
   DEBUG(dbgs() << '\n');
@@ -232,10 +232,11 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     // schedule region).
     if (State->IsLive(Reg)) {
       DEBUG(if (State->GetGroup(Reg) != 0)
-              dbgs() << " " << TRI->getName(Reg) << "=g" << 
+              dbgs() << " " << TRI->getName(Reg) << "=g" <<
                 State->GetGroup(Reg) << "->g0(region live-out)");
       State->UnionGroups(Reg, 0);
-    } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) {
+    } else if ((DefIndices[Reg] < InsertPosIndex)
+               && (DefIndices[Reg] >= Count)) {
       DefIndices[Reg] = Count;
     }
   }
@@ -266,7 +267,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
-    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || 
+    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
         IsImplicitDefUse(MI, MO)) {
       const unsigned Reg = MO.getReg();
       PassthruRegs.insert(Reg);
@@ -320,11 +321,12 @@ static SUnit *CriticalPathStep(SUnit *SU) {
 }
 
 void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
-                                             const char *tag, const char *header,
+                                             const char *tag,
+                                             const char *header,
                                              const char *footer) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   if (!State->IsLive(Reg)) {
@@ -355,10 +357,12 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
   DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
 }
 
-void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count,
-                                              std::set<unsigned>& PassthruRegs) {
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+                                                  unsigned Count,
+                                              std::set<unsigned>& PassthruRegs)
+{
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Handle dead defs by simulating a last-use of the register just
@@ -371,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    
+
     HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
   }
 
@@ -382,7 +386,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
 
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); 
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
 
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
@@ -398,11 +402,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
       unsigned AliasReg = *Alias;
       if (State->IsLive(AliasReg)) {
         State->UnionGroups(Reg, AliasReg);
-        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << 
+        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
               TRI->getName(AliasReg) << ")");
       }
     }
-    
+
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
@@ -438,7 +442,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
 void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
                                            unsigned Count) {
   DEBUG(dbgs() << "\tUse Groups:");
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Scan the register uses for this instruction and update
@@ -448,9 +452,9 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     if (!MO.isReg() || !MO.isUse()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << 
-          State->GetGroup(Reg)); 
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+          State->GetGroup(Reg));
 
     // It wasn't previously live but now it is, this is a kill. Forget
     // the previous live-range information and start a new live-range
@@ -472,7 +476,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
-  
+
   DEBUG(dbgs() << '\n');
 
   // Form a group of all defs and uses of a KILL instruction to ensure
@@ -486,7 +490,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
-      
+
       if (FirstReg != 0) {
         DEBUG(dbgs() << "=" << TRI->getName(Reg));
         State->UnionGroups(FirstReg, Reg);
@@ -495,7 +499,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
         FirstReg = Reg;
       }
     }
-  
+
     DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
   }
 }
@@ -507,13 +511,14 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
   // Check all references that need rewriting for Reg. For each, use
   // the corresponding register class to narrow the set of registers
   // that are appropriate for renaming.
-  std::pair<std::multimap<unsigned, 
+  std::pair<std::multimap<unsigned,
                      AggressiveAntiDepState::RegisterReference>::iterator,
             std::multimap<unsigned,
                      AggressiveAntiDepState::RegisterReference>::iterator>
     Range = State->GetRegRefs().equal_range(Reg);
-  for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
-         Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+  for (std::multimap<unsigned,
+       AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+       QE = Range.second; Q != QE; ++Q) {
     const TargetRegisterClass *RC = Q->second.RC;
     if (RC == NULL) continue;
 
@@ -527,9 +532,9 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
 
     DEBUG(dbgs() << " " << RC->getName());
   }
-  
+
   return BV;
-}  
+}
 
 bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
                                 unsigned AntiDepGroupIndex,
@@ -537,7 +542,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
                                 std::map<unsigned, unsigned> &RenameMap) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Collect all referenced registers in the same group as
@@ -552,7 +557,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   // Find the "superest" register in the group. At the same time,
   // collect the BitVector of registers that can be used to rename
   // each register.
-  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex << ":\n");
+  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+        << ":\n");
   std::map<unsigned, BitVector> RenameRegisterMap;
   unsigned SuperReg = 0;
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
@@ -563,7 +569,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     // If Reg has any references, then collect possible rename regs
     if (RegRefs.count(Reg) > 0) {
       DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
-    
+
       BitVector BV = GetRenameRegisters(Reg);
       RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
 
@@ -590,7 +596,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     static int renamecnt = 0;
     if (renamecnt++ % DebugDiv != DebugMod)
       return false;
-    
+
     dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
       " for debug ***\n";
   }
@@ -600,9 +606,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   // order. If that register is available, and the corresponding
   // registers are available for the other group subregisters, then we
   // can use those registers to rename.
-  const TargetRegisterClass *SuperRC = 
+  const TargetRegisterClass *SuperRC =
     TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
-  
+
   const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
   const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
   if (RB == RE) {
@@ -624,7 +630,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     const unsigned NewSuperReg = *R;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
-    
+
     DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
     RenameMap.clear();
 
@@ -643,7 +649,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
       }
 
       DEBUG(dbgs() << " " << TRI->getName(NewReg));
-      
+
       // Check if Reg can be renamed to NewReg.
       BitVector BV = RenameRegisterMap[Reg];
       if (!BV.test(NewReg)) {
@@ -663,7 +669,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         for (const unsigned *Alias = TRI->getAliasSet(NewReg);
              *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) {
+          if (State->IsLive(AliasReg) ||
+              (KillIndices[Reg] > DefIndices[AliasReg])) {
             DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
             found = true;
             break;
@@ -672,11 +679,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         if (found)
           goto next_super_reg;
       }
-      
+
       // Record that 'Reg' can be renamed to 'NewReg'.
       RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
     }
-    
+
     // If we fall-out here, then every register in the group can be
     // renamed, as recorded in RenameMap.
     RenameOrder.erase(SuperRC);
@@ -704,13 +711,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                               unsigned InsertPosIndex) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
-  
+
   // For each regclass the next register to use for renaming.
   RenameOrderType RenameOrder;
 
@@ -729,17 +736,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   if (CriticalPathSet.any()) {
     for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
       SUnit *SU = &SUnits[i];
-      if (!CriticalPathSU || 
-          ((SU->getDepth() + SU->Latency) > 
+      if (!CriticalPathSU ||
+          ((SU->getDepth() + SU->Latency) >
            (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
         CriticalPathSU = SU;
       }
     }
-    
+
     CriticalPathMI = CriticalPathSU->getInstr();
   }
 
-#ifndef NDEBUG 
+#ifndef NDEBUG
   DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
   DEBUG(dbgs() << "Available regs:");
   for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
@@ -766,7 +773,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // Process the defs in MI...
     PrescanInstruction(MI, Count, PassthruRegs);
-    
+
     // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
     std::vector<SDep*> Edges;
@@ -779,7 +786,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     if (MI == CriticalPathMI) {
       CriticalPathSU = CriticalPathStep(CriticalPathSU);
       CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
-    } else { 
+    } else {
       ExcludeRegs = &CriticalPathSet;
     }
 
@@ -790,14 +797,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
         SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
-        
+
         if ((Edge->getKind() != SDep::Anti) &&
             (Edge->getKind() != SDep::Output)) continue;
-        
+
         unsigned AntiDepReg = Edge->getReg();
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-        
+
         if (!AllocatableSet.test(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
@@ -816,12 +823,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         } else {
           // No anti-dep breaking for implicit deps
           MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
-          assert(AntiDepOp != NULL && "Can't find index for defined register operand");
+          assert(AntiDepOp != NULL &&
+                 "Can't find index for defined register operand");
           if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
             DEBUG(dbgs() << " (implicit)\n");
             continue;
           }
-          
+
           // If the SUnit has other dependencies on the SUnit that
           // it anti-depends on, don't bother breaking the
           // anti-dependency since those edges would prevent such
@@ -847,58 +855,59 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
               DEBUG(dbgs() << " (real dependency)\n");
               AntiDepReg = 0;
               break;
-            } else if ((P->getSUnit() != NextSU) && 
-                       (P->getKind() == SDep::Data) && 
+            } else if ((P->getSUnit() != NextSU) &&
+                       (P->getKind() == SDep::Data) &&
                        (P->getReg() == AntiDepReg)) {
               DEBUG(dbgs() << " (other dependency)\n");
               AntiDepReg = 0;
               break;
             }
           }
-          
+
           if (AntiDepReg == 0) continue;
         }
-        
+
         assert(AntiDepReg != 0);
         if (AntiDepReg == 0) continue;
-        
+
         // Determine AntiDepReg's register group.
         const unsigned GroupIndex = State->GetGroup(AntiDepReg);
         if (GroupIndex == 0) {
           DEBUG(dbgs() << " (zero group)\n");
           continue;
         }
-        
+
         DEBUG(dbgs() << '\n');
-        
+
         // Look for a suitable register to use to break the anti-dependence.
         std::map<unsigned, unsigned> RenameMap;
         if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
           DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
                 << TRI->getName(AntiDepReg) << ":");
-          
+
           // Handle each group register...
           for (std::map<unsigned, unsigned>::iterator
                  S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
             unsigned CurrReg = S->first;
             unsigned NewReg = S->second;
-            
-            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << 
-                  TRI->getName(NewReg) << "(" <<  
+
+            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+                  TRI->getName(NewReg) << "(" <<
                   RegRefs.count(CurrReg) << " refs)");
-            
+
             // Update the references to the old register CurrReg to
             // refer to the new register NewReg.
-            std::pair<std::multimap<unsigned, 
-                              AggressiveAntiDepState::RegisterReference>::iterator,
+            std::pair<std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator,
                       std::multimap<unsigned,
-                              AggressiveAntiDepState::RegisterReference>::iterator>
+                           AggressiveAntiDepState::RegisterReference>::iterator>
               Range = RegRefs.equal_range(CurrReg);
-            for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
+            for (std::multimap<unsigned,
+                 AggressiveAntiDepState::RegisterReference>::iterator
                    Q = Range.first, QE = Range.second; Q != QE; ++Q) {
               Q->second.Operand->setReg(NewReg);
             }
-            
+
             // We just went back in time and modified history; the
             // liveness information for CurrReg is now inconsistent. Set
             // the state as if it were dead.
@@ -906,7 +915,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
             RegRefs.erase(NewReg);
             DefIndices[NewReg] = DefIndices[CurrReg];
             KillIndices[NewReg] = KillIndices[CurrReg];
-            
+
             State->UnionGroups(CurrReg, 0);
             RegRefs.erase(CurrReg);
             DefIndices[CurrReg] = KillIndices[CurrReg];
@@ -915,7 +924,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                     (DefIndices[CurrReg] == ~0u)) &&
                    "Kill and Def maps aren't consistent for AntiDepReg!");
           }
-          
+
           ++Broken;
           DEBUG(dbgs() << '\n');
         }
@@ -924,6 +933,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     ScanInstruction(MI, Count);
   }
-  
+
   return Broken;
 }
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index d385a21..a62d68c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -30,7 +30,7 @@
 #include <map>
 
 namespace llvm {
-  /// Class AggressiveAntiDepState 
+  /// Class AggressiveAntiDepState
   /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
   public:
@@ -54,27 +54,27 @@ namespace llvm {
     /// is the parent of a group, or point to another node to indicate
     /// that it is a member of the same group as that node.
     std::vector<unsigned> GroupNodes;
-  
+
     /// GroupNodeIndices - For each register, the index of the GroupNode
     /// currently representing the group that the register belongs to.
     /// Register 0 is always represented by the 0 group, a group
     /// composed of registers that are not eligible for anti-aliasing.
     unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
-  
+
     /// RegRefs - Map registers to all their references within a live range.
     std::multimap<unsigned, RegisterReference> RegRefs;
-  
+
     /// KillIndices - The index of the most recent kill (proceding bottom-up),
     /// or ~0u if the register is not live.
     unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
-  
+
     /// DefIndices - The index of the most recent complete def (proceding bottom
     /// up), or ~0u if the register is live.
     unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
 
   public:
     AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
-    
+
     /// GetKillIndices - Return the kill indices.
     unsigned *GetKillIndices() { return KillIndices; }
 
@@ -87,13 +87,14 @@ namespace llvm {
     // GetGroup - Get the group for a register. The returned value is
     // the index of the GroupNode representing the group.
     unsigned GetGroup(unsigned Reg);
-    
+
     // GetGroupRegs - Return a vector of the registers belonging to a
     // group. If RegRefs is non-NULL then only included referenced registers.
     void GetGroupRegs(
        unsigned Group,
        std::vector<unsigned> &Regs,
-       std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs);
+       std::multimap<unsigned,
+         AggressiveAntiDepState::RegisterReference> *RegRefs);
 
     // UnionGroups - Union Reg1's and Reg2's groups to form a new
     // group. Return the index of the GroupNode representing the
@@ -110,7 +111,7 @@ namespace llvm {
   };
 
 
-  /// Class AggressiveAntiDepBreaker 
+  /// Class AggressiveAntiDepBreaker
   class AggressiveAntiDepBreaker : public AntiDepBreaker {
     MachineFunction& MF;
     MachineRegisterInfo &MRI;
@@ -130,14 +131,15 @@ namespace llvm {
     AggressiveAntiDepState *State;
 
   public:
-    AggressiveAntiDepBreaker(MachineFunction& MFi, 
+    AggressiveAntiDepBreaker(MachineFunction& MFi,
                              TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
-    
+
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
-    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
@@ -160,7 +162,7 @@ namespace llvm {
     /// IsImplicitDefUse - Return true if MO represents a register
     /// that is both implicitly used and defined in MI
     bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
-    
+
     /// GetPassthruRegs - If MI implicitly def/uses a register, then
     /// return that register and all subregisters.
     void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6b24e24..876f628 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -807,124 +807,145 @@ void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
 // Print out the specified constant, without a storage class.  Only the
 // constants valid in constant expressions can occur here.
 void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
-  if (CV->isNullValue() || isa<UndefValue>(CV))
+  if (CV->isNullValue() || isa<UndefValue>(CV)) {
     O << '0';
-  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    return;
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     O << CI->getZExtValue();
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    return;
+  }
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
     // This is a constant address for a global variable or function. Use the
     // name of the variable or function as the address value.
     O << Mang->getMangledName(GV);
-  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    return;
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    GetBlockAddressSymbol(BA)->print(O, MAI);
+    return;
+  }
+  
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  if (CE == 0) {
+    llvm_unreachable("Unknown constant value!");
+    O << '0';
+    return;
+  }
+  
+  switch (CE->getOpcode()) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  default:
+    llvm_unreachable("FIXME: Don't support this constant cast expr");
+  case Instruction::GetElementPtr: {
+    // generate a symbolic expression for the byte address
     const TargetData *TD = TM.getTargetData();
-    unsigned Opcode = CE->getOpcode();    
-    switch (Opcode) {
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      llvm_unreachable("FIXME: Don't support this constant cast expr");
-    case Instruction::GetElementPtr: {
-      // generate a symbolic expression for the byte address
-      const Constant *ptrVal = CE->getOperand(0);
-      SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-      if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
-                                                idxVec.size())) {
-        // Truncate/sext the offset to the pointer size.
-        if (TD->getPointerSizeInBits() != 64) {
-          int SExtAmount = 64-TD->getPointerSizeInBits();
-          Offset = (Offset << SExtAmount) >> SExtAmount;
-        }
-        
-        if (Offset)
-          O << '(';
-        EmitConstantValueOnly(ptrVal);
-        if (Offset > 0)
-          O << ") + " << Offset;
-        else if (Offset < 0)
-          O << ") - " << -Offset;
-      } else {
-        EmitConstantValueOnly(ptrVal);
-      }
-      break;
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    if (Offset == 0)
+      return EmitConstantValueOnly(ptrVal);
+    
+    // Truncate/sext the offset to the pointer size.
+    if (TD->getPointerSizeInBits() != 64) {
+      int SExtAmount = 64-TD->getPointerSizeInBits();
+      Offset = (Offset << SExtAmount) >> SExtAmount;
     }
-    case Instruction::BitCast:
-      return EmitConstantValueOnly(CE->getOperand(0));
-
-    case Instruction::IntToPtr: {
-      // Handle casts to pointers by changing them into casts to the appropriate
-      // integer type.  This promotes constant folding and simplifies this code.
-      Constant *Op = CE->getOperand(0);
-      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
-                                        false/*ZExt*/);
+    
+    if (Offset)
+      O << '(';
+    EmitConstantValueOnly(ptrVal);
+    if (Offset > 0)
+      O << ") + " << Offset;
+    else
+      O << ") - " << -Offset;
+    return;
+  }
+  case Instruction::BitCast:
+    return EmitConstantValueOnly(CE->getOperand(0));
+
+  case Instruction::IntToPtr: {
+    // Handle casts to pointers by changing them into casts to the appropriate
+    // integer type.  This promotes constant folding and simplifies this code.
+    const TargetData *TD = TM.getTargetData();
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return EmitConstantValueOnly(Op);
+  }
+    
+  case Instruction::PtrToInt: {
+    // Support only foldable casts to/from pointers that can be eliminated by
+    // changing the pointer to the appropriately sized integer type.
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+    const TargetData *TD = TM.getTargetData();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
       return EmitConstantValueOnly(Op);
-    }
-      
+
+    O << "((";
+    EmitConstantValueOnly(Op);
+    APInt ptrMask =
+      APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
+    
+    SmallString<40> S;
+    ptrMask.toStringUnsigned(S);
+    O << ") & " << S.str() << ')';
+    return;
+  }
       
-    case Instruction::PtrToInt: {
-      // Support only foldable casts to/from pointers that can be eliminated by
-      // changing the pointer to the appropriately sized integer type.
-      Constant *Op = CE->getOperand(0);
-      const Type *Ty = CE->getType();
-
-      // We can emit the pointer value into this slot if the slot is an
-      // integer slot greater or equal to the size of the pointer.
-      if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
-        return EmitConstantValueOnly(Op);
-
-      O << "((";
-      EmitConstantValueOnly(Op);
-      APInt ptrMask =
-        APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
+  case Instruction::Trunc:
+    // We emit the value and depend on the assembler to truncate the generated
+    // expression properly.  This is important for differences between
+    // blockaddress labels.  Since the two labels are in the same function, it
+    // is reasonable to treat their delta as a 32-bit value.
+    return EmitConstantValueOnly(CE->getOperand(0));
       
-      SmallString<40> S;
-      ptrMask.toStringUnsigned(S);
-      O << ") & " << S.str() << ')';
-      break;
-    }
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    O << '(';
+    EmitConstantValueOnly(CE->getOperand(0));
+    O << ')';
+    switch (CE->getOpcode()) {
     case Instruction::Add:
+     O << " + ";
+     break;
     case Instruction::Sub:
+     O << " - ";
+     break;
     case Instruction::And:
+     O << " & ";
+     break;
     case Instruction::Or:
+     O << " | ";
+     break;
     case Instruction::Xor:
-      O << '(';
-      EmitConstantValueOnly(CE->getOperand(0));
-      O << ')';
-      switch (Opcode) {
-      case Instruction::Add:
-       O << " + ";
-       break;
-      case Instruction::Sub:
-       O << " - ";
-       break;
-      case Instruction::And:
-       O << " & ";
-       break;
-      case Instruction::Or:
-       O << " | ";
-       break;
-      case Instruction::Xor:
-       O << " ^ ";
-       break;
-      default:
-       break;
-      }
-      O << '(';
-      EmitConstantValueOnly(CE->getOperand(1));
-      O << ')';
-      break;
+     O << " ^ ";
+     break;
     default:
-      llvm_unreachable("Unsupported operator!");
+     break;
     }
-  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
-    GetBlockAddressSymbol(BA)->print(O, MAI);
-  } else {
-    llvm_unreachable("Unknown constant value!");
+    O << '(';
+    EmitConstantValueOnly(CE->getOperand(1));
+    O << ')';
+    break;
   }
 }
 
@@ -1225,8 +1246,7 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
                                             unsigned AddrSpace) {
   const TargetData *TD = TM.getTargetData();
   unsigned BitWidth = CI->getBitWidth();
-  assert(isPowerOf2_32(BitWidth) &&
-         "Non-power-of-2-sized integers not handled!");
+  assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
 
   // We don't expect assemblers to support integer data directives
   // for more than 64 bits, so we emit the data in at most 64-bit
@@ -1239,39 +1259,34 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
     else
       Val = RawData[i];
 
-    if (MAI->getData64bitsDirective(AddrSpace))
+    if (MAI->getData64bitsDirective(AddrSpace)) {
       O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n';
-    else if (TD->isBigEndian()) {
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " most significant half of i64 " << Val;
-      }
-      O << '\n';
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " least significant half of i64 " << Val;
-      }
-      O << '\n';
-    } else {
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " least significant half of i64 " << Val;
-      }
-      O << '\n';
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " most significant half of i64 " << Val;
-      }
-      O << '\n';
+      continue;
     }
+
+    // Emit two 32-bit chunks, order depends on endianness.
+    unsigned FirstChunk = unsigned(Val), SecondChunk = unsigned(Val >> 32);
+    const char *FirstName = " least", *SecondName = " most";
+    if (TD->isBigEndian()) {
+      std::swap(FirstChunk, SecondChunk);
+      std::swap(FirstName, SecondName);
+    }
+    
+    O << MAI->getData32bitsDirective(AddrSpace) << FirstChunk;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString()
+        << FirstName << " significant half of i64 " << Val;
+    }
+    O << '\n';
+    
+    O << MAI->getData32bitsDirective(AddrSpace) << SecondChunk;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString()
+        << SecondName << " significant half of i64 " << Val;
+    }
+    O << '\n';
   }
 }
 
@@ -1284,22 +1299,39 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
   if (CV->isNullValue() || isa<UndefValue>(CV)) {
     EmitZeros(Size, AddrSpace);
     return;
-  } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+  }
+  
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
     EmitGlobalConstantArray(CVA , AddrSpace);
     return;
-  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+  }
+  
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
     EmitGlobalConstantStruct(CVS, AddrSpace);
     return;
-  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
     EmitGlobalConstantFP(CFP, AddrSpace);
     return;
-  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+  }
+  
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    // If we can directly emit an 8-byte constant, do it.
+    if (Size == 8)
+      if (const char *Data64Dir = MAI->getData64bitsDirective(AddrSpace)) {
+        O << Data64Dir << CI->getZExtValue() << '\n';
+        return;
+      }
+
     // Small integers are handled below; large integers are handled here.
     if (Size > 4) {
       EmitGlobalConstantLargeInt(CI, AddrSpace);
       return;
     }
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+  }
+  
+  if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     EmitGlobalConstantVector(CP);
     return;
   }
@@ -1617,7 +1649,7 @@ void AsmPrinter::printLabel(unsigned Id) const {
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
 /// instruction, using the specified assembler variant.  Targets should
-/// overried this to format as appropriate.
+/// override this to format as appropriate.
 bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                  unsigned AsmVariant, const char *ExtraCode) {
   // Target doesn't support this yet!
@@ -1645,15 +1677,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   // This code must use the function name itself, and not the function number,
   // since it must be possible to generate the label name from within other
   // functions.
-  std::string FuncName = Mang->getMangledName(F);
+  SmallString<60> FnName;
+  Mang->getNameWithPrefix(FnName, F, false);
 
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
-    << FuncName.size() << '_' << FuncName << '_'
-    << Mang->makeNameProper(BB->getName())
-    << Suffix;
+  // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME!
+  SmallString<60> NameResult;
+  Mang->getNameWithPrefix(NameResult,
+                          StringRef("BA") + Twine((unsigned)FnName.size()) + 
+                          "_" + FnName.str() + "_" + BB->getName() + Suffix, 
+                          Mangler::Private);
 
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(NameResult.str());
 }
 
 MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8a3ceb6..15f37ae 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -212,19 +212,30 @@ public:
   ///
   void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void fixInstructionMarkers() {
+  void fixInstructionMarkers(DenseMap<const MachineInstr *, 
+                             unsigned> &MIIndexMap) {
     assert (getFirstInsn() && "First instruction is missing!");
-    if (getLastInsn())
-      return;
-
-    // If a scope does not have an instruction to mark an end then use
-    // the end of last child scope.
+    
+    // Use the end of last child scope as end of this scope.
     SmallVector<DbgScope *, 4> &Scopes = getScopes();
-    assert (!Scopes.empty() && "Inner most scope does not have last insn!");
-    DbgScope *L = Scopes.back();
-    if (!L->getLastInsn())
-      L->fixInstructionMarkers();
-    setLastInsn(L->getLastInsn());
+    const MachineInstr *LastInsn = getFirstInsn();
+    unsigned LIndex = 0;
+    if (Scopes.empty()) {
+      assert (getLastInsn() && "Inner most scope does not have last insn!");
+      return;
+    }
+    for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(),
+           SE = Scopes.end(); SI != SE; ++SI) {
+      DbgScope *DS = *SI;
+      DS->fixInstructionMarkers(MIIndexMap);
+      const MachineInstr *DSLastInsn = DS->getLastInsn();
+      unsigned DSI = MIIndexMap[DSLastInsn];
+      if (DSI > LIndex) {
+        LastInsn = DSLastInsn;
+        LIndex = DSI;
+      }
+    }
+    setLastInsn(LastInsn);
   }
 
 #ifndef NDEBUG
@@ -1021,6 +1032,16 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
   return Enumerator;
 }
 
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm 
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
 /// createGlobalVariableDIE - Create new DIE using GV.
 DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
   // If the global variable was optmized out then no need to create debug info
@@ -1033,16 +1054,10 @@ DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
             GV.getDisplayName());
 
   StringRef LinkageName = GV.getLinkageName();
-  if (!LinkageName.empty()) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not
-    // emit usual symbol prefix before the symbol name. This happens for
-    // Objective-C symbol names and symbol whose name is replaced using GCC's
-    // __asm__ attribute.
-    if (LinkageName[0] == 1)
-      LinkageName = LinkageName.substr(1);
+  if (!LinkageName.empty())
     addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              LinkageName);
-  }
+              getRealLinkageName(LinkageName));
+
   addType(GVDie, GV.getType());
   if (!GV.isLocalToUnit())
     addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
@@ -1074,10 +1089,9 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
     addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
 
     uint64_t Offset = DT.getOffsetInBits();
-    uint64_t FieldOffset = Offset;
     uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
     uint64_t HiMark = (Offset + FieldSize) & AlignMask;
-    FieldOffset = (HiMark - FieldSize);
+    uint64_t FieldOffset = (HiMark - FieldSize);
     Offset -= FieldOffset;
 
     // Maybe we need to work from the other end.
@@ -1119,16 +1133,10 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
   addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
   StringRef LinkageName = SP.getLinkageName();
-  if (!LinkageName.empty()) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not
-    // emit usual symbol prefix before the symbol name. This happens for
-    // Objective-C symbol names and symbol whose name is replaced using GCC's
-    // __asm__ attribute.
-    if (LinkageName[0] == 1)
-      LinkageName = LinkageName.substr(1);
+  if (!LinkageName.empty())
     addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              LinkageName);
-  }
+              getRealLinkageName(LinkageName));
+
   addSourceLine(SPDie, &SP);
 
   // Add prototyped tag, if C or ObjC.
@@ -1382,7 +1390,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
     I->second.push_back(std::make_pair(StartID, ScopeDIE));
 
   StringPool.insert(InlinedSP.getName());
-  StringPool.insert(InlinedSP.getLinkageName());
+  StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName()));
+
   DILocation DL(Scope->getInlinedAt());
   addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
   addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
@@ -1644,8 +1653,11 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   ModuleCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
-  if (DI_GV.isDefinition() 
-      && !DI_GV.getContext().isCompileUnit()) {
+  DIDescriptor GVContext = DI_GV.getContext();
+  // Do not create specification DIE if context is either compile unit
+  // or a subprogram.
+  if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
+      && !GVContext.isSubprogram()) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1663,7 +1675,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
                    Asm->Mang->getMangledName(DI_GV.getGlobal()));
     addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
   }
-  addToContextOwner(VariableDie, DI_GV.getContext());
+  addToContextOwner(VariableDie, GVContext);
   
   // Expose as global. FIXME - need to check external flag.
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
@@ -1804,7 +1816,8 @@ void DwarfDebug::endModule() {
     DIE *NDie = ModuleCU->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
-    addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+    // FIXME - This is not the correct approach.
+    // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
   }
 
   // Standard sections final addresses.
@@ -1976,12 +1989,15 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
   if (!DbgScopeMap.empty())
     return false;
 
+  DenseMap<const MachineInstr *, unsigned> MIIndexMap;
+  unsigned MIIndex = 0;
   // Scan each instruction and create scopes. First build working set of scopes.
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      MIIndexMap[MInsn] = MIIndex++;
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
@@ -2014,16 +2030,10 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
     }
   }
 
-  // If a scope's last instruction is not set then use its child scope's
-  // last instruction as this scope's last instrunction.
-  for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
-	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
-    if (DI->second->isAbstractScope())
-      continue;
-    assert (DI->second->getFirstInsn() && "Invalid first instruction!");
-    DI->second->fixInstructionMarkers();
-    assert (DI->second->getLastInsn() && "Invalid last instruction!");
-  }
+  if (!CurrentFnDbgScope)
+    return false;
+
+  CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
 
   // Each scope has first instruction and last instruction to mark beginning
   // and end of a scope respectively. Create an inverse map that list scopes
@@ -2105,38 +2115,41 @@ void DwarfDebug::endFunction(MachineFunction *MF) {
   if (DbgScopeMap.empty())
     return;
 
-  // Define end label for subprogram.
-  EmitLabel("func_end", SubprogramCount);
-
-  // Get function line info.
-  if (!Lines.empty()) {
-    // Get section line info.
-    unsigned ID = SectionMap.insert(Asm->getCurrentSection());
-    if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
-    std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
-    // Append the function info to section info.
-    SectionLineInfos.insert(SectionLineInfos.end(),
-                            Lines.begin(), Lines.end());
+  if (CurrentFnDbgScope) {
+    // Define end label for subprogram.
+    EmitLabel("func_end", SubprogramCount);
+    
+    // Get function line info.
+    if (!Lines.empty()) {
+      // Get section line info.
+      unsigned ID = SectionMap.insert(Asm->getCurrentSection());
+      if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+      std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+      // Append the function info to section info.
+      SectionLineInfos.insert(SectionLineInfos.end(),
+                              Lines.begin(), Lines.end());
+    }
+    
+    // Construct abstract scopes.
+    for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+           AE = AbstractScopesList.end(); AI != AE; ++AI)
+      constructScopeDIE(*AI);
+    
+    constructScopeDIE(CurrentFnDbgScope);
+    
+    DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+                                                 MMI->getFrameMoves()));
   }
 
-  // Construct abstract scopes.
-  for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
-         AE = AbstractScopesList.end(); AI != AE; ++AI)
-    constructScopeDIE(*AI);
-
-  constructScopeDIE(CurrentFnDbgScope);
-
-  DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
-                                               MMI->getFrameMoves()));
-
   // Clear debug info
-  CurrentFnDbgScope = NULL;
-  DbgScopeMap.clear();
-  DbgScopeBeginMap.clear();
-  DbgScopeEndMap.clear();
-  ConcreteScopes.clear();
-  AbstractScopesList.clear();
-
+  if (CurrentFnDbgScope) {
+    CurrentFnDbgScope = NULL;
+    DbgScopeMap.clear();
+    DbgScopeBeginMap.clear();
+    DbgScopeEndMap.clear();
+    ConcreteScopes.clear();
+    AbstractScopesList.clear();
+  }
   Lines.clear();
   
   if (TimePassesIsEnabled)
@@ -2908,8 +2921,6 @@ void DwarfDebug::emitDebugInlineInfo() {
   for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
 
-//  for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
-    //        I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
     MDNode *Node = *I;
     ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
       = InlineInfo.find(Node);
@@ -2920,20 +2931,11 @@ void DwarfDebug::emitDebugInlineInfo() {
 
     if (LName.empty())
       Asm->EmitString(Name);
-    else {
-      // Skip special LLVM prefix that is used to inform the asm printer to not
-      // emit usual symbol prefix before the symbol name. This happens for
-      // Objective-C symbol names and symbol whose name is replaced using GCC's
-      // __asm__ attribute.
-      if (LName[0] == 1)
-        LName = LName.substr(1);
-//      Asm->EmitString(LName);
+    else 
       EmitSectionOffset("string", "section_str",
-                        StringPool.idFor(LName), false, true);
+                        StringPool.idFor(getRealLinkageName(LName)), false, true);
 
-    }
     Asm->EOL("MIPS linkage name");
-//    Asm->EmitString(Name);
     EmitSectionOffset("string", "section_str",
                       StringPool.idFor(Name), false, true);
     Asm->EOL("Function name");
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 7a969f0..6bc808c 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -35,12 +35,13 @@ add_llvm_library(LLVMCodeGen
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
-  MachineSink.cpp
   MachineSSAUpdater.cpp
+  MachineSink.cpp
   MachineVerifier.cpp
   MaxStackAlignment.cpp
   ObjectCodeEmitter.cpp
   OcamlGC.cpp
+  OptimizeExts.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 3c7961c2..056e2d5 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -288,9 +288,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 }
 
 unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
+CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+                                                 unsigned AntiDepReg,
                                                  unsigned LastNewReg,
-                                                 const TargetRegisterClass *RC) {
+                                                 const TargetRegisterClass *RC)
+{
   for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
        RE = RC->allocation_order_end(MF); R != RE; ++R) {
     unsigned NewReg = *R;
@@ -300,12 +302,16 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
     // an anti-dependence with this AntiDepReg, because that would
     // re-introduce that anti-dependence.
     if (NewReg == LastNewReg) continue;
+    // If the instruction already has a def of the NewReg, it's not suitable.
+    // For example, Instruction with multiple definitions can result in this
+    // condition.
+    if (MI->modifiesRegister(NewReg, TRI)) continue;
     // If NewReg is dead and NewReg's most recent def is not before
     // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
-    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for AntiDepReg!");
-    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for NewReg!");
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+           && "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+           && "Kill and Def maps aren't consistent for NewReg!");
     if (KillIndices[NewReg] != ~0u ||
         Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
         KillIndices[AntiDepReg] > DefIndices[NewReg])
@@ -336,14 +342,14 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
 
 #ifndef NDEBUG
   {
-    DEBUG(errs() << "Critical path has total latency "
+    DEBUG(dbgs() << "Critical path has total latency "
           << (Max->getDepth() + Max->Latency) << "\n");
-    DEBUG(errs() << "Available regs:");
+    DEBUG(dbgs() << "Available regs:");
     for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
       if (KillIndices[Reg] == ~0u)
-        DEBUG(errs() << " " << TRI->getName(Reg));
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
     }
-    DEBUG(errs() << '\n');
+    DEBUG(dbgs() << '\n');
   }
 #endif
 
@@ -495,10 +501,10 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
+      if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
                                                      LastNewReg[AntiDepReg],
                                                      RC)) {
-        DEBUG(errs() << "Breaking anti-dependence edge on "
+        DEBUG(dbgs() << "Breaking anti-dependence edge on "
               << TRI->getName(AntiDepReg)
               << " with " << RegRefs.count(AntiDepReg) << " references"
               << " using " << TRI->getName(NewReg) << "!\n");
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 496888d..9e8db02 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -64,11 +64,12 @@ namespace llvm {
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi);
     ~CriticalAntiDepBreaker();
-    
+
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
-    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
@@ -87,7 +88,8 @@ namespace llvm {
   private:
     void PrescanInstruction(MachineInstr *MI);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
-    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
+    unsigned findSuitableFreeRegister(MachineInstr *MI,
+                                      unsigned AntiDepReg,
                                       unsigned LastNewReg,
                                       const TargetRegisterClass *);
   };
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 07a5d38..0982eab 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -109,7 +109,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
-        DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
         AnyChanges = true;
         MI->eraseFromParent();
         MIE = MBB->rend();
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index a6429f7..11a85a0 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -37,7 +37,7 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  DEBUG(errs() << "processing function: "
+  DEBUG(dbgs() << "processing function: "
         << MF.getFunction()->getName() << "\n");
 
   // Get the ELF Section that this function belongs in.
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 3e1ee11..5e5f589 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -1076,7 +1076,7 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Emit all of sections to the file and build the section header table.
   for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
     ELFSection &S = *(*I);
-    DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
                  << ", Size: " << S.Size << ", Offset: " << S.Offset
                  << ", SectionData Size: " << S.size() << "\n");
 
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 36925b1..266c74c 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -48,7 +48,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
   Scoreboard = new unsigned[ScoreboardDepth];
   ScoreboardHead = 0;
 
-  DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " 
+  DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
@@ -66,7 +66,7 @@ unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
 }
 
 void ExactHazardRecognizer::dumpScoreboard() {
-  errs() << "Scoreboard:\n";
+  dbgs() << "Scoreboard:\n";
   
   unsigned last = ScoreboardDepth - 1;
   while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
@@ -74,10 +74,10 @@ void ExactHazardRecognizer::dumpScoreboard() {
 
   for (unsigned i = 0; i <= last; i++) {
     unsigned FUs = Scoreboard[getFutureIndex(i)];
-    errs() << "\t";
+    dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
-      errs() << ((FUs & (1 << j)) ? '1' : '0');
-    errs() << '\n';
+      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+    dbgs() << '\n';
   }
 }
 
@@ -102,8 +102,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
       unsigned index = getFutureIndex(cycle + i);
       unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
       if (!freeUnits) {
-        DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
-        DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
         DEBUG(SU->getInstr()->dump());
         return Hazard;
       }
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 4d25dcc..055172b 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -92,7 +93,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
     }
   }
  
-  errs() << "unsupported GC: " << Name << "\n";
+  dbgs() << "unsupported GC: " << Name << "\n";
   llvm_unreachable(0);
 }
 
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6e0bde6..79b2986 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -109,7 +110,7 @@ GCStrategy::~GCStrategy() {
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
  
 bool GCStrategy::performCustomLowering(Function &F) {
-  errs() << "gc " << getName() << " must override performCustomLowering.\n";
+  dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
   llvm_unreachable(0);
   return 0;
 }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c23d707..c61fd17 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -229,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  DEBUG(errs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+  DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
                << MF.getFunction()->getName() << "\'");
 
   if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
-    DEBUG(errs() << " skipped\n");
+    DEBUG(dbgs() << " skipped\n");
     return false;
   }
-  DEBUG(errs() << "\n");
+  DEBUG(dbgs() << "\n");
 
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
@@ -281,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
         if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
-        DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+        DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
                      << "): BB#" << BBI.BB->getNumber() << " ("
                      << ((Kind == ICSimpleFalse)
                          ? BBI.FalseBB->getNumber()
                          : BBI.TrueBB->getNumber()) << ") ");
         RetVal = IfConvertSimple(BBI, Kind);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) NumSimpleFalse++;
           else         NumSimple++;
@@ -304,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         if (DisableTriangleR && !isFalse && isRev) break;
         if (DisableTriangleF && isFalse && !isRev) break;
         if (DisableTriangleFR && isFalse && isRev) break;
-        DEBUG(errs() << "Ifcvt (Triangle");
+        DEBUG(dbgs() << "Ifcvt (Triangle");
         if (isFalse)
-          DEBUG(errs() << " false");
+          DEBUG(dbgs() << " false");
         if (isRev)
-          DEBUG(errs() << " rev");
-        DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+          DEBUG(dbgs() << " rev");
+        DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertTriangle(BBI, Kind);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) {
             if (isRev) NumTriangleFRev++;
@@ -327,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       }
       case ICDiamond: {
         if (DisableDiamond) break;
-        DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+        DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) NumDiamonds++;
         break;
       }
@@ -1141,7 +1141,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
       continue;
     if (!TII->PredicateInstruction(I, Cond)) {
 #ifndef NDEBUG
-      errs() << "Unable to predicate " << *I << "!\n";
+      dbgs() << "Unable to predicate " << *I << "!\n";
 #endif
       llvm_unreachable(0);
     }
@@ -1177,7 +1177,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     if (!isPredicated)
       if (!TII->PredicateInstruction(MI, Cond)) {
 #ifndef NDEBUG
-        errs() << "Unable to predicate " << *I << "!\n";
+        dbgs() << "Unable to predicate " << *I << "!\n";
 #endif
         llvm_unreachable(0);
       }
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 8a3bd0b..9997a48 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -349,12 +349,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::setjmp: {
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
                                Type::getInt32Ty(Context));
-    if (CI->getType() != Type::getVoidTy(Context))
+    if (!CI->getType()->isVoidTy())
       CI->replaceAllUsesWith(V);
     break;
   }
   case Intrinsic::sigsetjmp:
-     if (CI->getType() != Type::getVoidTy(Context))
+     if (!CI->getType()->isVoidTy())
        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
      break;
 
@@ -427,10 +427,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
 
-  case Intrinsic::dbg_stoppoint:
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
   case Intrinsic::dbg_declare:
     break;    // Simply strip out debugging intrinsics
 
@@ -512,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   }
   case Intrinsic::flt_rounds:
      // Lower to "round to the nearest"
-     if (CI->getType() != Type::getVoidTy(Context))
+     if (!CI->getType()->isVoidTy())
        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
      break;
   case Intrinsic::invariant_start:
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index d5fd051..2b5fd2c 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
@@ -61,6 +62,7 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
+
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
 // able to enable or disable fast-isel independently from -O0.
@@ -246,7 +248,7 @@ static void printAndVerify(PassManagerBase &PM,
                            const char *Banner,
                            bool allowDoubleDefs = false) {
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs(), Banner));
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 
   if (VerifyMachineCode)
     PM.add(createMachineVerifierPass(allowDoubleDefs));
@@ -269,7 +271,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
     if (PrintLSR)
-      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs()));
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
   }
 
   // Turn exception handling constructs into something the code generators can
@@ -278,8 +280,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
+    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::Dwarf:
     PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
@@ -302,7 +309,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (PrintISelInput)
     PM.add(createPrintFunctionPass("\n\n"
                                    "*** Final LLVM Code input to ISel ***\n",
-                                   &errs()));
+                                   &dbgs()));
 
   // Standard Lower-Level Passes.
 
@@ -323,6 +330,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                  /* allowDoubleDefs= */ true);
 
   if (OptLevel != CodeGenOpt::None) {
+    PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
     if (!DisableMachineSink)
@@ -335,7 +343,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None &&
       !DisableTailDuplicate && PreAllocTailDup) {
     PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate",
+                   /* allowDoubleDefs= */ true);
   }
 
   // Run pre-ra passes.
@@ -391,7 +400,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createGCMachineCodeAnalysisPass());
 
   if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(errs()));
+    PM.add(createGCInfoPrinter(dbgs()));
 
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index cc286aa..e207f60 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -10,7 +10,7 @@
 // This file implements the LiveRange and LiveInterval classes.  Given some
 // numbering of each the machine instructions an interval [i, j) is said to be a
 // live interval for register v if there is no instruction with number j' > j
-// such that v is live at j' abd there is no instruction with number i' < i such
+// such that v is live at j' and there is no instruction with number i' < i such
 // that v is live at i'. In this implementation intervals can have holes,
 // i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
 // individual range is represented as an instance of LiveRange, and the whole
@@ -24,6 +24,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
@@ -813,7 +814,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
 }
 
 void LiveRange::dump() const {
-  errs() << *this << "\n";
+  dbgs() << *this << "\n";
 }
 
 void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
@@ -872,7 +873,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
 }
 
 void LiveInterval::dump() const {
-  errs() << *this << "\n";
+  dbgs() << *this << "\n";
 }
 
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 452f872..e0e2ec8 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -146,7 +146,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
 }
 
 void LiveIntervals::dumpInstrs() const {
-  printInstrs(errs());
+  printInstrs(dbgs());
 }
 
 bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
@@ -253,9 +253,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
 #ifndef NDEBUG
 static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    errs() << tri_->getName(reg);
+    dbgs() << tri_->getName(reg);
   else
-    errs() << "%reg" << reg;
+    dbgs() << "%reg" << reg;
 }
 #endif
 
@@ -266,7 +266,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
   DEBUG({
-      errs() << "\t\tregister: ";
+      dbgs() << "\t\tregister: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -314,7 +314,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                "Shouldn't be alive across any blocks!");
         LiveRange LR(defIndex, killIdx, ValNo);
         interval.addRange(LR);
-        DEBUG(errs() << " +" << LR << "\n");
+        DEBUG(dbgs() << " +" << LR << "\n");
         ValNo->addKill(killIdx);
         return;
       }
@@ -325,7 +325,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // live into some number of blocks, but gets killed.  Start by adding a
     // range that goes from this definition to the end of the defining block.
     LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
-    DEBUG(errs() << " +" << NewLR);
+    DEBUG(dbgs() << " +" << NewLR);
     interval.addRange(NewLR);
 
     // Iterate over all of the blocks that the variable is completely
@@ -336,7 +336,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
       LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
       interval.addRange(LR);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
 
     // Finally, this virtual register is live from the start of any killing
@@ -348,7 +348,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
       interval.addRange(LR);
       ValNo->addKill(killIdx);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
 
   } else {
@@ -393,7 +393,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
-      DEBUG(errs() << " replace range with " << LR);
+      DEBUG(dbgs() << " replace range with " << LR);
       interval.addRange(LR);
       ValNo->addKill(RedefIndex);
 
@@ -404,8 +404,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                     OldValNo));
 
       DEBUG({
-          errs() << " RESULT: ";
-          interval.print(errs(), tri_);
+          dbgs() << " RESULT: ";
+          interval.print(dbgs(), tri_);
         });
     } else {
       // Otherwise, this must be because of phi elimination.  If this is the
@@ -422,8 +422,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           SlotIndex Start = getMBBStartIdx(Killer->getParent());
           SlotIndex End = getInstructionIndex(Killer).getDefIndex();
           DEBUG({
-              errs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
-              interval.print(errs(), tri_);
+              dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
+              interval.print(dbgs(), tri_);
             });
           interval.removeRange(Start, End);
 
@@ -442,8 +442,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         VNI->addKill(indexes_->getTerminatorGap(killMBB));
         VNI->setHasPHIKill(true);
         DEBUG({
-            errs() << " RESULT: ";
-            interval.print(errs(), tri_);
+            dbgs() << " RESULT: ";
+            interval.print(dbgs(), tri_);
           });
       }
 
@@ -469,11 +469,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       interval.addRange(LR);
       ValNo->addKill(indexes_->getTerminatorGap(mbb));
       ValNo->setHasPHIKill(true);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 }
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
@@ -485,7 +485,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
   DEBUG({
-      errs() << "\t\tregister: ";
+      dbgs() << "\t\tregister: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -502,7 +502,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // For earlyclobbers, the defSlot was pushed back one; the extra
   // advance below compensates.
   if (MO.isDead()) {
-    DEBUG(errs() << " dead");
+    DEBUG(dbgs() << " dead");
     end = start.getStoreIndex();
     goto exit;
   }
@@ -517,7 +517,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
 
     if (mi->killsRegister(interval.reg, tri_)) {
-      DEBUG(errs() << " killed");
+      DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
       goto exit;
     } else {
@@ -531,7 +531,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
           // Then the register is essentially dead at the instruction that defines
           // it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
-          DEBUG(errs() << " dead");
+          DEBUG(dbgs() << " dead");
           end = start.getStoreIndex();
         }
         goto exit;
@@ -560,7 +560,7 @@ exit:
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   LR.valno->addKill(end);
-  DEBUG(errs() << " +" << LR << '\n');
+  DEBUG(dbgs() << " +" << LR << '\n');
 }
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
@@ -595,7 +595,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
   DEBUG({
-      errs() << "\t\tlivein register: ";
+      dbgs() << "\t\tlivein register: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -612,7 +612,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   
   while (mi != MBB->end()) {
     if (mi->killsRegister(interval.reg, tri_)) {
-      DEBUG(errs() << " killed");
+      DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
       SeenDefUse = true;
       break;
@@ -621,7 +621,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
-      DEBUG(errs() << " dead");
+      DEBUG(dbgs() << " dead");
       end = start.getStoreIndex();
       SeenDefUse = true;
       break;
@@ -636,10 +636,10 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
-      DEBUG(errs() << " dead");
+      DEBUG(dbgs() << " dead");
       end = MIIdx.getStoreIndex();
     } else {
-      DEBUG(errs() << " live through");
+      DEBUG(dbgs() << " live through");
       end = baseIndex;
     }
   }
@@ -652,7 +652,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 
   interval.addRange(LR);
   LR.valno->addKill(end);
-  DEBUG(errs() << " +" << LR << '\n');
+  DEBUG(dbgs() << " +" << LR << '\n');
 }
 
 /// computeIntervals - computes the live intervals for virtual
@@ -660,7 +660,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 /// live interval is an interval [i, j) where 1 <= i <= j < N for
 /// which a variable is live
 void LiveIntervals::computeIntervals() { 
-  DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n"
+  DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
@@ -670,7 +670,7 @@ void LiveIntervals::computeIntervals() {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(errs() << MBB->getName() << ":\n");
+    DEBUG(dbgs() << MBB->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -690,7 +690,7 @@ void LiveIntervals::computeIntervals() {
       MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     
     for (; MI != miEnd; ++MI) {
-      DEBUG(errs() << MIIndex << "\t" << *MI);
+      DEBUG(dbgs() << MIIndex << "\t" << *MI);
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
@@ -1055,7 +1055,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       // If this is the rematerializable definition MI itself and
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
-        DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: "
+        DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: "
                      << MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
@@ -1208,28 +1208,28 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       if (CreatedNewVReg) {
         LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
                      nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       } else {
         // Extend the split live interval to this def / use.
         SlotIndex End = index.getDefIndex();
         LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
                      nI.getValNumInfo(nI.getNumValNums()-1));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       }
     }
     if (HasDef) {
       LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
                    nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
       nI.addRange(LR);
     }
 
     DEBUG({
-        errs() << "\t\t\t\tAdded new interval: ";
-        nI.print(errs(), tri_);
-        errs() << '\n';
+        dbgs() << "\t\t\t\tAdded new interval: ";
+        nI.print(dbgs(), tri_);
+        dbgs() << '\n';
       });
   }
   return CanFold;
@@ -1557,9 +1557,9 @@ addIntervalsForSpillsFast(const LiveInterval &li,
          "attempt to spill already spilled interval!");
 
   DEBUG({
-      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
       li.dump();
-      errs() << '\n';
+      dbgs() << '\n';
     });
 
   const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
@@ -1610,7 +1610,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
         LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
                      nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
@@ -1618,7 +1618,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
         LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
                      nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
       }
@@ -1626,9 +1626,9 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       added.push_back(&nI);
         
       DEBUG({
-          errs() << "\t\t\t\tadded new interval: ";
+          dbgs() << "\t\t\t\tadded new interval: ";
           nI.dump();
-          errs() << '\n';
+          dbgs() << '\n';
         });
     }
     
@@ -1651,9 +1651,9 @@ addIntervalsForSpills(const LiveInterval &li,
          "attempt to spill already spilled interval!");
 
   DEBUG({
-      errs() << "\t\t\t\tadding intervals for spills for interval: ";
-      li.print(errs(), tri_);
-      errs() << '\n';
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(dbgs(), tri_);
+      dbgs() << '\n';
     });
 
   // Each bit specify whether a spill is required in the MBB.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 3c88e37..b44a220 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -59,17 +60,17 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
 }
 
 void LiveVariables::VarInfo::dump() const {
-  errs() << "  Alive in blocks: ";
+  dbgs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
-    errs() << *I << ", ";
-  errs() << "\n  Killed by:";
+    dbgs() << *I << ", ";
+  dbgs() << "\n  Killed by:";
   if (Kills.empty())
-    errs() << " No instructions.\n";
+    dbgs() << " No instructions.\n";
   else {
     for (unsigned i = 0, e = Kills.size(); i != e; ++i)
-      errs() << "\n    #" << i << ": " << *Kills[i];
-    errs() << "\n";
+      dbgs() << "\n    #" << i << ": " << *Kills[i];
+    dbgs() << "\n";
   }
 }
 
@@ -289,7 +290,6 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
 
   MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
-  MachineInstr *LastPartDef = 0;
   unsigned LastPartDefDist = 0;
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
@@ -298,13 +298,9 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
       // There was a def of this sub-register in between. This is a partial
       // def, keep track of the last one.
       unsigned Dist = DistanceMap[Def];
-      if (Dist > LastPartDefDist) {
+      if (Dist > LastPartDefDist)
         LastPartDefDist = Dist;
-        LastPartDef = Def;
-      }
-      continue;
-    }
-    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+    } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
       unsigned Dist = DistanceMap[Use];
       if (Dist > LastRefOrPartRefDist) {
         LastRefOrPartRefDist = Dist;
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 80eb6cd..1121d9b 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -122,7 +122,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
          "Extract destination must be in a physical register");
   assert(SrcReg && "invalid subregister index for register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (SrcReg == DstReg) {
     // No need to insert an identity copy instruction.
@@ -131,11 +131,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
       // instruction with KILL.
       MI->setDesc(TII->get(TargetInstrInfo::KILL));
       MI->RemoveOperand(2);     // SubIdx
-      DEBUG(errs() << "subreg: replace by: " << *MI);
+      DEBUG(dbgs() << "subreg: replace by: " << *MI);
       return true;
     }
 
-    DEBUG(errs() << "subreg: eliminated!");
+    DEBUG(dbgs() << "subreg: eliminated!");
   } else {
     // Insert copy
     const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
@@ -150,11 +150,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
       TransferKillFlag(MI, SuperReg, TRI, true);
     DEBUG({
         MachineBasicBlock::iterator dMI = MI;
-        errs() << "subreg: " << *(--dMI);
+        dbgs() << "subreg: " << *(--dMI);
       });
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -179,7 +179,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg && InsSIdx == 0) {
     // No need to insert an identify copy instruction.
@@ -188,7 +188,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
     // The first def is defining RAX, not EAX so the top bits were not
     // zero extended.
-    DEBUG(errs() << "subreg: eliminated!");
+    DEBUG(dbgs() << "subreg: eliminated!");
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
@@ -203,11 +203,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
       TransferKillFlag(MI, InsReg, TRI);
     DEBUG({
         MachineBasicBlock::iterator dMI = MI;
-        errs() << "subreg: " << *(--dMI);
+        dbgs() << "subreg: " << *(--dMI);
       });
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -235,7 +235,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg) {
     // No need to insert an identity copy instruction. If the SrcReg was
@@ -248,7 +248,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
       else
         MIB.addReg(InsReg, RegState::Kill);
     } else {
-      DEBUG(errs() << "subreg: eliminated!\n");
+      DEBUG(dbgs() << "subreg: eliminated!\n");
       MBB->erase(MI);
       return true;
     }
@@ -288,7 +288,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 
   DEBUG({
       MachineBasicBlock::iterator dMI = MI;
-      errs() << "subreg: " << *(--dMI) << "\n";
+      dbgs() << "subreg: " << *(--dMI) << "\n";
     });
 
   MBB->erase(MI);
@@ -299,7 +299,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 /// copies.
 ///
 bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "Machine Function\n"  
+  DEBUG(dbgs() << "Machine Function\n"  
                << "********** LOWERING SUBREG INSTRS **********\n"
                << "********** Function: " 
                << MF.getFunction()->getName() << '\n');
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
index 73b15ed..337eab1 100644
--- a/lib/CodeGen/MachOWriter.cpp
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/OutputBuffer.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -634,7 +635,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
       }
       case Instruction::Add:
       default:
-        errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
+        dbgs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
         llvm_unreachable(0);
       }
     } else if (PC->getType()->isSingleValueType()) {
@@ -732,7 +733,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
         WorkList.push_back(CPair(CPS->getOperand(i),
                                  PA+SL->getElementOffset(i)));
     } else {
-      errs() << "Bad Type: " << *PC->getType() << "\n";
+      dbgs() << "Bad Type: " << *PC->getType() << "\n";
       llvm_unreachable("Unknown constant type to initialize memory with!");
     }
   }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 74a0d57..e2ce642 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Assembly/Writer.h"
@@ -158,7 +159,7 @@ bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
 }
 
 void MachineBasicBlock::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 static inline void OutputReg(raw_ostream &os, unsigned RegNo,
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index dd6fd7e..ae9451c 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -299,7 +300,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
 }
 
 void MachineFunction::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 void MachineFunction::print(raw_ostream &OS) const {
@@ -519,7 +520,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
 }
 
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
-  print(MF, errs());
+  print(MF, dbgs());
 }
 
 //===----------------------------------------------------------------------===//
@@ -579,7 +580,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-void MachineJumpTableInfo::dump() const { print(errs()); }
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
 
 
 //===----------------------------------------------------------------------===//
@@ -702,4 +703,4 @@ void MachineConstantPool::print(raw_ostream &OS) const {
   }
 }
 
-void MachineConstantPool::dump() const { print(errs()); }
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a761c2d..cf3e3e1 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -28,11 +28,13 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Metadata.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -277,10 +279,15 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << '>';
     break;
   case MachineOperand::MO_BlockAddress:
-    OS << "<";
+    OS << '<';
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_Metadata:
+    OS << '<';
+    WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+    OS << '>';
+    break;
   default:
     llvm_unreachable("Unrecognized operand type");
   }
@@ -1094,7 +1101,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
 }
 
 void MachineInstr::dump() const {
-  errs() << "  " << *this;
+  dbgs() << "  " << *this;
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -1313,3 +1320,12 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
                                        true  /*IsDead*/));
   return true;
 }
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+                                      const TargetRegisterInfo *RegInfo) {
+  MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+  if (!MO || MO->getSubReg())
+    addOperand(MachineOperand::CreateReg(IncomingReg,
+                                         true  /*IsDef*/,
+                                         true  /*IsImp*/));
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 0a57ea1..ffcc8ab 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -161,7 +161,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
 /// loop.
 ///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "******** Machine LICM ********\n");
+  DEBUG(dbgs() << "******** Machine LICM ********\n");
 
   Changed = FirstInLoop = false;
   MCP = MF.getConstantPool();
@@ -253,28 +253,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   }
 
   DEBUG({
-      errs() << "--- Checking if we can hoist " << I;
+      dbgs() << "--- Checking if we can hoist " << I;
       if (I.getDesc().getImplicitUses()) {
-        errs() << "  * Instruction has implicit uses:\n";
+        dbgs() << "  * Instruction has implicit uses:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
              *ImpUses; ++ImpUses)
-          errs() << "      -> " << TRI->getName(*ImpUses) << "\n";
+          dbgs() << "      -> " << TRI->getName(*ImpUses) << "\n";
       }
 
       if (I.getDesc().getImplicitDefs()) {
-        errs() << "  * Instruction has implicit defines:\n";
+        dbgs() << "  * Instruction has implicit defines:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
              *ImpDefs; ++ImpDefs)
-          errs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
+          dbgs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
       }
     });
 
   if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
-    DEBUG(errs() << "Cannot hoist with implicit defines or uses\n");
+    DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
     return false;
   }
 
@@ -479,7 +479,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
     return false;
 
   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
-    DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg() && MO.isDef())
@@ -506,14 +506,14 @@ void MachineLICM::Hoist(MachineInstr *MI) {
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
   DEBUG({
-      errs() << "Hoisting " << *MI;
+      dbgs() << "Hoisting " << *MI;
       if (CurPreheader->getBasicBlock())
-        errs() << " to MachineBasicBlock "
+        dbgs() << " to MachineBasicBlock "
                << CurPreheader->getName();
       if (MI->getParent()->getBasicBlock())
-        errs() << " from MachineBasicBlock "
+        dbgs() << " from MachineBasicBlock "
                << MI->getParent()->getName();
-      errs() << "\n";
+      dbgs() << "\n";
     });
 
   // If this is the first instruction being hoisted to the preheader,
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index d561a5b..269538b 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace llvm {
@@ -73,3 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   }
   return BotMBB;
 }
+
+void MachineLoop::dump() const {
+  print(dbgs());
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 292096f..467ea5d 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -210,7 +210,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If the client wants to know about all new instructions, tell it.
   if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
 
-  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
   return InsertedPHI->getOperand(0).getReg();
 }
 
@@ -383,7 +383,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
     InsertedPHI->eraseFromParent();
     InsertedVal = ConstVal;
   } else {
-    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index e040738..c177e3c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -90,7 +90,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
 }
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "******** Machine Sinking ********\n");
+  DEBUG(dbgs() << "******** Machine Sinking ********\n");
   
   const TargetMachine &TM = MF.getTarget();
   TII = TM.getInstrInfo();
@@ -255,15 +255,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   if (MI->getParent() == SuccToSinkTo)
     return false;
   
-  DEBUG(errs() << "Sink instr " << *MI);
-  DEBUG(errs() << "to block " << *SuccToSinkTo);
+  DEBUG(dbgs() << "Sink instr " << *MI);
+  DEBUG(dbgs() << "to block " << *SuccToSinkTo);
   
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(errs() << " *** PUNTING: Critical edge found\n");
+    DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
     return false;
   }
   
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0772319..584c21b 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -190,8 +190,7 @@ namespace {
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
     void markReachable(const MachineBasicBlock *MBB);
-    void calcMaxRegsPassed();
-    void calcMinRegsPassed();
+    void calcRegsPassed();
     void checkPHIOps(const MachineBasicBlock *MBB);
 
     void calcRegsRequired();
@@ -710,7 +709,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
 // Calculate the largest possible vregsPassed sets. These are the registers that
 // can pass through an MBB live, but may not be live every time. It is assumed
 // that all vregsPassed sets are empty before the call.
-void MachineVerifier::calcMaxRegsPassed() {
+void MachineVerifier::calcRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
   DenseSet<const MachineBasicBlock*> todo;
@@ -745,45 +744,9 @@ void MachineVerifier::calcMaxRegsPassed() {
   }
 }
 
-// Calculate the minimum vregsPassed set. These are the registers that always
-// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
-// been called earlier.
-void MachineVerifier::calcMinRegsPassed() {
-  DenseSet<const MachineBasicBlock*> todo;
-  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
-       MFI != MFE; ++MFI)
-    todo.insert(MFI);
-
-  while (!todo.empty()) {
-    const MachineBasicBlock *MBB = *todo.begin();
-    todo.erase(MBB);
-    BBInfo &MInfo = MBBInfoMap[MBB];
-
-    // Remove entries from vRegsPassed that are not live out from all
-    // reachable predecessors.
-    RegSet dead;
-    for (RegSet::iterator I = MInfo.vregsPassed.begin(),
-           E = MInfo.vregsPassed.end(); I != E; ++I) {
-      for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
-             PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
-        BBInfo &PrInfo = MBBInfoMap[*PrI];
-        if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) {
-          dead.insert(*I);
-          break;
-        }
-      }
-    }
-    // If any regs removed, we need to recheck successors.
-    if (!dead.empty()) {
-      set_subtract(MInfo.vregsPassed, dead);
-      todo.insert(MBB->succ_begin(), MBB->succ_end());
-    }
-  }
-}
-
 // Calculate the set of virtual registers that must be passed through each basic
 // block in order to satisfy the requirements of successor blocks. This is very
-// similar to calcMaxRegsPassed, only backwards.
+// similar to calcRegsPassed, only backwards.
 void MachineVerifier::calcRegsRequired() {
   // First push live-in regs to predecessors' vregsRequired.
   DenseSet<const MachineBasicBlock*> todo;
@@ -817,7 +780,7 @@ void MachineVerifier::calcRegsRequired() {
 }
 
 // Check PHI instructions at the beginning of MBB. It is assumed that
-// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
@@ -848,9 +811,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineFunctionAfter() {
-  calcMaxRegsPassed();
+  calcRegsPassed();
 
-  // With the maximal set of vregsPassed we can verify dead-in registers.
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     BBInfo &MInfo = MBBInfoMap[MFI];
@@ -859,31 +821,16 @@ void MachineVerifier::visitMachineFunctionAfter() {
     if (!MInfo.reachable)
       continue;
 
-    for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
-           PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
-      BBInfo &PrInfo = MBBInfoMap[*PrI];
-      if (!PrInfo.reachable)
-        continue;
-
-      // Verify physical live-ins. EH landing pads have magic live-ins so we
-      // ignore them.
-      if (!MFI->isLandingPad()) {
-        for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(),
-               E = MFI->livein_end(); I != E; ++I) {
-          if (TargetRegisterInfo::isPhysicalRegister(*I) &&
-              !isReserved (*I) && !PrInfo.isLiveOut(*I)) {
-            report("Live-in physical register is not live-out from predecessor",
-                   MFI);
-            *OS << "Register " << TRI->getName(*I)
-                << " is not live-out from BB#" << (*PrI)->getNumber()
-                << ".\n";
-          }
-        }
-      }
+    checkPHIOps(MFI);
 
+    // Verify dead-in virtual registers.
+    if (!allowVirtDoubleDefs) {
+      for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+             PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+        BBInfo &PrInfo = MBBInfoMap[*PrI];
+        if (!PrInfo.reachable)
+          continue;
 
-      // Verify dead-in virtual registers.
-      if (!allowVirtDoubleDefs) {
         for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
                E = MInfo.vregsDeadIn.end(); I != E; ++I) {
           // DeadIn register must be in neither regsLiveOut or vregsPassed of
@@ -899,39 +846,6 @@ void MachineVerifier::visitMachineFunctionAfter() {
     }
   }
 
-  calcMinRegsPassed();
-
-  // With the minimal set of vregsPassed we can verify live-in virtual
-  // registers, including PHI instructions.
-  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
-       MFI != MFE; ++MFI) {
-    BBInfo &MInfo = MBBInfoMap[MFI];
-
-    // Skip unreachable MBBs.
-    if (!MInfo.reachable)
-      continue;
-
-    checkPHIOps(MFI);
-
-    for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
-           PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
-      BBInfo &PrInfo = MBBInfoMap[*PrI];
-      if (!PrInfo.reachable)
-        continue;
-
-      for (RegMap::iterator I = MInfo.vregsLiveIn.begin(),
-             E = MInfo.vregsLiveIn.end(); I != E; ++I) {
-        if (!PrInfo.isLiveOut(I->first)) {
-          report("Used virtual register is not live-in", I->second);
-          *OS << "Register %reg" << I->first
-              << " is not live-out from predecessor MBB #"
-              << (*PrI)->getNumber()
-              << ".\n";
-        }
-      }
-    }
-  }
-
   // Now check LiveVariables info if available
   if (LiveVars) {
     calcRegsRequired();
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
new file mode 100644
index 0000000..625ff89
--- /dev/null
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -0,0 +1,185 @@
+//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs optimization of sign / zero extension instructions. It
+// may be extended to handle other instructions of similar property.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This pass will
+// replace (some) uses of the pre-extension value with uses of the sub-register
+// of the results.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ext-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
+                                cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+
+namespace {
+  class OptimizeExts : public MachineFunctionPass {
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *DT;   // Machine dominator tree
+
+  public:
+    static char ID; // Pass identification
+    OptimizeExts() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      if (Aggressive) {
+        AU.addRequired<MachineDominatorTree>();
+        AU.addPreserved<MachineDominatorTree>();
+      }
+    }
+
+  private:
+    bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+  };
+}
+
+char OptimizeExts::ID = 0;
+static RegisterPass<OptimizeExts>
+X("opt-exts", "Optimize sign / zero extensions");
+
+FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
+
+/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify
+/// the source, and if the source value is preserved as a sub-register of
+/// the result, then replace all reachable uses of the source with the subreg
+/// of the result.
+bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+  bool Changed = false;
+  LocalMIs.insert(MI);
+
+  unsigned SrcReg, DstReg, SubIdx;
+  if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
+    if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+        TargetRegisterInfo::isPhysicalRegister(SrcReg))
+      return false;
+
+    MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+    if (++UI == MRI->use_end())
+      // No other uses.
+      return false;
+
+    // Ok, the source has other uses. See if we can replace the other uses
+    // with use of the result of the extension.
+    SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+    UI = MRI->use_begin(DstReg);
+    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+         ++UI)
+      ReachedBBs.insert(UI->getParent());
+
+    bool ExtendLife = true;
+    // Uses that are in the same BB of uses of the result of the instruction.
+    SmallVector<MachineOperand*, 8> Uses;
+    // Uses that the result of the instruction can reach.
+    SmallVector<MachineOperand*, 8> ExtendedUses;
+
+    UI = MRI->use_begin(SrcReg);
+    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+         ++UI) {
+      MachineOperand &UseMO = UI.getOperand();
+      MachineInstr *UseMI = &*UI;
+      if (UseMI == MI)
+        continue;
+      if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+        ExtendLife = false;
+        continue;
+      }
+
+      MachineBasicBlock *UseMBB = UseMI->getParent();
+      if (UseMBB == MBB) {
+        // Local uses that come after the extension.
+        if (!LocalMIs.count(UseMI))
+          Uses.push_back(&UseMO);
+      } else if (ReachedBBs.count(UseMBB))
+        // Non-local uses where the result of extension is used. Always
+        // replace these unless it's a PHI.
+        Uses.push_back(&UseMO);
+      else if (Aggressive && DT->dominates(MBB, UseMBB))
+        // We may want to extend live range of the extension result in order
+        // to replace these uses.
+        ExtendedUses.push_back(&UseMO);
+      else {
+        // Both will be live out of the def MBB anyway. Don't extend live
+        // range of the extension result.
+        ExtendLife = false;
+        break;
+      }
+    }
+
+    if (ExtendLife && !ExtendedUses.empty())
+      // Ok, we'll extend the liveness of the extension result.
+      std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+                std::back_inserter(Uses));
+
+    // Now replace all uses.
+    if (!Uses.empty()) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+        MachineOperand *UseMO = Uses[i];
+        MachineInstr *UseMI = UseMO->getParent();
+        MachineBasicBlock *UseMBB = UseMI->getParent();
+        unsigned NewVR = MRI->createVirtualRegister(RC);
+        BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+                TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
+          .addReg(DstReg).addImm(SubIdx);
+        UseMO->setReg(NewVR);
+        ++NumReuse;
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+  bool Changed = false;
+
+  SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    LocalMIs.clear();
+    for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
+         ++MII) {
+      MachineInstr *MI = &*MII;
+      Changed |= OptimizeInstr(MI, MBB, LocalMIs);
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h
index a47dce9..738dea0 100644
--- a/lib/CodeGen/PBQP/AnnotatedGraph.h
+++ b/lib/CodeGen/PBQP/AnnotatedGraph.h
@@ -1,4 +1,4 @@
-//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===//
+//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h
index b2f2e6f..35ec4f1 100644
--- a/lib/CodeGen/PBQP/ExhaustiveSolver.h
+++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h
@@ -1,4 +1,4 @@
-//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===//
+//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h
index 0c7493b..becd98a 100644
--- a/lib/CodeGen/PBQP/GraphBase.h
+++ b/lib/CodeGen/PBQP/GraphBase.h
@@ -1,4 +1,4 @@
-//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===//
+//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index 1670877..f78a58a 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -1,4 +1,4 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===//
+//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 3ac9e70..1228f65 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -1,4 +1,4 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===//
+//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h
index 11f4b4b..20737a2 100644
--- a/lib/CodeGen/PBQP/PBQPMath.h
+++ b/lib/CodeGen/PBQP/PBQPMath.h
@@ -1,4 +1,4 @@
-//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===//
+//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h
index 1ca9cae..13e63ce 100644
--- a/lib/CodeGen/PBQP/SimpleGraph.h
+++ b/lib/CodeGen/PBQP/SimpleGraph.h
@@ -1,4 +1,4 @@
-//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===//
+//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
index c91e2fa..aee684d 100644
--- a/lib/CodeGen/PBQP/Solution.h
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -1,4 +1,4 @@
-//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===//
+//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h
index a9c5f83..a445de8 100644
--- a/lib/CodeGen/PBQP/Solver.h
+++ b/lib/CodeGen/PBQP/Solver.h
@@ -1,4 +1,4 @@
-//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===//
+//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 58c3dec..365df30 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -207,7 +207,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       IncomingReg = entry;
       reusedIncoming = true;
       ++NumReused;
-      DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+      DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
     } else {
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
     }
@@ -234,7 +234,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       // AfterPHIsIt, so it appears before the current PHICopy.
       if (reusedIncoming)
         if (MachineInstr *OldKill = VI.findKill(&MBB)) {
-          DEBUG(errs() << "Remove old kill from " << *OldKill);
+          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
           LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
           DEBUG(MBB.dump());
         }
@@ -421,7 +421,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
   MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
-  DEBUG(errs() << "PHIElimination splitting critical edge:"
+  DEBUG(dbgs() << "PHIElimination splitting critical edge:"
         " BB#" << A->getNumber()
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << B->getNumber() << '\n');
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 79be295..f43395f 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -233,7 +233,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       TargetSubtarget::ANTIDEP_NONE;
   }
 
-  DEBUG(errs() << "PostRAScheduler\n");
+  DEBUG(dbgs() << "PostRAScheduler\n");
 
   const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
@@ -258,7 +258,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
         ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
@@ -342,7 +342,7 @@ void SchedulePostRATDList::Schedule() {
     }
   }
 
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
@@ -448,7 +448,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
 /// incorrect by instruction reordering.
 ///
 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
-  DEBUG(errs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   std::set<unsigned> killedRegs;
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
@@ -511,7 +511,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       }
       
       if (MO.isKill() != kill) {
-        DEBUG(errs() << "Fixing " << MO << " in ");
+        DEBUG(dbgs() << "Fixing " << MO << " in ");
         // Warning: ToggleKillFlag may invalidate MO.
         ToggleKillFlag(MI, MO);
         DEBUG(MI->dump());
@@ -549,9 +549,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -580,7 +580,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -640,11 +640,11 @@ void SchedulePostRATDList::ListScheduleTopDown() {
         MinDepth = PendingQueue[i]->getDepth();
     }
 
-    DEBUG(errs() << "\n*** Examining Available\n";
+    DEBUG(dbgs() << "\n*** Examining Available\n";
           LatencyPriorityQueue q = AvailableQueue;
           while (!q.empty()) {
             SUnit *su = q.pop();
-            errs() << "Height " << su->getHeight() << ": ";
+            dbgs() << "Height " << su->getHeight() << ": ";
             su->dump(this);
           });
 
@@ -689,19 +689,19 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       }
     } else {
       if (CycleHasInsts) {
-        DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
       } else if (!HasNoopHazards) {
         // Otherwise, we have a pipeline stall, but no other problem,
         // just advance the current cycle and try again.
-        DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
         ++NumStalls;
       } else {
         // Otherwise, we have no instructions to issue and we have instructions
         // that will fault if we don't do this right.  This is the case for
         // processors without pipeline interlocks and other cases.
-        DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
         HazardRec->EmitNoop();
         Sequence.push_back(0);   // NULL here means noop
         ++NumNoops;
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 1c5222c..8cbc8c2 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -481,32 +481,21 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     
     // Search for the use in this block that precedes the instruction we care 
     // about, going to the fallback case if we don't find it.    
-    if (UseI == MBB->begin())
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-    
     MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
     bool found = false;
     while (Walker != MBB->begin()) {
+      --Walker;
       if (BlockUses.count(Walker)) {
         found = true;
         break;
       }
-      --Walker;
-    }
-        
-    // Must check begin() too.
-    if (!found) {
-      if (BlockUses.count(Walker))
-        found = true;
-      else
-        return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                              Uses, NewVNs, LiveOut, Phis,
-                                              IsTopLevel, IsIntraBlock);
     }
 
+    if (!found)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
     SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
     UseIndex = UseIndex.getUseIndex();
     SlotIndex EndIndex;
@@ -533,17 +522,11 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     // This case is basically a merging of the two preceding case, with the
     // special note that checking for defs must take precedence over checking
     // for uses, because of two-address instructions.
-    
-    if (UseI == MBB->begin())
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
-                                            NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-    
     MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
     bool foundDef = false;
     bool foundUse = false;
     while (Walker != MBB->begin()) {
+      --Walker;
       if (BlockDefs.count(Walker)) {
         foundDef = true;
         break;
@@ -551,21 +534,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
         foundUse = true;
         break;
       }
-      --Walker;
-    }
-        
-    // Must check begin() too.
-    if (!foundDef && !foundUse) {
-      if (BlockDefs.count(Walker))
-        foundDef = true;
-      else if (BlockUses.count(Walker))
-        foundUse = true;
-      else
-        return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                              Uses, NewVNs, LiveOut, Phis,
-                                              IsTopLevel, IsIntraBlock);
     }
 
+    if (!foundDef && !foundUse)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
     SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
     StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
     SlotIndex EndIndex;
@@ -1022,7 +997,7 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
 /// so it would not cross the barrier that's being processed. Shrink wrap
 /// (minimize) the live interval to the last uses.
 bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
-  DEBUG(errs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
+  DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
                << "  result: ");
 
   CurrLI = LI;
@@ -1039,7 +1014,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
 
   // If this would create a new join point, do not split.
   if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
-    DEBUG(errs() << "FAILED (would create a new join point).\n");
+    DEBUG(dbgs() << "FAILED (would create a new join point).\n");
     return false;
   }
 
@@ -1056,13 +1031,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   MachineBasicBlock::iterator RestorePt =
     findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
   if (RestorePt == BarrierMBB->end()) {
-    DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
+    DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
     return false;
   }
 
   if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
     if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
-      DEBUG(errs() << "success (remat).\n");
+      DEBUG(dbgs() << "success (remat).\n");
       return true;
     }
 
@@ -1081,7 +1056,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       MachineBasicBlock::iterator SpillPt = 
         findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
       if (SpillPt == BarrierMBB->begin()) {
-        DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+        DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
         return false; // No gap to insert spill.
       }
       // Add spill.
@@ -1096,7 +1071,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     // If it's already split, just restore the value. There is no need to spill
     // the def again.
     if (!DefMI) {
-      DEBUG(errs() << "FAILED (def is dead).\n");
+      DEBUG(dbgs() << "FAILED (def is dead).\n");
       return false; // Def is dead. Do nothing.
     }
     
@@ -1111,13 +1086,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
         SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
                                  RefsInMBB);
         if (SpillPt == DefMBB->begin()) {
-          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
         }
       } else {
         SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
         if (SpillPt == DefMBB->end()) {
-          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
         }
       }
@@ -1160,7 +1135,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
   
   ++NumSplits;
-  DEBUG(errs() << "success.\n");
+  DEBUG(dbgs() << "success.\n");
   return true;
 }
 
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index c9a33d8..a00f450 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -61,7 +61,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
 /// implicit_def defs and their uses.
 bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
-  DEBUG(errs() << "********** PROCESS IMPLICIT DEFS **********\n"
+  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
                << "********** Function: "
                << ((Value*)fn.getFunction())->getName() << '\n');
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 9e97d89..8e44a57 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -277,7 +277,7 @@ namespace {
       bool Error = false;
       for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
         if (regUse_[i] != 0) {
-          errs() << tri_->getName(i) << " is still in use!\n";
+          dbgs() << tri_->getName(i) << " is still in use!\n";
           Error = true;
         }
       }
@@ -344,16 +344,16 @@ namespace {
     void printIntervals(const char* const str, ItTy i, ItTy e) const {
       DEBUG({
           if (str)
-            errs() << str << " intervals:\n";
+            dbgs() << str << " intervals:\n";
 
           for (; i != e; ++i) {
-            errs() << "\t" << *i->first << " -> ";
+            dbgs() << "\t" << *i->first << " -> ";
 
             unsigned reg = i->first->reg;
             if (TargetRegisterInfo::isVirtualRegister(reg))
               reg = vrm_->getPhys(reg);
 
-            errs() << tri_->getName(reg) << '\n';
+            dbgs() << tri_->getName(reg) << '\n';
           }
         });
     }
@@ -455,7 +455,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     return Reg;
 
   // Try to coalesce.
-  DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
         << '\n');
   vrm_->clearVirt(cur.reg);
   vrm_->assignVirt2Phys(cur.reg, CandReg);
@@ -544,7 +544,7 @@ void RALinScan::initIntervalSets()
 void RALinScan::linearScan() {
   // linear scan algorithm
   DEBUG({
-      errs() << "********** LINEAR SCAN **********\n"
+      dbgs() << "********** LINEAR SCAN **********\n"
              << "********** Function: " 
              << mf_->getFunction()->getName() << '\n';
       printIntervals("fixed", fixed_.begin(), fixed_.end());
@@ -555,7 +555,7 @@ void RALinScan::linearScan() {
     LiveInterval* cur = unhandled_.top();
     unhandled_.pop();
     ++NumIters;
-    DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n');
+    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
 
     assert(!cur->empty() && "Empty interval in unhandled set.");
 
@@ -580,7 +580,7 @@ void RALinScan::linearScan() {
   while (!active_.empty()) {
     IntervalPtr &IP = active_.back();
     unsigned reg = IP.first->reg;
-    DEBUG(errs() << "\tinterval " << *IP.first << " expired\n");
+    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
     assert(TargetRegisterInfo::isVirtualRegister(reg) &&
            "Can only allocate virtual registers!");
     reg = vrm_->getPhys(reg);
@@ -592,7 +592,7 @@ void RALinScan::linearScan() {
   DEBUG({
       for (IntervalPtrs::reverse_iterator
              i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        errs() << "\tinterval " << *i->first << " expired\n";
+        dbgs() << "\tinterval " << *i->first << " expired\n";
     });
   inactive_.clear();
 
@@ -628,7 +628,7 @@ void RALinScan::linearScan() {
     }
   }
 
-  DEBUG(errs() << *vrm_);
+  DEBUG(dbgs() << *vrm_);
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
@@ -642,7 +642,7 @@ void RALinScan::linearScan() {
 /// to the inactive list.
 void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 {
-  DEBUG(errs() << "\tprocessing active intervals:\n");
+  DEBUG(dbgs() << "\tprocessing active intervals:\n");
 
   for (unsigned i = 0, e = active_.size(); i != e; ++i) {
     LiveInterval *Interval = active_[i].first;
@@ -652,7 +652,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -665,7 +665,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 
     } else if (IntervalPos->start > CurPoint) {
       // Move inactive intervals to inactive list.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -688,7 +688,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 /// ones to the active list.
 void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
 {
-  DEBUG(errs() << "\tprocessing inactive intervals:\n");
+  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
 
   for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
     LiveInterval *Interval = inactive_[i].first;
@@ -698,7 +698,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
 
       // Pop off the end of the list.
       inactive_[i] = inactive_.back();
@@ -706,7 +706,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
       --i; --e;
     } else if (IntervalPos->start <= CurPoint) {
       // move re-activated intervals in active list
-      DEBUG(errs() << "\t\tinterval " << *Interval << " active\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -834,10 +834,10 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
   SmallVector<LiveInterval*, 8> SLIs[3];
 
   DEBUG({
-      errs() << "\tConsidering " << NumCands << " candidates: ";
+      dbgs() << "\tConsidering " << NumCands << " candidates: ";
       for (unsigned i = 0; i != NumCands; ++i)
-        errs() << tri_->getName(Candidates[i].first) << " ";
-      errs() << "\n";
+        dbgs() << tri_->getName(Candidates[i].first) << " ";
+      dbgs() << "\n";
     });
   
   // Calculate the number of conflicts of each candidate.
@@ -950,7 +950,7 @@ namespace {
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
 void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  DEBUG(errs() << "\tallocating current interval: ");
+  DEBUG(dbgs() << "\tallocating current interval: ");
 
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
@@ -958,7 +958,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
-    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
     // Note the register is not really in use.
     vrm_->assignVirt2Phys(cur->reg, physReg);
     return;
@@ -1092,7 +1092,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // the free physical register and add this interval to the active
   // list.
   if (physReg) {
-    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
     vrm_->assignVirt2Phys(cur->reg, physReg);
     addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
@@ -1108,7 +1108,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     }
     return;
   }
-  DEBUG(errs() << "no free registers\n");
+  DEBUG(dbgs() << "no free registers\n");
 
   // Compile the spill weights into an array that is better for scanning.
   std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
@@ -1126,7 +1126,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
   }
  
-  DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
@@ -1196,10 +1196,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   }
 
   DEBUG({
-      errs() << "\t\tregister(s) with min weight(s): ";
+      dbgs() << "\t\tregister(s) with min weight(s): ";
 
       for (unsigned i = 0; i != LastCandidate; ++i)
-        errs() << tri_->getName(RegsWeights[i].first)
+        dbgs() << tri_->getName(RegsWeights[i].first)
                << " (" << RegsWeights[i].second << ")\n";
     });
 
@@ -1207,7 +1207,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // add any added intervals back to unhandled, and restart
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n');
+    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added;
     
@@ -1285,7 +1285,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   while (!spillIs.empty()) {
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
-    DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
+    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
     if (sli->beginIndex() < earliestStart)
       earliestStart = sli->beginIndex();
        
@@ -1296,7 +1296,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     spilled.insert(sli->reg);
   }
 
-  DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
+  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
 
   // Scan handled in reverse order up to the earliest start of a
   // spilled live interval and undo each one, restoring the state of
@@ -1306,7 +1306,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     // If this interval starts before t we are done.
     if (!i->empty() && i->beginIndex() < earliestStart)
       break;
-    DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
+    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
     handled_.pop_back();
 
     // When undoing a live interval allocation we must know if it is active or
@@ -1356,7 +1356,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     LiveInterval *HI = handled_[i];
     if (!HI->expiredAt(earliestStart) &&
         HI->expiredAt(cur->beginIndex())) {
-      DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n');
+      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
       active_.push_back(std::make_pair(HI, HI->begin()));
       assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
       addRegUse(vrm_->getPhys(HI->reg));
@@ -1492,7 +1492,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
   // available first.
   unsigned Preference = vrm_->getRegAllocPref(cur->reg);
   if (Preference) {
-    DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") ");
+    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
     if (isRegAvail(Preference) && 
         RC->contains(Preference))
       return Preference;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index aea5cff..cbb5826 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -296,11 +296,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
   assert(VirtReg && "Spilling a physical register is illegal!"
          " Must not have appropriate kill for the register or use exists beyond"
          " the intended one.");
-  DEBUG(errs() << "  Spilling register " << TRI->getName(PhysReg)
+  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
                << " containing %reg" << VirtReg);
   
   if (!isVirtRegModified(VirtReg)) {
-    DEBUG(errs() << " which has not been modified, so no store necessary!");
+    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
     std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
     if (LastUse.first)
       LastUse.first->getOperand(LastUse.second).setIsKill();
@@ -310,7 +310,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // modified.
     const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(errs() << " to stack slot #" << FrameIndex);
+    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
@@ -321,7 +321,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   removePhysReg(PhysReg);
 }
 
@@ -516,7 +516,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
 
   markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
 
-  DEBUG(errs() << "  Reloading %reg" << VirtReg << " into "
+  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
                << TRI->getName(PhysReg) << "\n");
 
   // Add move instruction(s)
@@ -725,7 +725,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   DEBUG({
       const BasicBlock *LBB = MBB.getBasicBlock();
       if (LBB)
-        errs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
     });
 
   // Add live-in registers as active.
@@ -752,13 +752,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     MachineInstr *MI = MII++;
     const TargetInstrDesc &TID = MI->getDesc();
     DEBUG({
-        errs() << "\nStarting RegAlloc of: " << *MI;
-        errs() << "  Regs have values: ";
+        dbgs() << "\nStarting RegAlloc of: " << *MI;
+        dbgs() << "  Regs have values: ";
         for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
           if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
-            errs() << "[" << TRI->getName(i)
+            dbgs() << "[" << TRI->getName(i)
                    << ",%reg" << PhysRegsUsed[i] << "] ";
-        errs() << '\n';
+        dbgs() << '\n';
       });
 
     // Determine whether this is a copy instruction.  The cases where the
@@ -809,7 +809,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
             markVirtRegModified(DestVirtReg);
             getVirtRegLastUse(DestVirtReg) =
                    std::make_pair((MachineInstr*)0, 0);
-            DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+            DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                          << " to %reg" << DestVirtReg << "\n");
             MO.setReg(DestPhysReg);  // Assign the earlyclobber register
           } else {
@@ -876,13 +876,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DEBUG(errs() << "  Last use of " << TRI->getName(PhysReg)
+        DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
                      << "[%reg" << VirtReg <<"], removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
              *SubRegs; ++SubRegs) {
           if (PhysRegsUsed[*SubRegs] != -2) {
-            DEBUG(errs()  << "  Last use of "
+            DEBUG(dbgs()  << "  Last use of "
                           << TRI->getName(*SubRegs) << "[%reg" << VirtReg
                           <<"], removing it from live set\n");
             removePhysReg(*SubRegs);
@@ -978,7 +978,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         MF->getRegInfo().setPhysRegUsed(DestPhysReg);
         markVirtRegModified(DestVirtReg);
         getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-        DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+        DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                      << " to %reg" << DestVirtReg << "\n");
         MO.setReg(DestPhysReg);  // Assign the output register
       }
@@ -1001,14 +1001,14 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DEBUG(errs()  << "  Register " << TRI->getName(PhysReg)
+        DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
                       << " [%reg" << VirtReg
                       << "] is never used, removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
              *AliasSet; ++AliasSet) {
           if (PhysRegsUsed[*AliasSet] != -2) {
-            DEBUG(errs()  << "  Register " << TRI->getName(*AliasSet)
+            DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
                           << " [%reg" << *AliasSet
                           << "] is never used, removing it from live set\n");
             removePhysReg(*AliasSet);
@@ -1058,7 +1058,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
-  DEBUG(errs() << "Machine Function\n");
+  DEBUG(dbgs() << "Machine Function\n");
   MF = &Fn;
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c2014a7..fc59653 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -717,7 +717,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       // Get the physical reg, subtracting 1 to account for the spill option.
       unsigned physReg = allowedSets[node][allocSelection - 1];
 
-      DEBUG(errs() << "VREG " << virtReg << " -> "
+      DEBUG(dbgs() << "VREG " << virtReg << " -> "
                    << tri->getName(physReg) << "\n");
 
       assert(physReg != 0);
@@ -741,7 +741,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       addStackInterval(spillInterval, mri);
 
       (void) oldSpillWeight;
-      DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: "
+      DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
                    << oldSpillWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
@@ -752,12 +752,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
 
         assert(!(*itr)->empty() && "Empty spill range.");
 
-        DEBUG(errs() << (*itr)->reg << " ");
+        DEBUG(dbgs() << (*itr)->reg << " ");
 
         vregIntervalsToAlloc.insert(*itr);
       }
 
-      DEBUG(errs() << ")\n");
+      DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
@@ -849,7 +849,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   vrm = &getAnalysis<VirtRegMap>();
 
-  DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
+  DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -874,7 +874,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     unsigned round = 0;
 
     while (!pbqpAllocComplete) {
-      DEBUG(errs() << "  PBQP Regalloc round " << round << ":\n");
+      DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
       PBQP::SimpleGraph problem = constructPBQPProblem();
       PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver;
@@ -896,7 +896,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   node2LI.clear();
   allowedSets.clear();
 
-  DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
   std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 71693d2..1f3e295 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -41,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const {
     if (SUnit *SU = Sequence[i])
       SU->dump(this);
     else
-      errs() << "**** NOOP ****\n";
+      dbgs() << "**** NOOP ****\n";
   }
 }
 
@@ -61,9 +61,9 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
   Schedule();
 
   DEBUG({
-      errs() << "*** Final schedule ***\n";
+      dbgs() << "*** Final schedule ***\n";
       dumpSchedule();
-      errs() << '\n';
+      dbgs() << '\n';
     });
 }
 
@@ -271,58 +271,58 @@ void SUnit::ComputeHeight() {
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  errs() << "SU(" << NodeNum << "): ";
+  dbgs() << "SU(" << NodeNum << "): ";
   G->dumpNode(this);
 }
 
 void SUnit::dumpAll(const ScheduleDAG *G) const {
   dump(G);
 
-  errs() << "  # preds left       : " << NumPredsLeft << "\n";
-  errs() << "  # succs left       : " << NumSuccsLeft << "\n";
-  errs() << "  Latency            : " << Latency << "\n";
-  errs() << "  Depth              : " << Depth << "\n";
-  errs() << "  Height             : " << Height << "\n";
+  dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
+  dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  Latency            : " << Latency << "\n";
+  dbgs() << "  Depth              : " << Depth << "\n";
+  dbgs() << "  Height             : " << Height << "\n";
 
   if (Preds.size() != 0) {
-    errs() << "  Predecessors:\n";
+    dbgs() << "  Predecessors:\n";
     for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
          I != E; ++I) {
-      errs() << "   ";
+      dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        errs() << "val "; break;
-      case SDep::Anti:        errs() << "anti"; break;
-      case SDep::Output:      errs() << "out "; break;
-      case SDep::Order:       errs() << "ch  "; break;
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
       }
-      errs() << "#";
-      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        errs() << " *";
-      errs() << ": Latency=" << I->getLatency();
-      errs() << "\n";
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
     }
   }
   if (Succs.size() != 0) {
-    errs() << "  Successors:\n";
+    dbgs() << "  Successors:\n";
     for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
          I != E; ++I) {
-      errs() << "   ";
+      dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        errs() << "val "; break;
-      case SDep::Anti:        errs() << "anti"; break;
-      case SDep::Output:      errs() << "out "; break;
-      case SDep::Order:       errs() << "ch  "; break;
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
       }
-      errs() << "#";
-      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        errs() << " *";
-      errs() << ": Latency=" << I->getLatency();
-      errs() << "\n";
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
     }
   }
-  errs() << "\n";
+  dbgs() << "\n";
 }
 
 #ifndef NDEBUG
@@ -340,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
         continue;
       }
       if (!AnyNotSched)
-        errs() << "*** Scheduling failed! ***\n";
+        dbgs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      errs() << "has not been scheduled!\n";
+      dbgs() << "has not been scheduled!\n";
       AnyNotSched = true;
     }
     if (SUnits[i].isScheduled &&
         (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
           unsigned(INT_MAX)) {
       if (!AnyNotSched)
-        errs() << "*** Scheduling failed! ***\n";
+        dbgs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      errs() << "has an unexpected "
+      dbgs() << "has an unexpected "
            << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
       if (SUnits[i].NumSuccsLeft != 0) {
         if (!AnyNotSched)
-          errs() << "*** Scheduling failed! ***\n";
+          dbgs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        errs() << "has successors left!\n";
+        dbgs() << "has successors left!\n";
         AnyNotSched = true;
       }
     } else {
       if (SUnits[i].NumPredsLeft != 0) {
         if (!AnyNotSched)
-          errs() << "*** Scheduling failed! ***\n";
+          dbgs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        errs() << "has predecessors left!\n";
+        dbgs() << "has predecessors left!\n";
         AnyNotSched = true;
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index 38839c4..4e6c1fc 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -69,7 +70,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Formal argument #" << i << " has unhandled type "
+      dbgs() << "Formal argument #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -102,7 +103,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Return operand #" << i << " has unhandled type "
+      dbgs() << "Return operand #" << i << " has unhandled type "
              << VT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -121,7 +122,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call operand #" << i << " has unhandled type "
+      dbgs() << "Call operand #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -140,7 +141,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call operand #" << i << " has unhandled type "
+      dbgs() << "Call operand #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -157,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
     ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call result #" << i << " has unhandled type "
+      dbgs() << "Call result #" << i << " has unhandled type "
              << VT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -170,7 +171,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
 void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
-    errs() << "Call result has unhandled type "
+    dbgs() << "Call result has unhandled type "
            << VT.getEVTString();
 #endif
     llvm_unreachable(0);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e6aa14c..549527c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -541,11 +541,11 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                                bool AddTo) {
   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.1 ";
+  DEBUG(dbgs() << "\nReplacing.1 ";
         N->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         To[0].getNode()->dump(&DAG);
-        errs() << " and " << NumTo-1 << " other values\n";
+        dbgs() << " and " << NumTo-1 << " other values\n";
         for (unsigned i = 0, e = NumTo; i != e; ++i)
           assert((!To[i].getNode() ||
                   N->getValueType(i) == To[i].getValueType()) &&
@@ -619,11 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.2 "; 
+  DEBUG(dbgs() << "\nReplacing.2 "; 
         TLO.Old.getNode()->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         TLO.New.getNode()->dump(&DAG);
-        errs() << '\n');
+        dbgs() << '\n');
 
   CommitTargetLoweringOpt(TLO);
   return true;
@@ -689,11 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DEBUG(errs() << "\nReplacing.3 "; 
+    DEBUG(dbgs() << "\nReplacing.3 "; 
           N->dump(&DAG);
-          errs() << "\nWith: ";
+          dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
-          errs() << '\n');
+          dbgs() << '\n');
     WorkListRemover DeadNodes(*this);
     if (N->getNumValues() == RV.getNode()->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -1684,22 +1684,25 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   EVT VT = N0.getValueType();
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
   // For each of OP in AND/OR/XOR:
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
   //
   // do not sink logical op inside of a vector extend, since it may combine
   // into a vsetcc.
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
-      (!LegalOperations ||
-       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
                                  N0.getOperand(0), N1.getOperand(0));
@@ -1839,6 +1842,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (!VT.isVector() &&
       SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
+
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -1885,48 +1889,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
-  if (N1C && N0.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() &&
-        // Do not change the width of a volatile load.
-        !LN0->isVolatile()) {
-      EVT ExtVT = MVT::Other;
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-
-      EVT LoadedVT = LN0->getMemoryVT();
-
-      // Do not generate loads of non-round integer types since these can
-      // be expensive (and would be wrong if the type is not byte sized).
-      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
-        EVT PtrType = N0.getOperand(1).getValueType();
-
-        // For big endian targets, we need to add an offset to the pointer to
-        // load the correct bytes.  For little endian systems, we merely need to
-        // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
-        unsigned EVTStoreBytes = ExtVT.getStoreSize();
-        unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-        unsigned Alignment = LN0->getAlignment();
-        SDValue NewPtr = LN0->getBasePtr();
-
-        if (TLI.isBigEndian()) {
-          NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
-                               NewPtr, DAG.getConstant(PtrOff, PtrType));
-          Alignment = MinAlign(Alignment, PtrOff);
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          
+          SDValue NewLoad = 
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
+        
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
 
-        AddToWorkList(NewPtr.getNode());
-        SDValue Load =
-          DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
-                         NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         ExtVT, LN0->isVolatile(), Alignment);
-        AddToWorkList(N);
-        CombineTo(N0.getNode(), Load, Load.getValue(1));
-        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          AddToWorkList(NewPtr.getNode());
+          
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
       }
     }
   }
@@ -2555,10 +2580,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // sext_inreg.
   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
-    EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
-    if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
-                         N0.getOperand(0), DAG.getValueType(EVT));
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
   }
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
@@ -2778,9 +2807,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // However when after the source operand of SRL is optimized into AND, the SRL
   // itself may not be optimized further. Look for it and add the BRCOND into
   // the worklist.
-  if (N->hasOneUse() &&
-      N->use_begin()->getOpcode() == ISD::BRCOND)
-    AddToWorkList(*N->use_begin());
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
 
   return SDValue();
 }
@@ -3034,9 +3071,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // See if the value being truncated is already sign extended.  If so, just
     // eliminate the trunc/sext pair.
     SDValue Op = N0.getOperand(0);
-    unsigned OpBits   = Op.getValueType().getSizeInBits();
-    unsigned MidBits  = N0.getValueType().getSizeInBits();
-    unsigned DestBits = VT.getSizeInBits();
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -3059,12 +3096,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // fold (sext (truncate x)) -> (sextinreg x).
     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
                                                  N0.getValueType())) {
-      if (Op.getValueType().bitsLT(VT))
+      if (OpBits < DestBits)
         Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
-      else if (Op.getValueType().bitsGT(VT))
+      else if (OpBits > DestBits)
         Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
-                         DAG.getValueType(N0.getValueType().getScalarType()));
+                         DAG.getValueType(N0.getValueType()));
     }
   }
 
@@ -3198,7 +3235,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
     SDValue Op = N0.getOperand(0);
     if (Op.getValueType().bitsLT(VT)) {
       Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3322,7 +3362,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     DebugLoc dl = N->getDebugLoc();
     return DAG.getNode(N0.getOpcode(), dl, VT,
                        DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
-                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1)));
+                       DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                   N0.getOperand(1).getValueType(),
+                                   N0.getOperand(1)));
   }
 
   return SDValue();
@@ -3512,7 +3554,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (VT.isVector())
     return SDValue();
 
-  // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   // extended to VT.
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
@@ -3586,7 +3628,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT EVT = cast<VTSDNode>(N1)->getVT();
   unsigned VTBits = VT.getScalarType().getSizeInBits();
-  unsigned EVTBits = EVT.getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
 
   // fold (sext_in_reg c1) -> c1
   if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
@@ -3702,7 +3744,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
     else
       // if the source and dest are the same type, we can drop both the extend
-      // and the truncate
+      // and the truncate.
       return N0.getOperand(0);
   }
 
@@ -4513,6 +4555,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
   if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
     // Match this pattern so that we can generate simpler code:
     //
@@ -4524,7 +4573,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     // into
     // 
     //   %a = ...
-    //   %b = and %a, 2
+    //   %b = and i32 %a, 2
     //   %c = setcc eq %b, 0
     //   brcond %c ...
     //
@@ -4535,7 +4584,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     SDValue Op1 = N1.getOperand(1);
 
     if (Op0.getOpcode() == ISD::AND &&
-        Op0.hasOneUse() &&
         Op1.getOpcode() == ISD::Constant) {
       SDValue AndOp1 = Op0.getOperand(1);
 
@@ -4550,12 +4598,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                          Op0, DAG.getConstant(0, Op0.getValueType()),
                          ISD::SETNE);
 
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
           // Replace the uses of SRL with SETCC
           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
           removeFromWorkList(N1.getNode());
           DAG.DeleteNode(N1.getNode());
-          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                             MVT::Other, Chain, SetCC, N2);
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
       }
     }
@@ -4692,11 +4749,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                  BasePtr, Offset, AM);
   ++PreIndexedNodes;
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.4 ";
+  DEBUG(dbgs() << "\nReplacing.4 ";
         N->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         Result.getNode()->dump(&DAG);
-        errs() << '\n');
+        dbgs() << '\n');
   WorkListRemover DeadNodes(*this);
   if (isLoad) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4826,11 +4883,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                                 BasePtr, Offset, AM);
         ++PostIndexedNodes;
         ++NodesCombined;
-        DEBUG(errs() << "\nReplacing.5 ";
+        DEBUG(dbgs() << "\nReplacing.5 ";
               N->dump(&DAG);
-              errs() << "\nWith: ";
+              dbgs() << "\nWith: ";
               Result.getNode()->dump(&DAG);
-              errs() << '\n');
+              dbgs() << '\n');
         WorkListRemover DeadNodes(*this);
         if (isLoad) {
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4889,11 +4946,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         // v3         = add v2, c
         // Now we replace use of chain2 with chain1.  This makes the second load
         // isomorphic to the one we are deleting, and thus makes this load live.
-        DEBUG(errs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.6 ";
               N->dump(&DAG);
-              errs() << "\nWith chain: ";
+              dbgs() << "\nWith chain: ";
               Chain.getNode()->dump(&DAG);
-              errs() << "\n");
+              dbgs() << "\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
 
@@ -4909,11 +4966,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DEBUG(errs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.6 ";
               N->dump(&DAG);
-              errs() << "\nWith: ";
+              dbgs() << "\nWith: ";
               Undef.getNode()->dump(&DAG);
-              errs() << " and 2 other values\n");
+              dbgs() << " and 2 other values\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
@@ -5738,35 +5795,48 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
       if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
         // FIXME: this discards src value information.  This is
         // over-conservative. It would be beneficial to be able to remember
-        // both potential memory locations.
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+            LLDAddrSpace = PT->getAddressSpace();
+        }
+        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+            RLDAddrSpace = PT->getAddressSpace();
+        }
         SDValue Addr;
-        if (TheSelect->getOpcode() == ISD::SELECT) {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if ((!LLD->hasAnyUseOfValue(1) ||
-               !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
-              (!RLD->hasAnyUseOfValue(1) ||
-               !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
-            Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0), LLD->getBasePtr(),
-                               RLD->getBasePtr());
-          }
-        } else {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if ((!LLD->hasAnyUseOfValue(1) ||
-               (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
-              (!RLD->hasAnyUseOfValue(1) ||
-               (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
-            Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0),
-                               TheSelect->getOperand(1),
-                               LLD->getBasePtr(), RLD->getBasePtr(),
-                               TheSelect->getOperand(4));
+        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+          if (TheSelect->getOpcode() == ISD::SELECT) {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0), LLD->getBasePtr(),
+                                 RLD->getBasePtr());
+            }
+          } else {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0),
+                                 TheSelect->getOperand(1),
+                                 LLD->getBasePtr(), RLD->getBasePtr(),
+                                 TheSelect->getOperand(4));
+            }
           }
         }
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 33694f2..09fd657 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -78,7 +78,7 @@ unsigned FastISel::getRegForValue(Value *V) {
   // Look up the value to see if we already have a register for it. We
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
-  // def-dominatess-use requirement enforced.
+  // def-dominates-use requirement enforced.
   if (ValueMap.count(V))
     return ValueMap[V];
   unsigned Reg = LocalValueMap[V];
@@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
-bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
+bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
   EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -325,12 +325,6 @@ bool FastISel::SelectCall(User *I) {
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
-  case Intrinsic::dbg_stoppoint: 
-  case Intrinsic::dbg_region_start: 
-  case Intrinsic::dbg_region_end: 
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return true;
   case Intrinsic::dbg_declare: {
     DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW
@@ -338,8 +332,6 @@ bool FastISel::SelectCall(User *I) {
       return true;
 
     Value *Address = DI->getAddress();
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
-      Address = BCI->getOperand(0);
     AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI) break;
@@ -424,7 +416,7 @@ bool FastISel::SelectCall(User *I) {
   return false;
 }
 
-bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
+bool FastISel::SelectCast(User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
     
@@ -742,44 +734,44 @@ FastISel::FastISel(MachineFunction &mf,
 FastISel::~FastISel() {}
 
 unsigned FastISel::FastEmit_(MVT, MVT,
-                             ISD::NodeType) {
+                             unsigned) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_r(MVT, MVT,
-                              ISD::NodeType, unsigned /*Op0*/) {
+                              unsigned, unsigned /*Op0*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rr(MVT, MVT, 
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) {
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_f(MVT, MVT,
-                              ISD::NodeType, ConstantFP * /*FPImm*/) {
+                              unsigned, ConstantFP * /*FPImm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_ri(MVT, MVT,
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                uint64_t /*Imm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                ConstantFP * /*FPImm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rri(MVT, MVT,
-                                ISD::NodeType,
+                                unsigned,
                                 unsigned /*Op0*/, unsigned /*Op1*/,
                                 uint64_t /*Imm*/) {
   return 0;
@@ -789,7 +781,7 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
 /// to emit an instruction with an immediate operand using FastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
 /// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
                                 unsigned Op0, uint64_t Imm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the ri form.
@@ -806,7 +798,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
 /// to emit an instruction with a floating-point immediate operand using
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
                                 unsigned Op0, ConstantFP *FPImm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index e3b25c2..4868c9e 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -113,7 +113,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
     return;
   }
   // Interpret void as zero return values.
-  if (Ty == Type::getVoidTy(Ty->getContext()))
+  if (Ty->isVoidTy())
     return;
   // Base case: we can get an EVT for this LLVM IR type.
   ValueVTs.push_back(TLI.getValueType(Ty));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 474d833..5e3f58a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -32,6 +32,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -950,9 +951,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   switch (Node->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "NODE: ";
-    Node->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "NODE: ";
+    Node->dump( &DAG);
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to legalize this operator!");
 
@@ -2292,12 +2293,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
     EVT ShiftAmountTy = TLI.getShiftAmountTy();
-    if (VT.isVector()) {
+    if (VT.isVector())
       ShiftAmountTy = VT;
-      VT = VT.getVectorElementType();
-    }
-    unsigned BitsDiff = VT.getSizeInBits() -
-                        ExtraVT.getSizeInBits();
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2831617..4f0fce7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,15 +43,15 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
-        errs() << "\n");
+  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SoftenFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to soften the result of this operator!");
 
@@ -531,15 +531,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
-        errs() << "\n");
+  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SoftenFloatOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to soften this operator's operand!");
 
@@ -768,7 +768,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -779,8 +779,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ExpandFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to expand the result of this operator!");
 
@@ -1167,7 +1167,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 /// types of the node are known to be legal, but other operands of the node may
 /// need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
@@ -1178,8 +1178,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
     switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-      errs() << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); errs() << "\n";
+      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
   #endif
       llvm_unreachable("Do not know how to expand this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index bd3b97a..9932cf4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 /// may also have invalid operands or may have other results that need
 /// expansion, we just know that (at least) one result needs promotion.
 void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   // See if the target wants to custom expand this node.
@@ -43,8 +43,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "PromoteIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
@@ -599,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -608,8 +608,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-    errs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
   #endif
     llvm_unreachable("Do not know how to promote this operator's operand!");
 
@@ -910,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -921,8 +921,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ExpandIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to expand the result of this operator!");
 
@@ -1965,7 +1965,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -1974,8 +1974,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
   default:
   #ifndef NDEBUG
-    errs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
   #endif
     llvm_unreachable("Do not know how to expand this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index d9efd4f..37f36a3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -123,42 +123,42 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         // another node that has not been seen by the LegalizeTypes machinery.
         if ((I->getNodeId() == NewNode && Mapped > 1) ||
             (I->getNodeId() != NewNode && Mapped != 0)) {
-          errs() << "Unprocessed value in a map!";
+          dbgs() << "Unprocessed value in a map!";
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
         if (Mapped > 1) {
-          errs() << "Value with legal type was transformed!";
+          dbgs() << "Value with legal type was transformed!";
           Failed = true;
         }
       } else {
         if (Mapped == 0) {
-          errs() << "Processed value not in any map!";
+          dbgs() << "Processed value not in any map!";
           Failed = true;
         } else if (Mapped & (Mapped - 1)) {
-          errs() << "Value in multiple maps!";
+          dbgs() << "Value in multiple maps!";
           Failed = true;
         }
       }
 
       if (Failed) {
         if (Mapped & 1)
-          errs() << " ReplacedValues";
+          dbgs() << " ReplacedValues";
         if (Mapped & 2)
-          errs() << " PromotedIntegers";
+          dbgs() << " PromotedIntegers";
         if (Mapped & 4)
-          errs() << " SoftenedFloats";
+          dbgs() << " SoftenedFloats";
         if (Mapped & 8)
-          errs() << " ScalarizedVectors";
+          dbgs() << " ScalarizedVectors";
         if (Mapped & 16)
-          errs() << " ExpandedIntegers";
+          dbgs() << " ExpandedIntegers";
         if (Mapped & 32)
-          errs() << " ExpandedFloats";
+          dbgs() << " ExpandedFloats";
         if (Mapped & 64)
-          errs() << " SplitVectors";
+          dbgs() << " SplitVectors";
         if (Mapped & 128)
-          errs() << " WidenedVectors";
-        errs() << "\n";
+          dbgs() << " WidenedVectors";
+        dbgs() << "\n";
         llvm_unreachable(0);
       }
     }
@@ -342,7 +342,7 @@ ScanOperands:
     }
 
     if (i == NumOperands) {
-      DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n");
+      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
     }
     }
 NodeDone:
@@ -411,7 +411,7 @@ NodeDone:
     if (!IgnoreNodeResults(I))
       for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
         if (!isTypeLegal(I->getValueType(i))) {
-          errs() << "Result type " << i << " illegal!\n";
+          dbgs() << "Result type " << i << " illegal!\n";
           Failed = true;
         }
 
@@ -419,24 +419,24 @@ NodeDone:
     for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
       if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
           !isTypeLegal(I->getOperand(i).getValueType())) {
-        errs() << "Operand type " << i << " illegal!\n";
+        dbgs() << "Operand type " << i << " illegal!\n";
         Failed = true;
       }
 
     if (I->getNodeId() != Processed) {
        if (I->getNodeId() == NewNode)
-         errs() << "New node not analyzed?\n";
+         dbgs() << "New node not analyzed?\n";
        else if (I->getNodeId() == Unanalyzed)
-         errs() << "Unanalyzed node not noticed?\n";
+         dbgs() << "Unanalyzed node not noticed?\n";
        else if (I->getNodeId() > 0)
-         errs() << "Operand not processed?\n";
+         dbgs() << "Operand not processed?\n";
        else if (I->getNodeId() == ReadyToProcess)
-         errs() << "Not added to worklist?\n";
+         dbgs() << "Not added to worklist?\n";
        Failed = true;
     }
 
     if (Failed) {
-      I->dump(&DAG); errs() << "\n";
+      I->dump(&DAG); dbgs() << "\n";
       llvm_unreachable(0);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c35f7ad..b5dbd41 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -509,6 +509,7 @@ private:
   void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecRes_BinOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_InregOp(SDNode *N);
 
   SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
   SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
@@ -550,6 +551,7 @@ private:
   void SplitVectorResult(SDNode *N, unsigned OpNo);
   void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -615,6 +617,7 @@ private:
   SDValue WidenVecRes_Convert(SDNode *N);
   SDValue WidenVecRes_Shift(SDNode *N);
   SDValue WidenVecRes_Unary(SDNode *N);
+  SDValue WidenVecRes_InregOp(SDNode *N);
 
   // Widen Vector Operand.
   bool WidenVectorOperand(SDNode *N, unsigned ResNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2625245..b5f84c0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -179,9 +179,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
-  case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::FP_ROUND_INREG:
+    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     QueryType = Node->getOperand(0).getValueType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index cf67ab9..808bac7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -32,17 +32,17 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Scalarize node result " << ResNo << ": ";
+  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
@@ -50,11 +50,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
   case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
   case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
-  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
@@ -186,6 +187,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+                     LHS, DAG.getValueType(ExtVT));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   // If the operand is wider than the vector element type then it is implicitly
   // truncated.  Make that explicit here.
@@ -196,13 +205,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   return InOp;
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) {
-  EVT EltVT = N->getValueType(0).getVectorElementType();
-  SDValue LHS = GetScalarizedVector(N->getOperand(0));
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT,
-                     LHS, N->getOperand(1));
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -278,18 +280,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Scalarize node operand " << OpNo << ": ";
+  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      errs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
       N->dump(&DAG);
-      errs() << "\n";
+      dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to scalarize this operator's operand!");
     case ISD::BIT_CONVERT:
@@ -382,17 +384,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 /// legalization, we just know that (at least) one result needs vector
 /// splitting.
 void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Split node result: ";
+  DEBUG(dbgs() << "Split node result: ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Lo, Hi;
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SplitVectorResult #" << ResNo << ": ";
+    dbgs() << "SplitVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to split the result of this operator!");
 
@@ -406,10 +408,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
   case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
   case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
     break;
@@ -654,6 +657,21 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
 }
 
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   DAG.getValueType(LoVT));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   DAG.getValueType(HiVT));
+}
+
 void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
@@ -709,18 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
   Hi = DAG.getUNDEF(HiVT);
 }
 
-void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo,
-                                                     SDValue &Hi) {
-  SDValue LHSLo, LHSHi;
-  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
-  DebugLoc dl = N->getDebugLoc();
-
-  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
-                   N->getOperand(1));
-  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
-                   N->getOperand(1));
-}
-
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -945,18 +951,18 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need legalization as well as the specified one.
 bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Split node operand: ";
+  DEBUG(dbgs() << "Split node operand: ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      errs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
       N->dump(&DAG);
-      errs() << "\n";
+      dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to split this operator's operand!");
 
@@ -1136,9 +1142,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Widen node result " << ResNo << ": ";
+  DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
 
   // See if the target wants to custom widen this node.
   if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -1148,9 +1154,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "WidenVectorResult #" << ResNo << ": ";
+    dbgs() << "WidenVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to widen the result of this operator!");
 
@@ -1159,10 +1165,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
   case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
-  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
@@ -1331,6 +1338,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               cast<VTSDNode>(N->getOperand(1))->getVT()
+                                 .getVectorElementType(),
+                               WidenVT.getVectorNumElements());
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
@@ -1713,13 +1731,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
                      WidenVT, N->getOperand(0));
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) {
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
-                     WidenVT, WidenLHS, N->getOperand(1));
-}
-
 SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1806,17 +1817,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Widen node operand " << ResNo << ": ";
+  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "WidenVectorOperand op #" << ResNo << ": ";
+    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to widen this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 4045a34..0c3c974c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -109,7 +109,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGFast::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
@@ -136,9 +136,9 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
 
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -175,7 +175,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -233,7 +233,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -343,7 +343,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = Clone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -550,7 +550,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum
+          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
@@ -558,7 +558,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum
+        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index faf21f7..b92a672 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -87,7 +87,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -110,9 +110,9 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -141,7 +141,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -233,7 +233,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
-      DEBUG(errs() << "*** Advancing cycle, no work to do\n");
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
       HazardRec->AdvanceCycle();
       ++NumStalls;
       ++CurCycle;
@@ -241,7 +241,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // Otherwise, we have no instructions to issue and we have instructions
       // that will fault if we don't do this right.  This is the case for
       // processors without pipeline interlocks and other cases.
-      DEBUG(errs() << "*** Emitting noop\n");
+      DEBUG(dbgs() << "*** Emitting noop\n");
       HazardRec->EmitNoop();
       Sequence.push_back(0);   // NULL here means noop
       ++NumNoops;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7e1015a..1ad7919 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -164,7 +164,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
@@ -199,9 +199,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -238,7 +238,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -284,7 +284,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -371,7 +371,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -490,7 +490,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = CreateClone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -771,7 +771,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum
+          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
@@ -780,7 +780,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum
+        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
@@ -827,9 +827,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -858,7 +858,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -1038,6 +1038,10 @@ namespace {
         return 0;
       return SethiUllmanNumbers[SU->NodeNum];
     }
+
+    unsigned getNodeOrdering(const SUnit *SU) const {
+      return scheduleDAG->DAG->GetOrdering(SU->getNode());
+    }
     
     unsigned size() const { return Queue.size(); }
 
@@ -1120,6 +1124,14 @@ static unsigned calcMaxScratches(const SUnit *SU) {
 
 // Bottom up
 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LOrder = SPQ->getNodeOrdering(left);
+  unsigned ROrder = SPQ->getNodeOrdering(right);
+
+  // Prefer an ordering where the lower the non-zero order number, the higher
+  // the preference.
+  if ((LOrder || ROrder) && LOrder != ROrder)
+    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   if (LPriority != RPriority)
@@ -1329,7 +1341,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 
     // Ok, the transformation is safe and the heuristics suggest it is
     // profitable. Update the graph.
-    DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum
+    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
                  << " next to PredSU # " << PredSU->NodeNum
                  << " to guide scheduling in the presence of multiple uses\n");
     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
@@ -1419,7 +1431,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
-          DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # "
+          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
                        << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
           scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
                                         /*Reg=*/0, /*isNormalMemory=*/false,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d53de34..aaaa2b3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -253,19 +253,19 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   if (!SU->getNode()) {
-    errs() << "PHYS REG COPY\n";
+    dbgs() << "PHYS REG COPY\n";
     return;
   }
 
   SU->getNode()->dump(DAG);
-  errs() << "\n";
+  dbgs() << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
     FlaggedNodes.push_back(N);
   while (!FlaggedNodes.empty()) {
-    errs() << "    ";
+    dbgs() << "    ";
     FlaggedNodes.back()->dump(DAG);
-    errs() << "\n";
+    dbgs() << "\n";
     FlaggedNodes.pop_back();
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 77301b0..cb1a0d6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
@@ -644,7 +645,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
-    errs() << "\n";
+    dbgs() << "\n";
     llvm_unreachable("Node is not in map!");
   }
 #endif
@@ -1740,7 +1741,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    unsigned EBits = EVT.getSizeInBits();
+    unsigned EBits = EVT.getScalarType().getSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
@@ -1785,7 +1786,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     if (ISD::isZEXTLoad(Op.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(Op);
       EVT VT = LD->getMemoryVT();
-      unsigned MemBits = VT.getSizeInBits();
+      unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
     }
     return;
@@ -2024,7 +2025,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
   case ISD::SIGN_EXTEND_INREG:
     // Max of the input and what this extends.
-    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    Tmp =
+      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
     Tmp = VTBits-Tmp+1;
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2168,10 +2170,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     switch (ExtType) {
     default: break;
     case ISD::SEXTLOAD:    // '17' bits known
-      Tmp = LD->getMemoryVT().getSizeInBits();
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
       return VTBits-Tmp+1;
     case ISD::ZEXTLOAD:    // '16' bits known
-      Tmp = LD->getMemoryVT().getSizeInBits();
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
       return VTBits-Tmp;
     }
   }
@@ -2655,12 +2657,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     // size of the value, the shift/rotate count is guaranteed to be zero.
     if (VT == MVT::i1)
       return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
     break;
   case ISD::FP_ROUND_INREG: {
     EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg round!");
     assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
            "Cannot FP_ROUND_INREG integer types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "FP_ROUND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in FP_ROUND_INREG");
     assert(EVT.bitsLE(VT) && "Not rounding down!");
     if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
     break;
@@ -2690,15 +2700,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
-    assert(!EVT.isVector() &&
-           "SIGN_EXTEND_INREG type should be the vector element type rather "
-           "than the vector type!");
-    assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+    assert(EVT.isVector() == VT.isVector() &&
+           "SIGN_EXTEND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in SIGN_EXTEND_INREG");
+    assert(EVT.bitsLE(VT) && "Not extending!");
     if (EVT == VT) return N1;  // Not actually extending
 
     if (N1C) {
       APInt Val = N1C->getAPIntValue();
-      unsigned FromBits = EVT.getSizeInBits();
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
       Val <<= Val.getBitWidth()-FromBits;
       Val = Val.ashr(Val.getBitWidth()-FromBits);
       return getConstant(Val, VT);
@@ -4106,7 +4119,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
       if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
         // If the and is only masking out bits that cannot effect the shift,
         // eliminate the and.
-        unsigned NumBits = VT.getSizeInBits()*2;
+        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
         if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
@@ -5713,7 +5726,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() {
 
 void SDNode::dump() const { dump(0); }
 void SDNode::dump(const SelectionDAG *G) const {
-  print(errs(), G);
+  print(dbgs(), G);
 }
 
 void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5885,12 +5898,12 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
     if (N->getOperand(i).getNode()->hasOneUse())
       DumpNodes(N->getOperand(i).getNode(), indent+2, G);
     else
-      errs() << "\n" << std::string(indent+2, ' ')
-             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+      dbgs() << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
 
 
-  errs() << "\n";
-  errs().indent(indent);
+  dbgs() << "\n";
+  dbgs().indent(indent);
   N->dump(G);
 }
 
@@ -5943,6 +5956,13 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
                                 getShiftAmountOperand(Operands[1])));
       break;
+    case ISD::SIGN_EXTEND_INREG:
+    case ISD::FP_ROUND_INREG: {
+      EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                Operands[0],
+                                getValueType(ExtVT)));
+    }
     }
   }
 
@@ -6048,7 +6068,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
 }
 
 void SelectionDAG::dump() const {
-  errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
   for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
        I != E; ++I) {
@@ -6059,7 +6079,7 @@ void SelectionDAG::dump() const {
 
   if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
 
-  errs() << "\n\n";
+  dbgs() << "\n\n";
 }
 
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6106,12 +6126,12 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
 
 void SDNode::dumpr() const {
   VisitedSDNodeSet once;
-  DumpNodesr(errs(), this, 0, 0, once);
+  DumpNodesr(dbgs(), this, 0, 0, once);
 }
 
 void SDNode::dumpr(const SelectionDAG *G) const {
   VisitedSDNodeSet once;
-  DumpNodesr(errs(), this, 0, G, once);
+  DumpNodesr(dbgs(), this, 0, G, once);
 }
 
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 74d624f..5e3a3b5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1195,6 +1195,18 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
     return false;
   }
 
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+  
   return true;
 }
 
@@ -1733,7 +1745,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   if (Density < 0.4)
     return false;
 
-  DEBUG(errs() << "Lowering jump table\n"
+  DEBUG(dbgs() << "Lowering jump table\n"
                << "First entry: " << First << ". Last entry: " << Last << '\n'
                << "Range: " << Range
                << "Size: " << TSize << ". Density: " << Density << "\n\n");
@@ -1837,7 +1849,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 
   APInt LSize = FrontCase.size();
   APInt RSize = TSize-LSize;
-  DEBUG(errs() << "Selecting best pivot: \n"
+  DEBUG(dbgs() << "Selecting best pivot: \n"
                << "First: " << First << ", Last: " << Last <<'\n'
                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
@@ -1853,7 +1865,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                            (Last - RBegin + 1ULL).roundToDouble();
     double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
-    DEBUG(errs() <<"=>Step\n"
+    DEBUG(dbgs() <<"=>Step\n"
                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
                  << "LDensity: " << LDensity
                  << ", RDensity: " << RDensity << '\n'
@@ -1861,7 +1873,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
     if (FMetric < Metric) {
       Pivot = J;
       FMetric = Metric;
-      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
     }
 
     LSize += J->size();
@@ -1965,7 +1977,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
       // Don't bother the code below, if there are too much unique destinations
       return false;
   }
-  DEBUG(errs() << "Total number of unique destinations: "
+  DEBUG(dbgs() << "Total number of unique destinations: "
         << Dests.size() << '\n'
         << "Total number of comparisons: " << numCmps << '\n');
 
@@ -1974,7 +1986,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
   APInt cmpRange = maxValue - minValue;
 
-  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
                << "Low bound: " << minValue << '\n'
                << "High bound: " << maxValue << '\n');
 
@@ -1984,7 +1996,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
        !(Dests.size() >= 3 && numCmps >= 6)))
     return false;
 
-  DEBUG(errs() << "Emitting bit tests\n");
+  DEBUG(dbgs() << "Emitting bit tests\n");
   APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
 
   // Optimize the case where all the case values fit in a
@@ -2034,9 +2046,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
 
   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
 
-  DEBUG(errs() << "Cases:\n");
+  DEBUG(dbgs() << "Cases:\n");
   for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
                  << ", Bits: " << CasesBits[i].Bits
                  << ", BB: " << CasesBits[i].BB << '\n');
 
@@ -2135,7 +2147,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
   // create a binary search tree from them.
   CaseVector Cases;
   size_t numCmps = Clusterify(Cases, SI);
-  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                << ". Total compares: " << numCmps << '\n');
   numCmps = 0;
 
@@ -3157,7 +3169,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
   } else if (!HasChain) {
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
-  } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  } else if (!I.getType()->isVoidTy()) {
     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
   } else {
@@ -3176,7 +3188,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
       DAG.setRoot(Chain);
   }
 
-  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  if (!I.getType()->isVoidTy()) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
       EVT VT = TLI.getValueType(PTy);
       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
@@ -4406,12 +4418,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
-  case Intrinsic::dbg_stoppoint:
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return 0;
   case Intrinsic::dbg_declare: {
     if (OptLevel != CodeGenOpt::None)
       // FIXME: Variable debug info is not supported here.
@@ -5931,7 +5937,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
-      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+      assert(!CS.getType()->isVoidTy() &&
              "Bad inline asm!");
       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
         OpVT = TLI.getValueType(STy->getElementType(ResNo));
@@ -6056,7 +6062,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   std::vector<SDValue> AsmNodeOperands;
   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
   AsmNodeOperands.push_back(
-          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
 
 
   // Loop over all of the inputs, copying the operand values into the
@@ -6100,8 +6107,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
                                                       OpInfo.CallOperandVal));
       } else {
         // This is the result value of the call.
-        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
-               "Bad inline asm!");
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
         // Concatenate this output onto the outputs list.
         RetValRegs.append(OpInfo.AssignedRegs);
       }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 88a2017..db656e3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -163,7 +163,7 @@ private:
   /// The comparison function for sorting the switch case values in the vector.
   /// WARNING: Case ranges should be disjoint!
   struct CaseCmp {
-    bool operator () (const Case& C1, const Case& C2) {
+    bool operator()(const Case &C1, const Case &C2) {
       assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
       const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
       const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
@@ -172,12 +172,12 @@ private:
   };
 
   struct CaseBitsCmp {
-    bool operator () (const CaseBits& C1, const CaseBits& C2) {
+    bool operator()(const CaseBits &C1, const CaseBits &C2) {
       return C1.Bits > C2.Bits;
     }
   };
 
-  size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
+  size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
 
   /// CaseBlock - This structure is used to communicate between
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -215,7 +215,7 @@ private:
     MachineBasicBlock *Default;
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
+    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
                     bool E = false):
       First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
     APInt First;
@@ -230,8 +230,8 @@ private:
     BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
       Mask(M), ThisBB(T), TargetBB(Tr) { }
     uint64_t Mask;
-    MachineBasicBlock* ThisBB;
-    MachineBasicBlock* TargetBB;
+    MachineBasicBlock *ThisBB;
+    MachineBasicBlock *TargetBB;
   };
 
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 05669c0..9ac8f83 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -162,7 +162,7 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                          MachineBasicBlock *MBB,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
 #ifndef NDEBUG
-  errs() << "If a target marks an instruction with "
+  dbgs() << "If a target marks an instruction with "
           "'usesCustomInserter', it must implement "
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
@@ -325,7 +325,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   else
     GFI = 0;
   RegInfo = &MF->getRegInfo();
-  DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n");
+  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
@@ -438,6 +438,95 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
   SDB->clear();
 }
 
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
+  SmallVector<SDNode*, 128> &Worklist;
+public:
+  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                   Worklist.end());
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+  SmallVector<SDNode*, 128> Worklist;
+
+  // Add all the dag nodes to the worklist.
+  Worklist.reserve(CurDAG->allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I)
+    Worklist.push_back(I);
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.pop_back_val();
+
+    if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+
+    // Run ShrinkDemandedOp on scalar binary operations.
+    if (N->getNumValues() == 1 &&
+        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero, KnownOne;
+      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        // Revisit the node.
+        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                       Worklist.end());
+        Worklist.push_back(N);
+
+        // Replace the old value with the new one.
+        DEBUG(errs() << "\nReplacing "; 
+              TLO.Old.getNode()->dump(CurDAG);
+              errs() << "\nWith: ";
+              TLO.New.getNode()->dump(CurDAG);
+              errs() << '\n');
+
+        Worklist.push_back(TLO.New.getNode());
+
+        SDOPsWorkListRemover DeadNodes(Worklist);
+        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+        if (TLO.Old.getNode()->use_empty()) {
+          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+               i != e; ++i) {
+            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+            if (OpNode->hasOneUse()) {
+              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                         OpNode), Worklist.end());
+              Worklist.push_back(OpNode);
+            }
+          }
+
+          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                     TLO.Old.getNode()), Worklist.end());
+          CurDAG->DeleteNode(TLO.Old.getNode());
+        }
+      }
+    }
+  }
+}
+
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   SmallPtrSet<SDNode*, 128> VisitedNodes;
   SmallVector<SDNode*, 128> Worklist;
@@ -448,9 +537,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   APInt KnownZero;
   APInt KnownOne;
 
-  while (!Worklist.empty()) {
-    SDNode *N = Worklist.back();
-    Worklist.pop_back();
+  do {
+    SDNode *N = Worklist.pop_back_val();
 
     // If we've already seen this node, ignore it.
     if (!VisitedNodes.insert(N))
@@ -490,7 +578,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       LOI.KnownOne = KnownOne;
       LOI.KnownZero = KnownZero;
     }
-  }
+  } while (!Worklist.empty());
 }
 
 void SelectionDAGISel::CodeGenAndEmitDAG() {
@@ -504,7 +592,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     BlockName = MF->getFunction()->getNameStr() + ":" +
                 BB->getBasicBlock()->getNameStr();
 
-  DEBUG(errs() << "Initial selection DAG:\n");
+  DEBUG(dbgs() << "Initial selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -517,7 +605,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(Unrestricted, *AA, OptLevel);
   }
 
-  DEBUG(errs() << "Optimized lowered selection DAG:\n");
+  DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   // Second step, hack on the DAG until it only uses operations and types that
@@ -533,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     Changed = CurDAG->LegalizeTypes();
   }
 
-  DEBUG(errs() << "Type-legalized selection DAG:\n");
+  DEBUG(dbgs() << "Type-legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (Changed) {
@@ -548,7 +636,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
     }
 
-    DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
   }
 
@@ -578,7 +666,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
     }
 
-    DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
   }
 
@@ -591,7 +679,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Legalize(OptLevel);
   }
 
-  DEBUG(errs() << "Legalized selection DAG:\n");
+  DEBUG(dbgs() << "Legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -604,13 +692,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
   }
 
-  DEBUG(errs() << "Optimized legalized selection DAG:\n");
+  DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
 
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    ShrinkDemandedOps();
     ComputeLiveOutVRegInfo();
+  }
 
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
@@ -621,7 +711,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     InstructionSelect();
   }
 
-  DEBUG(errs() << "Selected selection DAG:\n");
+  DEBUG(dbgs() << "Selected selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -654,7 +744,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DEBUG(errs() << "Selected machine code:\n");
+  DEBUG(dbgs() << "Selected machine code:\n");
   DEBUG(BB->dump());
 }
 
@@ -699,7 +789,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
              I != E; ++I, ++j)
           if (Fn.paramHasAttr(j, Attribute::ByVal)) {
             if (EnableFastISelVerbose || EnableFastISelAbort)
-              errs() << "FastISel skips entry block due to byval argument\n";
+              dbgs() << "FastISel skips entry block due to byval argument\n";
             SuppressFastISel = true;
             break;
           }
@@ -729,10 +819,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       // information is provided by an intrinsic (eh.selector) that can be moved
       // to unexpected places by the optimizers: if the unwind edge is critical,
       // then breaking it can result in the intrinsics being in the successor of
-      // the landing pad, not the landing pad itself.  This results in exceptions
-      // not being caught because no typeids are associated with the invoke.
-      // This may not be the only way things can go wrong, but it is the only way
-      // we try to work around for the moment.
+      // the landing pad, not the landing pad itself.  This results
+      // in exceptions not being caught because no typeids are associated with
+      // the invoke.  This may not be the only way things can go wrong, but it
+      // is the only way we try to work around for the moment.
       BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
 
       if (Br && Br->isUnconditional()) { // Critical edge?
@@ -765,7 +855,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
             ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
-              errs() << "FastISel miss: ";
+              dbgs() << "FastISel miss: ";
               BI->dump();
             }
             assert(!EnableFastISelAbort &&
@@ -775,7 +865,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
 
-        // First try normal tablegen-generated "fast" selection.
+        // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(BI)) {
           ResetDebugLoc(SDB, FastIS);
           continue;
@@ -788,11 +878,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            errs() << "FastISel missed call: ";
+            dbgs() << "FastISel missed call: ";
             BI->dump();
           }
 
-          if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) {
+          if (!BI->getType()->isVoidTy()) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
@@ -817,7 +907,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            errs() << "FastISel miss: ";
+            dbgs() << "FastISel miss: ";
             BI->dump();
           }
           if (EnableFastISelAbort)
@@ -846,13 +936,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 void
 SelectionDAGISel::FinishBasicBlock() {
 
-  DEBUG(errs() << "Target-post-processed machine code:\n");
+  DEBUG(dbgs() << "Target-post-processed machine code:\n");
   DEBUG(BB->dump());
 
-  DEBUG(errs() << "Total amount of phi nodes to update: "
+  DEBUG(dbgs() << "Total amount of phi nodes to update: "
                << SDB->PHINodesToUpdate.size() << "\n");
   DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
-          errs() << "Node " << i << " : ("
+          dbgs() << "Node " << i << " : ("
                  << SDB->PHINodesToUpdate[i].first
                  << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
 
@@ -915,11 +1005,11 @@ SelectionDAGISel::FinishBasicBlock() {
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
       if (PHIBB == SDB->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
@@ -927,10 +1017,9 @@ SelectionDAGISel::FinishBasicBlock() {
       for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
-        if (cBB->succ_end() !=
-            std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
-          PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                    false));
+        if (cBB->isSuccessor(PHIBB)) {
+          PHI->addOperand(MachineOperand::
+                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
@@ -977,7 +1066,7 @@ SelectionDAGISel::FinishBasicBlock() {
           (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
-      if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+      if (BB->isSuccessor(PHIBB)) {
         PHI->addOperand
           (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1023,17 +1112,23 @@ SelectionDAGISel::FinishBasicBlock() {
         SDB->EdgeMapping.find(BB);
       if (EI != SDB->EdgeMapping.end())
         ThisBB = EI->second;
-      for (MachineBasicBlock::iterator Phi = BB->begin();
-           Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
-        // This value for this PHI node is recorded in PHINodesToUpdate, get it.
-        for (unsigned pn = 0; ; ++pn) {
-          assert(pn != SDB->PHINodesToUpdate.size() &&
-                 "Didn't find PHI entry!");
-          if (SDB->PHINodesToUpdate[pn].first == Phi) {
-            Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn].
-                                                      second, false));
-            Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
-            break;
+
+      // BB may have been removed from the CFG if a branch was constant folded.
+      if (ThisBB->isSuccessor(BB)) {
+        for (MachineBasicBlock::iterator Phi = BB->begin();
+             Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI;
+             ++Phi) {
+          // This value for this PHI node is recorded in PHINodesToUpdate.
+          for (unsigned pn = 0; ; ++pn) {
+            assert(pn != SDB->PHINodesToUpdate.size() &&
+                   "Didn't find PHI entry!");
+            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+              Phi->addOperand(MachineOperand::
+                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                                        false));
+              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+              break;
+            }
           }
         }
       }
@@ -1302,45 +1397,47 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   return !isNonImmUse(Root, N, U);
 }
 
-SDNode *SelectionDAGISel::Select_INLINEASM(SDValue N) {
-  std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end());
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops);
     
   std::vector<EVT> VTs;
   VTs.push_back(MVT::Other);
   VTs.push_back(MVT::Flag);
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(),
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
   return New.getNode();
 }
 
-SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) {
-  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF,
-                              N.getValueType());
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+  return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF,
+                              N->getValueType(0));
 }
 
-SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) {
-  SDValue Chain = N.getOperand(0);
+SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
   unsigned C = cast<LabelSDNode>(N)->getLabelID();
   SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL,
+  return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL,
                               MVT::Other, Tmp, Chain);
 }
 
-void SelectionDAGISel::CannotYetSelect(SDValue N) {
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
   Msg << "Cannot yet select: ";
-  N.getNode()->print(Msg, CurDAG);
+  N->print(Msg, CurDAG);
   llvm_report_error(Msg.str());
 }
 
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDValue N) {
-  errs() << "Cannot yet select: ";
+void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
+  dbgs() << "Cannot yet select: ";
   unsigned iid =
-    cast<ConstantSDNode>(N.getOperand(N.getOperand(0).getValueType() == MVT::Other))->getZExtValue();
+    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
+                                       MVT::Other))->getZExtValue();
   if (iid < Intrinsic::num_intrinsics)
-    llvm_report_error("Cannot yet select: intrinsic %" + Intrinsic::getName((Intrinsic::ID)iid));
+    llvm_report_error("Cannot yet select: intrinsic %" +
+                      Intrinsic::getName((Intrinsic::ID)iid));
   else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
     llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
                       tii->getName(iid));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 83fa5a8..3786bd1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -225,7 +225,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
   if (level >= 20) {
     if (!printed) {
       printed = true;
-      DEBUG(errs() << "setSubgraphColor hit max level\n");
+      DEBUG(dbgs() << "setSubgraphColor hit max level\n");
     }
     return true;
   }
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d9a5a13..81c51c4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if ((KnownZero2 & NewMask) == NewMask)
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // If all of the unknown bits are known to be zero on one side or the other
@@ -1272,19 +1272,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // Sign extension.  Compute the demanded bits in the result that are not 
     // present in the input.
-    APInt NewBits = APInt::getHighBitsSet(BitWidth,
-                                          BitWidth - EVT.getSizeInBits()) &
-                    NewMask;
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth,
+                            BitWidth - EVT.getScalarType().getSizeInBits()) &
+      NewMask;
     
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if (NewBits == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
-    APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits());
+    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
     InSignBit.zext(BitWidth);
-    APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth,
-                                                   EVT.getSizeInBits()) &
-                              NewMask;
+    APInt InputDemandedBits =
+      APInt::getLowBitsSet(BitWidth,
+                           EVT.getScalarType().getSizeInBits()) &
+      NewMask;
     
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
@@ -1313,7 +1315,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt InMask = NewMask;
     InMask.trunc(OperandBitWidth);
     
@@ -1336,7 +1339,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
@@ -1376,7 +1379,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ANY_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt InMask = NewMask;
     InMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
@@ -1480,7 +1484,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                              KnownOne2, TLO, Depth+1))
       return true;
     // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
   }
   // FALL THROUGH
@@ -1597,7 +1601,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
         N0.getOperand(1).getOpcode() == ISD::Constant) {
-      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      const APInt &ShAmt
+        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
           ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
@@ -1625,27 +1630,26 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         N0.getOperand(0).getNode()->hasOneUse() &&
         isa<ConstantSDNode>(N0.getOperand(1))) {
       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
-      uint64_t bestMask = 0;
+      APInt bestMask;
       unsigned bestWidth = 0, bestOffset = 0;
-      if (!Lod->isVolatile() && Lod->isUnindexed() &&
-          // FIXME: This uses getZExtValue() below so it only works on i64 and
-          // below.
-          N0.getValueType().getSizeInBits() <= 64) {
+      if (!Lod->isVolatile() && Lod->isUnindexed()) {
         unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned maskWidth = origWidth;
         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
         // 8 bits, but have to be careful...
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
-        uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+        const APInt &Mask =
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
-          uint64_t newMask = (1ULL << width) - 1;
+          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
             if ((newMask & Mask) == Mask) {
               if (!TD->isLittleEndian())
                 bestOffset = (origWidth/width - offset - 1) * (width/8);
               else
                 bestOffset = (uint64_t)offset * (width/8);
-              bestMask = Mask >> (offset * (width/8) * 8);
+              bestMask = Mask.lshr(offset * (width/8) * 8);
               bestWidth = width;
               break;
             }
@@ -1668,7 +1672,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                         false, NewAlign);
           return DAG.getSetCC(dl, VT, 
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
-                                          DAG.getConstant(bestMask, newVT)),
+                                      DAG.getConstant(bestMask.trunc(bestWidth),
+                                                      newVT)),
                               DAG.getConstant(0LL, newVT), Cond);
         }
       }
@@ -1760,7 +1765,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       
       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
       if (N0.getOpcode() == ISD::SETCC) {
-        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
           return N0;
         
@@ -1876,24 +1881,27 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // Fold bit comparisons when we can.
     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
         EVT ShiftTy = DCI.isBeforeLegalize() ?
           getPointerTy() : getShiftAmountTy();
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
-          if (isPowerOf2_64(AndRHS->getZExtValue())) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                ShiftTy));
+          if (AndRHS->getAPIntValue().isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
           }
-        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
           // (X & 8) == 8  -->  (X & 8) >> 3
           // Perform the xform if C1 is a single bit.
           if (C1.isPowerOf2()) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(C1.logBase2(), ShiftTy));
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
           }
         }
       }
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 8070570..aeaa38b 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -185,7 +185,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
   initShrinkWrappingInfo();
 
   DEBUG(if (ShrinkWrapThisFunction) {
-      errs() << "Place CSR spills/restores for "
+      dbgs() << "Place CSR spills/restores for "
              << MF->getFunction()->getName() << "\n";
     });
 
@@ -299,7 +299,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
 
   DEBUG({
       if (ShrinkWrapDebugging >= Details) {
-        errs()
+        dbgs()
           << "-----------------------------------------------------------\n"
           << " Antic/Avail Sets:\n"
           << "-----------------------------------------------------------\n"
@@ -314,7 +314,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
           dumpSets(MBB);
         }
 
-        errs()
+        dbgs()
           << "-----------------------------------------------------------\n";
       }
     });
@@ -363,7 +363,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // If no CSRs used, we are done.
   if (CSI.empty()) {
     DEBUG(if (ShrinkWrapThisFunction)
-            errs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": uses no callee-saved registers\n");
     return false;
   }
@@ -383,7 +383,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // implementation to functions with <= 500 MBBs.
   if (Fn.size() > 500) {
     DEBUG(if (ShrinkWrapThisFunction)
-            errs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": too large (" << Fn.size() << " MBBs)\n");
     ShrinkWrapThisFunction = false;
   }
@@ -465,7 +465,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   }
 
   if (allCSRUsesInEntryBlock) {
-    DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+    DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                  << ": all CSRs used in EntryBlock\n");
     ShrinkWrapThisFunction = false;
   } else {
@@ -477,7 +477,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
         allCSRsUsedInEntryFanout = false;
     }
     if (allCSRsUsedInEntryFanout) {
-      DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+      DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": all CSRs used in imm successors of EntryBlock\n");
       ShrinkWrapThisFunction = false;
     }
@@ -504,7 +504,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
       if (dominatesExitNodes) {
         CSRUsedInChokePoints |= CSRUsed[MBB];
         if (CSRUsedInChokePoints == UsedCSRegs) {
-          DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+          DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                        << ": all CSRs used in choke point(s) at "
                        << getBasicBlockName(MBB) << "\n");
           ShrinkWrapThisFunction = false;
@@ -520,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) {
     return false;
 
   DEBUG({
-      errs() << "ENABLED: " << Fn.getFunction()->getName();
+      dbgs() << "ENABLED: " << Fn.getFunction()->getName();
       if (HasFastExitPath)
-        errs() << " (fast exit path)";
-      errs() << "\n";
+        dbgs() << " (fast exit path)";
+      dbgs() << "\n";
       if (ShrinkWrapDebugging >= BasicInfo) {
-        errs() << "------------------------------"
+        dbgs() << "------------------------------"
              << "-----------------------------\n";
-        errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
         if (ShrinkWrapDebugging >= Details) {
-          errs() << "------------------------------"
+          dbgs() << "------------------------------"
                << "-----------------------------\n";
           dumpAllUsed();
         }
@@ -602,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(SUCC);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              errs() << getBasicBlockName(MBB)
+              dbgs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "successor " << getBasicBlockName(SUCC) << "\n");
     }
@@ -618,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(PRED);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              errs() << getBasicBlockName(MBB)
+              dbgs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "predecessor " << getBasicBlockName(PRED) << "\n");
     }
@@ -656,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
         CSRUsed[EXB] |= loopSpills;
         addedUses = true;
         DEBUG(if (ShrinkWrapDebugging >= Iterations)
-                errs() << "LOOP " << getBasicBlockName(MBB)
+                dbgs() << "LOOP " << getBasicBlockName(MBB)
                      << "(" << stringifyCSRegSet(loopSpills) << ")->"
                      << getBasicBlockName(EXB) << "\n");
         if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
@@ -723,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRSave[MBB]) << "\n");
 
   return placedSpills;
@@ -784,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
   return placedRestores;
@@ -808,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
     ++iterations;
 
     DEBUG(if (ShrinkWrapDebugging >= Iterations)
-            errs() << "iter " << iterations
+            dbgs() << "iter " << iterations
                  << " --------------------------------------------------\n");
 
     // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
@@ -858,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
   unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
   numSRReduced += numSRReducedThisFunc;
   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
-      errs() << "-----------------------------------------------------------\n";
-      errs() << "total iterations = " << iterations << " ( "
+      dbgs() << "-----------------------------------------------------------\n";
+      dbgs() << "total iterations = " << iterations << " ( "
            << Fn.getFunction()->getName()
            << " " << numSRReducedThisFunc
            << " " << Fn.size()
            << " )\n";
-      errs() << "-----------------------------------------------------------\n";
+      dbgs() << "-----------------------------------------------------------\n";
       dumpSRSets();
-      errs() << "-----------------------------------------------------------\n";
+      dbgs() << "-----------------------------------------------------------\n";
       if (numSRReducedThisFunc)
         verifySpillRestorePlacement();
     });
@@ -899,7 +899,7 @@ void PEI::findFastExitPath() {
     // Check the immediate successors.
     if (isReturnBlock(SUCC)) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << getBasicBlockName(SUCC) << "\n";
       break;
     }
@@ -917,7 +917,7 @@ void PEI::findFastExitPath() {
     }
     if (HasFastExitPath) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << exitPath << "\n";
       break;
     }
@@ -951,7 +951,7 @@ void PEI::verifySpillRestorePlacement() {
     if (spilled.empty())
       continue;
 
-    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(spilled)
                  << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
@@ -983,7 +983,7 @@ void PEI::verifySpillRestorePlacement() {
       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
         if (restored != spilled) {
           CSRegSet notRestored = (spilled - restored);
-          DEBUG(errs() << MF->getFunction()->getName() << ": "
+          DEBUG(dbgs() << MF->getFunction()->getName() << ": "
                        << stringifyCSRegSet(notRestored)
                        << " spilled at " << getBasicBlockName(MBB)
                        << " are never restored on path to return "
@@ -1004,7 +1004,7 @@ void PEI::verifySpillRestorePlacement() {
     if (restored.empty())
       continue;
 
-    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRSave[MBB])
                  << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(restored) << "\n");
@@ -1031,7 +1031,7 @@ void PEI::verifySpillRestorePlacement() {
     }
     if (spilled != restored) {
       CSRegSet notSpilled = (restored - spilled);
-      DEBUG(errs() << MF->getFunction()->getName() << ": "
+      DEBUG(dbgs() << MF->getFunction()->getName() << ": "
                    << stringifyCSRegSet(notSpilled)
                    << " restored at " << getBasicBlockName(MBB)
                    << " are never spilled\n");
@@ -1078,13 +1078,13 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
 }
 
 void PEI::dumpSet(const CSRegSet& s) {
-  DEBUG(errs() << stringifyCSRegSet(s) << "\n");
+  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
 }
 
 void PEI::dumpUsed(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
     });
 }
@@ -1100,7 +1100,7 @@ void PEI::dumpAllUsed() {
 void PEI::dumpSets(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << getBasicBlockName(MBB)           << " | "
+        dbgs() << getBasicBlockName(MBB)           << " | "
                << stringifyCSRegSet(CSRUsed[MBB])  << " | "
                << stringifyCSRegSet(AnticIn[MBB])  << " | "
                << stringifyCSRegSet(AnticOut[MBB]) << " | "
@@ -1112,7 +1112,7 @@ void PEI::dumpSets(MachineBasicBlock* MBB) {
 void PEI::dumpSets1(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << getBasicBlockName(MBB)             << " | "
+        dbgs() << getBasicBlockName(MBB)             << " | "
                << stringifyCSRegSet(CSRUsed[MBB])    << " | "
                << stringifyCSRegSet(AnticIn[MBB])    << " | "
                << stringifyCSRegSet(AnticOut[MBB])   << " | "
@@ -1136,14 +1136,14 @@ void PEI::dumpSRSets() {
       for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
            MBB != E; ++MBB) {
         if (!CSRSave[MBB].empty()) {
-          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRSave[MBB]);
           if (CSRRestore[MBB].empty())
-            errs() << '\n';
+            dbgs() << '\n';
         }
 
         if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
-          errs() << "    "
+          dbgs() << "    "
                  << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
       }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 6314331..27d429b 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -183,16 +183,16 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
         DEBUG({
-            errs() << "Interfere with sub-register ";
-            li_->getInterval(*SR).print(errs(), tri_);
+            dbgs() << "Interfere with sub-register ";
+            li_->getInterval(*SR).print(dbgs(), tri_);
           });
         return false;
       }
   }
 
   DEBUG({
-      errs() << "\nExtending: ";
-      IntB.print(errs(), tri_);
+      dbgs() << "\nExtending: ";
+      IntB.print(dbgs(), tri_);
     });
 
   SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
@@ -224,9 +224,9 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
   }
   DEBUG({
-      errs() << "   result = ";
-      IntB.print(errs(), tri_);
-      errs() << "\n";
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   // If the source instruction was killing the source register before the
@@ -467,8 +467,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
   DEBUG({
-      errs() << "\nExtending: ";
-      IntB.print(errs(), tri_);
+      dbgs() << "\nExtending: ";
+      IntB.print(dbgs(), tri_);
     });
 
   // Remove val#'s defined by copies that will be coalesced away.
@@ -518,19 +518,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   ValNo->setHasPHIKill(BHasPHIKill);
 
   DEBUG({
-      errs() << "   result = ";
-      IntB.print(errs(), tri_);
-      errs() << '\n';
-      errs() << "\nShortening: ";
-      IntA.print(errs(), tri_);
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << '\n';
+      dbgs() << "\nShortening: ";
+      IntA.print(dbgs(), tri_);
     });
 
   IntA.removeValNo(AValNo);
 
   DEBUG({
-      errs() << "   result = ";
-      IntA.print(errs(), tri_);
-      errs() << '\n';
+      dbgs() << "   result = ";
+      IntA.print(dbgs(), tri_);
+      dbgs() << '\n';
     });
 
   ++numCommutes;
@@ -1223,16 +1223,16 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
-        errs() << "Interfere with register ";
-        li_->getInterval(RealDstReg).print(errs(), tri_);
+        dbgs() << "Interfere with register ";
+        li_->getInterval(RealDstReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          errs() << "Interfere with sub-register ";
-          li_->getInterval(*SR).print(errs(), tri_);
+          dbgs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
     }
@@ -1254,16 +1254,16 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealSrcReg) &&
       RHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
-        errs() << "Interfere with register ";
-        li_->getInterval(RealSrcReg).print(errs(), tri_);
+        dbgs() << "Interfere with register ";
+        li_->getInterval(RealSrcReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          errs() << "Interfere with sub-register ";
-          li_->getInterval(*SR).print(errs(), tri_);
+          dbgs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
     }
@@ -1293,7 +1293,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
     return false; // Already done.
 
-  DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
 
   unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
   bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
@@ -1313,7 +1313,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
       // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
       // coalesced to a larger register so the subreg indices cancel out.
-      DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already "
+      DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already "
                       "coalesced to another register.\n");
       return false;  // Not coalescable.
     }
@@ -1329,7 +1329,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If they are already joined we continue.
   if (SrcReg == DstReg) {
-    DEBUG(errs() << "\tCopy already coalesced.\n");
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
     return false;  // Not coalescable.
   }
 
@@ -1338,17 +1338,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If they are both physical registers, we cannot join them.
   if (SrcIsPhys && DstIsPhys) {
-    DEBUG(errs() << "\tCan not coalesce physregs.\n");
+    DEBUG(dbgs() << "\tCan not coalesce physregs.\n");
     return false;  // Not coalescable.
   }
 
   // We only join virtual registers with allocatable physical registers.
   if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
-    DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n");
+    DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
   if (DstIsPhys && !allocatableRegs_[DstReg]) {
-    DEBUG(errs() << "\tDst reg is unallocatable physreg.\n");
+    DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
 
@@ -1362,7 +1362,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
     assert(DstSubRC && "Illegal subregister index");
     if (!DstSubRC->contains(SrcSubReg)) {
-      DEBUG(errs() << "\tIncompatible destination regclass: "
+      DEBUG(dbgs() << "\tIncompatible destination regclass: "
                    << tri_->getName(SrcSubReg) << " not in "
                    << DstSubRC->getName() << ".\n");
       return false;             // Not coalescable.
@@ -1379,7 +1379,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
     assert(SrcSubRC && "Illegal subregister index");
     if (!SrcSubRC->contains(DstSubReg)) {
-      DEBUG(errs() << "\tIncompatible source regclass: "
+      DEBUG(dbgs() << "\tIncompatible source regclass: "
                    << tri_->getName(DstSubReg) << " not in "
                    << SrcSubRC->getName() << ".\n");
       (void)DstSubReg;
@@ -1405,7 +1405,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (DstSubIdx != SubIdx) {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1418,7 +1418,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (SrcSubIdx != SubIdx) {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1427,7 +1427,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if ((DstIsPhys && isExtSubReg) ||
                (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
       if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
-        DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg"
+        DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg"
                      << " of a super-class.\n");
         return false; // Not coalescable.
       }
@@ -1451,7 +1451,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           // class as the would be resulting register.
           SubIdx = 0;
         else {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       }
@@ -1463,7 +1463,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
             NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
           }
         if (!NewRC) {
-          DEBUG(errs() << "\t Conflicting sub-register indices.\n");
+          DEBUG(dbgs() << "\t Conflicting sub-register indices.\n");
           return false;  // Not coalescable
         }
 
@@ -1535,7 +1535,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if (!SrcIsPhys && !DstIsPhys) {
       NewRC = getCommonSubClass(SrcRC, DstRC);
       if (!NewRC) {
-        DEBUG(errs() << "\tDisjoint regclasses: "
+        DEBUG(dbgs() << "\tDisjoint regclasses: "
                      << SrcRC->getName() << ", "
                      << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
@@ -1551,7 +1551,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DEBUG(errs() << "\tSrc/Dest are different register classes.\n");
+      DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1573,9 +1573,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
          "Register mapping is horribly broken!");
 
   DEBUG({
-      errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_);
-      errs() << " and "; DstInt.print(errs(), tri_);
-      errs() << ": ";
+      dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_);
+      dbgs() << " and "; DstInt.print(dbgs(), tri_);
+      dbgs() << ": ";
     });
 
   // Save a copy of the virtual register live interval. We'll manually
@@ -1606,7 +1606,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
-            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1614,7 +1614,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
-            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1635,7 +1635,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                                    mri_->use_end()) / Length) < Ratio)) {
           mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
-          DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+          DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1654,7 +1654,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // Only coalesce an empty interval (defined by implicit_def) with
     // another interval which has a valno defined by the CopyMI and the CopyMI
     // is a kill of the implicit def.
-    DEBUG(errs() << "Not profitable!\n");
+    DEBUG(dbgs() << "Not profitable!\n");
     return false;
   }
 
@@ -1676,7 +1676,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     }
 
     // Otherwise, we are unable to join the intervals.
-    DEBUG(errs() << "Interference!\n");
+    DEBUG(dbgs() << "Interference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1779,9 +1779,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
 
   DEBUG({
-      errs() << "\n\t\tJoined.  Result = ";
-      ResDstInt->print(errs(), tri_);
-      errs() << "\n";
+      dbgs() << "\n\t\tJoined.  Result = ";
+      ResDstInt->print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   ++numJoins;
@@ -2134,8 +2134,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              errs() << "Interfere with sub-register ";
-              li_->getInterval(*SR).print(errs(), tri_);
+              dbgs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
         }
@@ -2151,8 +2151,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              errs() << "Interfere with sub-register ";
-              li_->getInterval(*SR).print(errs(), tri_);
+              dbgs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
         }
@@ -2413,7 +2413,7 @@ namespace {
 
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DEBUG(errs() << MBB->getName() << ":\n");
+  DEBUG(dbgs() << MBB->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
@@ -2478,7 +2478,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 }
 
 void SimpleRegisterCoalescing::joinIntervals() {
-  DEBUG(errs() << "********** JOINING INTERVALS ***********\n");
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
 
   std::vector<CopyRec> TryAgainList;
   if (loopInfo->empty()) {
@@ -2610,12 +2610,11 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
   return NULL;
 }
 
-
 void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    errs() << tri_->getName(reg);
+    dbgs() << tri_->getName(reg);
   else
-    errs() << "%reg" << reg;
+    dbgs() << "%reg" << reg;
 }
 
 void SimpleRegisterCoalescing::releaseMemory() {
@@ -2634,7 +2633,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
-  DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n"
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
@@ -2648,11 +2647,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   if (EnableJoining) {
     joinIntervals();
     DEBUG({
-        errs() << "********** INTERVALS POST JOINING **********\n";
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
         for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
              I != E; ++I){
-          I->second->print(errs(), tri_);
-          errs() << "\n";
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
         }
       });
   }
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 6de03e1..9558933 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -381,9 +381,6 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       I->eraseFromParent();
     }
 
-
-
-
     // The entry block changes to have the eh.sjlj.setjmp, with a conditional
     // branch to a dispatch block for non-zero returns. If we return normally,
     // we're not handling an exception and just register the function context
@@ -397,13 +394,15 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     // Insert a load in the Catch block, and a switch on its value.  By default,
     // we go to a block that just does an unwind (which is the correct action
     // for a standard call).
-    BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    BasicBlock *UnwindBlock =
+      BasicBlock::Create(F.getContext(), "unwindbb", &F);
     Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
 
     Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
                                        DispatchBlock);
     SwitchInst *DispatchSwitch =
-      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock);
+      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+                         DispatchBlock);
     // Split the entry block to insert the conditional branch for the setjmp.
     BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                      "eh.sjlj.setjmp.cont");
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 782af12..b8f529b 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -192,18 +192,18 @@ void SlotIndexes::renumberIndexes() {
 void SlotIndexes::dump() const {
   for (const IndexListEntry *itr = front(); itr != getTail();
        itr = itr->getNext()) {
-    errs() << itr->getIndex() << " ";
+    dbgs() << itr->getIndex() << " ";
 
     if (itr->getInstr() != 0) {
-      errs() << *itr->getInstr();
+      dbgs() << *itr->getInstr();
     } else {
-      errs() << "\n";
+      dbgs() << "\n";
     }
   }
 
   for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
        itr != mbb2IdxMap.end(); ++itr) {
-    errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
+    dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
            << itr->second.first << ", " << itr->second.second << "]\n";
   }
 }
@@ -217,7 +217,7 @@ void SlotIndex::print(raw_ostream &os) const {
 
 // Dump a SlotIndex to stderr.
 void SlotIndex::dump() const {
-  print(errs());
-  errs() << "\n";
+  print(dbgs());
+  dbgs() << "\n";
 }
 
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index bec9294..7ba4403 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -67,7 +67,7 @@ protected:
   /// immediately before each use, and stores after each def. No folding or
   /// remat is attempted.
   std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
-    DEBUG(errs() << "Spilling everywhere " << *li << "\n");
+    DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
@@ -75,7 +75,7 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
-    DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+    DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
 
     std::vector<LiveInterval*> added;
     
@@ -89,7 +89,7 @@ protected:
       // Grab the use/def instr.
       MachineInstr *mi = &*regItr;
 
-      DEBUG(errs() << "  Processing " << *mi);
+      DEBUG(dbgs() << "  Processing " << *mi);
 
       // Step regItr to the next use/def instr.
       do {
@@ -242,7 +242,7 @@ private:
   std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
                                          SlotIndex *earliestStart) {
 
-    DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n");
+    DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
 
     std::vector<LiveInterval*> added;
     SmallVector<VNInfo*, 4> vnis;
@@ -257,11 +257,11 @@ private:
       if (vni->isUnused() || vni->kills.empty())
         continue;
 
-      DEBUG(errs() << "  Extracted Val #" << vni->id << " as ");
+      DEBUG(dbgs() << "  Extracted Val #" << vni->id << " as ");
       LiveInterval *splitInterval = extractVNI(li, vni);
       
       if (splitInterval != 0) {
-        DEBUG(errs() << *splitInterval << "\n");
+        DEBUG(dbgs() << *splitInterval << "\n");
         added.push_back(splitInterval);
         alreadySplit.insert(splitInterval);
         if (earliestStart != 0) {
@@ -269,11 +269,11 @@ private:
             *earliestStart = splitInterval->beginIndex();
         }
       } else {
-        DEBUG(errs() << "0\n");
+        DEBUG(dbgs() << "0\n");
       }
     } 
 
-    DEBUG(errs() << "Original LI: " << *li << "\n");
+    DEBUG(dbgs() << "Original LI: " << *li << "\n");
 
     // If there original interval still contains some live ranges
     // add it to added and alreadySplit.    
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index e8ee822..48bb5af 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const {
 
         if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
           // We apparently only care about character arrays.
-          if (AT->getElementType() != Type::getInt8Ty(AT->getContext()))
+          if (!AT->getElementType()->isInteger(8))
             continue;
 
           // If an array has more than SSPBufferSize bytes of allocated space,
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index fd25a37..2170703 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -200,7 +200,7 @@ void StackSlotColoring::InitializeSlots() {
   Assignments.resize(LastFI);
 
   // Gather all spill slots into a list.
-  DEBUG(errs() << "Spill slot intervals:\n");
+  DEBUG(dbgs() << "Spill slot intervals:\n");
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
@@ -212,7 +212,7 @@ void StackSlotColoring::InitializeSlots() {
     OrigSizes[FI]      = MFI->getObjectSize(FI);
     AllColors.set(FI);
   }
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 
   // Sort them by weight.
   std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
@@ -244,7 +244,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
     return false;
 
   bool Changed = false;
-  DEBUG(errs() << "Assigning unused registers to spill slots:\n");
+  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -274,7 +274,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
         AllColored = false;
         continue;
       } else {
-        DEBUG(errs() << "Assigning fi#" << RSS << " to "
+        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
                      << TRI->getName(Reg) << '\n');
         ColoredRegs.push_back(Reg);
         SlotMapping[RSS] = Reg;
@@ -302,7 +302,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
       ++NumEliminated;
     }
   }
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 
   return Changed;
 }
@@ -337,7 +337,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
   // Record the assignment.
   Assignments[Color].push_back(li);
   int FI = li->getStackSlotIndex();
-  DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+  DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
   // objects sharing the same slot, then make sure the size and alignment
@@ -361,7 +361,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
-  DEBUG(errs() << "Color spill slot intervals:\n");
+  DEBUG(dbgs() << "Color spill slot intervals:\n");
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
@@ -375,7 +375,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     Changed |= (SS != NewSS);
   }
 
-  DEBUG(errs() << "\nSpill slots after coloring:\n");
+  DEBUG(dbgs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -387,7 +387,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 #ifndef NDEBUG
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
     DEBUG(SSIntervals[i]->dump());
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 #endif
 
   // Can we "color" a stack slot with a unused register?
@@ -419,7 +419,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 
   // Delete unused stack slots.
   while (NextColor != -1) {
-    DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n");
+    DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
     MFI->RemoveStackObject(NextColor);
     NextColor = AllColors.find_next(NextColor);
   }
@@ -605,7 +605,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   MachineBasicBlock *MBB = MI->getParent();
   if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
     if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DEBUG(errs() << "Eliminated load: ");
+      DEBUG(dbgs() << "Eliminated load: ");
       DEBUG(MI->dump());
       ++NumLoadElim;
     } else {
@@ -621,7 +621,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
     }
   } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
     if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DEBUG(errs() << "Eliminated store: ");
+      DEBUG(dbgs() << "Eliminated store: ");
       DEBUG(MI->dump());
       ++NumStoreElim;
     } else {
@@ -699,7 +699,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
 
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "********** Stack Slot Coloring **********\n");
+  DEBUG(dbgs() << "********** Stack Slot Coloring **********\n");
 
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo(); 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 3c13906..bd7cb75 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -555,7 +555,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     // Add the renaming set for this PHI node to our overall renaming information
     for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
          QE = PHIUnion.end(); QI != QE; ++QI) {
-      DEBUG(errs() << "Adding Renaming: " << QI->first << " -> "
+      DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
                    << P->getOperand(0).getReg() << "\n");
     }
     
@@ -698,7 +698,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         TII->copyRegToReg(*PI->getParent(), PI, t,
                           curr.second, RC, RC);
         
-        DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t
+        DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
                      << "\n");
         
         // Push temporary on Stacks
@@ -715,7 +715,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
                         map[curr.first], RC, RC);
       map[curr.first] = curr.second;
-      DEBUG(errs() << "Inserted copy from " << curr.first << " to "
+      DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
                    << curr.second << "\n");
       
       // Push this copy onto InsertedPHICopies so we can
@@ -928,7 +928,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         unsigned reg = OI->first;
         ++OI;
         I->second.erase(reg);
-        DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first
+        DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
                      << "\n");
       }
     }
@@ -946,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
     while (I->second.size()) {
       std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
       
-      DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
+      DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
       
       if (SI->first != I->first) {
         if (mergeLiveIntervals(I->first, SI->first)) {
@@ -978,7 +978,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           R.valno->setCopy(--SI->second->getFirstTerminator());
           R.valno->def = instrIdx.getDefIndex();
           
-          DEBUG(errs() << "Renaming failed: " << SI->first << " -> "
+          DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
                        << I->first << "\n");
         }
       }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index bf58902..f51f74d 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -139,8 +139,8 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
           }
         }
         if (!Found) {
-          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          errs() << "  missing input from predecessor BB#"
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  missing input from predecessor BB#"
                  << PredBB->getNumber() << '\n';
           llvm_unreachable(0);
         }
@@ -150,14 +150,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
         if (CheckExtra && !Preds.count(PHIBB)) {
           // This is not a hard error.
-          errs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+          dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
                  << ": " << *MI;
-          errs() << "  extra input from predecessor BB#"
+          dbgs() << "  extra input from predecessor BB#"
                  << PHIBB->getNumber() << '\n';
         }
         if (PHIBB->getNumber() < 0) {
-          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          errs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
           llvm_unreachable(0);
         }
       }
@@ -173,7 +173,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
   bool MadeChange = false;
 
   if (PreRegAlloc && TailDupVerify) {
-    DEBUG(errs() << "\n*** Before tail-duplicating\n");
+    DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
     VerifyPHIs(MF, true);
   }
 
@@ -253,7 +253,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
         SSAUpdateVals.clear();
       }
 
-      // Eliminate some of the copies inserted tail duplication to maintain
+      // Eliminate some of the copies inserted by tail duplication to maintain
       // SSA form.
       for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
         MachineInstr *Copy = Copies[i];
@@ -346,7 +346,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
                                      MachineBasicBlock *PredBB,
                                      MachineFunction &MF,
                                      DenseMap<unsigned, unsigned> &LocalVRMap) {
-  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+  MachineInstr *NewMI = TII->duplicate(MI, MF);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
     if (!MO.isReg())
@@ -437,8 +437,11 @@ bool
 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                  SmallVector<MachineInstr*, 16> &Copies) {
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB->isSuccessor(TailBB))
+  // Pre-regalloc tail duplication hurts compile time and doesn't help
+  // much except for indirect branches.
+  bool hasIndirectBranch = (!TailBB->empty() &&
+                            TailBB->back().getDesc().isIndirectBranch());
+  if (PreRegAlloc && !hasIndirectBranch)
     return false;
 
   // Set the limit on the number of instructions to duplicate, with a default
@@ -446,7 +449,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
+  if (hasIndirectBranch)
     // If the target has hardware branch prediction that can handle indirect
     // branches, duplicating them can often make them predictable when there
     // are common paths through the code.  The limit needs to be high enough
@@ -457,6 +460,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
@@ -481,7 +488,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   if (InstrCount > 1 && HasCall)
     return false;
 
-  DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
@@ -510,7 +517,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
       continue;
 
-    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
                  << "From Succ: " << *TailBB);
 
     TDBBs.push_back(PredBB);
@@ -570,7 +577,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
       TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
       !TailBB->hasAddressTaken()) {
-    DEBUG(errs() << "\nMerging into block: " << *PrevBB
+    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
           << "From MBB: " << *TailBB);
     if (PreRegAlloc) {
       DenseMap<unsigned, unsigned> LocalVRMap;
@@ -620,7 +627,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
 /// function, updating the CFG.
 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
-  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
 
   // Remove all successors.
   while (!MBB->succ_empty())
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 393e315..a0fccab 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -150,6 +150,13 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
   MBB.insert(I, MI);
 }
 
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+                                             MachineFunction &MF) const {
+  assert(!Orig->getDesc().isNotDuplicable() &&
+         "Instruction cannot be duplicated");
+  return MF.CloneMachineInstr(Orig);
+}
+
 bool
 TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
                                  const MachineInstr *Other,
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 98b95ac..a3f6364 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -573,15 +573,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
                                MachineFunction::iterator &mbbi,
                                unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
-  DEBUG(errs() << "2addr: COMMUTING  : " << *MI);
+  DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
 
   if (NewMI == 0) {
-    DEBUG(errs() << "2addr: COMMUTING FAILED!\n");
+    DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
     return false;
   }
 
-  DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI);
+  DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
   // If the instruction changed to commute it, update livevar.
   if (NewMI != MI) {
     if (LV)
@@ -628,8 +628,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               unsigned RegB, unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
-    DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi);
-    DEBUG(errs() << "2addr:         TO 3-ADDR: " << *NewMI);
+    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
     bool Sunk = false;
 
     if (NewMI->findRegisterUseOperand(RegB, false, TRI))
@@ -891,7 +891,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "Machine Function\n");
+  DEBUG(dbgs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
@@ -901,8 +901,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
   bool MadeChange = false;
 
-  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
-  DEBUG(errs() << "********** Function: " 
+  DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(dbgs() << "********** Function: " 
         << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
@@ -943,7 +943,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         if (FirstTied) {
           FirstTied = false;
           ++NumTwoAddressInstrs;
-          DEBUG(errs() << '\t' << *mi);
+          DEBUG(dbgs() << '\t' << *mi);
         }
 
         assert(mi->getOperand(SrcIdx).isReg() &&
@@ -1024,7 +1024,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
               DefMI->getDesc().isAsCheapAsAMove() &&
               DefMI->isSafeToReMat(TII, regB, AA) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
-            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
+            DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
             TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
             ReMatRegs.set(regB);
@@ -1040,7 +1040,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           DistanceMap.insert(std::make_pair(prevMI, Dist));
           DistanceMap[mi] = ++Dist;
 
-          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);
+          DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
 
           MachineOperand &MO = mi->getOperand(SrcIdx);
           assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
@@ -1085,7 +1085,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           
         MadeChange = true;
 
-        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
+        DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
       }
 
       // Clear TiedOperands here instead of at the top of the loop
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index c8c5d86..d4fb2e4 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -278,5 +278,5 @@ void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
 }
 
 void VirtRegMap::dump() const {
-  print(errs());
+  print(dbgs());
 }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 054c3b6..df2b8d2 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -60,6 +60,33 @@ ScheduleSpills("schedule-spills",
 
 VirtRegRewriter::~VirtRegRewriter() {}
 
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+                              const TargetRegisterInfo &TRI) {
+  if (unsigned SubIdx = MO.getSubReg()) {
+    // Insert the physical subreg and reset the subreg field.
+    MO.setReg(TRI.getSubReg(Reg, SubIdx));
+    MO.setSubReg(0);
+
+    // Any def, dead, and kill flags apply to the full virtual register, so they
+    // also apply to the full physical register. Add imp-def/dead and imp-kill
+    // as needed.
+    MachineInstr &MI = *MO.getParent();
+    if (MO.isDef())
+      if (MO.isDead())
+        MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
+      else
+        MI.addRegisterDefined(Reg, &TRI);
+    else if (!MO.isUndef() &&
+             (MO.isKill() ||
+              MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+  } else {
+    MO.setReg(Reg);
+  }
+}
+
 namespace {
 
 /// This class is intended for use with the new spilling framework only. It
@@ -69,10 +96,10 @@ struct TrivialRewriter : public VirtRegRewriter {
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
-    DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(errs() << "********** Function: " 
+    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(dbgs() << "********** Function: " 
           << MF.getFunction()->getName() << '\n');
-    DEBUG(errs() << "**** Machine Instrs"
+    DEBUG(dbgs() << "**** Machine Instrs"
           << "(NOTE! Does not include spills and reloads!) ****\n");
     DEBUG(MF.dump());
 
@@ -101,16 +128,13 @@ struct TrivialRewriter : public VirtRegRewriter {
           MachineOperand &mop = regItr.getOperand();
           assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
           ++regItr;
-          unsigned subRegIdx = mop.getSubReg();
-          unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg;
-          mop.setReg(pRegOp);
-          mop.setSubReg(0);
+          substitutePhysReg(mop, pReg, *tri);
           changed = true;
         }
       }
     }
     
-    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
     
     return changed;
@@ -191,11 +215,11 @@ public:
                                               (unsigned)CanClobber;
 
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(errs() << "Remembering RM#"
+      DEBUG(dbgs() << "Remembering RM#"
                    << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
-      DEBUG(errs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n");
+      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -647,12 +671,9 @@ static void ReMaterialize(MachineBasicBlock &MBB,
     if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
       continue;
     assert(MO.isUse());
-    unsigned SubIdx = MO.getSubReg();
     unsigned Phys = VRM.getPhys(VirtReg);
     assert(Phys && "Virtual register is not assigned a register?");
-    unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
-    MO.setReg(RReg);
-    MO.setSubReg(0);
+    substitutePhysReg(MO, Phys, *TRI);
   }
   ++NumReMats;
 }
@@ -686,7 +707,7 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
          << " copied, it is available for use but can no longer be modified\n");
   }
 }
@@ -711,12 +732,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
           << " clobbered, invalidating ");
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
     else
-      DEBUG(errs() << "SS#" << SlotOrReMat << "\n");
+      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
   }
 }
 
@@ -895,9 +916,9 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
 
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
         UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(errs() << '\t' << *prior(InsertLoc));
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
         
-        DEBUG(errs() << "Reuse undone!\n");
+        DEBUG(dbgs() << "Reuse undone!\n");
         --NumReused;
         
         // Finally, PhysReg is now available, go ahead and use it.
@@ -1004,11 +1025,12 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
 }
 
 static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) {
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+                         const TargetRegisterInfo &TRI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.getReg() == VirtReg)
-      MO.setReg(PhysReg);
+      substitutePhysReg(MO, PhysReg, TRI);
   }
 }
 
@@ -1041,9 +1063,9 @@ public:
     TRI = MF.getTarget().getRegisterInfo();
     TII = MF.getTarget().getInstrInfo();
     AllocatableRegs = TRI->getAllocatableSet(MF);
-    DEBUG(errs() << "\n**** Local spiller rewriting function '"
+    DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
           << MF.getFunction()->getName() << "':\n");
-    DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and"
+    DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
                     " reloads!) ****\n");
     DEBUG(MF.dump());
 
@@ -1095,7 +1117,7 @@ public:
       Spills.clear();
     }
 
-    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
 
     // Mark unused spill slots.
@@ -1175,7 +1197,7 @@ private:
     if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
       llvm_unreachable("Unable unfold the load / store folding instruction!");
     assert(NewMIs.size() == 1);
-    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
     VRM.transferRestorePts(&MI, NewMIs[0]);
     MII = MBB.insert(MII, NewMIs[0]);
     InvalidateKills(MI, TRI, RegKills, KillOps);
@@ -1191,7 +1213,7 @@ private:
       if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
         llvm_unreachable("Unable unfold the load / store folding instruction!");
       assert(NewMIs.size() == 1);
-      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
       VRM.transferRestorePts(&NextMI, NewMIs[0]);
       MBB.insert(NextMII, NewMIs[0]);
       InvalidateKills(NextMI, TRI, RegKills, KillOps);
@@ -1467,11 +1489,11 @@ private:
     TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
     MachineInstr *StoreMI = prior(oldNextMII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(errs() << "Store:\t" << *StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
 
     // If there is a dead store to this stack slot, nuke it now.
     if (LastStore) {
-      DEBUG(errs() << "Removed dead store:\t" << *LastStore);
+      DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
       ++NumDSE;
       SmallVector<unsigned, 2> KillRegs;
       InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
@@ -1599,7 +1621,7 @@ private:
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps) {
 
-    DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
+    DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
           << MBB.getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
@@ -1699,11 +1721,11 @@ private:
             // If the value is already available in the expected register, save
             // a reload / remat.
             if (SSorRMId)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << SSorRMId);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(InReg) << " for vreg"
                          << VirtReg <<" instead of reloading into physreg "
                          << TRI->getName(Phys) << '\n');
@@ -1711,11 +1733,11 @@ private:
             continue;
           } else if (InReg && InReg != Phys) {
             if (SSorRMId)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << SSorRMId);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(InReg) << " for vreg"
                          << VirtReg <<" by copying it into physreg "
                          << TRI->getName(Phys) << '\n');
@@ -1742,7 +1764,7 @@ private:
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
-            DEBUG(errs() << '\t' << *CopyMI);
+            DEBUG(dbgs() << '\t' << *CopyMI);
             ++NumCopified;
             continue;
           }
@@ -1769,7 +1791,7 @@ private:
           Spills.addAvailable(SSorRMId, Phys);
 
           UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(errs() << '\t' << *prior(MII));
+          DEBUG(dbgs() << '\t' << *prior(MII));
         }
       }
 
@@ -1789,7 +1811,7 @@ private:
           TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
           MachineInstr *StoreMI = prior(oldNextMII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DEBUG(errs() << "Store:\t" << *StoreMI);
+          DEBUG(dbgs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
         }
         NextMII = llvm::next(MII);
@@ -1840,16 +1862,14 @@ private:
           RegInfo->setPhysRegUsed(Phys);
           if (MO.isDef())
             ReusedOperands.markClobbered(Phys);
-          unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
+          substitutePhysReg(MO, Phys, *TRI);
           if (VRM.isImplicitlyDefined(VirtReg))
             // FIXME: Is this needed?
             BuildMI(MBB, &MI, MI.getDebugLoc(),
-                    TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg);
+                    TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys);
           continue;
         }
-        
+
         // This virtual register is now known to be a spilled value.
         if (!MO.isUse())
           continue;  // Handle defs in the loop below (handle use&def here though)
@@ -1908,11 +1928,11 @@ private:
           if (CanReuse) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(PhysReg) << " for vreg"
                          << VirtReg <<" instead of reloading into physreg "
                          << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
@@ -1991,11 +2011,11 @@ private:
           if (DesignatedReg == PhysReg) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                     << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(errs() << " from physreg " << TRI->getName(PhysReg)
+              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
                          << " for vreg" << VirtReg
                          << " instead of reloading into same physreg.\n");
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
@@ -2029,7 +2049,7 @@ private:
             SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DEBUG(errs() << '\t' << *prior(MII));
+          DEBUG(dbgs() << '\t' << *prior(MII));
           ++NumReused;
           continue;
         } // if (PhysReg)
@@ -2082,7 +2102,7 @@ private:
           }
 
           UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(errs() << '\t' << *prior(InsertLoc));
+          DEBUG(dbgs() << '\t' << *prior(InsertLoc));
         }
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
@@ -2096,7 +2116,7 @@ private:
         int PDSSlot = PotentialDeadStoreSlots[j];
         MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
         if (DeadStore) {
-          DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
           VRM.RemoveMachineInstrFromMaps(DeadStore);
           MBB.erase(DeadStore);
@@ -2106,7 +2126,7 @@ private:
       }
 
 
-      DEBUG(errs() << '\t' << MI);
+      DEBUG(dbgs() << '\t' << MI);
 
 
       // If we have folded references to memory operands, make sure we clear all
@@ -2116,7 +2136,7 @@ private:
       for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
         unsigned VirtReg = I->second.first;
         VirtRegMap::ModRef MR = I->second.second;
-        DEBUG(errs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+        DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
 
         // MI2VirtMap be can updated which invalidate the iterator.
         // Increment the iterator first.
@@ -2125,7 +2145,7 @@ private:
         if (SS == VirtRegMap::NO_STACK_SLOT)
           continue;
         FoldedSS.insert(SS);
-        DEBUG(errs() << " - StackSlot: " << SS << "\n");
+        DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
         
         // If this folded instruction is just a use, check to see if it's a
         // straight load from the virt reg slot.
@@ -2136,7 +2156,7 @@ private:
             // If this spill slot is available, turn it into a copy (or nothing)
             // instead of leaving it as a load!
             if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DEBUG(errs() << "Promoted Load To Copy: " << MI);
+              DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
               if (DestReg != InReg) {
                 const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
                 TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
@@ -2160,7 +2180,7 @@ private:
 
                 BackTracked = true;
               } else {
-                DEBUG(errs() << "Removing now-noop copy: " << MI);
+                DEBUG(dbgs() << "Removing now-noop copy: " << MI);
                 // Unset last kill since it's being reused.
                 InvalidateKill(InReg, TRI, RegKills, KillOps);
                 Spills.disallowClobberPhysReg(InReg);
@@ -2230,7 +2250,7 @@ private:
 
           if (isDead) {  // Previous store is dead.
             // If we get here, the store is dead, nuke it now.
-            DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
+            DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
             InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
             VRM.RemoveMachineInstrFromMaps(DeadStore);
             MBB.erase(DeadStore);
@@ -2301,7 +2321,7 @@ private:
           if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
               !MI.findRegisterUseOperand(Src)->isUndef()) {
             ++NumDCE;
-            DEBUG(errs() << "Removing now-noop copy: " << MI);
+            DEBUG(dbgs() << "Removing now-noop copy: " << MI);
             SmallVector<unsigned, 2> KillRegs;
             InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
             if (MO.isDead() && !KillRegs.empty()) {
@@ -2389,7 +2409,7 @@ private:
             unsigned Src, Dst, SrcSR, DstSR;
             if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
               ++NumDCE;
-              DEBUG(errs() << "Removing now-noop copy: " << MI);
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
               InvalidateKills(MI, TRI, RegKills, KillOps);
               VRM.RemoveMachineInstrFromMaps(&MI);
               MBB.erase(&MI);
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index cb30748..89c4290 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -138,7 +138,7 @@ void *ExecutionEngineState::RemoveMapping(
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
-  DEBUG(errs() << "JIT: Map \'" << GV->getName() 
+  DEBUG(dbgs() << "JIT: Map \'" << GV->getName() 
         << "\' to [" << Addr << "]\n";);
   void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
   assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
@@ -246,13 +246,13 @@ static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE,
   unsigned PtrSize = EE->getTargetData()->getPointerSize();
   char *Result = new char[(InputArgv.size()+1)*PtrSize];
 
-  DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n");
+  DEBUG(dbgs() << "JIT: ARGV = " << (void*)Result << "\n");
   const Type *SBytePtr = Type::getInt8PtrTy(C);
 
   for (unsigned i = 0; i != InputArgv.size(); ++i) {
     unsigned Size = InputArgv[i].size()+1;
     char *Dest = new char[Size];
-    DEBUG(errs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+    DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
 
     std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
     Dest[Size-1] = 0;
@@ -343,9 +343,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   // Check main() type
   unsigned NumArgs = Fn->getFunctionType()->getNumParams();
   const FunctionType *FTy = Fn->getFunctionType();
-  const Type* PPInt8Ty = 
-    PointerType::getUnqual(PointerType::getUnqual(
-          Type::getInt8Ty(Fn->getContext())));
+  const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
   switch (NumArgs) {
   case 3:
    if (FTy->getParamType(2) != PPInt8Ty) {
@@ -358,13 +356,13 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
    }
    // FALLS THROUGH
   case 1:
-   if (FTy->getParamType(0) != Type::getInt32Ty(Fn->getContext())) {
+   if (!FTy->getParamType(0)->isInteger(32)) {
      llvm_report_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
   case 0:
    if (!isa<IntegerType>(FTy->getReturnType()) &&
-       FTy->getReturnType() != Type::getVoidTy(FTy->getContext())) {
+       !FTy->getReturnType()->isVoidTy()) {
      llvm_report_error("Invalid return type of main() supplied");
    }
    break;
@@ -493,8 +491,22 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
 /// @brief Get a GenericValue for a Constant*
 GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   // If its undefined, return the garbage.
-  if (isa<UndefValue>(C)) 
-    return GenericValue();
+  if (isa<UndefValue>(C)) {
+    GenericValue Result;
+    switch (C->getType()->getTypeID()) {
+    case Type::IntegerTyID:
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      // Although the value is undefined, we still have to construct an APInt
+      // with the correct bit width.
+      Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
+      break;
+    default:
+      break;
+    }
+    return Result;
+  }
 
   // If the value is a ConstantExpr
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@@ -620,13 +632,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
             GV.DoubleVal = GV.IntVal.bitsToDouble();
           break;
         case Type::FloatTyID: 
-          assert(DestTy == Type::getInt32Ty(DestTy->getContext()) &&
-                 "Invalid bitcast");
+          assert(DestTy->isInteger(32) && "Invalid bitcast");
           GV.IntVal.floatToBits(GV.FloatVal);
           break;
         case Type::DoubleTyID:
-          assert(DestTy == Type::getInt64Ty(DestTy->getContext()) &&
-                 "Invalid bitcast");
+          assert(DestTy->isInteger(64) && "Invalid bitcast");
           GV.IntVal.doubleToBits(GV.DoubleVal);
           break;
         case Type::PointerTyID:
@@ -832,7 +842,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
   default:
-    errs() << "Cannot store value of type " << *Ty << "!\n";
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
   }
 
   if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
@@ -908,7 +918,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
 // specified memory location...
 //
 void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
-  DEBUG(errs() << "JIT: Initializing " << Addr << " ");
+  DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
   DEBUG(Init->dump());
   if (isa<UndefValue>(Init)) {
     return;
@@ -939,7 +949,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
     return;
   }
 
-  errs() << "Bad Type: " << *Init->getType() << "\n";
+  dbgs() << "Bad Type: " << *Init->getType() << "\n";
   llvm_unreachable("Unknown constant type to initialize memory with!");
 }
 
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 5901cd7..412b493 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
                                                 unsigned long long N,
-                                                int IsSigned) {
+                                                LLVMBool IsSigned) {
   GenericValue *GenVal = new GenericValue();
   GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
   return wrap(GenVal);
@@ -56,7 +56,7 @@ unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
 }
 
 unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
-                                         int IsSigned) {
+                                         LLVMBool IsSigned) {
   GenericValue *GenVal = unwrap(GenValRef);
   if (IsSigned)
     return GenVal->IntVal.getSExtValue();
@@ -87,9 +87,9 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
 
 /*===-- Operations on execution engines -----------------------------------===*/
 
-int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
-                              LLVMModuleProviderRef MP,
-                              char **OutError) {
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError) {
   std::string Error;
   EngineBuilder builder(unwrap(MP));
   builder.setEngineKind(EngineKind::Either)
@@ -102,9 +102,9 @@ int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
   return 1;
 }
 
-int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
-                          LLVMModuleProviderRef MP,
-                          char **OutError) {
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError) {
   std::string Error;
   EngineBuilder builder(unwrap(MP));
   builder.setEngineKind(EngineKind::Interpreter)
@@ -117,10 +117,10 @@ int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
   return 1;
 }
 
-int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
-                          LLVMModuleProviderRef MP,
-                          unsigned OptLevel,
-                          char **OutError) {
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError) {
   std::string Error;
   EngineBuilder builder(unwrap(MP));
   builder.setEngineKind(EngineKind::JIT)
@@ -177,9 +177,9 @@ void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
   unwrap(EE)->addModuleProvider(unwrap(MP));
 }
 
-int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
-                             LLVMModuleProviderRef MP,
-                             LLVMModuleRef *OutMod, char **OutError) {
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+                                  LLVMModuleProviderRef MP,
+                                  LLVMModuleRef *OutMod, char **OutError) {
   std::string Error;
   if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) {
     *OutMod = wrap(Gone);
@@ -190,8 +190,8 @@ int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
   return 1;
 }
 
-int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
-                     LLVMValueRef *OutFn) {
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+                          LLVMValueRef *OutFn) {
   if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
     *OutFn = wrap(F);
     return 0;
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index b59cfd1..73f5558 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -56,7 +56,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(+, Float);
     IMPLEMENT_BINARY_OPERATOR(+, Double);
   default:
-    errs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 }
@@ -67,7 +67,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(-, Float);
     IMPLEMENT_BINARY_OPERATOR(-, Double);
   default:
-    errs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 }
@@ -78,7 +78,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(*, Float);
     IMPLEMENT_BINARY_OPERATOR(*, Double);
   default:
-    errs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 }
@@ -89,7 +89,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
     IMPLEMENT_BINARY_OPERATOR(/, Float);
     IMPLEMENT_BINARY_OPERATOR(/, Double);
   default:
-    errs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 }
@@ -104,7 +104,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
     Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
     break;
   default:
-    errs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 }
@@ -131,7 +131,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(eq,Ty);
     IMPLEMENT_POINTER_ICMP(==);
   default:
-    errs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -144,7 +144,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ne,Ty);
     IMPLEMENT_POINTER_ICMP(!=);
   default:
-    errs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -157,7 +157,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ult,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
-    errs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -170,7 +170,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(slt,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
-    errs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -183,7 +183,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ugt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
-    errs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -196,7 +196,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sgt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
-    errs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -209,7 +209,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(ule,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
-    errs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -222,7 +222,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sle,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
-    errs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -235,7 +235,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(uge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
-    errs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -248,7 +248,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_INTEGER_ICMP(sge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
-    errs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+    dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -273,7 +273,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
   case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
   case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
   default:
-    errs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+    dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
     llvm_unreachable(0);
   }
  
@@ -292,7 +292,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(==, Float);
     IMPLEMENT_FCMP(==, Double);
   default:
-    errs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -306,7 +306,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(!=, Double);
 
   default:
-    errs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -319,7 +319,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(<=, Float);
     IMPLEMENT_FCMP(<=, Double);
   default:
-    errs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -332,7 +332,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(>=, Float);
     IMPLEMENT_FCMP(>=, Double);
   default:
-    errs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -345,7 +345,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(<, Float);
     IMPLEMENT_FCMP(<, Double);
   default:
-    errs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -358,7 +358,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     IMPLEMENT_FCMP(>, Float);
     IMPLEMENT_FCMP(>, Double);
   default:
-    errs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
   return Dest;
@@ -467,7 +467,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
   default:
-    errs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
     llvm_unreachable(0);
   }
  
@@ -513,7 +513,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
     return Result;
   }
   default:
-    errs() << "Unhandled Cmp predicate\n";
+    dbgs() << "Unhandled Cmp predicate\n";
     llvm_unreachable(0);
   }
 }
@@ -542,7 +542,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
   case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
   default:
-    errs() << "Don't know how to handle this binary operator!\n-->" << I;
+    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
     llvm_unreachable(0);
   }
 
@@ -602,7 +602,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
     ExecutionContext &CallingSF = ECStack.back();
     if (Instruction *I = CallingSF.Caller.getInstruction()) {
       // Save result...
-      if (CallingSF.Caller.getType() != Type::getVoidTy(RetTy->getContext()))
+      if (!CallingSF.Caller.getType()->isVoidTy())
         SetValue(I, Result, CallingSF);
       if (InvokeInst *II = dyn_cast<InvokeInst> (I))
         SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
@@ -744,7 +744,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
   // Allocate enough memory to hold the type...
   void *Memory = malloc(MemToAlloc);
 
-  DEBUG(errs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
+  DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
                << NumElements << " (Total: " << MemToAlloc << ") at "
                << uintptr_t(Memory) << '\n');
 
@@ -794,7 +794,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
   GenericValue Result;
   Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
-  DEBUG(errs() << "GEP Index " << Total << " bytes.\n");
+  DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n");
   return Result;
 }
 
@@ -812,7 +812,7 @@ void Interpreter::visitLoadInst(LoadInst &I) {
   LoadValueFromMemory(Result, Ptr, I.getType());
   SetValue(&I, Result, SF);
   if (I.isVolatile() && PrintVolatile)
-    errs() << "Volatile load " << I;
+    dbgs() << "Volatile load " << I;
 }
 
 void Interpreter::visitStoreInst(StoreInst &I) {
@@ -822,7 +822,7 @@ void Interpreter::visitStoreInst(StoreInst &I) {
   StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
                      I.getOperand(0)->getType());
   if (I.isVolatile() && PrintVolatile)
-    errs() << "Volatile store: " << I;
+    dbgs() << "Volatile store: " << I;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1164,7 +1164,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
     IMPLEMENT_VAARG(Float);
     IMPLEMENT_VAARG(Double);
   default:
-    errs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+    dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
     llvm_unreachable(0);
   }
 
@@ -1251,7 +1251,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
     Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
     break;
   default:
-    errs() << "Unhandled ConstantExpr: " << *CE << "\n";
+    dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
     llvm_unreachable(0);
     return GenericValue();
   }
@@ -1324,24 +1324,24 @@ void Interpreter::run() {
     // Track the number of dynamic instructions executed.
     ++NumDynamicInsts;
 
-    DEBUG(errs() << "About to interpret: " << I);
+    DEBUG(dbgs() << "About to interpret: " << I);
     visit(I);   // Dispatch to one of the visit* methods...
 #if 0
     // This is not safe, as visiting the instruction could lower it and free I.
 DEBUG(
     if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && 
         I.getType() != Type::VoidTy) {
-      errs() << "  --> ";
+      dbgs() << "  --> ";
       const GenericValue &Val = SF.Values[&I];
       switch (I.getType()->getTypeID()) {
       default: llvm_unreachable("Invalid GenericValue Type");
-      case Type::VoidTyID:    errs() << "void"; break;
-      case Type::FloatTyID:   errs() << "float " << Val.FloatVal; break;
-      case Type::DoubleTyID:  errs() << "double " << Val.DoubleVal; break;
-      case Type::PointerTyID: errs() << "void* " << intptr_t(Val.PointerVal);
+      case Type::VoidTyID:    dbgs() << "void"; break;
+      case Type::FloatTyID:   dbgs() << "float " << Val.FloatVal; break;
+      case Type::DoubleTyID:  dbgs() << "double " << Val.DoubleVal; break;
+      case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
         break;
       case Type::IntegerTyID: 
-        errs() << "i" << Val.IntVal.getBitWidth() << " "
+        dbgs() << "i" << Val.IntVal.getBitWidth() << " "
                << Val.IntVal.toStringUnsigned(10)
                << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
         break;
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index ebc2567..faf724f 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -411,11 +411,10 @@ GenericValue JIT::runFunction(Function *F,
 
   // Handle some common cases first.  These cases correspond to common `main'
   // prototypes.
-  if (RetTy == Type::getInt32Ty(F->getContext()) ||
-      RetTy == Type::getVoidTy(F->getContext())) {
+  if (RetTy->isInteger(32) || RetTy->isVoidTy()) {
     switch (ArgValues.size()) {
     case 3:
-      if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) &&
+      if (FTy->getParamType(0)->isInteger(32) &&
           isa<PointerType>(FTy->getParamType(1)) &&
           isa<PointerType>(FTy->getParamType(2))) {
         int (*PF)(int, char **, const char **) =
@@ -430,7 +429,7 @@ GenericValue JIT::runFunction(Function *F,
       }
       break;
     case 2:
-      if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) &&
+      if (FTy->getParamType(0)->isInteger(32) &&
           isa<PointerType>(FTy->getParamType(1))) {
         int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
 
@@ -443,7 +442,7 @@ GenericValue JIT::runFunction(Function *F,
       break;
     case 1:
       if (FTy->getNumParams() == 1 &&
-          FTy->getParamType(0) == Type::getInt32Ty(F->getContext())) {
+          FTy->getParamType(0)->isInteger(32)) {
         GenericValue rv;
         int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
         rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
@@ -548,7 +547,7 @@ GenericValue JIT::runFunction(Function *F,
                                        "", StubBB);
   TheCall->setCallingConv(F->getCallingConv());
   TheCall->setTailCall();
-  if (TheCall->getType() != Type::getVoidTy(F->getContext()))
+  if (!TheCall->getType()->isVoidTy())
     // Return result of the call.
     ReturnInst::Create(F->getContext(), TheCall, StubBB);
   else
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index ef323b5..0f604ac 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -377,7 +377,7 @@ namespace {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
-        DEBUG(errs() << "JIT is managing a GOT\n");
+        DEBUG(dbgs() << "JIT is managing a GOT\n");
       }
 
       if (DwarfExceptionHandling || JITEmitDebugInfo) {
@@ -431,7 +431,7 @@ namespace {
       if (MBBLocations.size() <= (unsigned)MBB->getNumber())
         MBBLocations.resize((MBB->getNumber()+1)*2);
       MBBLocations[MBB->getNumber()] = getCurrentPCValue();
-      DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+      DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
                    << (void*) getCurrentPCValue() << "]\n");
     }
 
@@ -547,7 +547,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
     TheJIT->updateGlobalMapping(F, Stub);
   }
 
-  DEBUG(errs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
+  DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
         << F->getName() << "'\n");
 
   // Finally, keep track of the stub-to-Function mapping so that the
@@ -577,7 +577,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
   IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
                                                                 JE);
 
-  DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+  DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
         << "] for GV '" << GV->getName() << "'\n");
 
   return IndirectSym;
@@ -595,7 +595,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
   Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
   JE.finishGVStub();
 
-  DEBUG(errs() << "JIT: Stub emitted at [" << Stub
+  DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
                << "] for external function at '" << FnAddr << "'\n");
   return Stub;
 }
@@ -605,7 +605,7 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
   if (!idx) {
     idx = ++nextGOTIndex;
     revGOTMap[addr] = idx;
-    DEBUG(errs() << "JIT: Adding GOT entry " << idx << " for addr ["
+    DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
                  << addr << "]\n");
   }
   return idx;
@@ -701,7 +701,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
                         + F->getName() + "' when lazy compiles are disabled!");
     }
 
-    DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName()
+    DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
           << "' In stub ptr = " << Stub << " actual ptr = "
           << ActualPtr << "\n");
 
@@ -864,7 +864,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
   size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
   size_t GVAlign =
       (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
-  DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
+  DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
   DEBUG(GV->dump());
   // Assume code section ends with worst possible alignment, so first
   // variable needs maximal padding.
@@ -992,7 +992,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
       }
     }
   }
-  DEBUG(errs() << "JIT: About to look through initializers\n");
+  DEBUG(dbgs() << "JIT: About to look through initializers\n");
   // Look for more globals that are referenced only from initializers.
   // GVSet.end is computed each time because the set can grow as we go.
   for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
@@ -1006,14 +1006,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
 }
 
 void JITEmitter::startFunction(MachineFunction &F) {
-  DEBUG(errs() << "JIT: Starting CodeGen of Function "
+  DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
         << F.getFunction()->getName() << "\n");
 
   uintptr_t ActualSize = 0;
   // Set the memory writable, if it's not already
   MemMgr->setMemoryWritable();
   if (MemMgr->NeedsExactSize()) {
-    DEBUG(errs() << "JIT: ExactSize\n");
+    DEBUG(dbgs() << "JIT: ExactSize\n");
     const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
     MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
     MachineConstantPool *MCP = F.getConstantPool();
@@ -1040,12 +1040,12 @@ void JITEmitter::startFunction(MachineFunction &F) {
     // Add the function size
     ActualSize += TII->GetFunctionSizeInBytes(F);
 
-    DEBUG(errs() << "JIT: ActualSize before globals " << ActualSize << "\n");
+    DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n");
     // Add the size of the globals that will be allocated after this function.
     // These are all the ones referenced from this function that were not
     // previously allocated.
     ActualSize += GetSizeOfGlobalsInBytes(F);
-    DEBUG(errs() << "JIT: ActualSize after globals " << ActualSize << "\n");
+    DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n");
   } else if (SizeEstimate > 0) {
     // SizeEstimate will be non-zero on reallocation attempts.
     ActualSize = SizeEstimate;
@@ -1104,7 +1104,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         if (MR.isExternalSymbol()) {
           ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
                                                         false);
-          DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+          DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
                        << ResultPtr << "]\n");
 
           // If the target REALLY wants a stub for this function, emit it now.
@@ -1136,7 +1136,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
         MR.setGOTIndex(idx);
         if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
-          DEBUG(errs() << "JIT: GOT was out of date for " << ResultPtr
+          DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
                        << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
                        << "\n");
           ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
@@ -1153,7 +1153,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   if (MemMgr->isManagingGOT()) {
     unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
     if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
-      DEBUG(errs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+      DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
                    << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
                    << "\n");
       ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
@@ -1182,7 +1182,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
                                 EmissionDetails);
 
-  DEBUG(errs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+  DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
         << "] Function: " << F.getFunction()->getName()
         << ": " << (FnEnd-FnStart) << " bytes of text, "
         << Relocations.size() << " relocations\n");
@@ -1195,31 +1195,31 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 
   DEBUG(
     if (sys::hasDisassembler()) {
-      errs() << "JIT: Disassembled code:\n";
-      errs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+      dbgs() << "JIT: Disassembled code:\n";
+      dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
                                        (uintptr_t)FnStart);
     } else {
-      errs() << "JIT: Binary code:\n";
+      dbgs() << "JIT: Binary code:\n";
       uint8_t* q = FnStart;
       for (int i = 0; q < FnEnd; q += 4, ++i) {
         if (i == 4)
           i = 0;
         if (i == 0)
-          errs() << "JIT: " << (long)(q - FnStart) << ": ";
+          dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
         bool Done = false;
         for (int j = 3; j >= 0; --j) {
           if (q + j >= FnEnd)
             Done = true;
           else
-            errs() << (unsigned short)q[j];
+            dbgs() << (unsigned short)q[j];
         }
         if (Done)
           break;
-        errs() << ' ';
+        dbgs() << ' ';
         if (i == 3)
-          errs() << '\n';
+          dbgs() << '\n';
       }
-      errs()<< '\n';
+      dbgs()<< '\n';
     }
         );
 
@@ -1268,7 +1268,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 }
 
 void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
-  DEBUG(errs() << "JIT: Ran out of space for native code.  Reattempting.\n");
+  DEBUG(dbgs() << "JIT: Ran out of space for native code.  Reattempting.\n");
   Relocations.clear();  // Clear the old relocations or we'll reapply them.
   ConstPoolAddresses.clear();
   ++NumRetries;
@@ -1319,7 +1319,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
     // in the JITResolver.  Were there a memory manager deallocateStub routine,
     // we could call that at this point too.
     if (FnRefs.empty()) {
-      DEBUG(errs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
+      DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
       StubFnRefs.erase(Stub);
 
       // Invalidate the stub.  If it is a GV stub, update the JIT's global
@@ -1365,7 +1365,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
 
   if (ConstantPoolBase == 0) return;  // Buffer overflow.
 
-  DEBUG(errs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+  DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
                << "] (size: " << Size << ", alignment: " << Align << ")\n");
 
   // Initialize the memory for all of the constant pool entries.
@@ -1383,8 +1383,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
                         "entry has not been implemented!");
     }
     TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
-    DEBUG(errs() << "JIT:   CP" << i << " at [0x";
-          errs().write_hex(CAddr) << "]\n");
+    DEBUG(dbgs() << "JIT:   CP" << i << " at [0x";
+          dbgs().write_hex(CAddr) << "]\n");
 
     const Type *Ty = CPE.Val.ConstVal->getType();
     Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 80cb999..a17caa1 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -352,7 +352,7 @@ namespace {
       // another block of memory and add it to the free list.
       if (largest < ActualSize ||
           largest <= FreeRangeHeader::getMinBlockSize()) {
-        DEBUG(errs() << "JIT: Allocating another slab of memory for function.");
+        DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
         candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
       }
 
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 52a8f71..d01c4b2 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -50,9 +50,9 @@ OProfileJITEventListener::OProfileJITEventListener()
     : Agent(op_open_agent()) {
   if (Agent == NULL) {
     const std::string err_str = sys::StrError();
-    DEBUG(errs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+    DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
   } else {
-    DEBUG(errs() << "Connected to OProfile agent.\n");
+    DEBUG(dbgs() << "Connected to OProfile agent.\n");
   }
 }
 
@@ -60,10 +60,10 @@ OProfileJITEventListener::~OProfileJITEventListener() {
   if (Agent != NULL) {
     if (op_close_agent(Agent) == -1) {
       const std::string err_str = sys::StrError();
-      DEBUG(errs() << "Failed to disconnect from OProfile agent: "
+      DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
                    << err_str << "\n");
     } else {
-      DEBUG(errs() << "Disconnected from OProfile agent.\n");
+      DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
     }
   }
 }
@@ -92,7 +92,7 @@ static debug_line_info LineStartToOProfileFormat(
   const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc);
   Result.lineno = tuple.Line;
   Result.filename = Filenames.getFilename(tuple.Scope);
-  DEBUG(errs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+  DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
                << Result.filename << ":" << Result.lineno << "\n");
   return Result;
 }
@@ -105,7 +105,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
   if (op_write_native_code(Agent, F.getName().data(),
                            reinterpret_cast<uint64_t>(FnStart),
                            FnStart, FnSize) == -1) {
-    DEBUG(errs() << "Failed to tell OProfile about native function " 
+    DEBUG(dbgs() << "Failed to tell OProfile about native function " 
           << F.getName() << " at [" 
           << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
     return;
@@ -133,7 +133,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
   if (!LineInfo.empty()) {
     if (op_write_debug_line_info(Agent, FnStart,
                                  LineInfo.size(), &*LineInfo.begin()) == -1) {
-      DEBUG(errs() 
+      DEBUG(dbgs() 
             << "Failed to tell OProfile about line numbers for native function "
             << F.getName() << " at [" 
             << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
@@ -145,7 +145,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
 void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
   assert(FnStart && "Invalid function pointer");
   if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
-    DEBUG(errs()
+    DEBUG(dbgs()
           << "Failed to tell OProfile about unload of native function at "
           << FnStart << "\n");
   }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 104cbe9..dcd696c 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
@@ -144,7 +145,7 @@ protected:
 
   // for debugging...
   virtual void dump() const {
-    errs() << "AbstractTypeSet!\n";
+    dbgs() << "AbstractTypeSet!\n";
   }
 };
 }
@@ -337,11 +338,11 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
 static void PrintMap(const std::map<const Value*, Value*> &M) {
   for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end();
        I != E; ++I) {
-    errs() << " Fr: " << (void*)I->first << " ";
+    dbgs() << " Fr: " << (void*)I->first << " ";
     I->first->dump();
-    errs() << " To: " << (void*)I->second << " ";
+    dbgs() << " To: " << (void*)I->second << " ";
     I->second->dump();
-    errs() << "\n";
+    dbgs() << "\n";
   }
 }
 #endif
@@ -404,10 +405,10 @@ static Value *RemapOperand(const Value *In,
   }
 
 #ifndef NDEBUG
-  errs() << "LinkModules ValueMap: \n";
+  dbgs() << "LinkModules ValueMap: \n";
   PrintMap(ValueMap);
 
-  errs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
+  dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
   llvm_unreachable("Couldn't remap value!");
 #endif
   return 0;
@@ -854,9 +855,14 @@ static bool LinkAlias(Module *Dest, const Module *Src,
     } else {
       // No linking to be performed, simply create an identical version of the
       // alias over in the dest module...
-
+      Constant *Aliasee = DAliasee;
+      // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
+      // by this, but aliases to GEPs are broken to a lot of other things, so
+      // it's less important.
+      if (SGA->getType() != DAliasee->getType())
+        Aliasee = ConstantExpr::getBitCast(DAliasee, SGA->getType());
       NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                              SGA->getName(), DAliasee, Dest);
+                              SGA->getName(), Aliasee, Dest);
       CopyGVAttributes(NewGA, SGA);
 
       // Proceed to 'common' steps
@@ -1222,9 +1228,15 @@ static bool LinkAppendingVars(Module *M,
 static bool ResolveAliases(Module *Dest) {
   for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end();
        I != E; ++I)
-    if (const GlobalValue *GV = I->resolveAliasedGlobal())
-      if (GV != I && !GV->isDeclaration())
-        I->replaceAllUsesWith(const_cast<GlobalValue*>(GV));
+    // We can't sue resolveGlobalAlias here because we need to preserve
+    // bitcasts and GEPs.
+    if (const Constant *C = I->getAliasee()) {
+      while (dyn_cast<GlobalAlias>(C))
+        C = cast<GlobalAlias>(C)->getAliasee();
+      const GlobalValue *GV = dyn_cast<GlobalValue>(C);
+      if (C != I && !(GV && GV->isDeclaration()))
+        I->replaceAllUsesWith(const_cast<Constant*>(C));
+    }
 
   return false;
 }
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index a5a2256..a19ec19 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -11,6 +11,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -108,8 +109,8 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 }
 
 void MCExpr::dump() const {
-  print(errs(), 0);
-  errs() << '\n';
+  print(dbgs(), 0);
+  dbgs() << '\n';
 }
 
 /* *** */
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index d050318..7c7a644 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -31,8 +32,8 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 }
 
 void MCOperand::dump() const {
-  print(errs(), 0);
-  errs() << "\n";
+  print(dbgs(), 0);
+  dbgs() << "\n";
 }
 
 void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
@@ -45,6 +46,6 @@ void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 }
 
 void MCInst::dump() const {
-  print(errs(), 0);
-  errs() << "\n";
+  print(dbgs(), 0);
+  dbgs() << "\n";
 }
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index c6812ed..4d520ec 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/MC/MCAsmInfo.h"
-
 using namespace llvm;
 
 MCSectionELF *MCSectionELF::
@@ -23,7 +23,7 @@ Create(StringRef Section, unsigned Type, unsigned Flags,
 // ShouldOmitSectionDirective - Decides whether a '.section' directive
 // should be printed before the section name
 bool MCSectionELF::ShouldOmitSectionDirective(const char *Name,
-                                        const MCAsmInfo &MAI) const {
+                                              const MCAsmInfo &MAI) const {
   
   // FIXME: Does .section .bss/.data/.text work everywhere??
   if (strcmp(Name, ".text") == 0 ||
@@ -37,7 +37,6 @@ bool MCSectionELF::ShouldOmitSectionDirective(const char *Name,
 
 // ShouldPrintSectionType - Only prints the section type if supported
 bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const {
-  
   if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS))
     return false;
 
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index b145d07..265d06c 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -51,11 +52,14 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
   return false;
 }
 
-static void PrintMangledName(raw_ostream &OS, StringRef Str,
-                             const MCAsmInfo &MAI) {
+/// printMangledName - Print the specified string in mangled form if it uses
+/// any unusual characters.
+void MCSymbol::printMangledName(StringRef Str, raw_ostream &OS,
+                                const MCAsmInfo *MAI) {
   // The first character is not allowed to be a number unless the target
   // explicitly allows it.
-  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+  if ((MAI == 0 || !MAI->doesAllowNameToStartWithDigit()) &&
+      Str[0] >= '0' && Str[0] <= '9') {
     MangleLetter(OS, Str[0]);
     Str = Str.substr(1);
   }
@@ -94,7 +98,7 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 
   // On systems that do not allow quoted names, print with mangling.
   if (!MAI->doesAllowQuotesInName())
-    return PrintMangledName(OS, getName(), *MAI);
+    return printMangledName(getName(), OS, MAI);
 
   // If the string contains a double quote or newline, we still have to mangle
   // it.
@@ -106,5 +110,5 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 }
 
 void MCSymbol::dump() const {
-  print(errs(), 0);
+  print(dbgs(), 0);
 }
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 69bd10c..c1222ec 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -30,5 +31,5 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
 }
 
 void MCValue::dump() const {
-  print(errs(), 0);
+  print(dbgs(), 0);
 }
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 9532e1e..9d14684 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1580,12 +1580,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   uint64_t b = uint64_t(1) << 32;
 
 #if 0
-  DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
-  DEBUG(errs() << "KnuthDiv: original:");
-  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
-  DEBUG(errs() << " by");
-  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  DEBUG(dbgs() << "KnuthDiv: original:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
 #endif
   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
   // u and v by d. Note that we have taken Knuth's advice here to use a power
@@ -1612,17 +1612,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   }
   u[m+n] = u_carry;
 #if 0
-  DEBUG(errs() << "KnuthDiv:   normal:");
-  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
-  DEBUG(errs() << " by");
-  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << "KnuthDiv:   normal:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
 #endif
 
   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
   int j = m;
   do {
-    DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n');
+    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
     // D3. [Calculate q'.].
     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
@@ -1632,7 +1632,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     // value qp is one too large, and it eliminates all cases where qp is two
     // too large.
     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
-    DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n');
+    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
     uint64_t qp = dividend / v[n-1];
     uint64_t rp = dividend % v[n-1];
     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
@@ -1641,7 +1641,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
         qp--;
     }
-    DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
 
     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
@@ -1652,7 +1652,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
       bool borrow = subtrahend > u_tmp;
-      DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp
+      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
                    << ", subtrahend == " << subtrahend
                    << ", borrow = " << borrow << '\n');
 
@@ -1666,12 +1666,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
         k++;
       }
       isNeg |= borrow;
-      DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
                     u[j+i+1] << '\n');
     }
-    DEBUG(errs() << "KnuthDiv: after subtraction:");
-    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
-    DEBUG(errs() << '\n');
+    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
     // this step is actually negative, (u[j+n]...u[j]) should be left as the
     // true value plus b**(n+1), namely as the b's complement of
@@ -1684,9 +1684,9 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
         carry = carry && u[i] == 0;
       }
     }
-    DEBUG(errs() << "KnuthDiv: after complement:");
-    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
-    DEBUG(errs() << '\n');
+    DEBUG(dbgs() << "KnuthDiv: after complement:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
 
     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
     // negative, go to step D6; otherwise go on to step D7.
@@ -1707,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
       }
       u[j+n] += carry;
     }
-    DEBUG(errs() << "KnuthDiv: after correction:");
-    DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]);
-    DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+    DEBUG(dbgs() << "KnuthDiv: after correction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
 
   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
   } while (--j >= 0);
 
-  DEBUG(errs() << "KnuthDiv: quotient:");
-  DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]);
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << "KnuthDiv: quotient:");
+  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
+  DEBUG(dbgs() << '\n');
 
   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
   // remainder may be obtained by dividing u[...] by d. If r is non-null we
@@ -1727,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
     // shift right here. In order to mak
     if (shift) {
       unsigned carry = 0;
-      DEBUG(errs() << "KnuthDiv: remainder:");
+      DEBUG(dbgs() << "KnuthDiv: remainder:");
       for (int i = n-1; i >= 0; i--) {
         r[i] = (u[i] >> shift) | carry;
         carry = u[i] << (32 - shift);
-        DEBUG(errs() << " " << r[i]);
+        DEBUG(dbgs() << " " << r[i]);
       }
     } else {
       for (int i = n-1; i >= 0; i--) {
         r[i] = u[i];
-        DEBUG(errs() << " " << r[i]);
+        DEBUG(dbgs() << " " << r[i]);
       }
     }
-    DEBUG(errs() << '\n');
+    DEBUG(dbgs() << '\n');
   }
 #if 0
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 #endif
 }
 
@@ -2191,7 +2191,7 @@ void APInt::dump() const {
   SmallString<40> S, U;
   this->toStringUnsigned(U);
   this->toStringSigned(S);
-  errs() << "APInt(" << BitWidth << "b, "
+  dbgs() << "APInt(" << BitWidth << "b, "
          << U.str() << "u " << S.str() << "s)";
 }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b6c0e08..fa692be8 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -354,7 +354,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
     // we don't need to pass argc/argv in.
     assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
            "Option can not be cl::Grouping AND cl::ValueRequired!");
-    int Dummy;
+    int Dummy = 0;
     ErrorParsing |= ProvideOption(PGOpt, OneArgName,
                                   StringRef(), 0, 0, Dummy);
 
@@ -778,10 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       free(*i);
   }
 
-  DEBUG(errs() << "Args: ";
+  DEBUG(dbgs() << "Args: ";
         for (int i = 0; i < argc; ++i)
-          errs() << argv[i] << ' ';
-        errs() << '\n';
+          dbgs() << argv[i] << ' ';
+        dbgs() << '\n';
        );
 
   // If we had an error processing our arguments, don't let the program execute
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index e427f82..ddf14e3 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -22,6 +22,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Instructions.h"
 using namespace llvm;
@@ -655,7 +656,7 @@ void ConstantRange::print(raw_ostream &OS) const {
 /// dump - Allow printing from a debugger easily...
 ///
 void ConstantRange::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index dff4f03..8bb1566 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Threading.h"
@@ -62,11 +63,11 @@ void llvm_unreachable_internal(const char *msg, const char *file,
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
   if (msg)
-    errs() << msg << "\n";
-  errs() << "UNREACHABLE executed";
+    dbgs() << msg << "\n";
+  dbgs() << "UNREACHABLE executed";
   if (file)
-    errs() << " at " << file << ":" << line;
-  errs() << "!\n";
+    dbgs() << " at " << file << ":" << line;
+  dbgs() << "!\n";
   abort();
 }
 }
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 70f2cfa..9ab3666 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 
 using namespace llvm;
@@ -91,3 +92,10 @@ formatted_raw_ostream &llvm::ferrs() {
   static formatted_raw_ostream S(errs());
   return S;
 }
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &llvm::fdbgs() {
+  static formatted_raw_ostream S(dbgs());
+  return S;
+}
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 14f94bc..e787670 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -23,6 +23,7 @@
 
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Mutex.h"
@@ -127,6 +128,6 @@ StatisticInfo::~StatisticInfo() {
   OutStream << '\n';  // Flush the output stream...
   OutStream.flush();
   
-  if (&OutStream != &outs() && &OutStream != &errs())
+  if (&OutStream != &outs() && &OutStream != &errs() && &OutStream != &dbgs())
     delete &OutStream;   // Close the file.
 }
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 1b233ab..785e0ec 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -11,50 +11,53 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include <cstring>
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
+/// StrInStrNoCase - Portable version of strcasestr.  Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case.  Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
+  size_t N = s2.size(), M = s1.size();
+  if (N > M)
+    return StringRef::npos;
+  for (size_t i = 0, e = M - N + 1; i != e; ++i)
+    if (s1.substr(i, N).equals_lower(s2))
+      return i;
+  return StringRef::npos;
+}
+
 /// getToken - This function extracts one token from source, ignoring any
 /// leading characters that appear in the Delimiters string, and ending the
 /// token at any of the characters that appear in the Delimiters string.  If
 /// there are no tokens in the source string, an empty string is returned.
-/// The Source source string is updated in place to remove the returned string
-/// and any delimiter prefix from it.
-std::string llvm::getToken(std::string &Source, const char *Delimiters) {
-  size_t NumDelimiters = std::strlen(Delimiters);
-
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+                                               StringRef Delimiters) {
   // Figure out where the token starts.
-  std::string::size_type Start =
-    Source.find_first_not_of(Delimiters, 0, NumDelimiters);
-  if (Start == std::string::npos) Start = Source.size();
-
-  // Find the next occurance of the delimiter.
-  std::string::size_type End =
-    Source.find_first_of(Delimiters, Start, NumDelimiters);
-  if (End == std::string::npos) End = Source.size();
-
-  // Create the return token.
-  std::string Result = std::string(Source.begin()+Start, Source.begin()+End);
+  StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+  if (Start == StringRef::npos) Start = Source.size();
 
-  // Erase the token that we read in.
-  Source.erase(Source.begin(), Source.begin()+End);
+  // Find the next occurrence of the delimiter.
+  StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+  if (End == StringRef::npos) End = Source.size();
 
-  return Result;
+  return std::make_pair(Source.substr(Start, End), Source.substr(End));
 }
 
 /// SplitString - Split up the specified string according to the specified
 /// delimiters, appending the result fragments to the output list.
-void llvm::SplitString(const std::string &Source, 
-                       std::vector<std::string> &OutFragments,
-                       const char *Delimiters) {
-  std::string S = Source;
-  
-  std::string S2 = getToken(S, Delimiters);
+void llvm::SplitString(StringRef Source,
+                       SmallVectorImpl<StringRef> &OutFragments,
+                       StringRef Delimiters) {
+  StringRef S2, S;
+  tie(S2, S) = getToken(Source, Delimiters);
   while (!S2.empty()) {
     OutFragments.push_back(S2);
-    S2 = getToken(S, Delimiters);
+    tie(S2, S) = getToken(S, Delimiters);
   }
 }
 
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index e4a9984..ae2640b 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/SmallVector.h"
+
 using namespace llvm;
 
 // MSVC emits references to this into the translation units which reference it.
@@ -51,13 +51,18 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
   size_type m = size();
   size_type n = Other.size();
 
-  SmallVector<unsigned, 32> previous(n+1, 0);
-  for (SmallVector<unsigned, 32>::size_type i = 0; i <= n; ++i) 
+  const unsigned SmallBufferSize = 64;
+  unsigned SmallBuffer[SmallBufferSize];
+  unsigned *Allocated = 0;
+  unsigned *previous = SmallBuffer;
+  if (2*(n + 1) > SmallBufferSize)
+    Allocated = previous = new unsigned [2*(n+1)];
+  unsigned *current = previous + (n + 1);
+  
+  for (unsigned i = 0; i <= n; ++i) 
     previous[i] = i;
 
-  SmallVector<unsigned, 32> current(n+1, 0);
   for (size_type y = 1; y <= m; ++y) {
-    current.assign(n+1, 0);
     current[0] = y;
     for (size_type x = 1; x <= n; ++x) {
       if (AllowReplacements) {
@@ -69,10 +74,16 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
         else current[x] = min(current[x-1], previous[x]) + 1;
       }
     }
-    current.swap(previous);
+    
+    unsigned *tmp = current;
+    current = previous;
+    previous = tmp;
   }
 
-  return previous[n];
+  unsigned Result = previous[n];
+  delete [] Allocated;
+  
+  return Result;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 7d32ee6..4bdfac2 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -373,7 +374,7 @@ void TimerGroup::removeTimer() {
 
     TimersToPrint.clear();
 
-    if (OutStream != &errs() && OutStream != &outs())
+    if (OutStream != &errs() && OutStream != &outs() && OutStream != &dbgs())
       delete OutStream;   // Close the file...
   }
 }
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 292c0c2..21504e9 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -9,13 +9,13 @@
 
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 std::string Twine::str() const {
   SmallString<256> Vec;
-  toVector(Vec);
-  return std::string(Vec.begin(), Vec.end());
+  return toStringRef(Vec).str();
 }
 
 void Twine::toVector(SmallVectorImpl<char> &Out) const {
@@ -23,6 +23,13 @@ void Twine::toVector(SmallVectorImpl<char> &Out) const {
   print(OS);
 }
 
+StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
+  if (isSingleStringRef())
+    return getSingleStringRef();
+  toVector(Out);
+  return StringRef(Out.data(), Out.size());
+}
+
 void Twine::printOneChild(raw_ostream &OS, const void *Ptr, 
                           NodeKind Kind) const {
   switch (Kind) {
@@ -125,9 +132,9 @@ void Twine::printRepr(raw_ostream &OS) const {
 }
 
 void Twine::dump() const {
-  print(llvm::errs());
+  print(llvm::dbgs());
 }
 
 void Twine::dumpRepr() const {
-  printRepr(llvm::errs());
+  printRepr(llvm::dbgs());
 }
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
index 10e64aa..c9a89e5 100644
--- a/lib/System/Win32/DynamicLibrary.inc
+++ b/lib/System/Win32/DynamicLibrary.inc
@@ -79,7 +79,7 @@ extern "C" {
         // Mingw32 uses msvcrt.dll by default. Don't ignore it.
         // Otherwise, user should be aware, what he's doing :)
         stricmp(ModuleName, "msvcrt") != 0 &&
-#endif        
+#endif
         stricmp(ModuleName, "msvcrt20") != 0 &&
         stricmp(ModuleName, "msvcrt40") != 0) {
       OpenedHandles.push_back((HMODULE)ModuleBase);
@@ -119,24 +119,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
     extern "C" { extern void *SYM; }
 
   #if defined(__MINGW32__)
-    EXPLICIT_SYMBOL_DEF(_alloca);
-    EXPLICIT_SYMBOL_DEF(__main);
-    EXPLICIT_SYMBOL_DEF(__ashldi3);
-    EXPLICIT_SYMBOL_DEF(__ashrdi3);
-    EXPLICIT_SYMBOL_DEF(__cmpdi2);
-    EXPLICIT_SYMBOL_DEF(__divdi3);
-    EXPLICIT_SYMBOL_DEF(__fixdfdi);
-    EXPLICIT_SYMBOL_DEF(__fixsfdi);
-    EXPLICIT_SYMBOL_DEF(__fixunsdfdi);
-    EXPLICIT_SYMBOL_DEF(__fixunssfdi);
-    EXPLICIT_SYMBOL_DEF(__floatdidf);
-    EXPLICIT_SYMBOL_DEF(__floatdisf);
-    EXPLICIT_SYMBOL_DEF(__lshrdi3);
-    EXPLICIT_SYMBOL_DEF(__moddi3);
-    EXPLICIT_SYMBOL_DEF(__udivdi3);
-    EXPLICIT_SYMBOL_DEF(__umoddi3);
+    EXPLICIT_SYMBOL_DEF(_alloca)
+    EXPLICIT_SYMBOL_DEF(__main)
+    EXPLICIT_SYMBOL_DEF(__ashldi3)
+    EXPLICIT_SYMBOL_DEF(__ashrdi3)
+    EXPLICIT_SYMBOL_DEF(__cmpdi2)
+    EXPLICIT_SYMBOL_DEF(__divdi3)
+    EXPLICIT_SYMBOL_DEF(__fixdfdi)
+    EXPLICIT_SYMBOL_DEF(__fixsfdi)
+    EXPLICIT_SYMBOL_DEF(__fixunsdfdi)
+    EXPLICIT_SYMBOL_DEF(__fixunssfdi)
+    EXPLICIT_SYMBOL_DEF(__floatdidf)
+    EXPLICIT_SYMBOL_DEF(__floatdisf)
+    EXPLICIT_SYMBOL_DEF(__lshrdi3)
+    EXPLICIT_SYMBOL_DEF(__moddi3)
+    EXPLICIT_SYMBOL_DEF(__udivdi3)
+    EXPLICIT_SYMBOL_DEF(__umoddi3)
   #elif defined(_MSC_VER)
-    EXPLICIT_SYMBOL_DEF(_alloca_probe);
+    EXPLICIT_SYMBOL_DEF(_alloca_probe)
   #endif
 #endif
 
@@ -181,7 +181,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
     EXPLICIT_SYMBOL2(alloca, _alloca);
 #undef EXPLICIT_SYMBOL
 #undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF    
+#undef EXPLICIT_SYMBOL_DEF
   }
 #elif defined(_MSC_VER)
   {
@@ -189,8 +189,8 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
     EXPLICIT_SYMBOL2(_alloca, _alloca_probe);
 #undef EXPLICIT_SYMBOL
 #undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF    
-  }  
+#undef EXPLICIT_SYMBOL_DEF
+  }
 #endif
 
   return 0;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7cfa097..969c4a4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -938,6 +938,35 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+  MachineConstantPool *MCP = MF.getConstantPool();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+  assert(MCPE.isMachineConstantPoolEntry() &&
+         "Expecting a machine constantpool entry!");
+  ARMConstantPoolValue *ACPV =
+    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+  unsigned PCLabelId = AFI->createConstPoolEntryUId();
+  ARMConstantPoolValue *NewCPV = 0;
+  if (ACPV->isGlobalValue())
+    NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+                                      ARMCP::CPValue, 4);
+  else if (ACPV->isExtSymbol())
+    NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+                                      ACPV->getSymbol(), PCLabelId, 4);
+  else if (ACPV->isBlockAddress())
+    NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+                                      ARMCP::CPBlockAddress, 4);
+  else
+    llvm_unreachable("Unexpected ARM constantpool value type!!");
+  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+  return PCLabelId;
+}
+
 void ARMBaseInstrInfo::
 reMaterialize(MachineBasicBlock &MBB,
               MachineBasicBlock::iterator I,
@@ -960,28 +989,8 @@ reMaterialize(MachineBasicBlock &MBB,
   case ARM::tLDRpci_pic:
   case ARM::t2LDRpci_pic: {
     MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    MachineConstantPool *MCP = MF.getConstantPool();
     unsigned CPI = Orig->getOperand(1).getIndex();
-    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
-    assert(MCPE.isMachineConstantPoolEntry() &&
-           "Expecting a machine constantpool entry!");
-    ARMConstantPoolValue *ACPV =
-      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
-    unsigned PCLabelId = AFI->createConstPoolEntryUId();
-    ARMConstantPoolValue *NewCPV = 0;
-    if (ACPV->isGlobalValue())
-      NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
-                                        ARMCP::CPValue, 4);
-    else if (ACPV->isExtSymbol())
-      NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
-                                        ACPV->getSymbol(), PCLabelId, 4);
-    else if (ACPV->isBlockAddress())
-      NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
-                                        ARMCP::CPBlockAddress, 4);
-    else
-      llvm_unreachable("Unexpected ARM constantpool value type!!");
-    CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
     MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
                                       DestReg)
       .addConstantPoolIndex(CPI).addImm(PCLabelId);
@@ -994,6 +1003,22 @@ reMaterialize(MachineBasicBlock &MBB,
   NewMI->getOperand(0).setSubReg(SubIdx);
 }
 
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+  switch(Orig->getOpcode()) {
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
+    Orig->getOperand(1).setIndex(CPI);
+    Orig->getOperand(2).setImm(PCLabelId);
+    break;
+  }
+  }
+  return MI;
+}
+
 bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
                                   const MachineInstr *MI1,
                                   const MachineRegisterInfo *MRI) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 78d9135..0d9d4a7 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -287,6 +287,8 @@ public:
                              const MachineInstr *Orig,
                              const TargetRegisterInfo *TRI) const;
 
+  MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
   virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
                            const MachineRegisterInfo *MRI) const;
 };
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7aebdf4..f1b6e1d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -217,7 +217,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
 }
 
-BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
   // FIXME: avoid re-calculating this everytime.
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
@@ -494,7 +495,8 @@ needsStackRealignment(const MachineFunction &MF) const {
           !MFI->hasVarSizedObjects());
 }
 
-bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   if (NoFramePointerElim && MFI->hasCalls())
     return true;
@@ -523,7 +525,7 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
 
 /// estimateRSStackSizeLimit - Look at each instruction that references stack
 /// frames and return the stack size limit beyond which some of these
-/// instructions will require scratch register during their expansion later.
+/// instructions will require a scratch register during their expansion later.
 unsigned
 ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
   unsigned Limit = (1 << 12) - 1;
@@ -547,6 +549,9 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
           // When the stack offset is negative, we will end up using
           // the i8 instructions instead.
           return (1 << 8) - 1;
+
+        if (AddrMode == ARMII::AddrMode6)
+          return 0;
         break; // At most one FI per instruction
       }
     }
@@ -557,7 +562,7 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
 
 void
 ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                          RegScavenger *RS) const {
+                                                       RegScavenger *RS) const {
   // This tells PEI to spill the FP as if it is any other callee-save register
   // to take advantage the eliminateFrameIndex machinery. This also ensures it
   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -852,7 +857,7 @@ int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
 }
 
 unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                               const MachineFunction &MF) const {
+                                              const MachineFunction &MF) const {
   switch (Reg) {
   default: break;
   // Return 0 if either register of the pair is a special register.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index d63f3e6..14a45b3 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -64,53 +64,53 @@ public:
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
 
-  SDNode *Select(SDValue Op);
+  SDNode *Select(SDNode *N);
   virtual void InstructionSelect();
-  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+  bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
-  bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode2Offset(SDValue Op, SDValue N,
+  bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode3Offset(SDValue Op, SDValue N,
+  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode4(SDValue Op, SDValue N, SDValue &Addr,
+  bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr,
                        SDValue &Mode);
-  bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
-  bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
+  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
                        SDValue &Opc, SDValue &Align);
 
-  bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
+  bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
                         SDValue &Label);
 
-  bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &Offset);
-  bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale,
+  bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale,
                               SDValue &Base, SDValue &OffImm,
                               SDValue &Offset);
-  bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
-  bool SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+  bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
                                  SDValue &BaseReg, SDValue &Opc);
-  bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
-  bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base,
                             SDValue &OffImm);
-  bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
                                  SDValue &OffImm);
-  bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base,
                               SDValue &OffImm);
-  bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
                              SDValue &OffReg, SDValue &ShImm);
 
   // Include the pieces autogenerated from the target description.
@@ -119,48 +119,48 @@ public:
 private:
   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
   /// ARM.
-  SDNode *SelectARMIndexedLoad(SDValue Op);
-  SDNode *SelectT2IndexedLoad(SDValue Op);
+  SDNode *SelectARMIndexedLoad(SDNode *N);
+  SDNode *SelectT2IndexedLoad(SDNode *N);
 
   /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
-  SDNode *SelectDYN_ALLOC(SDValue Op);
+  SDNode *SelectDYN_ALLOC(SDNode *N);
 
   /// SelectVLD - Select NEON load intrinsics.  NumVecs should
   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// loads of D registers and even subregs and odd subregs of Q registers.
   /// For NumVecs == 2, QOpcodes1 is not used.
-  SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
+  SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
   /// SelectVST - Select NEON store intrinsics.  NumVecs should
   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// stores of D registers and even subregs and odd subregs of Q registers.
   /// For NumVecs == 2, QOpcodes1 is not used.
-  SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
+  SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// load/store of D registers and even subregs and odd subregs of Q registers.
-  SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs,
+  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
                           unsigned *DOpcodes, unsigned *QOpcodes0,
                           unsigned *QOpcodes1);
 
   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
-  SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc);
+  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc);
 
   /// SelectCMOVOp - Select CMOV instructions for ARM.
-  SDNode *SelectCMOVOp(SDValue Op);
-  SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectCMOVOp(SDNode *N);
+  SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                               ARMCC::CondCodes CCVal, SDValue CCR,
                               SDValue InFlag);
-  SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                                ARMCC::CondCodes CCVal, SDValue CCR,
                                SDValue InFlag);
-  SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                               ARMCC::CondCodes CCVal, SDValue CCR,
                               SDValue InFlag);
-  SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                                ARMCC::CondCodes CCVal, SDValue CCR,
                                SDValue InFlag);
 
@@ -206,7 +206,7 @@ void ARMDAGToDAGISel::InstructionSelect() {
   CurDAG->RemoveDeadNodes();
 }
 
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
                                               SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
@@ -230,7 +230,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
   if (N.getOpcode() == ISD::MUL) {
@@ -340,9 +340,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
                                             SDValue &Offset, SDValue &Opc) {
-  unsigned Opcode = Op.getOpcode();
+  unsigned Opcode = Op->getOpcode();
   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
     ? cast<LoadSDNode>(Op)->getAddressingMode()
     : cast<StoreSDNode>(Op)->getAddressingMode();
@@ -379,7 +379,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N,
 }
 
 
-bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
   if (N.getOpcode() == ISD::SUB) {
@@ -429,9 +429,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
                                             SDValue &Offset, SDValue &Opc) {
-  unsigned Opcode = Op.getOpcode();
+  unsigned Opcode = Op->getOpcode();
   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
     ? cast<LoadSDNode>(Op)->getAddressingMode()
     : cast<StoreSDNode>(Op)->getAddressingMode();
@@ -451,14 +451,14 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode4(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
                                       SDValue &Addr, SDValue &Mode) {
   Addr = N;
   Mode = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
                                       SDValue &Base, SDValue &Offset) {
   if (N.getOpcode() != ISD::ADD) {
     Base = N;
@@ -506,7 +506,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
                                       SDValue &Addr, SDValue &Update,
                                       SDValue &Opc, SDValue &Align) {
   Addr = N;
@@ -518,7 +518,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
                                        SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
     Offset = N.getOperand(0);
@@ -530,10 +530,10 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
                                             SDValue &Base, SDValue &Offset){
   // FIXME dl should come from the parent load or store, not the address
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc dl = Op->getDebugLoc();
   if (N.getOpcode() != ISD::ADD) {
     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
     if (!NC || NC->getZExtValue() != 0)
@@ -549,7 +549,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
 }
 
 bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N,
                                         unsigned Scale, SDValue &Base,
                                         SDValue &OffImm, SDValue &Offset) {
   if (Scale == 4) {
@@ -605,25 +605,25 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N,
                                             SDValue &Base, SDValue &OffImm,
                                             SDValue &Offset) {
   return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N,
                                             SDValue &Base, SDValue &OffImm,
                                             SDValue &Offset) {
   return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N,
                                             SDValue &Base, SDValue &OffImm,
                                             SDValue &Offset) {
   return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
                                            SDValue &Base, SDValue &OffImm) {
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -659,7 +659,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
                                                 SDValue &BaseReg,
                                                 SDValue &Opc) {
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
@@ -679,7 +679,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   // Match simple R + imm12 operands.
 
@@ -729,7 +729,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N,
                                            SDValue &Base, SDValue &OffImm) {
   // Match simple R - imm8 operands.
   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
@@ -753,9 +753,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
                                                  SDValue &OffImm){
-  unsigned Opcode = Op.getOpcode();
+  unsigned Opcode = Op->getOpcode();
   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
     ? cast<LoadSDNode>(Op)->getAddressingMode()
     : cast<StoreSDNode>(Op)->getAddressingMode();
@@ -772,7 +772,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
                                              SDValue &Base, SDValue &OffImm) {
   if (N.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -798,7 +798,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
                                             SDValue &Base,
                                             SDValue &OffReg, SDValue &ShImm) {
   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
@@ -854,8 +854,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
 }
 
-SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
-  LoadSDNode *LD = cast<LoadSDNode>(Op);
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::MemIndexedMode AM = LD->getAddressingMode();
   if (AM == ISD::UNINDEXED)
     return NULL;
@@ -866,23 +866,23 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
   unsigned Opcode = 0;
   bool Match = false;
   if (LoadedVT == MVT::i32 &&
-      SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+      SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
     Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
     Match = true;
   } else if (LoadedVT == MVT::i16 &&
-             SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
     Match = true;
     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
     if (LD->getExtensionType() == ISD::SEXTLOAD) {
-      if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
         Match = true;
         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
       }
     } else {
-      if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+      if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
         Match = true;
         Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
       }
@@ -894,15 +894,15 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
     SDValue Base = LD->getBasePtr();
     SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
-    return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
                                   MVT::Other, Ops, 6);
   }
 
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
-  LoadSDNode *LD = cast<LoadSDNode>(Op);
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::MemIndexedMode AM = LD->getAddressingMode();
   if (AM == ISD::UNINDEXED)
     return NULL;
@@ -913,7 +913,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   unsigned Opcode = 0;
   bool Match = false;
-  if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) {
+  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
     switch (LoadedVT.getSimpleVT().SimpleTy) {
     case MVT::i32:
       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
@@ -942,20 +942,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
     SDValue Base = LD->getBasePtr();
     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
-    return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
                                   MVT::Other, Ops, 5);
   }
 
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
-  EVT VT = Op.getValueType();
-  SDValue Chain = Op.getOperand(0);
-  SDValue Size = Op.getOperand(1);
-  SDValue Align = Op.getOperand(2);
+  EVT VT = N->getValueType(0);
+  SDValue Chain = N->getOperand(0);
+  SDValue Size = N->getOperand(1);
+  SDValue Align = N->getOperand(2);
   SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
   int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
   if (AlignVal < 0)
@@ -1030,15 +1029,14 @@ static EVT GetNEONSubregVT(EVT VT) {
   }
 }
 
-SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
   assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
-  SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1124,15 +1122,14 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
   assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
-  SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1225,16 +1222,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
                                          unsigned NumVecs, unsigned *DOpcodes,
                                          unsigned *QOpcodes0,
                                          unsigned *QOpcodes1) {
   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
-  SDNode *N = Op.getNode();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, MemUpdate, MemOpc, Align;
-  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
@@ -1324,38 +1320,38 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op,
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
                                                      unsigned Opc) {
   if (!Subtarget->hasV6T2Ops())
     return NULL;
 
   unsigned Shl_imm = 0;
-  if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
     unsigned Srl_imm = 0;
-    if (isInt32Immediate(Op.getOperand(1), Srl_imm)) {
+    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
       unsigned Width = 32 - Srl_imm;
       int LSB = Srl_imm - Shl_imm;
       if (LSB < 0)
         return NULL;
       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-      SDValue Ops[] = { Op.getOperand(0).getOperand(0),
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
                         CurDAG->getTargetConstant(LSB, MVT::i32),
                         CurDAG->getTargetConstant(Width, MVT::i32),
                         getAL(CurDAG), Reg0 };
-      return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32, Ops, 5);
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
     }
   }
   return NULL;
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   SDValue CPTmp0;
   SDValue CPTmp1;
-  if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) {
+  if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) {
     unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
     unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
     unsigned Opc = 0;
@@ -1372,27 +1368,27 @@ SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
       CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6);
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
   }
   return 0;
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                      ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   SDValue CPTmp0;
   SDValue CPTmp1;
   SDValue CPTmp2;
-  if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+  if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+    return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
   }
   return 0;
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
   if (!T)
@@ -1402,14 +1398,14 @@ SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
     SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(Op.getNode(),
+    return CurDAG->SelectNodeTo(N,
                                 ARM::t2MOVCCi, MVT::i32, Ops, 5);
   }
   return 0;
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
+SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                      ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
   if (!T)
@@ -1419,19 +1415,19 @@ SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal,
     SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(Op.getNode(),
+    return CurDAG->SelectNodeTo(N,
                                 ARM::MOVCCi, MVT::i32, Ops, 5);
   }
   return 0;
 }
 
-SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
-  EVT VT = Op.getValueType();
-  SDValue FalseVal = Op.getOperand(0);
-  SDValue TrueVal  = Op.getOperand(1);
-  SDValue CC = Op.getOperand(2);
-  SDValue CCR = Op.getOperand(3);
-  SDValue InFlag = Op.getOperand(4);
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal  = N->getOperand(1);
+  SDValue CC = N->getOperand(2);
+  SDValue CCR = N->getOperand(3);
+  SDValue InFlag = N->getOperand(4);
   assert(CC.getOpcode() == ISD::Constant);
   assert(CCR.getOpcode() == ISD::Register);
   ARMCC::CondCodes CCVal =
@@ -1445,18 +1441,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
     SDValue CPTmp1;
     SDValue CPTmp2;
     if (Subtarget->isThumb()) {
-      SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal,
+      SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
                                         CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal,
+        Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
     } else {
-      SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal,
+      SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
                                          CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal,
+        Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
@@ -1469,18 +1465,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
     //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
     // Pattern complexity = 10  cost = 1  size = 0
     if (Subtarget->isThumb()) {
-      SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal,
+      SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal,
                                         CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal,
+        Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
     } else {
-      SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal,
+      SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal,
                                          CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal,
+        Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
@@ -1514,11 +1510,10 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) {
     Opc = ARM::VMOVDcc;
     break;
   }
-  return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
 }
 
-SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   if (N->isMachineOpcode())
@@ -1569,7 +1564,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
                                        Ops, 6);
       }
-      ReplaceUses(Op, SDValue(ResNode, 0));
+      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
       return NULL;
     }
 
@@ -1593,28 +1588,28 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     }
   }
   case ARMISD::DYN_ALLOC:
-    return SelectDYN_ALLOC(Op);
+    return SelectDYN_ALLOC(N);
   case ISD::SRL:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(Op,
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
                       Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
       return I;
     break;
   case ISD::SRA:
-    if (SDNode *I = SelectV6T2BitfieldExtractOp(Op,
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
                       Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX))
       return I;
     break;
   case ISD::MUL:
     if (Subtarget->isThumb1Only())
       break;
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
       unsigned RHSV = C->getZExtValue();
       if (!RHSV) break;
       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
         unsigned ShImm = Log2_32(RHSV-1);
         if (ShImm >= 32)
           break;
-        SDValue V = Op.getOperand(0);
+        SDValue V = N->getOperand(0);
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -1630,7 +1625,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
         unsigned ShImm = Log2_32(RHSV+1);
         if (ShImm >= 32)
           break;
-        SDValue V = Op.getOperand(0);
+        SDValue V = N->getOperand(0);
         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -1650,7 +1645,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     // are entirely contributed by c2 and lower 16-bits are entirely contributed
     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
-    EVT VT = Op.getValueType();
+    EVT VT = N->getValueType(0);
     if (VT != MVT::i32)
       break;
     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
@@ -1658,7 +1653,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
     if (!Opc)
       break;
-    SDValue N0 = Op.getOperand(0), N1 = Op.getOperand(1);
+    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
     if (!N1C)
       break;
@@ -1683,18 +1678,18 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   }
   case ARMISD::VMOVRRD:
     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
-                                  Op.getOperand(0), getAL(CurDAG),
+                                  N->getOperand(0), getAL(CurDAG),
                                   CurDAG->getRegister(0, MVT::i32));
   case ISD::UMUL_LOHI: {
     if (Subtarget->isThumb1Only())
       break;
     if (Subtarget->isThumb()) {
-      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
     } else {
-      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
@@ -1704,11 +1699,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     if (Subtarget->isThumb1Only())
       break;
     if (Subtarget->isThumb()) {
-      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
     } else {
-      SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
@@ -1717,9 +1712,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   case ISD::LOAD: {
     SDNode *ResNode = 0;
     if (Subtarget->isThumb() && Subtarget->hasThumb2())
-      ResNode = SelectT2IndexedLoad(Op);
+      ResNode = SelectT2IndexedLoad(N);
     else
-      ResNode = SelectARMIndexedLoad(Op);
+      ResNode = SelectARMIndexedLoad(N);
     if (ResNode)
       return ResNode;
     // Other cases are autogenerated.
@@ -1740,11 +1735,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
 
     unsigned Opc = Subtarget->isThumb() ?
       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
-    SDValue Chain = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDValue N2 = Op.getOperand(2);
-    SDValue N3 = Op.getOperand(3);
-    SDValue InFlag = Op.getOperand(4);
+    SDValue Chain = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    SDValue InFlag = N->getOperand(4);
     assert(N1.getOpcode() == ISD::BasicBlock);
     assert(N2.getOpcode() == ISD::Constant);
     assert(N3.getOpcode() == ISD::Register);
@@ -1756,23 +1751,23 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
                                              MVT::Flag, Ops, 5);
     Chain = SDValue(ResNode, 0);
-    if (Op.getNode()->getNumValues() == 2) {
+    if (N->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
-      ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
+      ReplaceUses(SDValue(N, 1), InFlag);
     }
-    ReplaceUses(SDValue(Op.getNode(), 0),
+    ReplaceUses(SDValue(N, 0),
                 SDValue(Chain.getNode(), Chain.getResNo()));
     return NULL;
   }
   case ARMISD::CMOV:
-    return SelectCMOVOp(Op);
+    return SelectCMOVOp(N);
   case ARMISD::CNEG: {
-    EVT VT = Op.getValueType();
-    SDValue N0 = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDValue N2 = Op.getOperand(2);
-    SDValue N3 = Op.getOperand(3);
-    SDValue InFlag = Op.getOperand(4);
+    EVT VT = N->getValueType(0);
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    SDValue InFlag = N->getOperand(4);
     assert(N2.getOpcode() == ISD::Constant);
     assert(N3.getOpcode() == ISD::Register);
 
@@ -1791,7 +1786,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
       Opc = ARM::VNEGDcc;
       break;
     }
-    return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+    return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
   }
 
   case ARMISD::VZIP: {
@@ -1863,7 +1858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
       unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
                               ARM::VLD2d32, ARM::VLD2d64 };
       unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
-      return SelectVLD(Op, 2, DOpcodes, QOpcodes, 0);
+      return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vld3: {
@@ -1871,7 +1866,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                               ARM::VLD3d32, ARM::VLD3d64 };
       unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
       unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
-      return SelectVLD(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4: {
@@ -1879,35 +1874,35 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                               ARM::VLD4d32, ARM::VLD4d64 };
       unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
       unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
-      return SelectVLD(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld2lane: {
       unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
       unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
       unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
-      return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld3lane: {
       unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
       unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
       unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
-      return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4lane: {
       unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
       unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
       unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
-      return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2: {
       unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
                               ARM::VST2d32, ARM::VST2d64 };
       unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
-      return SelectVST(Op, 2, DOpcodes, QOpcodes, 0);
+      return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vst3: {
@@ -1915,7 +1910,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                               ARM::VST3d32, ARM::VST3d64 };
       unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
       unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
-      return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4: {
@@ -1923,34 +1918,34 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                               ARM::VST4d32, ARM::VST4d64 };
       unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
       unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
-      return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2lane: {
       unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
       unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
       unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
-      return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst3lane: {
       unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
       unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
       unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
-      return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4lane: {
       unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
       unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
       unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
-      return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
     }
   }
   }
 
-  return SelectCode(Op);
+  return SelectCode(N);
 }
 
 bool ARMDAGToDAGISel::
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 334baae..7b62c00 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3130,6 +3130,9 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   //  exitMBB:
   //   ...
   BB = exitMBB;
+
+  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
+
   return BB;
 }
 
@@ -3140,7 +3143,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction *F = BB->getParent();
+  MachineFunction *MF = BB->getParent();
   MachineFunction::iterator It = BB;
   ++It;
 
@@ -3155,7 +3158,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   case 1:
     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
-    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
     break;
   case 2:
     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
@@ -3167,13 +3170,13 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
     break;
   }
 
-  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(It, loopMBB);
-  F->insert(It, exitMBB);
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
   exitMBB->transferSuccessors(BB);
 
-  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
   unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
   unsigned scratch2 = (!BinOpcode) ? incr :
     RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
@@ -3216,7 +3219,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //   ...
   BB = exitMBB;
 
-  F->DeleteMachineInstr(MI);   // The instruction is gone now.
+  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
 
   return BB;
 }
@@ -4258,10 +4261,10 @@ std::pair<unsigned, const TargetRegisterClass*>
 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
   if (Constraint.size() == 1) {
-    // GCC RS6000 Constraint Letters
+    // GCC ARM Constraint Letters
     switch (Constraint[0]) {
     case 'l':
-      if (Subtarget->isThumb1Only())
+      if (Subtarget->isThumb())
         return std::make_pair(0U, ARM::tGPRRegisterClass);
       else
         return std::make_pair(0U, ARM::GPRRegisterClass);
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index da8b373..f67e74a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -127,8 +127,8 @@ def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
-def CarryDefIsUsed   : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
+def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">;
+def CarryDefIsUsed   : Predicate<"N->hasAnyUseOfValue(1)">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 def UseMovt   : Predicate<"Subtarget->useMovt()">;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 34d7d8f..603ccf5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -113,7 +113,7 @@ def t_addrmode_s1 : Operand<i32>,
 def t_addrmode_sp : Operand<i32>,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
   let PrintMethod = "printThumbAddrModeSPOperand";
-  let MIOperandInfo = (ops JustSP:$base, i32imm:$offsimm);
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
 //===----------------------------------------------------------------------===//
@@ -208,9 +208,8 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst",
                   [(brind GPR:$dst)]>,
-               T1Special<{1,0,?,?}> {
-    // <Rd> = pc
-    let Inst{7} = 1;
+               T1Special<{1,0,1,1}> {
+    // <Rd> = Inst{7:2-0} = pc
     let Inst{2-0} = 0b111;
   }
 }
@@ -342,16 +341,28 @@ def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
                "ldr", "\t$dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
            T1LdSt<0b100>;
+def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
+               "ldr", "\t$dst, $addr",
+               []>,
+           T1LdSt4Imm<{1,?,?}>;
 
 def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
                 "ldrb", "\t$dst, $addr",
                 [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>,
             T1LdSt<0b110>;
+def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
+                "ldrb", "\t$dst, $addr",
+                []>,
+            T1LdSt1Imm<{1,?,?}>;
 
 def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
                 "ldrh", "\t$dst, $addr",
                 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>,
             T1LdSt<0b101>;
+def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
+                "ldrh", "\t$dst, $addr",
+                []>,
+            T1LdSt2Imm<{1,?,?}>;
 
 let AddedComplexity = 10 in
 def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
@@ -397,16 +408,28 @@ def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
                "str", "\t$src, $addr",
                [(store tGPR:$src, t_addrmode_s4:$addr)]>,
            T1LdSt<0b000>;
+def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
+               "str", "\t$src, $addr",
+               []>,
+           T1LdSt4Imm<{0,?,?}>;
 
 def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
                  "strb", "\t$src, $addr",
                  [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>,
             T1LdSt<0b010>;
+def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
+                 "strb", "\t$src, $addr",
+                 []>,
+            T1LdSt1Imm<{0,?,?}>;
 
 def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
                  "strh", "\t$src, $addr",
                  [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>,
             T1LdSt<0b001>;
+def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
+                 "strh", "\t$src, $addr",
+                 []>,
+            T1LdSt2Imm<{0,?,?}>;
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
                    "str", "\t$src, $addr",
@@ -748,7 +771,7 @@ let usesCustomInserter = 1 in  // Expanded after instruction selection.
 // 16-bit movcc in IT blocks for Thumb2.
 def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
                     "mov", "\t$dst, $rhs", []>,
-              T1Special<{1,0,?,?}>;
+              T1Special<{1,0,1,1}>;
 
 def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
                     "mov", "\t$dst, $rhs", []>,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6f20ed4..769df7e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -360,8 +360,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
                  opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
      let Inst{31-27} = 0b11101;
-     let Inst{24} = 1;
      let Inst{26-25} = 0b01;
+     let Inst{24} = 1;
      let Inst{23-21} = op23_21;
      let Inst{20} = 0; // The S bit.
    }
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b13f98a..b78b95b 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -740,6 +740,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
 /// isMemoryOp - Returns true if instruction is a memory operations (that this
 /// pass is capable of operating on).
 static bool isMemoryOp(const MachineInstr *MI) {
+  if (MI->hasOneMemOperand()) {
+    const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+    // Don't touch volatile memory accesses - we may be changing their order.
+    if (MMO->isVolatile())
+      return false;
+
+    // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
+    if (MMO->getAlignment() < 4)
+      return false;
+  }
+
   int Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 9fbde81..d393e8d 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -367,19 +367,6 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
 
-// Just the stack pointer (for tSTRspi and friends).
-def JustSP : RegisterClass<"ARM", [i32], 32, [SP]> {
-  let MethodProtos = [{
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-      JustSPClass::iterator
-      JustSPClass::allocation_order_end(const MachineFunction &MF) const {
-        return allocation_order_begin(MF);
-      }
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // Subregister Set Definitions... now that we have all of the pieces, define the
 // sub registers for each register.
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed4667b..132738e 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmLexer.h"
 #include "llvm/MC/MCAsmParser.h"
+#include "llvm/MC/MCParsedAsmOperand.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -78,7 +79,7 @@ private:
 
   /// @name Auto-generated Match Functions
   /// {
-  bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
+  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCInst &Inst);
 
   /// MatchRegisterName - Match the given string to a register name and return
@@ -94,14 +95,15 @@ public:
   ARMAsmParser(const Target &T, MCAsmParser &_Parser)
     : TargetAsmParser(T), Parser(_Parser) {}
 
-  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
   
 /// ARMOperand - Instances of this class represent a parsed ARM machine
 /// instruction.
-struct ARMOperand {
+struct ARMOperand : public MCParsedAsmOperand {
   enum {
     Token,
     Register,
@@ -515,9 +517,10 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
 }
 
 /// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands,
-                                    MCInst &Inst) {
-  struct ARMOperand Op0 = Operands[0];
+bool ARMAsmParser::
+MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                 MCInst &Inst) {
+  ARMOperand &Op0 = *(ARMOperand*)Operands[0];
   assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
   const StringRef &Mnemonic = Op0.getToken();
   if (Mnemonic == "add" ||
@@ -578,33 +581,27 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
 }
 
 /// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
-  SmallVector<ARMOperand, 7> Operands;
-
-  Operands.push_back(ARMOperand::CreateToken(Name));
+bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  Operands.push_back(new ARMOperand(ARMOperand::CreateToken(Name)));
 
   SMLoc Loc = getLexer().getTok().getLoc();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
     // Read the first operand.
-    Operands.push_back(ARMOperand());
-    if (ParseOperand(Operands.back()))
-      return true;
+    ARMOperand Op;
+    if (ParseOperand(Op)) return true;
+    Operands.push_back(new ARMOperand(Op));
 
     while (getLexer().is(AsmToken::Comma)) {
       getLexer().Lex();  // Eat the comma.
 
       // Parse and remember the operand.
-      Operands.push_back(ARMOperand());
-      if (ParseOperand(Operands.back()))
-        return true;
+      if (ParseOperand(Op)) return true;
+      Operands.push_back(new ARMOperand(Op));
     }
   }
-  if (!MatchInstruction(Operands, Inst))
-    return false;
-
-  Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented");
-  return true;
+  return false;
 }
 
 /// ParseDirective parses the arm specific directives
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 931d8df..2d13533 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -175,16 +175,16 @@ namespace {
       printDataDirective(MCPV->getType());
 
       ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
-      std::string Name;
+      SmallString<128> TmpNameStr;
 
       if (ACPV->isLSDA()) {
-        SmallString<16> LSDAName;
-        raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+        raw_svector_ostream(TmpNameStr) << MAI->getPrivateGlobalPrefix() <<
           "_LSDA_" << getFunctionNumber();
-        Name = LSDAName.str();
+        O << TmpNameStr.str();
       } else if (ACPV->isBlockAddress()) {
-        Name = GetBlockAddressSymbol(ACPV->getBlockAddress())->getName();
+        O << GetBlockAddressSymbol(ACPV->getBlockAddress())->getName();
       } else if (ACPV->isGlobalValue()) {
+        std::string Name;
         GlobalValue *GV = ACPV->getGV();
         bool isIndirect = Subtarget->isTargetDarwin() &&
           Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
@@ -201,16 +201,16 @@ namespace {
             GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) :
                                         MMIMachO.getGVStubEntry(Sym);
           if (StubSym == 0) {
-            SmallString<128> NameStr;
-            Mang->getNameWithPrefix(NameStr, GV, false);
-            StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+            Mang->getNameWithPrefix(TmpNameStr, GV, false);
+            StubSym = OutContext.GetOrCreateSymbol(TmpNameStr.str());
           }
         }
+        O << Name;
       } else {
         assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
-        Name = Mang->makeNameProper(ACPV->getSymbol());
+        Mang->getNameWithPrefix(TmpNameStr, ACPV->getSymbol());
+        OutContext.GetOrCreateSymbol(TmpNameStr.str())->print(O, MAI);
       }
-      O << Name;
 
       if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
       if (ACPV->getPCAdjustment() != 0) {
@@ -392,9 +392,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   }
   case MachineOperand::MO_ExternalSymbol: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    std::string Name = Mang->makeNameProper(MO.getSymbolName());
-
-    O << Name;
+    SmallString<128> NameStr;
+    Mang->getNameWithPrefix(NameStr, MO.getSymbolName());
+    OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI);
+    
     if (isCallOp && Subtarget->isTargetELF() &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "(PLT)";
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index 5b0a89d..eaefef9 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -157,7 +157,7 @@ namespace {
 
     // Select - Convert the specified operand from a target-independent to a
     // target-specific node if it hasn't already been changed.
-    SDNode *Select(SDValue Op);
+    SDNode *Select(SDNode *N);
     
     /// InstructionSelect - This callback is invoked by
     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
@@ -202,7 +202,7 @@ private:
 
     SDNode *getGlobalBaseReg();
     SDNode *getGlobalRetAddr();
-    void SelectCALL(SDValue Op);
+    void SelectCALL(SDNode *Op);
 
   };
 }
@@ -232,8 +232,7 @@ void AlphaDAGToDAGISel::InstructionSelect() {
 
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
-SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
   if (N->isMachineOpcode()) {
     return NULL;   // Already selected.
   }
@@ -242,7 +241,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
   switch (N->getOpcode()) {
   default: break;
   case AlphaISD::CALL:
-    SelectCALL(Op);
+    SelectCALL(N);
     return NULL;
 
   case ISD::FrameIndex: {
@@ -258,9 +257,9 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
   
   case AlphaISD::DivCall: {
     SDValue Chain = CurDAG->getEntryNode();
-    SDValue N0 = Op.getOperand(0);
-    SDValue N1 = Op.getOperand(1);
-    SDValue N2 = Op.getOperand(2);
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, 
                                  SDValue(0,0));
     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, 
@@ -287,7 +286,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
     if (uval == 0) {
       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
                                                 Alpha::R31, MVT::i64);
-      ReplaceUses(Op, Result);
+      ReplaceUses(SDValue(N, 0), Result);
       return NULL;
     }
 
@@ -415,13 +414,12 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
 
   }
 
-  return SelectCode(Op);
+  return SelectCode(N);
 }
 
-void AlphaDAGToDAGISel::SelectCALL(SDValue Op) {
+void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
   //TODO: add flag stuff to prevent nondeturministic breakage!
 
-  SDNode *N = Op.getNode();
   SDValue Chain = N->getOperand(0);
   SDValue Addr = N->getOperand(1);
   SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
@@ -442,8 +440,8 @@ void AlphaDAGToDAGISel::SelectCALL(SDValue Op) {
    }
    InFlag = Chain.getValue(1);
 
-  ReplaceUses(Op.getValue(0), Chain);
-  ReplaceUses(Op.getValue(1), InFlag);
+  ReplaceUses(SDValue(N, 0), Chain);
+  ReplaceUses(SDValue(N, 1), InFlag);
 }
 
 
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index 917f7f5..0bd94d4 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -25,12 +25,14 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
@@ -179,9 +181,12 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << Mang->getMangledName(MO.getGlobal());
     printOffset(MO.getOffset());
     break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << Mang->makeNameProper(MO.getSymbolName());
+  case MachineOperand::MO_ExternalSymbol: {
+    SmallString<60> NameStr;
+    Mang->getNameWithPrefix(NameStr, MO.getSymbolName());
+    OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI);
     break;
+  }
   case MachineOperand::MO_ConstantPoolIndex:
     O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
       << MO.getIndex();
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 2217af4..e1b6008 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -51,8 +51,8 @@ namespace {
 #include "BlackfinGenDAGISel.inc"
 
   private:
-    SDNode *Select(SDValue Op);
-    bool SelectADDRspii(SDValue Op, SDValue Addr,
+    SDNode *Select(SDNode *N);
+    bool SelectADDRspii(SDNode *Op, SDValue Addr,
                         SDValue &Base, SDValue &Offset);
 
     // Walk the DAG after instruction selection, fixing register class issues.
@@ -82,8 +82,7 @@ void BlackfinDAGToDAGISel::InstructionSelect() {
   FixRegisterClasses(*CurDAG);
 }
 
-SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
   if (N->isMachineOpcode())
     return NULL;   // Already selected.
 
@@ -99,10 +98,10 @@ SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) {
   }
   }
 
-  return SelectCode(Op);
+  return SelectCode(N);
 }
 
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Op,
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op,
                                           SDValue Addr,
                                           SDValue &Base,
                                           SDValue &Offset) {
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 1ab3c0a..0fd975c 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ConstantsScanner.h"
 #include "llvm/Analysis/FindUsedTypes.h"
@@ -34,6 +35,7 @@
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CallSite.h"
@@ -341,6 +343,15 @@ namespace {
 
 char CWriter::ID = 0;
 
+
+static std::string Mangle(const std::string &S) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  MCSymbol::printMangledName(S, OS, 0);
+  return OS.str();
+}
+
+
 /// This method inserts names for any unnamed structure types that are used by
 /// the program, and removes names from structure types that are not used by the
 /// program.
@@ -1431,8 +1442,11 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
 
 std::string CWriter::GetValueName(const Value *Operand) {
   // Mangle globals with the standard mangler interface for LLC compatibility.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand))
-    return Mang->getMangledName(GV);
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
+    SmallString<128> Str;
+    Mang->getNameWithPrefix(Str, GV, false);
+    return Mangle(Str.str().str());
+  }
     
   std::string Name = Operand->getName();
     
@@ -1857,7 +1871,6 @@ bool CWriter::doInitialization(Module &M) {
 
   // Ensure that all structure types have names...
   Mang = new Mangler(M);
-  Mang->markCharUnacceptable('.');
 
   // Keep track of which functions are static ctors/dtors so they can have
   // an attribute added to their prototypes.
@@ -2210,7 +2223,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
   // Print out forward declarations for structure types before anything else!
   Out << "/* Structure forward decls */\n";
   for (; I != End; ++I) {
-    std::string Name = "struct l_" + Mang->makeNameProper(I->first);
+    std::string Name = "struct " + Mangle("l_"+I->first);
     Out << Name << ";\n";
     TypeNames.insert(std::make_pair(I->second, Name));
   }
@@ -2221,7 +2234,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
   // for struct or opaque types.
   Out << "/* Typedefs */\n";
   for (I = TST.begin(); I != End; ++I) {
-    std::string Name = "l_" + Mang->makeNameProper(I->first);
+    std::string Name = Mangle("l_"+I->first);
     Out << "typedef ";
     printType(Out, I->second, false, Name);
     Out << ";\n";
@@ -2921,7 +2934,6 @@ void CWriter::lowerIntrinsics(Function &F) {
           case Intrinsic::setjmp:
           case Intrinsic::longjmp:
           case Intrinsic::prefetch:
-          case Intrinsic::dbg_stoppoint:
           case Intrinsic::powi:
           case Intrinsic::x86_sse_cmp_ss:
           case Intrinsic::x86_sse_cmp_ps:
@@ -3178,20 +3190,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     Out << "0; *((void**)&" << GetValueName(&I)
         << ") = __builtin_stack_save()";
     return true;
-  case Intrinsic::dbg_stoppoint: {
-    // If we use writeOperand directly we get a "u" suffix which is rejected
-    // by gcc.
-    DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
-    std::string dir;
-    GetConstantStringInfo(SPI.getDirectory(), dir);
-    std::string file;
-    GetConstantStringInfo(SPI.getFileName(), file);
-    Out << "\n#line "
-        << SPI.getLine()
-        << " \""
-        << dir << '/' << file << "\"\n";
-    return true;
-  }
   case Intrinsic::x86_sse_cmp_ss:
   case Intrinsic::x86_sse_cmp_ps:
   case Intrinsic::x86_sse2_cmp_sd:
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index c69a751..80693e1 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -277,10 +277,9 @@ namespace {
       return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
       }
 
-    SDNode *emitBuildVector(SDValue build_vec) {
-      EVT vecVT = build_vec.getValueType();
+    SDNode *emitBuildVector(SDNode *bvNode) {
+      EVT vecVT = bvNode->getValueType(0);
       EVT eltVT = vecVT.getVectorElementType();
-      SDNode *bvNode = build_vec.getNode();
       DebugLoc dl = bvNode->getDebugLoc();
 
       // Check to see if this vector can be represented as a CellSPU immediate
@@ -296,13 +295,13 @@ namespace {
            ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
             (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
             (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
-        return Select(build_vec);
+        return Select(bvNode);
 
       // No, need to emit a constant pool spill:
       std::vector<Constant*> CV;
 
-      for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
-        ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
+      for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+        ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
         CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
       }
 
@@ -312,49 +311,49 @@ namespace {
       SDValue CGPoolOffset =
               SPU::LowerConstantPool(CPIdx, *CurDAG,
                                      SPUtli.getSPUTargetMachine());
-      return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
+      return SelectCode(CurDAG->getLoad(vecVT, dl,
                                         CurDAG->getEntryNode(), CGPoolOffset,
                                         PseudoSourceValue::getConstantPool(), 0,
-                                        false, Alignment));
+                                        false, Alignment).getNode());
     }
 
     /// Select - Convert the specified operand from a target-independent to a
     /// target-specific node if it hasn't already been changed.
-    SDNode *Select(SDValue Op);
+    SDNode *Select(SDNode *N);
 
     //! Emit the instruction sequence for i64 shl
-    SDNode *SelectSHLi64(SDValue &Op, EVT OpVT);
+    SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
 
     //! Emit the instruction sequence for i64 srl
-    SDNode *SelectSRLi64(SDValue &Op, EVT OpVT);
+    SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
 
     //! Emit the instruction sequence for i64 sra
-    SDNode *SelectSRAi64(SDValue &Op, EVT OpVT);
+    SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
 
     //! Emit the necessary sequence for loading i64 constants:
-    SDNode *SelectI64Constant(SDValue &Op, EVT OpVT, DebugLoc dl);
+    SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
 
     //! Alternate instruction emit sequence for loading i64 constants
     SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
 
     //! Returns true if the address N is an A-form (local store) address
-    bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                          SDValue &Index);
 
     //! D-form address predicate
-    bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                          SDValue &Index);
 
     /// Alternate D-form address using i7 offset predicate
-    bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+    bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
                           SDValue &Base);
 
     /// D-form address selection workhorse
-    bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
+    bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
                                SDValue &Base, int minOffset, int maxOffset);
 
     //! Address predicate if N can be expressed as an indexed [r+r] operation.
-    bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                          SDValue &Index);
 
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -366,13 +365,13 @@ namespace {
       switch (ConstraintCode) {
       default: return true;
       case 'm':   // memory
-        if (!SelectDFormAddr(Op, Op, Op0, Op1)
-            && !SelectAFormAddr(Op, Op, Op0, Op1))
-          SelectXFormAddr(Op, Op, Op0, Op1);
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+          SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
         break;
       case 'o':   // offsetable
-        if (!SelectDFormAddr(Op, Op, Op0, Op1)
-            && !SelectAFormAddr(Op, Op, Op0, Op1)) {
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
           Op0 = Op;
           Op1 = getSmallIPtrImm(0);
         }
@@ -429,7 +428,7 @@ SPUDAGToDAGISel::InstructionSelect()
  \arg Index The base address index
  */
 bool
-SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                     SDValue &Index) {
   // These match the addr256k operand type:
   EVT OffsVT = MVT::i16;
@@ -479,7 +478,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
 }
 
 bool
-SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
                                   SDValue &Base) {
   const int minDForm2Offset = -(1 << 7);
   const int maxDForm2Offset = (1 << 7) - 1;
@@ -500,7 +499,7 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
   to non-empty SDValue instances.
 */
 bool
-SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                                  SDValue &Index) {
   return DFormAddressPredicate(Op, N, Base, Index,
                                SPUFrameInfo::minFrameOffset(),
@@ -508,7 +507,7 @@ SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
 }
 
 bool
-SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
                                       SDValue &Index, int minOffset,
                                       int maxOffset) {
   unsigned Opc = N.getOpcode();
@@ -618,7 +617,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
     Index = N;
     return true;
   } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
-    unsigned OpOpc = Op.getOpcode();
+    unsigned OpOpc = Op->getOpcode();
 
     if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
       // Direct load/store without getelementptr
@@ -630,7 +629,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
       else
         Addr = N;                       // Register
 
-      Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2));
+      Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
 
       if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
         if (Offs.getOpcode() == ISD::UNDEF)
@@ -667,7 +666,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
   (r)(r) X-form address.
 */
 bool
-SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                                  SDValue &Index) {
   if (!SelectAFormAddr(Op, N, Base, Index)
       && !SelectDFormAddr(Op, N, Base, Index)) {
@@ -685,12 +684,11 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
 /*!
  */
 SDNode *
-SPUDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SPUDAGToDAGISel::Select(SDNode *N) {
   unsigned Opc = N->getOpcode();
   int n_ops = -1;
   unsigned NewOpc;
-  EVT OpVT = Op.getValueType();
+  EVT OpVT = N->getValueType(0);
   SDValue Ops[8];
   DebugLoc dl = N->getDebugLoc();
 
@@ -700,8 +698,8 @@ SPUDAGToDAGISel::Select(SDValue Op) {
 
   if (Opc == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
-    SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
 
     if (FI < 128) {
       NewOpc = SPU::AIr32;
@@ -710,9 +708,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
       n_ops = 2;
     } else {
       NewOpc = SPU::Ar32;
-      Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
+      Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
       Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
-                                              Op.getValueType(), TFI, Imm0),
+                                              N->getValueType(0), TFI, Imm0),
                        0);
       n_ops = 2;
     }
@@ -720,10 +718,10 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     // Catch the i64 constants that end up here. Note: The backend doesn't
     // attempt to legalize the constant (it's useless because DAGCombiner
     // will insert 64-bit constants and we can't stop it).
-    return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
+    return SelectI64Constant(N, OpVT, N->getDebugLoc());
   } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
              && OpVT == MVT::i64) {
-    SDValue Op0 = Op.getOperand(0);
+    SDValue Op0 = N->getOperand(0);
     EVT Op0VT = Op0.getValueType();
     EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                     Op0VT, (128 / Op0VT.getSizeInBits()));
@@ -760,9 +758,10 @@ SPUDAGToDAGISel::Select(SDValue Op) {
       break;
     }
 
-    SDNode *shufMaskLoad = emitBuildVector(shufMask);
+    SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
     SDNode *PromoteScalar =
-            SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0));
+            SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+                                       Op0VecVT, Op0).getNode());
 
     SDValue zextShuffle =
             CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
@@ -773,32 +772,32 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
     // re-use it in the VEC2PREFSLOT selection without needing to explicitly
     // call SelectCode (it's already done for us.)
-    SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle));
+    SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode());
     return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
-                                      zextShuffle));
+                                      zextShuffle).getNode());
   } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
-            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
 
     return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
-                                      Op.getOperand(0), Op.getOperand(1),
-                                      SDValue(CGLoad, 0)));
+                                      N->getOperand(0), N->getOperand(1),
+                                      SDValue(CGLoad, 0)).getNode());
   } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
-            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
+            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
 
     return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
-                                      Op.getOperand(0), Op.getOperand(1),
-                                      SDValue(CGLoad, 0)));
+                                      N->getOperand(0), N->getOperand(1),
+                                      SDValue(CGLoad, 0)).getNode());
   } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
-            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
 
     return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
-                                      Op.getOperand(0), Op.getOperand(1),
-                                      SDValue(CGLoad, 0)));
+                                      N->getOperand(0), N->getOperand(1),
+                                      SDValue(CGLoad, 0)).getNode());
   } else if (Opc == ISD::TRUNCATE) {
-    SDValue Op0 = Op.getOperand(0);
+    SDValue Op0 = N->getOperand(0);
     if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
         && OpVT == MVT::i32
         && Op0.getValueType() == MVT::i64) {
@@ -834,22 +833,22 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     }
   } else if (Opc == ISD::SHL) {
     if (OpVT == MVT::i64) {
-      return SelectSHLi64(Op, OpVT);
+      return SelectSHLi64(N, OpVT);
     }
   } else if (Opc == ISD::SRL) {
     if (OpVT == MVT::i64) {
-      return SelectSRLi64(Op, OpVT);
+      return SelectSRLi64(N, OpVT);
     }
   } else if (Opc == ISD::SRA) {
     if (OpVT == MVT::i64) {
-      return SelectSRAi64(Op, OpVT);
+      return SelectSRAi64(N, OpVT);
     }
   } else if (Opc == ISD::FNEG
              && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
-    DebugLoc dl = Op.getDebugLoc();
+    DebugLoc dl = N->getDebugLoc();
     // Check if the pattern is a special form of DFNMS:
     // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
-    SDValue Op0 = Op.getOperand(0);
+    SDValue Op0 = N->getOperand(0);
     if (Op0.getOpcode() == ISD::FSUB) {
       SDValue Op00 = Op0.getOperand(0);
       if (Op00.getOpcode() == ISD::FMUL) {
@@ -869,28 +868,28 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     unsigned Opc = SPU::XORfneg64;
 
     if (OpVT == MVT::f64) {
-      signMask = SelectI64Constant(negConst, MVT::i64, dl);
+      signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
     } else if (OpVT == MVT::v2f64) {
       Opc = SPU::XORfnegvec;
       signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
                                                  MVT::v2i64,
-                                                 negConst, negConst));
+                                                 negConst, negConst).getNode());
     }
 
     return CurDAG->getMachineNode(Opc, dl, OpVT,
-                                  Op.getOperand(0), SDValue(signMask, 0));
+                                  N->getOperand(0), SDValue(signMask, 0));
   } else if (Opc == ISD::FABS) {
     if (OpVT == MVT::f64) {
       SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
       return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
-                                    Op.getOperand(0), SDValue(signMask, 0));
+                                    N->getOperand(0), SDValue(signMask, 0));
     } else if (OpVT == MVT::v2f64) {
       SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
       SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
                                        absConst, absConst);
-      SDNode *signMask = emitBuildVector(absVec);
+      SDNode *signMask = emitBuildVector(absVec.getNode());
       return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
-                                    Op.getOperand(0), SDValue(signMask, 0));
+                                    N->getOperand(0), SDValue(signMask, 0));
     }
   } else if (Opc == SPUISD::LDRESULT) {
     // Custom select instructions for LDRESULT
@@ -925,7 +924,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     // SPUInstrInfo catches the following patterns:
     // (SPUindirect (SPUhi ...), (SPUlo ...))
     // (SPUindirect $sp, imm)
-    EVT VT = Op.getValueType();
+    EVT VT = N->getValueType(0);
     SDValue Op0 = N->getOperand(0);
     SDValue Op1 = N->getOperand(1);
     RegisterSDNode *RN;
@@ -952,7 +951,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     else
       return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
   } else
-    return SelectCode(Op);
+    return SelectCode(N);
 }
 
 /*!
@@ -968,15 +967,15 @@ SPUDAGToDAGISel::Select(SDValue Op) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) {
-  SDValue Op0 = Op.getOperand(0);
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
   EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
                                OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = Op.getOperand(1);
+  SDValue ShiftAmt = N->getOperand(1);
   EVT ShiftAmtVT = ShiftAmt.getValueType();
   SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
   SDValue SelMaskVal;
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc dl = N->getDebugLoc();
 
   VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
@@ -1034,14 +1033,14 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) {
-  SDValue Op0 = Op.getOperand(0);
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
   EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = Op.getOperand(1);
+  SDValue ShiftAmt = N->getOperand(1);
   EVT ShiftAmtVT = ShiftAmt.getValueType();
   SDNode *VecOp0, *Shift = 0;
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc dl = N->getDebugLoc();
 
   VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
 
@@ -1101,16 +1100,16 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) {
  * @return The SDNode with the entire instruction sequence
  */
 SDNode *
-SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) {
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
   // Promote Op0 to vector
   EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
                                OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = Op.getOperand(1);
+  SDValue ShiftAmt = N->getOperand(1);
   EVT ShiftAmtVT = ShiftAmt.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc dl = N->getDebugLoc();
 
   SDNode *VecOp0 =
-    CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0));
+    CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0));
 
   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
   SDNode *SignRot =
@@ -1170,9 +1169,9 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) {
 /*!
  Do the necessary magic necessary to load a i64 constant
  */
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, EVT OpVT,
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
                                            DebugLoc dl) {
-  ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
+  ConstantSDNode *CN = cast<ConstantSDNode>(N);
   return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
 }
 
@@ -1192,7 +1191,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
 
     ReplaceUses(i64vec, Op0);
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(Op0), 0));
+                                  SDValue(emitBuildVector(Op0.getNode()), 0));
   } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
     SDValue lhs = i64vec.getOperand(0);
     SDValue rhs = i64vec.getOperand(1);
@@ -1205,7 +1204,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
 
     SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
                        ? lhs.getNode()
-                       : emitBuildVector(lhs));
+                       : emitBuildVector(lhs.getNode()));
 
     if (rhs.getOpcode() == ISD::BIT_CONVERT) {
       ReplaceUses(rhs, rhs.getOperand(0));
@@ -1214,7 +1213,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
 
     SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
                        ? rhs.getNode()
-                       : emitBuildVector(rhs));
+                       : emitBuildVector(rhs.getNode()));
 
     if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
       ReplaceUses(shufmask, shufmask.getOperand(0));
@@ -1223,18 +1222,18 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
 
     SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
                             ? shufmask.getNode()
-                            : emitBuildVector(shufmask));
+                            : emitBuildVector(shufmask.getNode()));
 
     SDNode *shufNode =
             Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
                                    SDValue(lhsNode, 0), SDValue(rhsNode, 0),
-                                   SDValue(shufMaskNode, 0)));
+                                   SDValue(shufMaskNode, 0)).getNode());
 
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
                                   SDValue(shufNode, 0));
   } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(i64vec), 0));
+                                  SDValue(emitBuildVector(i64vec.getNode()), 0));
   } else {
     llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
                       "condition");
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 4d40769..4eec757 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -146,12 +146,12 @@ namespace {
   private:
     DenseMap<SDNode*, SDNode*> RMWStores;
     void PreprocessForRMW();
-    SDNode *Select(SDValue Op);
-    SDNode *SelectIndexedLoad(SDValue Op);
-    SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2,
+    SDNode *Select(SDNode *N);
+    SDNode *SelectIndexedLoad(SDNode *Op);
+    SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
                                unsigned Opc8, unsigned Opc16);
 
-    bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
 
   #ifndef NDEBUG
     unsigned Indent;
@@ -283,7 +283,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N,
+bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
                                     SDValue &Base, SDValue &Disp) {
   MSP430ISelAddressMode AM;
 
@@ -326,7 +326,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op, Op, Op0, Op1))
+    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
       return true;
     break;
   }
@@ -627,8 +627,8 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) {
   return true;
 }
 
-SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) {
-  LoadSDNode *LD = cast<LoadSDNode>(Op);
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
   if (!isValidIndexedLoad(LD))
     return NULL;
 
@@ -646,17 +646,17 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) {
     return NULL;
   }
 
-   return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(),
+   return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
                                  VT, MVT::i16, MVT::Other,
                                  LD->getBasePtr(), LD->getChain());
 }
 
-SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op,
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
                                                SDValue N1, SDValue N2,
                                                unsigned Opc8, unsigned Opc16) {
   if (N1.getOpcode() == ISD::LOAD &&
       N1.hasOneUse() &&
-      IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) {
+      IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) {
     LoadSDNode *LD = cast<LoadSDNode>(N1);
     if (!isValidIndexedLoad(LD))
       return NULL;
@@ -667,7 +667,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op,
     MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
     SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
     SDNode *ResNode =
-      CurDAG->SelectNodeTo(Op.getNode(), Opc,
+      CurDAG->SelectNodeTo(Op, Opc,
                            VT, MVT::i16, MVT::Other,
                            Ops0, 3);
     cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -707,9 +707,8 @@ void MSP430DAGToDAGISel::InstructionSelect() {
   RMWStores.clear();
 }
 
-SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
-  SDNode *Node = Op.getNode();
-  DebugLoc dl = Op.getDebugLoc();
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
 
   // Dump information about the Node being selected
   DEBUG(errs().indent(Indent) << "Selecting: ");
@@ -730,7 +729,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
   switch (Node->getOpcode()) {
   default: break;
   case ISD::FrameIndex: {
-    assert(Op.getValueType() == MVT::i16);
+    assert(Node->getValueType(0) == MVT::i16);
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
     if (Node->hasOneUse())
@@ -740,18 +739,18 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
                                   TFI, CurDAG->getTargetConstant(0, MVT::i16));
   }
   case ISD::LOAD:
-    if (SDNode *ResNode = SelectIndexedLoad(Op))
+    if (SDNode *ResNode = SelectIndexedLoad(Node))
       return ResNode;
     // Other cases are autogenerated.
     break;
   case ISD::ADD:
     if (SDNode *ResNode =
-        SelectIndexedBinOp(Op,
-                           Op.getOperand(0), Op.getOperand(1),
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
                            MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
       return ResNode;
     else if (SDNode *ResNode =
-             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
                                 MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
       return ResNode;
 
@@ -759,8 +758,8 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     break;
   case ISD::SUB:
     if (SDNode *ResNode =
-        SelectIndexedBinOp(Op,
-                           Op.getOperand(0), Op.getOperand(1),
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
                            MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
       return ResNode;
 
@@ -768,12 +767,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     break;
   case ISD::AND:
     if (SDNode *ResNode =
-        SelectIndexedBinOp(Op,
-                           Op.getOperand(0), Op.getOperand(1),
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
                            MSP430::AND8rm_POST, MSP430::AND16rm_POST))
       return ResNode;
     else if (SDNode *ResNode =
-             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
                                 MSP430::AND8rm_POST, MSP430::AND16rm_POST))
       return ResNode;
 
@@ -781,12 +780,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     break;
   case ISD::OR:
     if (SDNode *ResNode =
-        SelectIndexedBinOp(Op,
-                           Op.getOperand(0), Op.getOperand(1),
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
                            MSP430::OR8rm_POST, MSP430::OR16rm_POST))
       return ResNode;
     else if (SDNode *ResNode =
-             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
                                 MSP430::OR8rm_POST, MSP430::OR16rm_POST))
       return ResNode;
 
@@ -794,12 +793,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
     break;
   case ISD::XOR:
     if (SDNode *ResNode =
-        SelectIndexedBinOp(Op,
-                           Op.getOperand(0), Op.getOperand(1),
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
                            MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
       return ResNode;
     else if (SDNode *ResNode =
-             SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
                                 MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
       return ResNode;
 
@@ -808,11 +807,11 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
   }
 
   // Select the default instruction
-  SDNode *ResNode = SelectCode(Op);
+  SDNode *ResNode = SelectCode(Node);
 
   DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
-  if (ResNode == NULL || ResNode == Op.getNode())
-    DEBUG(Op.getNode()->dump(CurDAG));
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
   DEBUG(errs() << "\n");
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5fe9b20..d3dce4b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -660,16 +660,16 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
   default: llvm_unreachable("Invalid integer condition!");
   case ISD::SETEQ:
     TCC = MSP430CC::COND_E;     // aka COND_Z
-    // Minor optimization: if RHS is a constant, swap operands, then the
+    // Minor optimization: if LHS is a constant, swap operands, then the
     // constant can be folded into comparison.
-    if (RHS.getOpcode() == ISD::Constant)
+    if (LHS.getOpcode() == ISD::Constant)
       std::swap(LHS, RHS);
     break;
   case ISD::SETNE:
     TCC = MSP430CC::COND_NE;    // aka COND_NZ
-    // Minor optimization: if RHS is a constant, swap operands, then the
+    // Minor optimization: if LHS is a constant, swap operands, then the
     // constant can be folded into comparison.
-    if (RHS.getOpcode() == ISD::Constant)
+    if (LHS.getOpcode() == ISD::Constant)
       std::swap(LHS, RHS);
     break;
   case ISD::SETULE:
@@ -1014,8 +1014,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   // BB:
   // cmp 0, N
   // je RemBB
-  BuildMI(BB, dl, TII.get(MSP430::CMP8ir))
-    .addImm(0).addReg(ShiftAmtSrcReg);
+  BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+    .addReg(ShiftAmtSrcReg).addImm(0);
   BuildMI(BB, dl, TII.get(MSP430::JCC))
     .addMBB(RemBB)
     .addImm(MSP430CC::COND_E);
@@ -1045,6 +1045,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
     .addReg(SrcReg).addMBB(BB)
     .addReg(ShiftReg2).addMBB(LoopBB);
 
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
   return RemBB;
 }
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index d67ba90..022d171 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -819,38 +819,40 @@ def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
 // Integer comparisons
 let Defs = [SRW] in {
 def CMP8rr  : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
-                     "cmp.b\t{$src1, $src2}",
+                     "cmp.b\t{$src2, $src1}",
                      [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>;
 def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
-                     "cmp.w\t{$src1, $src2}",
+                     "cmp.w\t{$src2, $src1}",
                      [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>;
 
-def CMP8ir  : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2),
-                   "cmp.b\t{$src1, $src2}",
-                   [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>;
-def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2),
-                     "cmp.w\t{$src1, $src2}",
-                     [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>;
-
-def CMP8im  : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2),
-                     "cmp.b\t{$src1, $src2}",
-                      [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
-def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2),
-                     "cmp.w\t{$src1, $src2}",
-                      [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
+def CMP8ri  : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+                   "cmp.b\t{$src2, $src1}",
+                   [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
+def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+                     "cmp.w\t{$src2, $src1}",
+                     [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi  : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+                     "cmp.b\t{$src2, $src1}",
+                      [(MSP430cmp (load addr:$src1),
+                                  (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+                     "cmp.w\t{$src2, $src1}",
+                      [(MSP430cmp (load addr:$src1),
+                                  (i16 imm:$src2)), (implicit SRW)]>;
 
 def CMP8rm  : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2),
-                     "cmp.b\t{$src1, $src2}",
+                     "cmp.b\t{$src2, $src1}",
                      [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>;
 def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2),
-                     "cmp.w\t{$src1, $src2}",
+                     "cmp.w\t{$src2, $src1}",
                      [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>;
 
 def CMP8mr  : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
-                "cmp.b\t{$src1, $src2}",
+                "cmp.b\t{$src2, $src1}",
                 [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>;
 def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
-                "cmp.w\t{$src1, $src2}",
+                "cmp.w\t{$src2, $src1}",
                 [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
 
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ede111d..a53e918 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -84,14 +84,14 @@ private:
   }
 
   SDNode *getGlobalBaseReg();
-  SDNode *Select(SDValue N);
+  SDNode *Select(SDNode *N);
 
   // Complex Pattern.
-  bool SelectAddr(SDValue Op, SDValue N, 
+  bool SelectAddr(SDNode *Op, SDValue N, 
                   SDValue &Base, SDValue &Offset);
 
-  SDNode *SelectLoadFp64(SDValue N);
-  SDNode *SelectStoreFp64(SDValue N);
+  SDNode *SelectLoadFp64(SDNode *N);
+  SDNode *SelectStoreFp64(SDNode *N);
 
   // getI32Imm - Return a target constant with the specified
   // value, of type i32.
@@ -132,7 +132,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
+SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
 {
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
@@ -199,19 +199,19 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
   return true;
 }
 
-SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
+SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   MVT::SimpleValueType NVT = 
-    N.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+    N->getValueType(0).getSimpleVT().SimpleTy;
 
   if (!Subtarget.isMips1() || NVT != MVT::f64)
     return NULL;
 
-  if (!Predicate_unindexedload(N.getNode()) ||
-      !Predicate_load(N.getNode()))
+  if (!Predicate_unindexedload(N) ||
+      !Predicate_load(N))
     return NULL;
 
-  SDValue Chain = N.getOperand(0);
-  SDValue N1 = N.getOperand(1);
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
   SDValue Offset0, Offset1, Base;
 
   if (!SelectAddr(N, N1, Offset0, Base) ||
@@ -220,7 +220,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  DebugLoc dl = N.getDebugLoc();
+  DebugLoc dl = N->getDebugLoc();
 
   // The second load should start after for 4 bytes. 
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
@@ -255,27 +255,27 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) {
   SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, 
                             MVT::f64, I0, SDValue(LD1, 0));
 
-  ReplaceUses(N, I1);
-  ReplaceUses(N.getValue(1), Chain);
+  ReplaceUses(SDValue(N, 0), I1);
+  ReplaceUses(SDValue(N, 1), Chain);
   cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
   cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
   return I1.getNode();
 }
 
-SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
+SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
 
   if (!Subtarget.isMips1() || 
-      N.getOperand(1).getValueType() != MVT::f64)
+      N->getOperand(1).getValueType() != MVT::f64)
     return NULL;
 
-  SDValue Chain = N.getOperand(0);
+  SDValue Chain = N->getOperand(0);
 
-  if (!Predicate_unindexedstore(N.getNode()) ||
-      !Predicate_store(N.getNode()))
+  if (!Predicate_unindexedstore(N) ||
+      !Predicate_store(N))
     return NULL;
 
-  SDValue N1 = N.getOperand(1);
-  SDValue N2 = N.getOperand(2);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
   SDValue Offset0, Offset1, Base;
 
   if (!SelectAddr(N, N2, Offset0, Base) ||
@@ -285,7 +285,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  DebugLoc dl = N.getDebugLoc();
+  DebugLoc dl = N->getDebugLoc();
 
   // Get the even and odd part from the f64 register
   SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, 
@@ -318,14 +318,13 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) {
                                        MVT::Other, Ops1, 4), 0);
   cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
 
-  ReplaceUses(N.getValue(0), Chain);
+  ReplaceUses(SDValue(N, 0), Chain);
   return Chain.getNode();
 }
 
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
-SDNode* MipsDAGToDAGISel::Select(SDValue N) {
-  SDNode *Node = N.getNode();
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
 
@@ -379,7 +378,7 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
       SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, 
                                                 SDValue(Carry,0), RHS);
 
-      return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag,
+      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag,
                                   LHS, SDValue(AddCarry,0));
     }
 
@@ -405,11 +404,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
       InFlag = SDValue(Lo,1);
       SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
 
-      if (!N.getValue(0).use_empty()) 
-        ReplaceUses(N.getValue(0), SDValue(Lo,0));
+      if (!SDValue(Node, 0).use_empty()) 
+        ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
 
-      if (!N.getValue(1).use_empty()) 
-        ReplaceUses(N.getValue(1), SDValue(Hi,0));
+      if (!SDValue(Node, 1).use_empty()) 
+        ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
 
       return NULL;
     }
@@ -460,23 +459,23 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
       return getGlobalBaseReg();
 
     case ISD::ConstantFP: {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { 
+      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { 
         SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
-        ReplaceUses(N, Zero);
+        ReplaceUses(SDValue(Node, 0), Zero);
         return Zero.getNode();
       }
       break;
     }
 
     case ISD::LOAD:
-      if (SDNode *ResNode = SelectLoadFp64(N))
+      if (SDNode *ResNode = SelectLoadFp64(Node))
         return ResNode;
       // Other cases are autogenerated.
       break;
 
     case ISD::STORE:
-      if (SDNode *ResNode = SelectStoreFp64(N))
+      if (SDNode *ResNode = SelectStoreFp64(Node))
         return ResNode;
       // Other cases are autogenerated.
       break;
@@ -523,11 +522,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
   }
 
   // Select the default instruction
-  SDNode *ResNode = SelectCode(N);
+  SDNode *ResNode = SelectCode(Node);
 
   DEBUG(errs().indent(Indent-2) << "=> ");
-  if (ResNode == NULL || ResNode == N.getNode())
-    DEBUG(N.getNode()->dump(CurDAG));
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
   DEBUG(errs() << "\n");
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index e13e6cd..82197ae 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -36,7 +36,7 @@ void PIC16DAGToDAGISel::InstructionSelect() {
 
 /// Select - Select instructions not customized! Used for
 /// expanded, promoted and normal instructions.
-SDNode* PIC16DAGToDAGISel::Select(SDValue N) {
+SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
 
   // Select the default instruction.
   SDNode *ResNode = SelectCode(N);
@@ -47,7 +47,7 @@ SDNode* PIC16DAGToDAGISel::Select(SDValue N) {
 
 // SelectDirectAddr - Match a direct address for DAG. 
 // A direct address could be a globaladdress or externalsymbol.
-bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N, 
+bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N, 
                                       SDValue &Address) {
   // Return true if TGA or ES.
   if (N.getOpcode() == ISD::TargetGlobalAddress
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index d9172f2..813a540 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -52,10 +52,10 @@ private:
   // Include the pieces autogenerated from the target description.
 #include "PIC16GenDAGISel.inc"
 
-  SDNode *Select(SDValue N);
+  SDNode *Select(SDNode *N);
 
   // Match direct address complex pattern.
-  bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address);
+  bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address);
 
 };
 
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index aae4607..d505d38 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -49,6 +50,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -57,27 +59,42 @@ namespace {
   class PPCAsmPrinter : public AsmPrinter {
   protected:
     struct FnStubInfo {
-      std::string Stub, LazyPtr, AnonSymbol;
+      MCSymbol *Stub, *LazyPtr, *AnonSymbol;
       
-      FnStubInfo() {}
+      FnStubInfo() {
+        Stub = LazyPtr = AnonSymbol = 0;
+      }
       
-      void Init(const GlobalValue *GV, Mangler *Mang) {
+      void Init(const GlobalValue *GV, Mangler *Mang, MCContext &Ctx) {
         // Already initialized.
-        if (!Stub.empty()) return;
-        Stub = Mang->getMangledName(GV, "$stub", true);
-        LazyPtr = Mang->getMangledName(GV, "$lazy_ptr", true);
-        AnonSymbol = Mang->getMangledName(GV, "$stub$tmp", true);
+        if (Stub != 0) return;
+
+        // Get the names.
+        SmallString<128> TmpStr;
+        Mang->getNameWithPrefix(TmpStr, GV, true);
+        MakeSymbols(TmpStr, Ctx);
       }
 
-      void Init(const std::string &GV, Mangler *Mang) {
-        // Already initialized.
-        if (!Stub.empty()) return;
-        Stub = Mang->makeNameProper(GV + "$stub",
-                                    Mangler::Private);
-        LazyPtr = Mang->makeNameProper(GV + "$lazy_ptr",
-                                       Mangler::Private);
-        AnonSymbol = Mang->makeNameProper(GV + "$stub$tmp",
-                                          Mangler::Private);
+      void Init(StringRef GVName, Mangler *Mang, MCContext &Ctx) {
+        assert(!GVName.empty() && "external symbol name shouldn't be empty");
+        if (Stub != 0) return; // Already initialized.
+        // Get the names for the external symbol name.
+        SmallString<128> TmpStr;
+        Mang->getNameWithPrefix(TmpStr, GVName, Mangler::Private);
+        MakeSymbols(TmpStr, Ctx);
+      }
+      
+      void MakeSymbols(SmallString<128> &TmpStr, MCContext &Ctx) {
+        TmpStr += "$stub";
+        Stub = Ctx.GetOrCreateSymbol(TmpStr.str());
+        TmpStr.erase(TmpStr.end()-5, TmpStr.end()); // Remove $stub
+
+        TmpStr += "$lazy_ptr";
+        LazyPtr = Ctx.GetOrCreateSymbol(TmpStr.str());
+        TmpStr.erase(TmpStr.end()-9, TmpStr.end()); // Remove $lazy_ptr
+        
+        TmpStr += "$stub$tmp";
+        AnonSymbol = Ctx.GetOrCreateSymbol(TmpStr.str());
       }
     };
     
@@ -224,15 +241,17 @@ namespace {
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             FnStubInfo &FnInfo = FnStubs[Mang->getMangledName(GV)];
-            FnInfo.Init(GV, Mang);
-            O << FnInfo.Stub;
+            FnInfo.Init(GV, Mang, OutContext);
+            FnInfo.Stub->print(O, MAI);
             return;
           }
         }
         if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
-          FnStubInfo &FnInfo =FnStubs[Mang->makeNameProper(MO.getSymbolName())];
-          FnInfo.Init(MO.getSymbolName(), Mang);
-          O << FnInfo.Stub;
+          SmallString<128> MangledName;
+          Mang->getNameWithPrefix(MangledName, MO.getSymbolName());
+          FnStubInfo &FnInfo = FnStubs[MangledName.str()];
+          FnInfo.Init(MO.getSymbolName(), Mang, OutContext);
+          FnInfo.Stub->print(O, MAI);
           return;
         }
       }
@@ -550,50 +569,49 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   processDebugLoc(MI, true);
 
   // Check for slwi/srwi mnemonics.
+  bool useSubstituteMnemonic = false;
   if (MI->getOpcode() == PPC::RLWINM) {
-    bool FoundMnemonic = false;
     unsigned char SH = MI->getOperand(2).getImm();
     unsigned char MB = MI->getOperand(3).getImm();
     unsigned char ME = MI->getOperand(4).getImm();
     if (SH <= 31 && MB == 0 && ME == (31-SH)) {
-      O << "\tslwi "; FoundMnemonic = true;
+      O << "\tslwi "; useSubstituteMnemonic = true;
     }
     if (SH <= 31 && MB == (32-SH) && ME == 31) {
-      O << "\tsrwi "; FoundMnemonic = true;
+      O << "\tsrwi "; useSubstituteMnemonic = true;
       SH = 32-SH;
     }
-    if (FoundMnemonic) {
+    if (useSubstituteMnemonic) {
       printOperand(MI, 0);
       O << ", ";
       printOperand(MI, 1);
-      O << ", " << (unsigned int)SH << '\n';
-      return;
+      O << ", " << (unsigned int)SH;
     }
   } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) {
     if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+      useSubstituteMnemonic = true;
       O << "\tmr ";
       printOperand(MI, 0);
       O << ", ";
       printOperand(MI, 1);
-      O << '\n';
-      return;
     }
   } else if (MI->getOpcode() == PPC::RLDICR) {
     unsigned char SH = MI->getOperand(2).getImm();
     unsigned char ME = MI->getOperand(3).getImm();
     // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
     if (63-SH == ME) {
+      useSubstituteMnemonic = true;
       O << "\tsldi ";
       printOperand(MI, 0);
       O << ", ";
       printOperand(MI, 1);
-      O << ", " << (unsigned int)SH << '\n';
-      return;
+      O << ", " << (unsigned int)SH;
     }
   }
 
-  printInstruction(MI);
-  
+  if (!useSubstituteMnemonic)
+    printInstruction(MI);
+
   if (VerboseAsm)
     EmitComments(*MI);
   O << '\n';
@@ -1038,27 +1056,38 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
                                 MCSectionMachO::S_SYMBOL_STUBS |
                                 MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                 32, SectionKind::getText());
-     for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end();
+    for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end();
          I != E; ++I) {
       OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
       const FnStubInfo &Info = I->second;
-      O << Info.Stub << ":\n";
+      Info.Stub->print(O, MAI);
+      O << ":\n";
       O << "\t.indirect_symbol " << I->getKeyData() << '\n';
       O << "\tmflr r0\n";
-      O << "\tbcl 20,31," << Info.AnonSymbol << '\n';
-      O << Info.AnonSymbol << ":\n";
+      O << "\tbcl 20,31,";
+      Info.AnonSymbol->print(O, MAI);
+      O << '\n';
+      Info.AnonSymbol->print(O, MAI);
+      O << ":\n";
       O << "\tmflr r11\n";
-      O << "\taddis r11,r11,ha16(" << Info.LazyPtr << "-" << Info.AnonSymbol;
+      O << "\taddis r11,r11,ha16(";
+      Info.LazyPtr->print(O, MAI);
+      O << '-';
+      Info.AnonSymbol->print(O, MAI);
       O << ")\n";
       O << "\tmtlr r0\n";
       O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16(";
-      O << Info.LazyPtr << "-" << Info.AnonSymbol << ")(r11)\n";
+      Info.LazyPtr->print(O, MAI);
+      O << '-';
+      Info.AnonSymbol->print(O, MAI);
+      O << ")(r11)\n";
       O << "\tmtctr r12\n";
       O << "\tbctr\n";
       
       OutStreamer.SwitchSection(LSPSection);
-      O << Info.LazyPtr << ":\n";
+      Info.LazyPtr->print(O, MAI);
+      O << ":\n";
       O << "\t.indirect_symbol " << I->getKeyData() << '\n';
       O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n";
     }
@@ -1074,15 +1103,20 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
       OutStreamer.SwitchSection(StubSection);
       EmitAlignment(4);
       const FnStubInfo &Info = I->second;
-      O << Info.Stub << ":\n";
+      Info.Stub->print(O, MAI);
+      O << ":\n";
       O << "\t.indirect_symbol " << I->getKeyData() << '\n';
-      O << "\tlis r11,ha16(" << Info.LazyPtr << ")\n";
+      O << "\tlis r11,ha16(";
+      Info.LazyPtr->print(O, MAI);
+      O << ")\n";
       O << (isPPC64 ? "\tldu" :  "\tlwzu") << " r12,lo16(";
-      O << Info.LazyPtr << ")(r11)\n";
+      Info.LazyPtr->print(O, MAI);
+      O << ")(r11)\n";
       O << "\tmtctr r12\n";
       O << "\tbctr\n";
       OutStreamer.SwitchSection(LSPSection);
-      O << Info.LazyPtr << ":\n";
+      Info.LazyPtr->print(O, MAI);
+      O << ":\n";
       O << "\t.indirect_symbol " << I->getKeyData() << '\n';
       O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n";
     }
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index e7334b5..32c1879 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -95,7 +95,7 @@ namespace {
     
     // Select - Convert the specified operand from a target-independent to a
     // target-specific node if it hasn't already been changed.
-    SDNode *Select(SDValue Op);
+    SDNode *Select(SDNode *N);
     
     SDNode *SelectBitfieldInsert(SDNode *N);
 
@@ -105,7 +105,7 @@ namespace {
 
     /// SelectAddrImm - Returns true if the address N can be represented by
     /// a base register plus a signed 16-bit displacement [r+imm].
-    bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp,
+    bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp,
                        SDValue &Base) {
       return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
     }
@@ -113,7 +113,7 @@ namespace {
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
     /// immediate field.  Because preinc imms have already been validated, just
     /// accept it.
-    bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const {
+    bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const {
       Out = N;
       return true;
     }
@@ -121,14 +121,14 @@ namespace {
     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
     /// represented as an indexed [r+r] operation.  Returns false if it can
     /// be represented by [r+imm], which are preferred.
-    bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Index) {
       return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
     }
     
     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
-    bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base,
                            SDValue &Index) {
       return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
@@ -136,7 +136,7 @@ namespace {
     /// SelectAddrImmShift - Returns true if the address N can be represented by
     /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
     /// for use by STD and friends.
-    bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp,
+    bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp,
                             SDValue &Base) {
       return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
     }
@@ -180,7 +180,7 @@ namespace {
 #include "PPCGenDAGISel.inc"
     
 private:
-    SDNode *SelectSETCC(SDValue Op);
+    SDNode *SelectSETCC(SDNode *N);
   };
 }
 
@@ -635,8 +635,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   return 0;
 }
 
-SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
@@ -756,9 +755,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
 
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
-SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
-  DebugLoc dl = Op.getDebugLoc();
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
   if (N->isMachineOpcode())
     return NULL;   // Already selected.
 
@@ -841,18 +839,18 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
   }
   
   case ISD::SETCC:
-    return SelectSETCC(Op);
+    return SelectSETCC(N);
   case PPCISD::GlobalBaseReg:
     return getGlobalBaseReg();
     
   case ISD::FrameIndex: {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
-    unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
     if (N->hasOneUse())
-      return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI,
+      return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
                                   getSmallIPtrImm(0));
-    return CurDAG->getMachineNode(Opc, dl, Op.getValueType(), TFI,
+    return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
                                   getSmallIPtrImm(0));
   }
 
@@ -899,7 +897,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
     
   case ISD::LOAD: {
     // Handle preincrement loads.
-    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT LoadedVT = LD->getMemoryVT();
     
     // Normal loads are handled by code generated from the .td file.
@@ -1092,7 +1090,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
   }
   }
   
-  return SelectCode(Op);
+  return SelectCode(N);
 }
 
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 8fe151a..842f8ee 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -430,9 +430,7 @@ let isCall = 1, PPC970_Unit = 7,
           F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,
-          CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
-          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in {
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_Darwin  : IForm<18, 0, 1,
@@ -457,9 +455,7 @@ let isCall = 1, PPC970_Unit = 7,
           F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,
-          CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
-          CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in {
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_SVR4  : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index be6e51e..daf4ec6 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -308,6 +308,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
   // Rewrite the stub with an unconditional branch to the target, for any users
   // who took the address of the stub.
   EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
 
   // Put the address of the target function to call and the address to return to
   // after calling the target function in a place that is easy to get on the
@@ -441,4 +442,5 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
 
 void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
   EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(Old, 7*4);
 }
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index c87879b..ee6deb5 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -22,6 +22,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   if (!is64Bit)
     Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
   AssemblerDialect = 1;           // New-Style mnemonics.
+  SupportsDebugInformation= true; // Debug information.
 }
 
 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index f5e50fc..060d6a5 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -7,6 +7,39 @@ TODO:
 
 ===-------------------------------------------------------------------------===
 
+On PPC64, this:
+
+long f2 (long x) { return 0xfffffff000000000UL; }
+long f3 (long x) { return 0x1ffffffffUL; }
+
+could compile into:
+
+_f2:
+	li r3,-1
+	rldicr r3,r3,0,27
+	blr
+_f3:
+	li r3,-1
+	rldicl r3,r3,0,31
+	blr
+
+we produce:
+
+_f2:
+	lis r2, 4095
+	ori r2, r2, 65535
+	sldi r3, r2, 36
+	blr 
+_f3:
+	li r2, 1
+	sldi r2, r2, 32
+	oris r2, r2, 65535
+	ori r3, r2, 65535
+	blr 
+
+
+===-------------------------------------------------------------------------===
+
 Support 'update' load/store instructions.  These are cracked on the G5, but are
 still a codesize win.
 
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index a6e05fa..69da35f 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -282,19 +282,6 @@ this requires TBAA.
 
 //===---------------------------------------------------------------------===//
 
-This should be optimized to one 'and' and one 'or', from PR4216:
-
-define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp {
-entry:
-  %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1]
-  %0 = and i32 %bf.prev.low, -65536               ; <i32> [#uses=1]
-  %1 = and i32 %bf.prev.lo.cleared10, 40186       ; <i32> [#uses=1]
-  %2 = or i32 %1, %0                              ; <i32> [#uses=1]
-  ret i32 %2
-}
-
-//===---------------------------------------------------------------------===//
-
 This isn't recognized as bswap by instcombine (yes, it really is bswap):
 
 unsigned long reverse(unsigned v) {
@@ -1661,38 +1648,9 @@ would delete the or instruction for us.
 
 //===---------------------------------------------------------------------===//
 
-FunctionAttrs is not marking this function as readnone (just readonly):
-$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs
-
-int t(int a, int b, int c) {
- int *p;
- if (a)
-   p = &a;
- else
-   p = &c;
- return *p;
-}
-
-This is because we codegen this to:
-
-define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp {
-entry:
-  %a.addr = alloca i32                            ; <i32*> [#uses=3]
-  %c.addr = alloca i32                            ; <i32*> [#uses=2]
-...
-
-if.end:
-  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ]
-  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
-  ret i32 %tmp2
-}
-
-And functionattrs doesn't realize that the p.0 load points to function local
-memory.
-
-Also, functionattrs doesn't know about memcpy/memset.  This function should be
-marked readnone, since it only twiddles local memory, but functionattrs doesn't
-handle memset/memcpy/memmove aggressively:
+functionattrs doesn't know much about memcpy/memset.  This function should be
+marked readnone rather than readonly, since it only twiddles local memory, but
+functionattrs doesn't handle memset/memcpy/memmove aggressively:
 
 struct X { int *p; int *q; };
 int foo() {
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index b41917e..e1b3299 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -43,11 +43,11 @@ public:
       TM(tm) {
   }
 
-  SDNode *Select(SDValue Op);
+  SDNode *Select(SDNode *N);
 
   // Complex Pattern Selectors.
-  bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2);
-  bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base,
+  bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base,
                     SDValue &Offset);
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -87,7 +87,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -128,7 +128,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr,
   return true;
 }
 
-bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
                                      SDValue &R1,  SDValue &R2) {
   if (Addr.getOpcode() == ISD::FrameIndex) return false;
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
@@ -152,8 +152,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr,
   return true;
 }
 
-SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   if (N->isMachineOpcode())
     return NULL;   // Already selected.
@@ -199,7 +198,7 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
   }
   }
 
-  return SelectCode(Op);
+  return SelectCode(N);
 }
 
 
@@ -213,8 +212,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-   if (!SelectADDRrr(Op, Op, Op0, Op1))
-     SelectADDRri(Op, Op, Op0, Op1);
+   if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1))
+     SelectADDRri(Op.getNode(), Op, Op0, Op1);
    break;
   }
 
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 590574e..7cc4fd1 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
@@ -355,7 +356,7 @@ void SubtargetFeatures::print(raw_ostream &OS) const {
 /// dump - Dump feature info.
 ///
 void SubtargetFeatures::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 /// getDefaultSubtargetFeatures - Return a string listing
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index d64611d..7096c0e 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -128,23 +128,23 @@ namespace {
     #include "SystemZGenDAGISel.inc"
 
   private:
-    bool SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+    bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
                             SDValue &Base, SDValue &Disp);
-    bool SelectAddrRI12(SDValue Op, SDValue& Addr,
+    bool SelectAddrRI12(SDNode *Op, SDValue& Addr,
                         SDValue &Base, SDValue &Disp,
                         bool is12BitOnly = false);
-    bool SelectAddrRI(SDValue Op, SDValue& Addr,
+    bool SelectAddrRI(SDNode *Op, SDValue& Addr,
                       SDValue &Base, SDValue &Disp);
-    bool SelectAddrRRI12(SDValue Op, SDValue Addr,
+    bool SelectAddrRRI12(SDNode *Op, SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectAddrRRI20(SDValue Op, SDValue Addr,
+    bool SelectAddrRRI20(SDNode *Op, SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectLAAddr(SDValue Op, SDValue Addr,
+    bool SelectLAAddr(SDNode *Op, SDValue Addr,
                       SDValue &Base, SDValue &Disp, SDValue &Index);
 
-    SDNode *Select(SDValue Op);
+    SDNode *Select(SDNode *Node);
 
-    bool TryFoldLoad(SDValue P, SDValue N,
+    bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Disp, SDValue &Index);
 
     bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
@@ -367,12 +367,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
 
 /// Returns true if the address can be represented by a base register plus
 /// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
                                              SDValue &Base, SDValue &Disp) {
   return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
 }
 
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
                                          SDValue &Base, SDValue &Disp,
                                          bool is12BitOnly) {
   SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
@@ -422,7 +422,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
                                        SDValue &Base, SDValue &Disp) {
   SystemZRRIAddressMode AM(/*isRI*/true);
   bool Done = false;
@@ -465,7 +465,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM20, AM12;
   bool Done = false;
@@ -514,7 +514,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
   bool Done = false;
@@ -558,7 +558,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr,
 
 /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
                                   SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
 
@@ -591,11 +591,11 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr,
   return false;
 }
 
-bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
       N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+      IsLegalAndProfitableToFold(N.getNode(), P, P))
     return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
   return false;
 }
@@ -612,10 +612,9 @@ void SystemZDAGToDAGISel::InstructionSelect() {
   CurDAG->RemoveDeadNodes();
 }
 
-SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
-  SDNode *Node = Op.getNode();
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
   EVT NVT = Node->getValueType(0);
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc dl = Node->getDebugLoc();
   unsigned Opcode = Node->getOpcode();
 
   // Dump information about the Node being selected
@@ -643,20 +642,20 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
     EVT ResVT;
     bool is32Bit = false;
     switch (NVT.getSimpleVT().SimpleTy) {
-      default: assert(0 && "Unsupported VT!");
-      case MVT::i32:
-        Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
-        ResVT = MVT::v2i64;
-        is32Bit = true;
-        break;
-      case MVT::i64:
-        Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
-        ResVT = MVT::v2i64;
-        break;
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+      ResVT = MVT::v2i64;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+      ResVT = MVT::v2i64;
+      break;
     }
 
     SDValue Tmp0, Tmp1, Tmp2;
-    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
 
     // Prepare the dividend
     SDNode *Dividend;
@@ -677,16 +676,16 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
     SDValue DivVal = SDValue(Dividend, 0);
     if (foldedLoad) {
       SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
-      Result = CurDAG->getMachineNode(MOpc, dl, ResVT,
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
                                       Ops, array_lengthof(Ops));
       // Update the chain.
-      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
     } else {
       Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
     }
 
     // Copy the division (odd subreg) result, if it is needed.
-    if (!Op.getValue(0).use_empty()) {
+    if (!SDValue(Node, 0).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
       SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
                                            dl, NVT,
@@ -694,14 +693,14 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
 
-      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
       DEBUG(errs().indent(Indent-2) << "=> ";
             Result->dump(CurDAG);
             errs() << "\n");
     }
 
     // Copy the remainder (even subreg) result, if it is needed.
-    if (!Op.getValue(1).use_empty()) {
+    if (!SDValue(Node, 1).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
       SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
                                            dl, NVT,
@@ -709,7 +708,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
 
-      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
       DEBUG(errs().indent(Indent-2) << "=> ";
             Result->dump(CurDAG);
             errs() << "\n");
@@ -729,22 +728,22 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
 
     bool is32Bit = false;
     switch (NVT.getSimpleVT().SimpleTy) {
-      default: assert(0 && "Unsupported VT!");
-      case MVT::i32:
-        Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
-        ClrOpc = SystemZ::MOV64Pr0_even;
-        ResVT = MVT::v2i32;
-        is32Bit = true;
-        break;
-      case MVT::i64:
-        Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
-        ClrOpc = SystemZ::MOV128r0_even;
-        ResVT = MVT::v2i64;
-        break;
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+      ClrOpc = SystemZ::MOV64Pr0_even;
+      ResVT = MVT::v2i32;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+      ClrOpc = SystemZ::MOV128r0_even;
+      ResVT = MVT::v2i64;
+      break;
     }
 
     SDValue Tmp0, Tmp1, Tmp2;
-    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
 
     // Prepare the dividend
     SDNode *Dividend = N0.getNode();
@@ -767,37 +766,37 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
     SDNode *Result;
     if (foldedLoad) {
       SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
-      Result = CurDAG->getMachineNode(MOpc, dl,ResVT,
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
                                       Ops, array_lengthof(Ops));
       // Update the chain.
-      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
     } else {
       Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
     }
 
     // Copy the division (odd subreg) result, if it is needed.
-    if (!Op.getValue(0).use_empty()) {
+    if (!SDValue(Node, 0).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
       SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
-      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
       DEBUG(errs().indent(Indent-2) << "=> ";
             Result->dump(CurDAG);
             errs() << "\n");
     }
 
     // Copy the remainder (even subreg) result, if it is needed.
-    if (!Op.getValue(1).use_empty()) {
+    if (!SDValue(Node, 1).use_empty()) {
       unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
       SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG,
                                            dl, NVT,
                                            SDValue(Result, 0),
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
-      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
       DEBUG(errs().indent(Indent-2) << "=> ";
             Result->dump(CurDAG);
             errs() << "\n");
@@ -812,11 +811,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
   }
 
   // Select the default instruction
-  SDNode *ResNode = SelectCode(Op);
+  SDNode *ResNode = SelectCode(Node);
 
   DEBUG(errs().indent(Indent-2) << "=> ";
-        if (ResNode == NULL || ResNode == Op.getNode())
-          Op.getNode()->dump(CurDAG);
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
         else
           ResNode->dump(CurDAG);
         errs() << "\n";
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index cddf49e..f5c969a 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -34,7 +34,7 @@ char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
 }
 
 LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
-  return unwrap(TD)->isLittleEndian();
+  return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
 }
 
 unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index f887523..70e8008 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -21,11 +21,13 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
@@ -492,16 +494,15 @@ getELFKindForNamedSection(const char *Name, SectionKind K) {
 }
 
 
-static unsigned
-getELFSectionType(const char *Name, SectionKind K) {
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
 
-  if (strcmp(Name, ".init_array") == 0)
+  if (Name == ".init_array")
     return MCSectionELF::SHT_INIT_ARRAY;
 
-  if (strcmp(Name, ".fini_array") == 0)
+  if (Name == ".fini_array")
     return MCSectionELF::SHT_FINI_ARRAY;
 
-  if (strcmp(Name, ".preinit_array") == 0)
+  if (Name == ".preinit_array")
     return MCSectionELF::SHT_PREINIT_ARRAY;
 
   if (K.isBSS() || K.isThreadBSS())
@@ -577,10 +578,16 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // into a 'uniqued' section name, create and return the section now.
   if (GV->isWeakForLinker()) {
     const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    std::string Name = Mang->makeNameProper(GV->getNameStr());
-
-    return getELFSection((Prefix+Name).c_str(),
-                         getELFSectionType((Prefix+Name).c_str(), Kind),
+    SmallString<128> Name, MangledName;
+    Name.append(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    
+    raw_svector_ostream OS(MangledName);
+    MCSymbol::printMangledName(Name, OS, 0);
+    OS.flush();
+    
+    return getELFSection(MangledName.str(),
+                         getELFSectionType(MangledName.str(), Kind),
                          getELFSectionFlags(Kind),
                          Kind);
   }
@@ -922,7 +929,7 @@ const MCSection *
 TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
   // If this constant requires a relocation, we have to put it in the data
   // segment, not in the text segment.
-  if (Kind.isDataRel())
+  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
     return ConstDataSection;
 
   if (Kind.isMergeableConst4())
@@ -983,7 +990,7 @@ TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
 
 
 const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(const char *Name, bool isDirective, SectionKind Kind) const {
+getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
   // Create the map if it doesn't already exist.
   if (UniquingMap == 0)
     UniquingMap = new MachOUniqueMapTy();
@@ -1078,8 +1085,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // into a 'uniqued' section name, create and return the section now.
   if (GV->isWeakForLinker()) {
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
-    std::string Name = Mang->makeNameProper(GV->getNameStr());
-    return getCOFFSection((Prefix+Name).c_str(), false, Kind);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    return getCOFFSection(Name.str(), false, Kind);
   }
 
   if (Kind.isText())
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c357b4d..c4ae5d2 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetAsmParser.h"
 #include "X86.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
@@ -15,6 +16,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParsedAsmOperand.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmParser.h"
@@ -46,7 +48,7 @@ private:
   /// @name Auto-generated Match Functions
   /// {  
 
-  bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,
+  bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCInst &Inst);
 
   /// MatchRegisterName - Match the given string to a register name, or 0 if
@@ -59,7 +61,8 @@ public:
   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
     : TargetAsmParser(T), Parser(_Parser) {}
 
-  virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
@@ -71,7 +74,7 @@ namespace {
 
 /// X86Operand - Instances of this class represent a parsed X86 machine
 /// instruction.
-struct X86Operand {
+struct X86Operand : public MCParsedAsmOperand {
   enum {
     Token,
     Register,
@@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) {
   return false;
 }
 
-bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
-  SmallVector<X86Operand, 8> Operands;
+bool X86ATTAsmParser::
+ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
-  Operands.push_back(X86Operand::CreateToken(Name));
+  Operands.push_back(new X86Operand(X86Operand::CreateToken(Name)));
 
   SMLoc Loc = getLexer().getTok().getLoc();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
     // Parse '*' modifier.
     if (getLexer().is(AsmToken::Star)) {
       getLexer().Lex(); // Eat the star.
-      Operands.push_back(X86Operand::CreateToken("*"));
+      Operands.push_back(new X86Operand(X86Operand::CreateToken("*")));
     }
 
     // Read the first operand.
-    Operands.push_back(X86Operand());
-    if (ParseOperand(Operands.back()))
+    X86Operand Op;
+    if (ParseOperand(Op))
       return true;
 
+    Operands.push_back(new X86Operand(Op));
+
     while (getLexer().is(AsmToken::Comma)) {
       getLexer().Lex();  // Eat the comma.
 
       // Parse and remember the operand.
-      Operands.push_back(X86Operand());
-      if (ParseOperand(Operands.back()))
+      if (ParseOperand(Op))
         return true;
+      Operands.push_back(new X86Operand(Op));
     }
   }
 
-  if (!MatchInstruction(Operands, Inst))
-    return false;
-
-  // FIXME: We should give nicer diagnostics about the exact failure.
-
-  Error(Loc, "unrecognized instruction");
-  return true;
+  return false;
 }
 
 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index b88063f..70c6dd0 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 /// jump tables, constant pools, global address and external symbols, all of
 /// which print to a label with various suffixes for relocation types etc.
 void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
+  SmallString<128> TempNameStr;
   switch (MO.getType()) {
   default: llvm_unreachable("unknown symbol type!");
   case MachineOperand::MO_JumpTableIndex:
@@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
     
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
-      SmallString<128> NameStr;
-      Mang->getNameWithPrefix(NameStr, GV, true);
-      NameStr += "$non_lazy_ptr";
-      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      Mang->getNameWithPrefix(TempNameStr, GV, true);
+      TempNameStr += "$non_lazy_ptr";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       
       const MCSymbol *&StubSym = 
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
       if (StubSym == 0) {
-        NameStr.clear();
-        Mang->getNameWithPrefix(NameStr, GV, false);
-        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+        TempNameStr.clear();
+        Mang->getNameWithPrefix(TempNameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       }
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
-      SmallString<128> NameStr;
-      Mang->getNameWithPrefix(NameStr, GV, true);
-      NameStr += "$non_lazy_ptr";
-      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      Mang->getNameWithPrefix(TempNameStr, GV, true);
+      TempNameStr += "$non_lazy_ptr";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       const MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
       if (StubSym == 0) {
-        NameStr.clear();
-        Mang->getNameWithPrefix(NameStr, GV, false);
-        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+        TempNameStr.clear();
+        Mang->getNameWithPrefix(TempNameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       }
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
-      SmallString<128> NameStr;
-      Mang->getNameWithPrefix(NameStr, GV, true);
-      NameStr += "$stub";
-      MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+      Mang->getNameWithPrefix(TempNameStr, GV, true);
+      TempNameStr += "$stub";
+      MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       const MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
       if (StubSym == 0) {
-        NameStr.clear();
-        Mang->getNameWithPrefix(NameStr, GV, false);
-        StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+        TempNameStr.clear();
+        Mang->getNameWithPrefix(TempNameStr, GV, false);
+        StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       }
     }
     
@@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
     break;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    std::string Name = Mang->makeNameProper(MO.getSymbolName());
+    const MCSymbol *SymToPrint;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
-      Name += "$stub";
-      MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name));
+      Mang->getNameWithPrefix(TempNameStr,
+                              StringRef(MO.getSymbolName())+"$stub");
+      const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       const MCSymbol *&StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
       if (StubSym == 0) {
-        Name.erase(Name.end()-5, Name.end());
-        StubSym = OutContext.GetOrCreateSymbol(StringRef(Name));
+        TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+        StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
       }
+      SymToPrint = StubSym;
+    } else {
+      Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName());
+      SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str());
     }
     
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
-    if (Name[0] == '$') 
-      O << '(' << Name << ')';
-    else
-      O << Name;
+    if (SymToPrint->getName()[0] != '$') 
+      SymToPrint->print(O, MAI);
+    else {
+      O << '(';
+      SymToPrint->print(O, MAI);
+      O << '(';
+    }
     break;
   }
   }
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b69..9ee118c 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/DebugInfo.h"
 using namespace llvm;
 
 
@@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     OutMI.setOpcode(X86::MOVZX32rm16);
     lower_subreg32(&OutMI, 0);
     break;
+  case X86::MOV16r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
+  case X86::MOV64r0:
+    OutMI.setOpcode(X86::MOV32r0);
+    lower_subreg32(&OutMI, 0);
+    break;
   }
 }
 
@@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
   case TargetInstrInfo::GC_LABEL:
     printLabel(MI);
     return;
+  case TargetInstrInfo::DEBUG_VALUE: {
+    if (!VerboseAsm)
+      return;
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason
+    DIVariable V((MDNode*)(MI->getOperand(2).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    if (MI->getOperand(0).getType()==MachineOperand::MO_Register)
+      printOperand(MI, 0);
+    else {
+      assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate);
+      int64_t imm = MI->getOperand(0).getImm();
+      O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']';
+    }
+    O << "+";
+    printOperand(MI, 1);
+    return;
+  }
   case TargetInstrInfo::INLINEASM:
     printInlineAsm(MI);
     return;
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 71ad51c..0f3e44b 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so
 "load, add 1.0, store" is better done in the fp stack, etc.
 
 //===---------------------------------------------------------------------===//
+
+The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to
+"cmpsd".  For example, this code:
+
+double d1(double x) { return x == x ? x : x + x; }
+
+Compiles into:
+
+_d1:
+	ucomisd	%xmm0, %xmm0
+	jnp	LBB1_2
+	addsd	%xmm0, %xmm0
+	ret
+LBB1_2:
+	ret
+
+Also, the 'ret's should be shared.  This is PR6032.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index afd9f53..aa7bb3d 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions.
 
 //===---------------------------------------------------------------------===//
 
-Start using the flags more.  For example, compile:
+Use the FLAGS values from arithmetic instructions more.  For example, compile:
 
 int add_zf(int *x, int y, int a, int b) {
      if ((*x += y) == 0)
@@ -554,31 +554,8 @@ _add_zf:
         movl %ecx, %eax
         ret
 
-and:
-
-int add_zf(int *x, int y, int a, int b) {
-     if ((*x + y) < 0)
-          return a;
-     else
-          return b;
-}
-
-to:
-
-add_zf:
-        addl    (%rdi), %esi
-        movl    %edx, %eax
-        cmovns  %ecx, %eax
-        ret
-
-instead of:
-
-_add_zf:
-        addl (%rdi), %esi
-        testl %esi, %esi
-        cmovs %edx, %ecx
-        movl %ecx, %eax
-        ret
+As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll
+without a test instruction.
 
 //===---------------------------------------------------------------------===//
 
@@ -685,55 +662,6 @@ Though this probably isn't worth it.
 
 //===---------------------------------------------------------------------===//
 
-We need to teach the codegen to convert two-address INC instructions to LEA
-when the flags are dead (likewise dec).  For example, on X86-64, compile:
-
-int foo(int A, int B) {
-  return A+1;
-}
-
-to:
-
-_foo:
-        leal    1(%edi), %eax
-        ret
-
-instead of:
-
-_foo:
-        incl %edi
-        movl %edi, %eax
-        ret
-
-Another example is:
-
-;; X's live range extends beyond the shift, so the register allocator
-;; cannot coalesce it with Y.  Because of this, a copy needs to be
-;; emitted before the shift to save the register value before it is
-;; clobbered.  However, this copy is not needed if the register
-;; allocator turns the shift into an LEA.  This also occurs for ADD.
-
-; Check that the shift gets turned into an LEA.
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep {mov E.X, E.X}
-
-@G = external global i32		; <i32*> [#uses=3]
-
-define i32 @test1(i32 %X, i32 %Y) {
-	%Z = add i32 %X, %Y		; <i32> [#uses=1]
-	volatile store i32 %Y, i32* @G
-	volatile store i32 %Z, i32* @G
-	ret i32 %X
-}
-
-define i32 @test2(i32 %X) {
-	%Z = add i32 %X, 1		; <i32> [#uses=1]
-	volatile store i32 %Z, i32* @G
-	ret i32 %X
-}
-
-//===---------------------------------------------------------------------===//
-
 Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
 a neg instead of a sub instruction.  Consider:
 
@@ -854,11 +782,6 @@ __Z11no_overflowjj:
 
 //===---------------------------------------------------------------------===//
 
-Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the
-condition register is dead. xor reg reg is shorter than mov reg, #0.
-
-//===---------------------------------------------------------------------===//
-
 The following code:
 
 bb114.preheader:		; preds = %cond_next94
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a6e1ca3..7919559 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
                                       "Enable conditional move instructions">;
 
+
 def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
                                       "Enable MMX instructions">;
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@@ -66,6 +67,9 @@ def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
                                      "Enable three-operand fused multiple-add">;
 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                       "Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+                                          "HasVectorUAMem", "true",
+                 "Allow unaligned memory operands on vector/SIMD instructions">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4892e17..828e872 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -135,7 +135,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
   IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   
   do {
-    DEBUG(errs() << "JITTing function '" 
+    DEBUG(dbgs() << "JITTing function '" 
           << MF.getFunction()->getName() << "'\n");
     MCE.startFunction(MF);
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
@@ -477,7 +477,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
                                            const TargetInstrDesc *Desc) {
-  DEBUG(errs() << MI);
+  DEBUG(dbgs() << MI);
 
   MCE.processDebugLoc(MI.getDebugLoc(), true);
 
@@ -618,11 +618,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
       
     const MachineOperand &MO = MI.getOperand(CurOp++);
 
-    DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n");
-    DEBUG(errs() << "isMBB " << MO.isMBB() << "\n");
-    DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n");
-    DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n");
-    DEBUG(errs() << "isImm " << MO.isImm() << "\n");
+    DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+    DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+    DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+    DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+    DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
 
     if (MO.isMBB()) {
       emitPCRelativeBlockAddress(MO.getMBB());
@@ -843,7 +843,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
 
   if (!Desc->isVariadic() && CurOp != NumOps) {
 #ifndef NDEBUG
-    errs() << "Cannot encode all operands of: " << MI << "\n";
+    dbgs() << "Cannot encode all operands of: " << MI << "\n";
 #endif
     llvm_unreachable(0);
   }
@@ -1082,9 +1082,9 @@ public:
     }
 
     if (!OK) {
-      errs() << "couldn't convert inst '";
+      dbgs() << "couldn't convert inst '";
       MI.dump();
-      errs() << "' to machine instr:\n";
+      dbgs() << "' to machine instr:\n";
       Instr->dump();
     }
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 431c120..7e02d59 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
 
 bool X86FastISel::X86SelectZExt(Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
-  if (I->getType() == Type::getInt8Ty(I->getContext()) &&
-      I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {
+  if (I->getType()->isInteger(8) &&
+      I->getOperand(0)->getType()->isInteger(1)) {
     unsigned ResultReg = getRegForValue(I->getOperand(0));
     if (ResultReg == 0) return false;
     // Set the high bits to zero.
@@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
 bool X86FastISel::X86SelectShift(Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
-  if (I->getType() == Type::getInt8Ty(I->getContext())) {
+  if (I->getType()->isInteger(8)) {
     CReg = X86::CL;
     RC = &X86::GR8RegClass;
     switch (I->getOpcode()) {
@@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::getInt16Ty(I->getContext())) {
+  } else if (I->getType()->isInteger(16)) {
     CReg = X86::CX;
     RC = &X86::GR16RegClass;
     switch (I->getOpcode()) {
@@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::getInt32Ty(I->getContext())) {
+  } else if (I->getType()->isInteger(32)) {
     CReg = X86::ECX;
     RC = &X86::GR32RegClass;
     switch (I->getOpcode()) {
@@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
     default: return false;
     }
-  } else if (I->getType() == Type::getInt64Ty(I->getContext())) {
+  } else if (I->getType()->isInteger(64)) {
     CReg = X86::RCX;
     RC = &X86::GR64RegClass;
     switch (I->getOpcode()) {
@@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
       CC != CallingConv::X86_FastCall)
     return false;
 
-  // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't
-  // handle this for now.
+  // fastcc with -tailcallopt is intended to provide a guaranteed
+  // tail call optimization. Fastisel doesn't know how to do that.
   if (CC == CallingConv::Fast && PerformTailCallOpt)
     return false;
 
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 044bd4b..503ac14 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -75,12 +75,12 @@ namespace {
     unsigned StackTop;          // The current top of the FP stack.
 
     void dumpStack() const {
-      errs() << "Stack contents:";
+      dbgs() << "Stack contents:";
       for (unsigned i = 0; i != StackTop; ++i) {
-        errs() << " FP" << Stack[i];
+        dbgs() << " FP" << Stack[i];
         assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
       }
-      errs() << "\n";
+      dbgs() << "\n";
     }
   private:
     /// isStackEmpty - Return true if the FP stack is empty.
@@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
       PrevMI = prior(I);
 
     ++NumFP;  // Keep track of # of pseudo instrs
-    DEBUG(errs() << "\nFPInst:\t" << *MI);
+    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
 
     // Get dead variables list now because the MI pointer may be deleted as part
     // of processing!
@@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
       unsigned Reg = DeadRegs[i];
       if (Reg >= X86::FP0 && Reg <= X86::FP6) {
-        DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
         freeStackSlotAfter(I, Reg-X86::FP0);
       }
     }
@@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     DEBUG(
       MachineBasicBlock::iterator PrevI(PrevMI);
       if (I == PrevI) {
-        errs() << "Just deleted pseudo instruction\n";
+        dbgs() << "Just deleted pseudo instruction\n";
       } else {
         MachineBasicBlock::iterator Start = I;
         // Rewind to first instruction newly inserted.
         while (Start != BB.begin() && prior(Start) != PrevI) --Start;
-        errs() << "Inserted instructions:\n\t";
-        Start->print(errs(), &MF.getTarget());
+        dbgs() << "Inserted instructions:\n\t";
+        Start->print(dbgs(), &MF.getTarget());
         while (++Start != llvm::next(I)) {}
       }
       dumpStack();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index cb82383..e2a53d1 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -113,37 +113,37 @@ namespace {
     }
 
     void dump() {
-      errs() << "X86ISelAddressMode " << this << '\n';
-      errs() << "Base.Reg ";
+      dbgs() << "X86ISelAddressMode " << this << '\n';
+      dbgs() << "Base.Reg ";
       if (Base.Reg.getNode() != 0)
         Base.Reg.getNode()->dump(); 
       else
-        errs() << "nul";
-      errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+        dbgs() << "nul";
+      dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
              << " Scale" << Scale << '\n'
              << "IndexReg ";
       if (IndexReg.getNode() != 0)
         IndexReg.getNode()->dump();
       else
-        errs() << "nul"; 
-      errs() << " Disp " << Disp << '\n'
+        dbgs() << "nul"; 
+      dbgs() << " Disp " << Disp << '\n'
              << "GV ";
       if (GV)
         GV->dump();
       else
-        errs() << "nul";
-      errs() << " CP ";
+        dbgs() << "nul";
+      dbgs() << " CP ";
       if (CP)
         CP->dump();
       else
-        errs() << "nul";
-      errs() << '\n'
+        dbgs() << "nul";
+      dbgs() << '\n'
              << "ES ";
       if (ES)
-        errs() << ES;
+        dbgs() << ES;
       else
-        errs() << "nul";
-      errs() << " JT" << JT << " Align" << Align << '\n';
+        dbgs() << "nul";
+      dbgs() << " JT" << JT << " Align" << Align << '\n';
     }
   };
 }
@@ -190,7 +190,7 @@ namespace {
 #include "X86GenDAGISel.inc"
 
   private:
-    SDNode *Select(SDValue N);
+    SDNode *Select(SDNode *N);
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
 
@@ -201,19 +201,19 @@ namespace {
     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
-    bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
                     SDValue &Segment);
-    bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
-    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+    bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
-    bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
+    bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
                              SDValue N, SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &InChain, SDValue &OutChain);
-    bool TryFoldLoad(SDValue P, SDValue N,
+    bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
                      SDValue &Index, SDValue &Disp,
                      SDValue &Segment);
@@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   if (U == Root)
     switch (U->getOpcode()) {
     default: break;
+    case X86ISD::ADD:
+    case X86ISD::SUB:
+    case X86ISD::AND:
+    case X86ISD::XOR:
+    case X86ISD::OR:
     case ISD::ADD:
     case ISD::ADDC:
     case ISD::ADDE:
@@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() {
 
   // Codegen the basic block.
 #ifndef NDEBUG
-  DEBUG(errs() << "===== Instruction selection begins:\n");
+  DEBUG(dbgs() << "===== Instruction selection begins:\n");
   Indent = 0;
 #endif
   SelectRoot(*CurDAG);
 #ifndef NDEBUG
-  DEBUG(errs() << "===== Instruction selection ends:\n");
+  DEBUG(dbgs() << "===== Instruction selection ends:\n");
 #endif
 
   CurDAG->RemoveDeadNodes();
@@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
   DEBUG({
-      errs() << "MatchAddress: ";
+      dbgs() << "MatchAddress: ";
       AM.dump();
     });
   // Limit recursion.
@@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
@@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
 /// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
 /// match a load whose top elements are either undef or zeros.  The load flavor
 /// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
                                           SDValue N, SDValue &Base,
                                           SDValue &Scale, SDValue &Index,
                                           SDValue &Disp, SDValue &Segment,
@@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
     if (ISD::isNON_EXTLoad(InChain.getNode()) &&
         InChain.getValue(0).hasOneUse() &&
         N.hasOneUse() &&
-        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
+        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
       LoadSDNode *LD = cast<LoadSDNode>(InChain);
       if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
         return false;
@@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
 
 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
                                     SDValue &Base, SDValue &Scale,
                                     SDValue &Index, SDValue &Disp) {
   X86ISelAddressMode AM;
@@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
 }
 
 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp) {
-  assert(Op.getOpcode() == X86ISD::TLSADDR);
+  assert(Op->getOpcode() == X86ISD::TLSADDR);
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
   
@@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
 }
 
 
-bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
                                   SDValue &Segment) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
       N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+      IsLegalAndProfitableToFold(N.getNode(), P, P))
     return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
   return false;
 }
@@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   SDValue In2L = Node->getOperand(2);
   SDValue In2H = Node->getOperand(3);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return NULL;
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
   SDValue Ptr = Node->getOperand(1);
   SDValue Val = Node->getOperand(2);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return 0;
 
   bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
   return true;
 }
 
-SDNode *X86DAGToDAGISel::Select(SDValue N) {
-  SDNode *Node = N.getNode();
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   EVT NVT = Node->getValueType(0);
   unsigned Opc, MOpc;
   unsigned Opcode = Node->getOpcode();
@@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
   
 #ifndef NDEBUG
   DEBUG({
-      errs() << std::string(Indent, ' ') << "Selecting: ";
+      dbgs() << std::string(Indent, ' ') << "Selecting: ";
       Node->dump(CurDAG);
-      errs() << '\n';
+      dbgs() << '\n';
     });
   Indent += 2;
 #endif
@@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
   if (Node->isMachineOpcode()) {
 #ifndef NDEBUG
     DEBUG({
-        errs() << std::string(Indent-2, ' ') << "== ";
+        dbgs() << std::string(Indent-2, ' ') << "== ";
         Node->dump(CurDAG);
-        errs() << '\n';
+        dbgs() << '\n';
       });
     Indent -= 2;
 #endif
@@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
     // Multiply is commmutative.
     if (!foldedLoad) {
-      foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
       if (foldedLoad)
         std::swap(N0, N1);
     }
@@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
     }
 
     // Copy the low half of the result, if it is needed.
-    if (!N.getValue(0).use_empty()) {
+    if (!SDValue(Node, 0).use_empty()) {
       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
                                                 LoReg, NVT, InFlag);
       InFlag = Result.getValue(2);
-      ReplaceUses(N.getValue(0), Result);
+      ReplaceUses(SDValue(Node, 0), Result);
 #ifndef NDEBUG
       DEBUG({
-          errs() << std::string(Indent-2, ' ') << "=> ";
+          dbgs() << std::string(Indent-2, ' ') << "=> ";
           Result.getNode()->dump(CurDAG);
-          errs() << '\n';
+          dbgs() << '\n';
         });
 #endif
     }
     // Copy the high half of the result, if it is needed.
-    if (!N.getValue(1).use_empty()) {
+    if (!SDValue(Node, 1).use_empty()) {
       SDValue Result;
       if (HiReg == X86::AH && Subtarget->is64Bit()) {
         // Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
                                         HiReg, NVT, InFlag);
         InFlag = Result.getValue(2);
       }
-      ReplaceUses(N.getValue(1), Result);
+      ReplaceUses(SDValue(Node, 1), Result);
 #ifndef NDEBUG
       DEBUG({
-          errs() << std::string(Indent-2, ' ') << "=> ";
+          dbgs() << std::string(Indent-2, ' ') << "=> ";
           Result.getNode()->dump(CurDAG);
-          errs() << '\n';
+          dbgs() << '\n';
         });
 #endif
     }
@@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
 
     unsigned LoReg, HiReg, ClrReg;
     unsigned ClrOpcode, SExtOpcode;
-    EVT ClrVT = NVT;
     switch (NVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
     case MVT::i8:
@@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
       break;
     case MVT::i16:
       LoReg = X86::AX;  HiReg = X86::DX;
-      ClrOpcode  = X86::MOV32r0;  ClrReg = X86::EDX;  ClrVT = MVT::i32;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
       SExtOpcode = X86::CWD;
       break;
     case MVT::i32:
@@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
       break;
     case MVT::i64:
       LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
-      ClrOpcode  = ~0U; // NOT USED.
+      ClrOpcode  = X86::MOV64r0;
       SExtOpcode = X86::CQO;
       break;
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-    bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
     bool signBitIsZero = CurDAG->SignBitIsZero(N0);
 
     SDValue InFlag;
@@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
       // Special case for div8, just use a move with zero extension to AX to
       // clear the upper 8 bits (AH).
       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
-      if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
         Move =
           SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
@@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
           SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
       } else {
         // Zero out the high part, effectively zero extending the input.
-        SDValue ClrNode;
-
-        if (NVT.getSimpleVT() == MVT::i64) {
-          ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
-                            0);
-          // We just did a 32-bit clear, insert it into a 64-bit register to
-          // clear the whole 64-bit reg.
-          SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
-          SDValue SubRegNo =
-            CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
-          ClrNode =
-            SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
-                                           MVT::i64, Zero, ClrNode, SubRegNo),
-                    0);
-        } else {
-          ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
-        }
-
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
         InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
                                       ClrNode, InFlag).getValue(1);
       }
@@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
     }
 
     // Copy the division (low) result, if it is needed.
-    if (!N.getValue(0).use_empty()) {
+    if (!SDValue(Node, 0).use_empty()) {
       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
                                                 LoReg, NVT, InFlag);
       InFlag = Result.getValue(2);
-      ReplaceUses(N.getValue(0), Result);
+      ReplaceUses(SDValue(Node, 0), Result);
 #ifndef NDEBUG
       DEBUG({
-          errs() << std::string(Indent-2, ' ') << "=> ";
+          dbgs() << std::string(Indent-2, ' ') << "=> ";
           Result.getNode()->dump(CurDAG);
-          errs() << '\n';
+          dbgs() << '\n';
         });
 #endif
     }
     // Copy the remainder (high) result, if it is needed.
-    if (!N.getValue(1).use_empty()) {
+    if (!SDValue(Node, 1).use_empty()) {
       SDValue Result;
       if (HiReg == X86::AH && Subtarget->is64Bit()) {
         // Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
                                         HiReg, NVT, InFlag);
         InFlag = Result.getValue(2);
       }
-      ReplaceUses(N.getValue(1), Result);
+      ReplaceUses(SDValue(Node, 1), Result);
 #ifndef NDEBUG
       DEBUG({
-          errs() << std::string(Indent-2, ' ') << "=> ";
+          dbgs() << std::string(Indent-2, ' ') << "=> ";
           Result.getNode()->dump(CurDAG);
-          errs() << '\n';
+          dbgs() << '\n';
         });
 #endif
     }
@@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
   }
   }
 
-  SDNode *ResNode = SelectCode(N);
+  SDNode *ResNode = SelectCode(Node);
 
 #ifndef NDEBUG
   DEBUG({
-      errs() << std::string(Indent-2, ' ') << "=> ";
-      if (ResNode == NULL || ResNode == N.getNode())
-        N.getNode()->dump(CurDAG);
+      dbgs() << std::string(Indent-2, ' ') << "=> ";
+      if (ResNode == NULL || ResNode == Node)
+        Node->dump(CurDAG);
       else
         ResNode->dump(CurDAG);
-      errs() << '\n';
+      dbgs() << '\n';
     });
   Indent -= 2;
 #endif
@@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   case 'v':   // not offsetable    ??
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
+    if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
       return true;
     break;
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c722fbf..228ec9f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::MEMBARRIER);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     assert(((Callee.getOpcode() == ISD::Register &&
                (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
-                cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) ||
+                cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
               Callee.getOpcode() == ISD::TargetExternalSymbol ||
               Callee.getOpcode() == ISD::TargetGlobalAddress) &&
-             "Expecting an global address, external symbol, or register");
+           "Expecting a global address, external symbol, or scratch register");
 
     return DAG.getNode(X86ISD::TC_RETURN, dl,
                        NodeTys, &Ops[0], Ops.size());
@@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
       // because a TEST instruction will be better.
       bool NonFlagUse = false;
       for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-           UE = Op.getNode()->use_end(); UI != UE; ++UI)
-        if (UI->getOpcode() != ISD::BRCOND &&
-            UI->getOpcode() != ISD::SELECT &&
-            UI->getOpcode() != ISD::SETCC) {
+             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+        SDNode *User = *UI;
+        unsigned UOpNo = UI.getOperandNo();
+        if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+          // Look pass truncate.
+          UOpNo = User->use_begin().getOperandNo();
+          User = *User->use_begin();
+        }
+        if (User->getOpcode() != ISD::BRCOND &&
+            User->getOpcode() != ISD::SETCC &&
+            (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
           NonFlagUse = true;
           break;
         }
+      }
       if (!NonFlagUse)
         break;
     }
@@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
   return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
 }
 
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+                         DebugLoc dl, SelectionDAG &DAG) {
+  SDValue LHS, RHS;
+  if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op010C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+      if (Op010C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(0);
+        RHS = Op0.getOperand(1).getOperand(1);
+      }
+  } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *Op000C =
+        dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+      if (Op000C->getZExtValue() == 1) {
+        LHS = Op0.getOperand(1);
+        RHS = Op0.getOperand(0).getOperand(1);
+      }
+  } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+    SDValue AndLHS = Op0.getOperand(0);
+    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+      LHS = AndLHS.getOperand(0);
+      RHS = AndLHS.getOperand(1);
+    }
+  }
+
+  if (LHS.getNode()) {
+    // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
+    // instruction.  Since the shift amount is in-range-or-undefined, we know
+    // that doing a bittest on the i16 value is ok.  We extend to i32 because
+    // the encoding for the i16 version is larger than the i32 version.
+    if (LHS.getValueType() == MVT::i8)
+      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+    // If the operand types disagree, extend the shift amount to match.  Since
+    // BT ignores high bits (like shifts) we can use anyextend.
+    if (LHS.getValueType() != RHS.getValueType())
+      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(Cond, MVT::i8), BT);
+  }
+
+  return SDValue();
+}
+
 SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
   SDValue Op0 = Op.getOperand(0);
@@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
+  // Optimize to BT if possible.
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
@@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
       Op1.getOpcode() == ISD::Constant &&
       cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    SDValue LHS, RHS;
-    if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op010C =
-            dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
-        if (Op010C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(0);
-          RHS = Op0.getOperand(1).getOperand(1);
-        }
-    } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
-      if (ConstantSDNode *Op000C =
-            dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
-        if (Op000C->getZExtValue() == 1) {
-          LHS = Op0.getOperand(1);
-          RHS = Op0.getOperand(0).getOperand(1);
-        }
-    } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
-      ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
-      SDValue AndLHS = Op0.getOperand(0);
-      if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
-        LHS = AndLHS.getOperand(0);
-        RHS = AndLHS.getOperand(1);
-      }
-    }
-
-    if (LHS.getNode()) {
-      // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT
-      // instruction.  Since the shift amount is in-range-or-undefined, we know
-      // that doing a bittest on the i16 value is ok.  We extend to i32 because
-      // the encoding for the i16 version is larger than the i32 version.
-      if (LHS.getValueType() == MVT::i8)
-        LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
-      // If the operand types disagree, extend the shift amount to match.  Since
-      // BT ignores high bits (like shifts) we can use anyextend.
-      if (LHS.getValueType() != RHS.getValueType())
-        RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
-
-      SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-      unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                         DAG.getConstant(Cond, MVT::i8), BT);
-    }
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
   }
 
   bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
@@ -5936,6 +5957,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
   }
 
   if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
@@ -6093,6 +6131,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
   }
 
   if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
@@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 
 bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return Ty1 == Type::getInt32Ty(Ty1->getContext()) &&
-         Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit();
+  return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit();
 }
 
 bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
@@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   for (int i=0; i < 2 + X86AddrNumOperands; ++i)
     argOpers[i] = &bInstr->getOperand(i+2);
 
-  // x86 address has 4 operands: base, index, scale, and displacement
+  // x86 address has 5 operands: base, index, scale, displacement, and segment.
   int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
 
   unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
@@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
     .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
 
-  unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
-  unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+  // The subsequent operations should be using the destination registers of
+  //the PHI instructions.
   if (invSrc) {
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);
+    t1 = F->getRegInfo().createVirtualRegister(RC);
+    t2 = F->getRegInfo().createVirtualRegister(RC);
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
   } else {
-    tt1 = t1;
-    tt2 = t2;
+    t1 = dest1Oper.getReg();
+    t2 = dest2Oper.getReg();
   }
 
   int valArgIndx = lastAddrIndx + 1;
@@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   else
     MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
   if (regOpcL != X86::MOV32rr)
-    MIB.addReg(tt1);
+    MIB.addReg(t1);
   (*MIB).addOperand(*argOpers[valArgIndx]);
   assert(argOpers[valArgIndx + 1]->isReg() ==
          argOpers[valArgIndx]->isReg());
@@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   else
     MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
   if (regOpcH != X86::MOV32rr)
-    MIB.addReg(tt2);
+    MIB.addReg(t2);
   (*MIB).addOperand(*argOpers[valArgIndx + 1]);
 
   MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
@@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 || !Subtarget->is64Bit())
+    return SDValue();
+
+  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  SDValue ShAmt0 = N0.getOperand(1);
+  if (ShAmt0.getValueType() != MVT::i8)
+    return SDValue();
+  SDValue ShAmt1 = N1.getOperand(1);
+  if (ShAmt1.getValueType() != MVT::i8)
+    return SDValue();
+  if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+    ShAmt0 = ShAmt0.getOperand(0);
+  if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+    ShAmt1 = ShAmt1.getOperand(0);
+
+  DebugLoc DL = N->getDebugLoc();
+  unsigned Opc = X86ISD::SHLD;
+  SDValue Op0 = N0.getOperand(0);
+  SDValue Op1 = N1.getOperand(0);
+  if (ShAmt0.getOpcode() == ISD::SUB) {
+    Opc = X86ISD::SHRD;
+    std::swap(Op0, Op1);
+    std::swap(ShAmt0, ShAmt1);
+  }
+
+  if (ShAmt1.getOpcode() == ISD::SUB) {
+    SDValue Sum = ShAmt1.getOperand(0);
+    if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+      if (SumC->getSExtValue() == 64 &&
+          ShAmt1.getOperand(1) == ShAmt0)
+        return DAG.getNode(Opc, DL, VT,
+                           Op0, Op1,
+                           DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+    }
+  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+    ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+    if (ShAmt0C &&
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+      return DAG.getNode(Opc, DL, VT,
+                         N0.getOperand(0), N1.getOperand(0),
+                         DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+  }
+
+  return SDValue();
+}
+
 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                    const X86Subtarget *Subtarget) {
@@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);
@@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   std::string AsmStr = IA->getAsmString();
 
   // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
-  std::vector<std::string> AsmPieces;
+  SmallVector<StringRef, 4> AsmPieces;
   SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
 
   switch (AsmPieces.size()) {
@@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
       return LowerToBSwap(CI);
     }
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
-    if (CI->getType() == Type::getInt16Ty(CI->getContext()) &&
+    if (CI->getType()->isInteger(16) &&
         AsmPieces.size() == 3 &&
         AsmPieces[0] == "rorw" &&
         AsmPieces[1] == "$$8," &&
@@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     }
     break;
   case 3:
-    if (CI->getType() == Type::getInt64Ty(CI->getContext()) &&
+    if (CI->getType()->isInteger(64) &&
         Constraints.size() >= 2 &&
         Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
         Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
       // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-      std::vector<std::string> Words;
+      SmallVector<StringRef, 4> Words;
       SplitString(AsmPieces[0], Words, " \t");
       if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
         Words.clear();
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 65fbbda..08e1dd1 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1106,13 +1106,13 @@ def OR64rm   : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
 def OR64ri8  : RIi8<0x83, MRM1r, (outs GR64:$dst),
                     (ins GR64:$src1, i64i8imm:$src2),
                     "or{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
-                     (implicit EFLAGS)]>;
+                   [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
+                    (implicit EFLAGS)]>;
 def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
                      (ins GR64:$src1, i64i32imm:$src2),
                      "or{q}\t{$src2, $dst|$dst, $src2}",
-                     [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
-                      (implicit EFLAGS)]>;
+                  [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
+                    (implicit EFLAGS)]>;
 } // isTwoAddress
 
 def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
 // Alias Instructions
 //===----------------------------------------------------------------------===//
 
-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
 // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
 // when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
-          (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
-
-
-// Materialize i64 constant where top 32-bits are zero.
+let Defs = [EFLAGS],
+    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+                 "",
+                 [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                         "", [(set GR64:$dst, i64immZExt32:$src)]>;
@@ -1683,6 +1687,7 @@ def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
 def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
                       
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
                     "cmpxchg16b\t$dst", []>, TB;
 
@@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),
 def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
           (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
 
+// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
+// has an immediate with at least 32 bits of leading zeros, to avoid needing to
+// materialize that immediate in a register first.
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri
+              (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+              imm:$imm),
+            x86_subreg_32bit)>;
+
 // r & (2^32-1) ==> movz
 def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
           (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
@@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             x86_subreg_8bit_hi))>,
       Requires<[In64BitMode]>;
-def : Pat<(srl_su GR16:$src, (i8 8)),
+def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32_NOREXrr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
@@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
 def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
 
-// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
-def : Pat<(or (srl GR64:$src1, CL:$amt),
-              (shl GR64:$src2, (sub 64, CL:$amt))),
-          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
-                     (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
-          (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
-              (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-          (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
-                     (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-                 addr:$dst),
-          (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
+// Double shift patterns
 def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
           (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 
@@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
                        GR64:$src2, (i8 imm:$amt2)), addr:$dst),
           (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
 
-// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-def : Pat<(or (shl GR64:$src1, CL:$amt),
-              (srl GR64:$src2, (sub 64, CL:$amt))),
-          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
-                     (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
-          (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
-              (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-          (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
-                     (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
-                 addr:$dst),
-          (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
 def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
           (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 
@@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
                        GR64:$src2, (i8 imm:$amt2)), addr:$dst),
           (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
 
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in {  // Try this before the selecting to OR
+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
+                    (implicit EFLAGS)),
+          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),
+                    (implicit EFLAGS)),
+          (ADD64rr GR64:$src1, GR64:$src2)>;
+} // AddedComplexity
+
 // X86 specific add which produces a flag.
 def : Pat<(addc GR64:$src1, GR64:$src2),
           (ADD64rr GR64:$src1, GR64:$src2)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e555cd1..7b39fb3 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
   }
 }
 
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg, unsigned &DstReg,
+                                    unsigned &SubIdx) const {
+  switch (MI.getOpcode()) {
+  default: break;
+  case X86::MOVSX16rr8:
+  case X86::MOVZX16rr8:
+  case X86::MOVSX32rr8:
+  case X86::MOVZX32rr8:
+  case X86::MOVSX64rr8:
+  case X86::MOVZX64rr8:
+    if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+      // It's not always legal to reference the low 8-bit of the larger
+      // register in 32-bit mode.
+      return false;
+  case X86::MOVSX32rr16:
+  case X86::MOVZX32rr16:
+  case X86::MOVSX64rr16:
+  case X86::MOVZX64rr16:
+  case X86::MOVSX64rr32:
+  case X86::MOVZX64rr32: {
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable(0);
+      break;
+    case X86::MOVSX16rr8:
+    case X86::MOVZX16rr8:
+    case X86::MOVSX32rr8:
+    case X86::MOVZX32rr8:
+    case X86::MOVSX64rr8:
+    case X86::MOVZX64rr8:
+      SubIdx = 1;
+      break;
+    case X86::MOVSX32rr16:
+    case X86::MOVZX32rr16:
+    case X86::MOVSX64rr16:
+    case X86::MOVZX64rr16:
+      SubIdx = 3;
+      break;
+    case X86::MOVSX64rr32:
+    case X86::MOVZX64rr32:
+      SubIdx = 4;
+      break;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
 /// isFrameOperand - Return true and the FrameIndex if the specified
 /// operand and follow operands form a reference to the stack frame.
 bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
@@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
   switch (Opc) {
   default: break;
   case X86::MOV8r0:
-  case X86::MOV32r0: {
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
     if (!isSafeToClobberEFLAGS(MBB, I)) {
       switch (Opc) {
       default: break;
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri; break;
       }
       Clone = false;
     }
@@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
     isTwoAddrFold = true;
   } else if (i == 0) { // If operand 0
-    if (MI->getOpcode() == X86::MOV32r0)
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
       NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
     else if (MI->getOpcode() == X86::MOV8r0)
       NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
     if (NewMI)
@@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   
   // No fusion 
   if (PrintFailedFusing)
-    errs() << "We failed to fuse operand " << i << " in " << *MI;
+    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
   return NULL;
 }
 
@@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   } else if (OpNum == 0) { // If operand 0
     switch (Opc) {
     case X86::MOV8r0:
+    case X86::MOV16r0:
     case X86::MOV32r0:
+    case X86::MOV64r0:
       return true;
     default: break;
     }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index b83441d..0ab85f4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -448,6 +448,16 @@ public:
                            unsigned &SrcReg, unsigned &DstReg,
                            unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
 
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const;
+
   unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
   /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
   /// stack locations as well.  This uses a heuristic so it isn't
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4d922a5..396cb53 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
                         [SDNPHasChain,  SDNPOptInFlag]>;
 
-def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags>;
+def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
 def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
-def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+                          [SDNPCommutative]>;
 def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
 def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
-def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags>;
-def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags>;
-def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags>;
+def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
@@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
   return N->hasOneUse();
 }]>;
 
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+  else {
+    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt KnownZero0, KnownOne0;
+    CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+    APInt KnownZero1, KnownOne1;
+    CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+    return (~KnownZero0 & ~KnownZero1) == 0;
+  }
+}]>;
+
 // 'shld' and 'shrd' instruction patterns. Note that even though these have
 // the srl and shl in their patterns, the C++ code must still check for them,
 // because predicates are tested before children nodes are explored.
@@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
                  "xor{b}\t$dst, $dst",
                  [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
                  
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
                  "xor{l}\t$dst, $dst",
                  [(set GR32:$dst, 0)]>;
 }
 
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
-          (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
 //
@@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
                [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
 }
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
                "lock\n\t"
                "cmpxchg8b\t$ptr",
                [(X86cas8 addr:$ptr)]>, TB, LOCK;
@@ -3858,6 +3882,7 @@ def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
 def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
 
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
                   "cmpxchg8b\t$dst", []>, TB;
 
@@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
           (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                           x86_subreg_8bit_hi)>,
       Requires<[In32BitMode]>;
-def : Pat<(srl_su GR16:$src, (i8 8)),
+def : Pat<(srl GR16:$src, (i8 8)),
           (EXTRACT_SUBREG
             (MOVZX32rr8
               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
@@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C32r)>;
 
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
+                    (implicit EFLAGS)),
+          (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
+                    (implicit EFLAGS)),
+          (ADD32rr GR32:$src1, GR32:$src2)>;
+} // AddedComplexity
+
 //===----------------------------------------------------------------------===//
 // EFLAGS-defining Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b26e508..94b9b55 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
 
 // Like 'load', but uses special alignment checks suitable for use in
 // memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others.
-// FIXME: Actually implement support for targets that don't require the
-//        alignment. This probably wants a subtarget predicate.
+// be naturally aligned on some targets but not on others.  If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
 def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return cast<LoadSDNode>(N)->getAlignment() >= 16;
+  return    Subtarget->hasVectorUAMem()
+         || cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
 def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index c69cc83..f363903 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
 #endif
 
 #if 0
-  DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr
+  DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
                << " ESP=" << (void*)StackPtr
                << ": Resolving call to function: "
                << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index d96aafd..9bd96af 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(i).getIndex();
   unsigned BasePtr;
 
+  // DEBUG_VALUE has a special representation, and is only robust enough to
+  // represent SP(or BP) +- offset addressing modes.  We rewrite the
+  // FrameIndex to be a constant; implicitly positive constants are relative
+  // to ESP and negative ones to EBP.
+  if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) {
+    MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex));
+    return 0;
+  }
+
   if (needsStackRealignment(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
   else
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 75cdbad..2039be7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   , HasFMA3(false)
   , HasFMA4(false)
   , IsBTMemSlow(false)
+  , HasVectorUAMem(false)
   , DarwinVers(0)
   , stackAlignment(8)
   // FIXME: this is a known good value for Yonah. How about others?
@@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   if (Is64Bit)
     HasX86_64 = true;
 
-  DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel
+  DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                << ", 3DNowLevel " << X863DNowLevel
                << ", 64bit " << HasX86_64 << "\n");
   assert((!Is64Bit || HasX86_64) &&
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index ef6dbaf..618dd10 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -78,6 +78,10 @@ protected:
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
+  /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
+  ///                  This may require setting a feature bit in the processor.
+  bool HasVectorUAMem;
+
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
   unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -142,6 +146,7 @@ public:
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool hasVectorUAMem() const { return HasVectorUAMem; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
@@ -169,7 +174,7 @@ public:
       p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
     else if (isTargetDarwin())
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
-    else if (isTargetCygMing() || isTargetWindows())
+    else if (isTargetMingw() || isTargetWindows())
       p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";
     else
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index da2fb04..383fd91 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -49,7 +49,7 @@ namespace {
         Lowering(*TM.getTargetLowering()), 
         Subtarget(*TM.getSubtargetImpl()) { }
 
-    SDNode *Select(SDValue Op);
+    SDNode *Select(SDNode *N);
     
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
@@ -58,11 +58,11 @@ namespace {
     }
 
     // Complex Pattern Selectors.
-    bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base,
+    bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
                         SDValue &Offset);
-    bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base,
+    bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base,
                         SDValue &Offset);
-    bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base,
+    bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
                         SDValue &Offset);
     
     virtual void InstructionSelect();
@@ -83,7 +83,7 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
   return new XCoreDAGToDAGISel(TM);
 }
 
-bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr,
+bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
                                   SDValue &Base, SDValue &Offset) {
   FrameIndexSDNode *FIN = 0;
   if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
@@ -105,7 +105,7 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr,
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
                                   SDValue &Base, SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
     Base = Addr.getOperand(0);
@@ -126,7 +126,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr,
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
                                   SDValue &Base, SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
     Base = Addr.getOperand(0);
@@ -156,8 +156,7 @@ void XCoreDAGToDAGISel::InstructionSelect() {
   CurDAG->RemoveDeadNodes();
 }
 
-SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
-  SDNode *N = Op.getNode();
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   EVT NVT = N->getValueType(0);
   if (NVT == MVT::i32) {
@@ -185,7 +184,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
         // FIXME fold addition into the macc instruction
         SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
                                 CurDAG->getTargetConstant(0, MVT::i32)), 0);
-        SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) };
+        SDValue Ops[] = { Zero, Zero, N->getOperand(0), N->getOperand(1) };
         SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl,
                                                  MVT::i32, MVT::i32, Ops, 4);
         ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
@@ -196,7 +195,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
         // FIXME fold addition into the macc / lmul instruction
         SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
                                   CurDAG->getTargetConstant(0, MVT::i32)), 0);
-        SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                             Zero, Zero };
         SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32,
                                                  MVT::i32, Ops, 4);
@@ -205,19 +204,19 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
         return NULL;
       }
       case XCoreISD::LADD: {
-        SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
-                            Op.getOperand(2) };
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                            N->getOperand(2) };
         return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
                                       Ops, 3);
       }
       case XCoreISD::LSUB: {
-        SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
-                            Op.getOperand(2) };
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                            N->getOperand(2) };
         return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
                                       Ops, 3);
       }
       // Other cases are autogenerated.
     }
   }
-  return SelectCode(Op);
+  return SelectCode(N);
 }
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index dd5a6d8..d8190a4 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -147,7 +147,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
       if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (maxElements > 0 && STy->getNumElements() > maxElements) {
-          DEBUG(errs() << "argpromotion disable promoting argument '"
+          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                 << PtrArg->getName() << "' because it would require adding more"
                 << " than " << maxElements << " arguments to the function.\n");
         } else {
@@ -409,7 +409,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
     // to do.
     if (ToPromote.find(Operands) == ToPromote.end()) {
       if (maxElements > 0 && ToPromote.size() == maxElements) {
-        DEBUG(errs() << "argpromotion not promoting argument '"
+        DEBUG(dbgs() << "argpromotion not promoting argument '"
               << Arg->getName() << "' because it would require adding more "
               << "than " << maxElements << " arguments to the function.\n");
         // We limit aggregate promotion to only promoting up to a fixed number
@@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   NF->copyAttributesFrom(F);
 
   
-  DEBUG(errs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
         << "From: " << *F);
   
   // Recompute the parameter attributes list based on the new arguments for
@@ -808,7 +808,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         LI->replaceAllUsesWith(I2);
         AA.replaceWithNewValue(LI, I2);
         LI->eraseFromParent();
-        DEBUG(errs() << "*** Promoted load of argument '" << I->getName()
+        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
               << "' in function '" << F->getName() << "'\n");
       } else {
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
@@ -835,7 +835,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         NewName += ".val";
         TheArg->setName(NewName);
 
-        DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName()
+        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
               << "' of function '" << NF->getName() << "'\n");
 
         // All of the uses must be load instructions.  Replace them all with
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index a3db836..1749b1e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -425,7 +425,7 @@ void DAE::SurveyFunction(Function &F) {
     return;
   }
 
-  DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+  DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
   // Keep track of the number of live retvals, so we can skip checks once all
   // of them turn out to be live.
   unsigned NumLiveRetVals = 0;
@@ -488,7 +488,7 @@ void DAE::SurveyFunction(Function &F) {
   for (unsigned i = 0; i != RetCount; ++i)
     MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
 
-  DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+  DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
 
   // Now, check all of our arguments.
   unsigned i = 0;
@@ -530,7 +530,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
 /// mark any values that are used as this function's parameters or by its return
 /// values (according to Uses) live as well.
 void DAE::MarkLive(const Function &F) {
-  DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+  DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
     // Mark the function as live.
     LiveFunctions.insert(&F);
     // Mark all arguments as live.
@@ -551,7 +551,7 @@ void DAE::MarkLive(const RetOrArg &RA) {
   if (!LiveValues.insert(RA).second)
     return; // We were already marked Live.
 
-  DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n");
+  DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
   PropagateLiveness(RA);
 }
 
@@ -616,7 +616,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
           NewRetIdxs[i] = RetTypes.size() - 1;
         } else {
           ++NumRetValsEliminated;
-          DEBUG(errs() << "DAE - Removing return value " << i << " from "
+          DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
                 << F->getName() << "\n");
         }
       }
@@ -626,7 +626,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         RetTypes.push_back(RetTy);
         NewRetIdxs[0] = 0;
       } else {
-        DEBUG(errs() << "DAE - Removing return value from " << F->getName()
+        DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
               << "\n");
         ++NumRetValsEliminated;
       }
@@ -681,7 +681,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
     } else {
       ++NumArgumentsEliminated;
-      DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName()
+      DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
             << ") from " << F->getName() << "\n");
     }
   }
@@ -915,7 +915,7 @@ bool DAE::runOnModule(Module &M) {
   // removed.  We can do this if they never call va_start.  This loop cannot be
   // fused with the next loop, because deleting a function invalidates
   // information computed while surveying other functions.
-  DEBUG(errs() << "DAE - Deleting dead varargs\n");
+  DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
     Function &F = *I++;
     if (F.getFunctionType()->isVarArg())
@@ -926,7 +926,7 @@ bool DAE::runOnModule(Module &M) {
   // We assume all arguments are dead unless proven otherwise (allowing us to
   // determine that dead arguments passed into recursive functions are dead).
   //
-  DEBUG(errs() << "DAE - Determining liveness\n");
+  DEBUG(dbgs() << "DAE - Determining liveness\n");
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     SurveyFunction(*I);
   
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index a16d335..64a6d78 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -79,16 +79,47 @@ Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
 /// memory that is local to the function.  Global constants are considered
 /// local to all functions.
 bool FunctionAttrs::PointsToLocalMemory(Value *V) {
-  V = V->getUnderlyingObject();
-  // An alloca instruction defines local memory.
-  if (isa<AllocaInst>(V))
-    return true;
-  // A global constant counts as local memory for our purposes.
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return GV->isConstant();
-  // Could look through phi nodes and selects here, but it doesn't seem
-  // to be useful in practice.
-  return false;
+  SmallVector<Value*, 16> Worklist;
+  unsigned MaxLookup = 8;
+
+  Worklist.push_back(V);
+
+  do {
+    V = Worklist.pop_back_val()->getUnderlyingObject();
+
+    // An alloca instruction defines local memory.
+    if (isa<AllocaInst>(V))
+      continue;
+
+    // A global constant counts as local memory for our purposes.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+      if (!GV->isConstant())
+        return false;
+      continue;
+    }
+
+    // If both select values point to local memory, then so does the select.
+    if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // If all values incoming to a phi node point to local memory, then so does
+    // the phi.
+    if (PHINode *PN = dyn_cast<PHINode>(V)) {
+      // Don't bother inspecting phi nodes with many operands.
+      if (PN->getNumIncomingValues() > MaxLookup)
+        return false;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+
+    return false;
+  } while (!Worklist.empty() && --MaxLookup);
+
+  return Worklist.empty();
 }
 
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
@@ -136,6 +167,21 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
         // Ignore calls to functions in the same SCC.
         if (SCCNodes.count(CS.getCalledFunction()))
           continue;
+        // Ignore intrinsics that only access local memory.
+        if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
+          if (AliasAnalysis::getModRefBehavior(id) ==
+              AliasAnalysis::AccessesArguments) {
+            // Check that all pointer arguments point to local memory.
+            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+                 CI != CE; ++CI) {
+              Value *Arg = *CI;
+              if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg))
+                // Writes memory.  Just give up.
+                return false;
+            }
+            // Only reads and writes local memory.
+            continue;
+          }
       } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         // Ignore loads from local memory.
         if (PointsToLocalMemory(LI->getPointerOperand()))
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1793bbf..ee260e9 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -544,7 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   if (NewGlobals.empty())
     return 0;
 
-  DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
 
@@ -771,14 +771,14 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   }
 
   if (Changed) {
-    DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
     ++NumGlobUses;
   }
 
   // If we nuked all of the loads, then none of the stores are needed either,
   // nor is the global.
   if (AllNonStoreUsesGone) {
-    DEBUG(errs() << "  *** GLOBAL NOW DEAD!\n");
+    DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
     CleanupConstantGlobalUsers(GV, 0);
     if (GV->use_empty()) {
       GV->eraseFromParent();
@@ -815,7 +815,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      const Type *AllocTy,
                                                      Value* NElems,
                                                      TargetData* TD) {
-  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
+  DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
 
   const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
   
@@ -1268,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
                                             Value* NElems, TargetData *TD) {
-  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
+  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   const Type* MAT = getMallocAllocatedType(CI);
   const StructType *STy = cast<StructType>(MAT);
 
@@ -1600,7 +1600,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
       return false;
   
-  DEBUG(errs() << "   *** SHRINKING TO BOOL: " << *GV);
+  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
   
   // Create the new global, initializing it to false.
   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1681,7 +1681,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   GV->removeDeadConstantUsers();
 
   if (GV->use_empty()) {
-    DEBUG(errs() << "GLOBAL DEAD: " << *GV);
+    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
     GV->eraseFromParent();
     ++NumDeleted;
     return true;
@@ -1689,26 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
   if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
 #if 0
-    DEBUG(errs() << "Global: " << *GV);
-    DEBUG(errs() << "  isLoaded = " << GS.isLoaded << "\n");
-    DEBUG(errs() << "  StoredType = ");
+    DEBUG(dbgs() << "Global: " << *GV);
+    DEBUG(dbgs() << "  isLoaded = " << GS.isLoaded << "\n");
+    DEBUG(dbgs() << "  StoredType = ");
     switch (GS.StoredType) {
-    case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break;
-    case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n");
+    case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break;
+    case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n");
                                             break;
-    case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break;
-    case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break;
+    case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break;
+    case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break;
     }
     if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
-      DEBUG(errs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
+      DEBUG(dbgs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
     if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      DEBUG(errs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
+      DEBUG(dbgs() << "  AccessingFunction = " << GS.AccessingFunction->getName()
                   << "\n");
-    DEBUG(errs() << "  HasMultipleAccessingFunctions =  "
+    DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
                  << GS.HasMultipleAccessingFunctions << "\n");
-    DEBUG(errs() << "  HasNonInstructionUser = " 
+    DEBUG(dbgs() << "  HasNonInstructionUser = " 
                  << GS.HasNonInstructionUser<<"\n");
-    DEBUG(errs() << "\n");
+    DEBUG(dbgs() << "\n");
 #endif
     
     // If this is a first class global and has only one accessing function
@@ -1726,7 +1726,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
         GS.AccessingFunction->getName() == "main" &&
         GS.AccessingFunction->hasExternalLinkage() &&
         GV->getType()->getAddressSpace() == 0) {
-      DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV);
+      DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
       Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
       const Type* ElemTy = GV->getType()->getElementType();
       // FIXME: Pass Global's alignment when globals have alignment
@@ -1743,7 +1743,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     // If the global is never loaded (but may be stored to), it is dead.
     // Delete it now.
     if (!GS.isLoaded) {
-      DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV);
+      DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
 
       // Delete any stores we can find to the global.  We may not be able to
       // make it completely dead though.
@@ -1758,7 +1758,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       return Changed;
 
     } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
-      DEBUG(errs() << "MARKING CONSTANT: " << *GV);
+      DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
       GV->setConstant(true);
 
       // Clean up any obviously simplifiable users now.
@@ -1766,7 +1766,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
       // If the global is dead now, just nuke it.
       if (GV->use_empty()) {
-        DEBUG(errs() << "   *** Marking constant allowed us to simplify "
+        DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
                      << "all users and delete global!\n");
         GV->eraseFromParent();
         ++NumDeleted;
@@ -1794,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
           CleanupConstantGlobalUsers(GV, GV->getInitializer());
 
           if (GV->use_empty()) {
-            DEBUG(errs() << "   *** Substituting initializer allowed us to "
+            DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
                          << "simplify all users and delete global!\n");
             GV->eraseFromParent();
             ++NumDeleted;
@@ -1925,11 +1925,11 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       if (!ATy) return 0;
       const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       if (!STy || STy->getNumElements() != 2 ||
-          STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0;
+          !STy->getElementType(0)->isInteger(32)) return 0;
       const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
       if (!PFTy) return 0;
       const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
-      if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) ||
+      if (!FTy || !FTy->getReturnType()->isVoidTy() ||
           FTy->isVarArg() || FTy->getNumParams() != 0)
         return 0;
       
@@ -2091,8 +2091,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     return Val;
   }
   
+  std::vector<Constant*> Elts;
   if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
-    std::vector<Constant*> Elts;
 
     // Break up the constant into its elements.
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
@@ -2120,28 +2120,38 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
                                STy->isPacked());
   } else {
     ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
-    const ArrayType *ATy = cast<ArrayType>(Init->getType());
+    const SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
+    uint64_t NumElts;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+      NumElts = ATy->getNumElements();
+    else
+      NumElts = cast<VectorType>(InitTy)->getNumElements();
+    
+    
     // Break up the array into elements.
-    std::vector<Constant*> Elts;
     if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
         Elts.push_back(cast<Constant>(*i));
+    } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+      for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
     } else if (isa<ConstantAggregateZero>(Init)) {
-      Constant *Elt = Constant::getNullValue(ATy->getElementType());
-      Elts.assign(ATy->getNumElements(), Elt);
-    } else if (isa<UndefValue>(Init)) {
-      Constant *Elt = UndefValue::get(ATy->getElementType());
-      Elts.assign(ATy->getNumElements(), Elt);
+      Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
     } else {
-      llvm_unreachable("This code is out of sync with "
+      assert(isa<UndefValue>(Init) && "This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
+      Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
     }
     
-    assert(CI->getZExtValue() < ATy->getNumElements());
+    assert(CI->getZExtValue() < NumElts);
     Elts[CI->getZExtValue()] =
       EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-    return ConstantArray::get(ATy, Elts);
+    
+    if (isa<ArrayType>(Init->getType()))
+      return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+    else
+      return ConstantVector::get(&Elts[0], Elts.size());
   }    
 }
 
@@ -2153,13 +2163,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
     GV->setInitializer(Val);
     return;
   }
-  
+
   ConstantExpr *CE = cast<ConstantExpr>(Addr);
   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-  
-  Constant *Init = GV->getInitializer();
-  Init = EvaluateStoreInto(Init, Val, CE, 2);
-  GV->setInitializer(Init);
+  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
 }
 
 /// ComputeLoadResult - Return the value that would be computed by a load from
@@ -2402,7 +2409,7 @@ static bool EvaluateStaticConstructor(Function *F) {
                                       MutatedMemory, AllocaTmps);
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
-    DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
           << F->getName() << "' to " << MutatedMemory.size()
           << " stores.\n");
     for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 6918fe8..5725db1 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -147,7 +147,7 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
       
       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
       // success!
-      DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI);
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI);
       
       AI->replaceAllUsesWith(AvailableAlloca);
       AI->eraseFromParent();
@@ -178,13 +178,13 @@ bool Inliner::shouldInline(CallSite CS) {
   InlineCost IC = getInlineCost(CS);
   
   if (IC.isAlways()) {
-    DEBUG(errs() << "    Inlining: cost=always"
+    DEBUG(dbgs() << "    Inlining: cost=always"
           << ", Call: " << *CS.getInstruction() << "\n");
     return true;
   }
   
   if (IC.isNever()) {
-    DEBUG(errs() << "    NOT Inlining: cost=never"
+    DEBUG(dbgs() << "    NOT Inlining: cost=never"
           << ", Call: " << *CS.getInstruction() << "\n");
     return false;
   }
@@ -200,7 +200,7 @@ bool Inliner::shouldInline(CallSite CS) {
   
   float FudgeFactor = getInlineFudgeFactor(CS);
   if (Cost >= (int)(CurrentThreshold * FudgeFactor)) {
-    DEBUG(errs() << "    NOT Inlining: cost=" << Cost
+    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
           << ", Call: " << *CS.getInstruction() << "\n");
     return false;
   }
@@ -263,14 +263,14 @@ bool Inliner::shouldInline(CallSite CS) {
 
     if (outerCallsFound && someOuterCallWouldNotBeInlined && 
         TotalSecondaryCost < Cost) {
-      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() << 
+      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
            " Cost = " << Cost << 
            ", outer Cost = " << TotalSecondaryCost << '\n');
       return false;
     }
   }
 
-  DEBUG(errs() << "    Inlining: cost=" << Cost
+  DEBUG(dbgs() << "    Inlining: cost=" << Cost
         << ", Call: " << *CS.getInstruction() << '\n');
   return true;
 }
@@ -280,11 +280,11 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
-  DEBUG(errs() << "Inliner visiting SCC:");
+  DEBUG(dbgs() << "Inliner visiting SCC:");
   for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
     Function *F = SCC[i]->getFunction();
     if (F) SCCFunctions.insert(F);
-    DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
 
   // Scan through and identify all call sites ahead of time so that we only
@@ -314,7 +314,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
       }
   }
 
-  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
 
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
@@ -346,7 +346,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
       // size.  This happens because IPSCCP propagates the result out of the
       // call and then we're left with the dead call.
       if (isInstructionTriviallyDead(CS.getInstruction())) {
-        DEBUG(errs() << "    -> Deleting dead call: "
+        DEBUG(dbgs() << "    -> Deleting dead call: "
                      << *CS.getInstruction() << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
         CG[Caller]->removeCallEdgeFor(CS);
@@ -377,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
           // callgraph references to the node, we cannot delete it yet, this
           // could invalidate the CGSCC iterator.
           CG[Callee]->getNumReferences() == 0) {
-        DEBUG(errs() << "    -> Deleting dead function: "
+        DEBUG(dbgs() << "    -> Deleting dead function: "
               << Callee->getName() << "\n");
         CallGraphNode *CalleeNode = CG[Callee];
         
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 20ae0d5..3d31932 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -131,7 +131,7 @@ bool InternalizePass::runOnModule(Module &M) {
       if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
       Changed = true;
       ++NumFunctions;
-      DEBUG(errs() << "Internalizing func " << I->getName() << "\n");
+      DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
     }
 
   // Never internalize the llvm.used symbol.  It is used to implement
@@ -160,7 +160,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumGlobals;
-      DEBUG(errs() << "Internalized gvar " << I->getName() << "\n");
+      DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
     }
 
   // Mark all aliases that are not in the api as internal as well.
@@ -171,7 +171,7 @@ bool InternalizePass::runOnModule(Module &M) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
       ++NumAliases;
-      DEBUG(errs() << "Internalized alias " << I->getName() << "\n");
+      DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
     }
 
   return Changed;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b2bdabc..fa8845b 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -498,7 +498,7 @@ static void ThunkGToF(Function *F, Function *G) {
   CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
-  if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) {
+  if (NewG->getReturnType()->isVoidTy()) {
     ReturnInst::Create(F->getContext(), BB);
   } else if (CI->getType() != NewG->getReturnType()) {
     Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
@@ -633,17 +633,17 @@ bool MergeFunctions::runOnModule(Module &M) {
   bool LocalChanged;
   do {
     LocalChanged = false;
-    DEBUG(errs() << "size: " << FnMap.size() << "\n");
+    DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
     for (std::map<unsigned long, std::vector<Function *> >::iterator
          I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
       std::vector<Function *> &FnVec = I->second;
-      DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
+      DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
 
       for (int i = 0, e = FnVec.size(); i != e; ++i) {
         for (int j = i + 1; j != e; ++j) {
           bool isEqual = equals(FnVec[i], FnVec[j]);
 
-          DEBUG(errs() << "  " << FnVec[i]->getName()
+          DEBUG(dbgs() << "  " << FnVec[i]->getName()
                 << (isEqual ? " == " : " != ")
                 << FnVec[j]->getName() << "\n");
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b955b97..f40902f 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -145,7 +145,7 @@ bool PartialInliner::runOnModule(Module& M) {
   worklist.reserve(M.size());
   for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
     if (!FI->use_empty() && !FI->isDeclaration())
-    worklist.push_back(&*FI);
+      worklist.push_back(&*FI);
     
   bool changed = false;
   while (!worklist.empty()) {
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index 67fc934..dda32d0 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -93,11 +93,10 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
   if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
     return 0;
 
-  DEBUG(errs() << "SretPromotion: Looking at sret function " 
+  DEBUG(dbgs() << "SretPromotion: Looking at sret function " 
         << F->getName() << "\n");
 
-  assert(F->getReturnType() == Type::getVoidTy(F->getContext()) &&
-         "Invalid function return type");
+  assert(F->getReturnType()->isVoidTy() && "Invalid function return type");
   Function::arg_iterator AI = F->arg_begin();
   const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
   assert(FArgType && "Invalid sret parameter type");
@@ -107,12 +106,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
 
   // Check if it is ok to perform this promotion.
   if (isSafeToUpdateAllCallers(F) == false) {
-    DEBUG(errs() << "SretPromotion: Not all callers can be updated\n");
+    DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
     NumRejectedSRETUses++;
     return 0;
   }
 
-  DEBUG(errs() << "SretPromotion: sret argument will be promoted\n");
+  DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
   NumSRET++;
   // [1] Replace use of sret parameter 
   AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
@@ -358,7 +357,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) {
   unsigned Num = STy->getNumElements();
   for (unsigned i = 0; i < Num; i++) {
     const Type *Ty = STy->getElementType(i);
-    if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext()))
+    if (!Ty->isSingleValueType() && !Ty->isVoidTy())
       return true;
   }
   return false;
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
new file mode 100644
index 0000000..5b1ff3e
--- /dev/null
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_llvm_library(LLVMInstCombine
+  InstructionCombining.cpp
+  InstCombineAddSub.cpp
+  InstCombineAndOrXor.cpp
+  InstCombineCalls.cpp
+  InstCombineCasts.cpp
+  InstCombineCompares.cpp
+  InstCombineLoadStoreAlloca.cpp
+  InstCombineMulDivRem.cpp
+  InstCombinePHI.cpp
+  InstCombineSelect.cpp
+  InstCombineShifts.cpp 
+  InstCombineSimplifyDemanded.cpp
+  InstCombineVectorOps.cpp
+  )
+
+target_link_libraries (LLVMInstCombine LLVMTransformUtils)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 0000000..5367900
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,349 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/TargetFolder.h"
+
+namespace llvm {
+  class CallSite;
+  class TargetData;
+  class DbgDeclareInst;
+  class MemIntrinsic;
+  class MemSetInst;
+  
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+  SPF_UNKNOWN = 0,
+  SPF_SMIN, SPF_UMIN,
+  SPF_SMAX, SPF_UMAX
+  //SPF_ABS - TODO.
+};
+  
+/// getComplexity:  Assign a complexity or rank value to LLVM Values...
+///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+  if (isa<Instruction>(V)) {
+    if (BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) ||
+        BinaryOperator::isNot(V))
+      return 3;
+    return 4;
+  }
+  if (isa<Argument>(V)) return 3;
+  return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+  
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class VISIBILITY_HIDDEN InstCombineIRInserter 
+    : public IRBuilderDefaultInserter<true> {
+  InstCombineWorklist &Worklist;
+public:
+  InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+  
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+    Worklist.Add(I);
+  }
+};
+  
+/// InstCombiner - The -instcombine pass.
+class VISIBILITY_HIDDEN InstCombiner
+                             : public FunctionPass,
+                               public InstVisitor<InstCombiner, Instruction*> {
+  TargetData *TD;
+  bool MustPreserveLCSSA;
+  bool MadeIRChange;
+public:
+  /// Worklist - All of the instructions that need to be simplified.
+  InstCombineWorklist Worklist;
+
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+  BuilderTy *Builder;
+      
+  static char ID; // Pass identification, replacement for typeid
+  InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+
+public:
+  virtual bool runOnFunction(Function &F);
+  
+  bool DoOneIteration(Function &F, unsigned ItNum);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+                                 
+  TargetData *getTargetData() const { return TD; }
+
+  // Visitation implementation - Implement instruction combining for different
+  // instruction types.  The semantics are as follows:
+  // Return Value:
+  //    null        - No change was made
+  //     I          - Change was made, I is still valid, I may be dead though
+  //   otherwise    - Change was made, replace I with returned instruction
+  //
+  Instruction *visitAdd(BinaryOperator &I);
+  Instruction *visitFAdd(BinaryOperator &I);
+  Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+  Instruction *visitSub(BinaryOperator &I);
+  Instruction *visitFSub(BinaryOperator &I);
+  Instruction *visitMul(BinaryOperator &I);
+  Instruction *visitFMul(BinaryOperator &I);
+  Instruction *visitURem(BinaryOperator &I);
+  Instruction *visitSRem(BinaryOperator &I);
+  Instruction *visitFRem(BinaryOperator &I);
+  bool SimplifyDivRemOfSelect(BinaryOperator &I);
+  Instruction *commonRemTransforms(BinaryOperator &I);
+  Instruction *commonIRemTransforms(BinaryOperator &I);
+  Instruction *commonDivTransforms(BinaryOperator &I);
+  Instruction *commonIDivTransforms(BinaryOperator &I);
+  Instruction *visitUDiv(BinaryOperator &I);
+  Instruction *visitSDiv(BinaryOperator &I);
+  Instruction *visitFDiv(BinaryOperator &I);
+  Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+  Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *visitAnd(BinaryOperator &I);
+  Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
+  Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                   Value *A, Value *B, Value *C);
+  Instruction *visitOr (BinaryOperator &I);
+  Instruction *visitXor(BinaryOperator &I);
+  Instruction *visitShl(BinaryOperator &I);
+  Instruction *visitAShr(BinaryOperator &I);
+  Instruction *visitLShr(BinaryOperator &I);
+  Instruction *commonShiftTransforms(BinaryOperator &I);
+  Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+                                    Constant *RHSC);
+  Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                            GlobalVariable *GV, CmpInst &ICI,
+                                            ConstantInt *AndCst = 0);
+  Instruction *visitFCmpInst(FCmpInst &I);
+  Instruction *visitICmpInst(ICmpInst &I);
+  Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+  Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                              Instruction *LHS,
+                                              ConstantInt *RHS);
+  Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
+  Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+                                ICmpInst::Predicate Pred, Value *TheAdd);
+  Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                           ICmpInst::Predicate Cond, Instruction &I);
+  Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                   BinaryOperator &I);
+  Instruction *commonCastTransforms(CastInst &CI);
+  Instruction *commonPointerCastTransforms(CastInst &CI);
+  Instruction *visitTrunc(TruncInst &CI);
+  Instruction *visitZExt(ZExtInst &CI);
+  Instruction *visitSExt(SExtInst &CI);
+  Instruction *visitFPTrunc(FPTruncInst &CI);
+  Instruction *visitFPExt(CastInst &CI);
+  Instruction *visitFPToUI(FPToUIInst &FI);
+  Instruction *visitFPToSI(FPToSIInst &FI);
+  Instruction *visitUIToFP(CastInst &CI);
+  Instruction *visitSIToFP(CastInst &CI);
+  Instruction *visitPtrToInt(PtrToIntInst &CI);
+  Instruction *visitIntToPtr(IntToPtrInst &CI);
+  Instruction *visitBitCast(BitCastInst &CI);
+  Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                              Instruction *FI);
+  Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+  Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+                            Value *A, Value *B, Instruction &Outer,
+                            SelectPatternFlavor SPF2, Value *C);
+  Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+  Instruction *visitCallInst(CallInst &CI);
+  Instruction *visitInvokeInst(InvokeInst &II);
+
+  Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+  Instruction *visitPHINode(PHINode &PN);
+  Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+  Instruction *visitAllocaInst(AllocaInst &AI);
+  Instruction *visitFree(Instruction &FI);
+  Instruction *visitLoadInst(LoadInst &LI);
+  Instruction *visitStoreInst(StoreInst &SI);
+  Instruction *visitBranchInst(BranchInst &BI);
+  Instruction *visitSwitchInst(SwitchInst &SI);
+  Instruction *visitInsertElementInst(InsertElementInst &IE);
+  Instruction *visitExtractElementInst(ExtractElementInst &EI);
+  Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+  Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+  // visitInstruction - Specify what to return for unhandled instructions...
+  Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+  bool ShouldChangeType(const Type *From, const Type *To) const;
+  Value *dyn_castNegVal(Value *V) const;
+  Value *dyn_castFNegVal(Value *V) const;
+  const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                  SmallVectorImpl<Value*> &NewIndices);
+  Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+                                 
+  /// ValueRequiresCast - Return true if the cast from "V to Ty" actually
+  /// results in any code being generated.  It does not require codegen if V is
+  /// simple enough or if the cast can be folded into other casts.
+  bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
+                         const Type *Ty);
+
+  Instruction *visitCallSite(CallSite CS);
+  bool transformConstExprCastCall(CallSite CS);
+  Instruction *transformCallThroughTrampoline(CallSite CS);
+  Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                 bool DoXform = true);
+  bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+  DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
+  Value *EmitGEPOffset(User *GEP);
+
+public:
+  // InsertNewInstBefore - insert an instruction New before instruction Old
+  // in the program.  Add the new instruction to the worklist.
+  //
+  Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+    assert(New && New->getParent() == 0 &&
+           "New instruction already inserted into a basic block!");
+    BasicBlock *BB = Old.getParent();
+    BB->getInstList().insert(&Old, New);  // Insert inst
+    Worklist.Add(New);
+    return New;
+  }
+      
+  // ReplaceInstUsesWith - This method is to be used when an instruction is
+  // found to be dead, replacable with another preexisting expression.  Here
+  // we add all uses of I to the worklist, replace all uses of I with the new
+  // value, then return I, so that the inst combiner will know that I was
+  // modified.
+  //
+  Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+    Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
+    
+    // If we are replacing the instruction with itself, this must be in a
+    // segment of unreachable code, so just clobber the instruction.
+    if (&I == V) 
+      V = UndefValue::get(I.getType());
+      
+    I.replaceAllUsesWith(V);
+    return &I;
+  }
+
+  // EraseInstFromFunction - When dealing with an instruction that has side
+  // effects or produces a void value, we can't rely on DCE to delete the
+  // instruction.  Instead, visit methods should return the value returned by
+  // this function.
+  Instruction *EraseInstFromFunction(Instruction &I) {
+    DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+    assert(I.use_empty() && "Cannot erase instruction that is used!");
+    // Make sure that we reprocess all operands now that we reduced their
+    // use counts.
+    if (I.getNumOperands() < 8) {
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i))
+          Worklist.Add(Op);
+    }
+    Worklist.Remove(&I);
+    I.eraseFromParent();
+    MadeIRChange = true;
+    return 0;  // Don't do anything with FI
+  }
+      
+  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                         APInt &KnownOne, unsigned Depth = 0) const {
+    return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  }
+  
+  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                         unsigned Depth = 0) const {
+    return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+  }
+  unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+    return llvm::ComputeNumSignBits(Op, TD, Depth);
+  }
+
+private:
+
+  /// SimplifyCommutative - This performs a few simplifications for 
+  /// commutative operators.
+  bool SimplifyCommutative(BinaryOperator &I);
+
+  /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+  /// based on the demanded bits.
+  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
+                                 APInt& KnownZero, APInt& KnownOne,
+                                 unsigned Depth);
+  bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                            APInt& KnownZero, APInt& KnownOne,
+                            unsigned Depth=0);
+      
+  /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+  /// SimplifyDemandedBits knows about.  See if the instruction has any
+  /// properties that allow us to simplify its operands.
+  bool SimplifyDemandedInstructionBits(Instruction &Inst);
+      
+  Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                    APInt& UndefElts, unsigned Depth = 0);
+    
+  // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+  // which has a PHI node as operand #0, see if we can fold the instruction
+  // into the PHI (which is only possible if all operands to the PHI are
+  // constants).
+  //
+  // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+  // that would normally be unprofitable because they strongly encourage jump
+  // threading.
+  Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
+
+  // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+  // operator and they all are only used by the PHI, PHI together their
+  // inputs, and do the operation once, to the result of the PHI.
+  Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+  
+  Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+                        ConstantInt *AndRHS, BinaryOperator &TheAnd);
+  
+  Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+                            bool isSub, Instruction &I);
+  Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                               bool isSigned, bool Inside, Instruction &IB);
+  Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+  Instruction *MatchBSwap(BinaryOperator &I);
+  bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+  Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+  Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+  Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+
+  unsigned GetOrEnforceKnownAlignment(Value *V,
+                                      unsigned PrefAlign = 0);
+
+};
+
+      
+  
+} // end namespace llvm.
+
+#endif
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 0000000..4891ff0
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,740 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+  if (!V->hasOneUse() || !V->getType()->isInteger())
+    return 0;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  if (I->getOpcode() == Instruction::Mul)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+      return I->getOperand(0);
+  if (I->getOpcode() == Instruction::Shl)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+      // The multiplier is really 1 << CST.
+      uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+      uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+      CST = ConstantInt::get(V->getType()->getContext(),
+                             APInt(BitWidth, 1).shl(CSTVal));
+      return I->getOperand(0);
+    }
+  return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+  // There are different heuristics we can use for this.  Here are some simple
+  // ones.
+  
+  // Add has the property that adding any two 2's complement numbers can only 
+  // have one carry bit which can change a sign.  As such, if LHS and RHS each
+  // have at least two sign bits, we know that the addition of the two values
+  // will sign extend fine.
+  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+    return true;
+  
+  
+  // If one of the operands only has one non-zero bit, and if the other operand
+  // has a known-zero bit in a more significant place than it (not including the
+  // sign bit) the ripple may go up to and fill the zero, but won't change the
+  // sign.  For example, (X & ~4) + 1.
+  
+  // TODO: Implement.
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
+      // X + (signbit) --> X ^ signbit
+      const APInt& Val = CI->getValue();
+      uint32_t BitWidth = Val.getBitWidth();
+      if (Val == APInt::getSignBit(BitWidth))
+        return BinaryOperator::CreateXor(LHS, RHS);
+      
+      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
+      // (X & 254)+1 -> (X&254)|1
+      if (SimplifyDemandedInstructionBits(I))
+        return &I;
+
+      // zext(bool) + C -> bool ? C + 1 : C
+      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+        if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+    
+    ConstantInt *XorRHS = 0;
+    Value *XorLHS = 0;
+    if (isa<ConstantInt>(RHSC) &&
+        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+      
+      uint32_t Size = TySizeBits / 2;
+      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
+      APInt CFF80Val(-C0080Val);
+      do {
+        if (TySizeBits > Size) {
+          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
+              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
+            // This is a sign extend if the top bits are known zero.
+            if (!MaskedValueIsZero(XorLHS, 
+                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
+              Size = 0;  // Not a sign ext, but can't be any others either.
+            break;
+          }
+        }
+        Size >>= 1;
+        C0080Val = APIntOps::lshr(C0080Val, Size);
+        CFF80Val = APIntOps::ashr(CFF80Val, Size);
+      } while (Size >= 1);
+      
+      // FIXME: This shouldn't be necessary. When the backends can handle types
+      // with funny bit widths then this switch statement should be removed. It
+      // is just here to get the size of the "middle" type back up to something
+      // that the back ends can handle.
+      const Type *MiddleType = 0;
+      switch (Size) {
+        default: break;
+        case 32:
+        case 16:
+        case  8: MiddleType = IntegerType::get(I.getContext(), Size); break;
+      }
+      if (MiddleType) {
+        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
+        return new SExtInst(NewTrunc, I.getType(), I.getName());
+      }
+    }
+  }
+
+  if (I.getType()->isInteger(1))
+    return BinaryOperator::CreateXor(LHS, RHS);
+
+  if (I.getType()->isInteger()) {
+    // X + X --> X << 1
+    if (LHS == RHS)
+      return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+
+    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
+      if (RHSI->getOpcode() == Instruction::Sub)
+        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
+          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
+    }
+    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
+      if (LHSI->getOpcode() == Instruction::Sub)
+        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
+          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
+    }
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
+    if (LHS->getType()->isIntOrIntVector()) {
+      if (Value *RHSV = dyn_castNegVal(RHS)) {
+        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+        return BinaryOperator::CreateNeg(NewAdd);
+      }
+    }
+    
+    return BinaryOperator::CreateSub(RHS, LHSV);
+  }
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castNegVal(RHS))
+      return BinaryOperator::CreateSub(LHS, V);
+
+
+  ConstantInt *C2;
+  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+    if (X == RHS)   // X*C + X --> X * (C+1)
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+    // X*C1 + X*C2 --> X * (C1+C2)
+    ConstantInt *C1;
+    if (X == dyn_castFoldableMul(RHS, C1))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+  }
+
+  // X + X*C --> X * (C+1)
+  if (dyn_castFoldableMul(RHS, C2) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+  // X + ~X --> -1   since   ~X = -X-1
+  if (match(LHS, m_Not(m_Specific(RHS))) ||
+      match(RHS, m_Not(m_Specific(LHS))))
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+  // A+B --> A|B iff A and B have no bits set in common.
+  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+    APInt LHSKnownOne(IT->getBitWidth(), 0);
+    APInt LHSKnownZero(IT->getBitWidth(), 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    if (LHSKnownZero != 0) {
+      APInt RHSKnownOne(IT->getBitWidth(), 0);
+      APInt RHSKnownZero(IT->getBitWidth(), 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      
+      // No bits in common -> bitwise or.
+      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+        return BinaryOperator::CreateOr(LHS, RHS);
+    }
+  }
+
+  // W*X + Y*Z --> W * (X+Z)  iff W == Y
+  if (I.getType()->isIntOrIntVector()) {
+    Value *W, *X, *Y, *Z;
+    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+      if (W != Y) {
+        if (W == Z) {
+          std::swap(Y, Z);
+        } else if (Y == X) {
+          std::swap(W, X);
+        } else if (X == Z) {
+          std::swap(Y, Z);
+          std::swap(W, X);
+        }
+      }
+
+      if (W == Y) {
+        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+        return BinaryOperator::CreateMul(W, NewAdd);
+      }
+    }
+  }
+
+  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+    Value *X = 0;
+    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+    // (X & FF00) + xx00  -> (X+xx00) & FF00
+    if (LHS->hasOneUse() &&
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
+      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
+      if (Anded == CRHS) {
+        // See if all bits from the first bit set in the Add RHS up are included
+        // in the mask.  First, get the rightmost bit.
+        const APInt &AddRHSV = CRHS->getValue();
+
+        // Form a mask of all bits from the lowest bit added through the top.
+        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+        // See if the and mask includes all of these bits.
+        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+        if (AddRHSHighBits == AddRHSHighBitsAnd) {
+          // Okay, the xform is safe.  Insert the new add pronto.
+          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+          return BinaryOperator::CreateAnd(NewAdd, C2);
+        }
+      }
+    }
+
+    // Try to fold constant add into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+  }
+
+  // add (select X 0 (sub n A)) A  -->  select X A n
+  {
+    SelectInst *SI = dyn_cast<SelectInst>(LHS);
+    Value *A = RHS;
+    if (!SI) {
+      SI = dyn_cast<SelectInst>(RHS);
+      A = LHS;
+    }
+    if (SI && SI->hasOneUse()) {
+      Value *TV = SI->getTrueValue();
+      Value *FV = SI->getFalseValue();
+      Value *N;
+
+      // Can we fold the add into the argument of the select?
+      // We check both true and false select arguments for a matching subtract.
+      if (match(FV, m_Zero()) &&
+          match(TV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the true select value.
+        return SelectInst::Create(SI->getCondition(), N, A);
+      if (match(TV, m_Zero()) &&
+          match(FV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the false select value.
+        return SelectInst::Create(SI->getCondition(), A, N);
+    }
+  }
+
+  // Check for (add (sext x), y), see if we can merge this into an
+  // integer add followed by a sext.
+  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+    // (add (sext x), cst) --> (sext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      Constant *CI = 
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new, smaller add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              CI, "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add (sext x), (sext y)) --> (sext (add int x, y))
+    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                             RHSConv->getOperand(0), "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // X + 0 --> X
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
+        return ReplaceInstUsesWith(I, LHS);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castFNegVal(LHS))
+    return BinaryOperator::CreateFSub(RHS, LHSV);
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castFNegVal(RHS))
+      return BinaryOperator::CreateFSub(LHS, V);
+
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
+  // Check for (add double (sitofp x), y), see if we can merge this into an
+  // integer add followed by a promotion.
+  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // ... if the constant fits in the integer value.  This is useful for things
+    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+    // requires a constant pool load, and generally allows the add to be better
+    // instcombined.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+      Constant *CI = 
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+                                              CI, "addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of int->fp conversions),
+      // and if the integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0),"addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
+  return Changed ? &I : 0;
+}
+
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer).  Return the result as a signed integer of intptr size.
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+  TargetData &TD = *getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
+
+  // Build a mask for high order bits.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        Result = Builder->CreateAdd(Result,
+                                    ConstantInt::get(IntPtrTy, Size),
+                                    GEP->getName()+".offs");
+        continue;
+      }
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC =
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      // Emit an add instruction.
+      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy)
+      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+    if (Size != 1) {
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+    }
+
+    // Emit an add instruction.
+    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+  }
+  return Result;
+}
+
+
+
+
+/// Optimize pointer differences into the same array into a size.  Consider:
+///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+                                               const Type *Ty) {
+  assert(TD && "Must have target data info for this");
+  
+  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+  // this.
+  bool Swapped = false;
+  GetElementPtrInst *GEP = 0;
+  ConstantExpr *CstGEP = 0;
+  
+  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+  // For now we require one side to be the base pointer "A" or a constant
+  // expression derived from it.
+  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+    // (gep X, ...) - X
+    if (LHSGEP->getOperand(0) == RHS) {
+      GEP = LHSGEP;
+      Swapped = false;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
+      // (gep X, ...) - (ce_gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          LHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = LHSGEP;
+        Swapped = false;
+      }
+    }
+  }
+  
+  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+    // X - (gep X, ...)
+    if (RHSGEP->getOperand(0) == LHS) {
+      GEP = RHSGEP;
+      Swapped = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
+      // (ce_gep X, ...) - (gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          RHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = RHSGEP;
+        Swapped = true;
+      }
+    }
+  }
+  
+  if (GEP == 0)
+    return 0;
+  
+  // Emit the offset of the GEP and an intptr_t.
+  Value *Result = EmitGEPOffset(GEP);
+  
+  // If we had a constant expression GEP on the other side offsetting the
+  // pointer, subtract it from the offset we have.
+  if (CstGEP) {
+    Value *CstOffset = EmitGEPOffset(CstGEP);
+    Result = Builder->CreateSub(Result, CstOffset);
+  }
+  
+
+  // If we have p - gep(p, ...)  then we have to negate the result.
+  if (Swapped)
+    Result = Builder->CreateNeg(Result, "diff.neg");
+
+  return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Op0 == Op1)                        // sub X, X  -> 0
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
+  if (Value *V = dyn_castNegVal(Op1)) {
+    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return Res;
+  }
+
+  if (isa<UndefValue>(Op0))
+    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
+  if (isa<UndefValue>(Op1))
+    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
+  if (I.getType()->isInteger(1))
+    return BinaryOperator::CreateXor(Op0, Op1);
+  
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+    // Replace (-1 - A) with (~A).
+    if (C->isAllOnesValue())
+      return BinaryOperator::CreateNot(Op1);
+
+    // C - ~X == X + (1+C)
+    Value *X = 0;
+    if (match(Op1, m_Not(m_Value(X))))
+      return BinaryOperator::CreateAdd(X, AddOne(C));
+
+    // -(X >>u 31) -> (X >>s 31)
+    // -(X >>s 31) -> (X >>u 31)
+    if (C->isZero()) {
+      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
+        if (SI->getOpcode() == Instruction::LShr) {
+          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+            // Check to see if we are shifting out everything but the sign bit.
+            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+                SI->getType()->getPrimitiveSizeInBits()-1) {
+              // Ok, the transformation is safe.  Insert AShr.
+              return BinaryOperator::Create(Instruction::AShr, 
+                                          SI->getOperand(0), CU, SI->getName());
+            }
+          }
+        } else if (SI->getOpcode() == Instruction::AShr) {
+          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+            // Check to see if we are shifting out everything but the sign bit.
+            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
+                SI->getType()->getPrimitiveSizeInBits()-1) {
+              // Ok, the transformation is safe.  Insert LShr. 
+              return BinaryOperator::CreateLShr(
+                                          SI->getOperand(0), CU, SI->getName());
+            }
+          }
+        }
+      }
+    }
+
+    // Try to fold constant sub into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    // C - zext(bool) -> bool ? C - 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+      if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+  }
+
+  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+    if (Op1I->getOpcode() == Instruction::Add) {
+      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
+        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
+                                         I.getName());
+      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
+        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
+                                         I.getName());
+      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
+        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
+          // C1-(X+C2) --> (C1-C2)-X
+          return BinaryOperator::CreateSub(
+            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
+      }
+    }
+
+    if (Op1I->hasOneUse()) {
+      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
+      // is not used by anyone else...
+      //
+      if (Op1I->getOpcode() == Instruction::Sub) {
+        // Swap the two operands of the subexpr...
+        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
+        Op1I->setOperand(0, IIOp1);
+        Op1I->setOperand(1, IIOp0);
+
+        // Create the new top level add instruction...
+        return BinaryOperator::CreateAdd(Op0, Op1);
+      }
+
+      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
+      //
+      if (Op1I->getOpcode() == Instruction::And &&
+          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
+        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
+
+        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
+        return BinaryOperator::CreateAnd(Op0, NewNot);
+      }
+
+      // 0 - (X sdiv C)  -> (X sdiv -C)
+      if (Op1I->getOpcode() == Instruction::SDiv)
+        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+          if (CSI->isZero())
+            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
+              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
+                                          ConstantExpr::getNeg(DivRHS));
+
+      // X - X*C --> X * (1-C)
+      ConstantInt *C2 = 0;
+      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
+        Constant *CP1 = 
+          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+                                             C2);
+        return BinaryOperator::CreateMul(Op0, CP1);
+      }
+    }
+  }
+
+  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+    if (Op0I->getOpcode() == Instruction::Add) {
+      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
+        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+    } else if (Op0I->getOpcode() == Instruction::Sub) {
+      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
+        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
+                                         I.getName());
+    }
+  }
+
+  ConstantInt *C1;
+  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+    if (X == Op1)  // X*C - X --> X * (C-1)
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
+    if (X == dyn_castFoldableMul(Op1, C2))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+  }
+  
+  // Optimize pointer differences into the same array into a size.  Consider:
+  //  &A[10] - &A[0]: we should compile this to "10".
+  if (TD) {
+    Value *LHSOp, *RHSOp;
+    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+        match(Op1, m_PtrToInt(m_Value(RHSOp))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+    
+    // trunc(p)-trunc(q) -> trunc(p-q)
+    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // If this is a 'B = x-(-A)', change to B = x+A...
+  if (Value *V = dyn_castFNegVal(Op1))
+    return BinaryOperator::CreateFAdd(Op0, V);
+
+  return 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 0000000..af300fc
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,1990 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to).  This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+  // ~(~(X)) -> X.
+  if (BinaryOperator::isNot(V))
+    return true;
+  
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+  
+  // Compares can be inverted if they have a single use.
+  if (CmpInst *CI = dyn_cast<CmpInst>(V))
+    return CI->hasOneUse();
+  
+  return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+  // If this is not(not(x)) don't return that this is a not: we want the two
+  // not's to be folded first.
+  if (BinaryOperator::isNot(V)) {
+    Value *Operand = BinaryOperator::getNotArgument(V);
+    if (!isFreeToInvert(Operand))
+      return Operand;
+  }
+  
+  // Constants can be considered to be not'ed values...
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantInt::get(C->getType(), ~C->getValue());
+  return 0;
+}
+
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+///      (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B) 
+///
+/// Three bits are used to represent the condition, as follows:
+///   0  A > B
+///   1  A == B
+///   2  A < B
+///
+/// <=>  Value  Definition
+/// 000     0   Always false
+/// 001     1   A >  B
+/// 010     2   A == B
+/// 011     3   A >= B
+/// 100     4   A <  B
+/// 101     5   A != B
+/// 110     6   A <= B
+/// 111     7   Always true
+///  
+static unsigned getICmpCode(const ICmpInst *ICI) {
+  switch (ICI->getPredicate()) {
+    // False -> 0
+  case ICmpInst::ICMP_UGT: return 1;  // 001
+  case ICmpInst::ICMP_SGT: return 1;  // 001
+  case ICmpInst::ICMP_EQ:  return 2;  // 010
+  case ICmpInst::ICMP_UGE: return 3;  // 011
+  case ICmpInst::ICMP_SGE: return 3;  // 011
+  case ICmpInst::ICMP_ULT: return 4;  // 100
+  case ICmpInst::ICMP_SLT: return 4;  // 100
+  case ICmpInst::ICMP_NE:  return 5;  // 101
+  case ICmpInst::ICMP_ULE: return 6;  // 110
+  case ICmpInst::ICMP_SLE: return 6;  // 110
+    // True -> 7
+  default:
+    llvm_unreachable("Invalid ICmp predicate!");
+    return 0;
+  }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+  isOrdered = false;
+  switch (CC) {
+  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
+  case FCmpInst::FCMP_UNO:                   return 0;  // 000
+  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
+  case FCmpInst::FCMP_UGT:                   return 1;  // 001
+  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
+  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
+  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
+  case FCmpInst::FCMP_UGE:                   return 3;  // 011
+  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
+  case FCmpInst::FCMP_ULT:                   return 4;  // 100
+  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
+  case FCmpInst::FCMP_UNE:                   return 5;  // 101
+  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
+  case FCmpInst::FCMP_ULE:                   return 6;  // 110
+    // True -> 7
+  default:
+    // Not expecting FCMP_FALSE and FCMP_TRUE;
+    llvm_unreachable("Unexpected FCmp predicate!");
+    return 0;
+  }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand 
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) {
+  switch (Code) {
+  default: assert(0 && "Illegal ICmp code!");
+  case 0:
+    return ConstantInt::getFalse(LHS->getContext());
+  case 1: 
+    if (Sign)
+      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
+    return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
+  case 2:
+    return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
+  case 3: 
+    if (Sign)
+      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
+    return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
+  case 4: 
+    if (Sign)
+      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
+    return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
+  case 5:
+    return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
+  case 6: 
+    if (Sign)
+      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
+    return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
+  case 7:
+    return ConstantInt::getTrue(LHS->getContext());
+  }
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+                           Value *LHS, Value *RHS) {
+  switch (code) {
+  default: llvm_unreachable("Illegal FCmp code!");
+  case  0:
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
+  case  1: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
+  case  2: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
+  case  3: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
+  case  4: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
+  case  5: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
+  case  6: 
+    if (isordered)
+      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
+    else
+      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
+  case  7: return ConstantInt::getTrue(LHS->getContext());
+  }
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+                                    ConstantInt *OpRHS,
+                                    ConstantInt *AndRHS,
+                                    BinaryOperator &TheAnd) {
+  Value *X = Op->getOperand(0);
+  Constant *Together = 0;
+  if (!Op->isShift())
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+  switch (Op->getOpcode()) {
+  case Instruction::Xor:
+    if (Op->hasOneUse()) {
+      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+      Value *And = Builder->CreateAnd(X, AndRHS);
+      And->takeName(Op);
+      return BinaryOperator::CreateXor(And, Together);
+    }
+    break;
+  case Instruction::Or:
+    if (Together == AndRHS) // (X | C) & C --> C
+      return ReplaceInstUsesWith(TheAnd, AndRHS);
+
+    if (Op->hasOneUse() && Together != OpRHS) {
+      // (X | C1) & C2 --> (X | (C1&C2)) & C2
+      Value *Or = Builder->CreateOr(X, Together);
+      Or->takeName(Op);
+      return BinaryOperator::CreateAnd(Or, AndRHS);
+    }
+    break;
+  case Instruction::Add:
+    if (Op->hasOneUse()) {
+      // Adding a one to a single bit bit-field should be turned into an XOR
+      // of the bit.  First thing to check is to see if this AND is with a
+      // single bit constant.
+      const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+      // If there is only one bit set.
+      if (AndRHSV.isPowerOf2()) {
+        // Ok, at this point, we know that we are masking the result of the
+        // ADD down to exactly one bit.  If the constant we are adding has
+        // no bits set below this bit, then we can eliminate the ADD.
+        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+        // Check to see if any bits below the one bit set in AndRHSV are set.
+        if ((AddRHS & (AndRHSV-1)) == 0) {
+          // If not, the only thing that can effect the output of the AND is
+          // the bit specified by AndRHSV.  If that bit is set, the effect of
+          // the XOR is to toggle the bit.  If it is clear, then the ADD has
+          // no effect.
+          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+            TheAnd.setOperand(0, X);
+            return &TheAnd;
+          } else {
+            // Pull the XOR out of the AND.
+            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+            NewAnd->takeName(Op);
+            return BinaryOperator::CreateXor(NewAnd, AndRHS);
+          }
+        }
+      }
+    }
+    break;
+
+  case Instruction::Shl: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+                                       AndRHS->getValue() & ShlMask);
+
+    if (CI->getValue() == ShlMask) { 
+    // Masking out bits that the shift already masks
+      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
+    } else if (CI != AndRHS) {                  // Reducing bits set in and.
+      TheAnd.setOperand(1, CI);
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::LShr: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!  This only applies to
+    // unsigned shifts, because a signed shr may bring in set bits!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(Op->getContext(),
+                                       AndRHS->getValue() & ShrMask);
+
+    if (CI->getValue() == ShrMask) {   
+    // Masking out bits that the shift already masks.
+      return ReplaceInstUsesWith(TheAnd, Op);
+    } else if (CI != AndRHS) {
+      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::AShr:
+    // Signed shr.
+    // See if this is shifting in some sign extension, then masking it out
+    // with an and.
+    if (Op->hasOneUse()) {
+      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+      Constant *C = ConstantInt::get(Op->getContext(),
+                                     AndRHS->getValue() & ShrMask);
+      if (C == AndRHS) {          // Masking out bits shifted in.
+        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+        // Make the argument unsigned.
+        Value *ShVal = Op->getOperand(0);
+        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+      }
+    }
+    break;
+  }
+  return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                                           bool isSigned, bool Inside, 
+                                           Instruction &IB) {
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+         "Lo is not <= Hi in range emission code!");
+    
+  if (Inside) {
+    if (Lo == Hi)  // Trivially false.
+      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
+
+    // V >= Min && V < Hi --> V < Hi
+    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+      ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+      return new ICmpInst(pred, V, Hi);
+    }
+
+    // Emit V-Lo <u Hi-Lo
+    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
+  }
+
+  if (Lo == Hi)  // Trivially true.
+    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
+
+  // V < Min || V >= Hi -> V > Hi-1
+  Hi = SubOne(cast<ConstantInt>(Hi));
+  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+    ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+    return new ICmpInst(pred, V, Hi);
+  }
+
+  // Emit V-Lo >u Hi-1-Lo
+  // Note that Hi has already had one subtracted from it, above.
+  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+  const APInt& V = Val->getValue();
+  uint32_t BitWidth = Val->getType()->getBitWidth();
+  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+  // look for the first zero bit after the run of ones
+  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+  // look for the first non-zero bit
+  ME = V.getActiveBits(); 
+  return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub.  If we can fold one of
+/// the following xforms:
+/// 
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+                                        ConstantInt *Mask, bool isSub,
+                                        Instruction &I) {
+  Instruction *LHSI = dyn_cast<Instruction>(LHS);
+  if (!LHSI || LHSI->getNumOperands() != 2 ||
+      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+  switch (LHSI->getOpcode()) {
+  default: return 0;
+  case Instruction::And:
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
+      // If the AndRHS is a power of two minus one (0+1+), this is simple.
+      if ((Mask->getValue().countLeadingZeros() + 
+           Mask->getValue().countPopulation()) == 
+          Mask->getValue().getBitWidth())
+        break;
+
+      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+      // part, we don't need any explicit masks to take them out of A.  If that
+      // is all N is, ignore it.
+      uint32_t MB = 0, ME = 0;
+      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
+        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+        if (MaskedValueIsZero(RHS, Mask))
+          break;
+      }
+    }
+    return 0;
+  case Instruction::Or:
+  case Instruction::Xor:
+    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+    if ((Mask->getValue().countLeadingZeros() + 
+         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
+      break;
+    return 0;
+  }
+  
+  if (isSub)
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
+                                          ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
+      if (Instruction *I = dyn_cast<Instruction>(RV))
+        return I;
+      // Otherwise, it's a constant boolean value.
+      return ReplaceInstUsesWith(I, RV);
+    }
+  }
+  
+  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+  
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+    // where C is a power of 2
+    if (LHSCC == ICmpInst::ICMP_ULT &&
+        LHSCst->getValue().isPowerOf2()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return new ICmpInst(LHSCC, NewOr, LHSCst);
+    }
+    
+    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return new ICmpInst(LHSCC, NewOr, LHSCst);
+    }
+  }
+  
+  // From here on, we only handle:
+  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+  
+  // We can't fold (ugt x, C) & (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+    
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+    
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+
+  // At this point, we know we have have two icmp instructions
+  // comparing a value against two constants and and'ing the result
+  // together.  Because of the above check, we know that we only have
+  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
+  // (from the icmp folding check above), that the two constants 
+  // are not equal and that the larger constant is on the RHS
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
+    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
+    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
+    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
+    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
+      return ReplaceInstUsesWith(I, LHS);
+    }
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_ULT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
+      break;                        // (X != 13 & X u< 15) -> no change
+    case ICmpInst::ICMP_SLT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
+      break;                        // (X != 13 & X s< 15) -> no change
+    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
+    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
+      return ReplaceInstUsesWith(I, RHS);
+    case ICmpInst::ICMP_NE:
+      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
+                            ConstantInt::get(Add->getType(), 1));
+      }
+      break;                        // (X != 13 & X != 15) -> no change
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
+    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
+    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
+      return ReplaceInstUsesWith(I, LHS);
+    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
+    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
+    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
+      return ReplaceInstUsesWith(I, LHS);
+    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
+      return ReplaceInstUsesWith(I, RHS);
+    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+        return new ICmpInst(LHSCC, Val, RHSCst);
+      break;                        // (X u> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
+      return InsertRangeTest(Val, AddOne(LHSCst),
+                             RHSCst, false, true, I);
+    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
+      return ReplaceInstUsesWith(I, RHS);
+    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+        return new ICmpInst(LHSCC, Val, RHSCst);
+      break;                        // (X s> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
+      return InsertRangeTest(Val, AddOne(LHSCst),
+                             RHSCst, true, true, I);
+    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+ 
+  return 0;
+}
+
+Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
+                                          FCmpInst *RHS) {
+  
+  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // false.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return new FCmpInst(FCmpInst::FCMP_ORD,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp ord x,x" is "fcmp ord x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_ORD,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    
+    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    if (Op0CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, LHS);
+    
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op1Pred == 0) {
+      std::swap(LHS, RHS);
+      std::swap(Op0Pred, Op1Pred);
+      std::swap(Op0Ordered, Op1Ordered);
+    }
+    if (Op0Pred == 0) {
+      // uno && ueq -> uno && (uno || eq) -> ueq
+      // ord && olt -> ord && (ord && lt) -> olt
+      if (Op0Ordered == Op1Ordered)
+        return ReplaceInstUsesWith(I, RHS);
+      
+      // uno && oeq -> uno && (ord && eq) -> false
+      // uno && ord -> false
+      if (!Op0Ordered)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      // ord && ueq -> ord && (uno || eq) -> oeq
+      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
+    }
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;  
+
+  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+    const APInt &AndRHSMask = AndRHS->getValue();
+    APInt NotAndRHS(~AndRHSMask);
+
+    // Optimize a variety of ((val OP C1) & C2) combinations...
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      Value *Op0LHS = Op0I->getOperand(0);
+      Value *Op0RHS = Op0I->getOperand(1);
+      switch (Op0I->getOpcode()) {
+      default: break;
+      case Instruction::Xor:
+      case Instruction::Or:
+        // If the mask is only needed on one incoming arm, push it up.
+        if (!Op0I->hasOneUse()) break;
+          
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+        }
+
+        break;
+      case Instruction::Add:
+        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
+        break;
+
+      case Instruction::Sub:
+        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+
+        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+        // has 1's for all bits that the subtraction with A might affect.
+        if (Op0I->hasOneUse()) {
+          uint32_t BitWidth = AndRHSMask.getBitWidth();
+          uint32_t Zeros = AndRHSMask.countLeadingZeros();
+          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
+          if (!(A && A->isZero()) &&               // avoid infinite recursion.
+              MaskedValueIsZero(Op0LHS, Mask)) {
+            Value *NewNeg = Builder->CreateNeg(Op0RHS);
+            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+          }
+        }
+        break;
+
+      case Instruction::Shl:
+      case Instruction::LShr:
+        // (1 << x) & 1 --> zext(x == 0)
+        // (1 >> x) & 1 --> zext(x == 0)
+        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+          Value *NewICmp =
+            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+          return new ZExtInst(NewICmp, I.getType());
+        }
+        break;
+      }
+
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+          return Res;
+    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
+      // If this is an integer truncation or change from signed-to-unsigned, and
+      // if the source is an and/or with immediate, transform it.  This
+      // frequently occurs for bitfield accesses.
+      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
+        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
+            CastOp->getNumOperands() == 2)
+          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
+            if (CastOp->getOpcode() == Instruction::And) {
+              // Change: and (cast (and X, C1) to T), C2
+              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
+              // This will fold the two constants together, which may allow 
+              // other simplifications.
+              Value *NewCast = Builder->CreateTruncOrBitCast(
+                CastOp->getOperand(0), I.getType(), 
+                CastOp->getName()+".shrunk");
+              // trunc_or_bitcast(C1)&C2
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              C3 = ConstantExpr::getAnd(C3, AndRHS);
+              return BinaryOperator::CreateAnd(NewCast, C3);
+            } else if (CastOp->getOpcode() == Instruction::Or) {
+              // Change: and (cast (or X, C1) to T), C2
+              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
+              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
+              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
+                // trunc(C1)&C2
+                return ReplaceInstUsesWith(I, AndRHS);
+            }
+          }
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+
+  // (~A & ~B) == (~(A | B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                      I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(Or);
+      }
+
+  {
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // (A|B) & ~(A&B) -> A^B
+    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    // ~(A&B) & (A|B) -> A^B
+    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    if (Op0->hasOneUse() &&
+        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
+        cast<BinaryOperator>(Op0)->swapOperands();
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      }
+    }
+
+    if (Op1->hasOneUse() &&
+        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
+        cast<BinaryOperator>(Op1)->swapOperands();
+        std::swap(A, B);
+      }
+      if (A == Op0)                                // A&(A^B) -> A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
+    }
+
+    // (A&((~A)|B)) -> A&B
+    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+      return BinaryOperator::CreateAnd(A, Op1);
+    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+      return BinaryOperator::CreateAnd(A, Op0);
+  }
+  
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
+        return Res;
+
+  // fold (and (cast A), (cast B)) -> (cast (and A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() &&
+            SrcTy->isIntOrIntVector() &&
+            // Only do this if the casts both really cause code to be generated.
+            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
+                              I.getType()) &&
+            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                              I.getType())) {
+          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+      }
+    
+  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp =
+          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+                             SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  // If and'ing two fcmp, try combine them into one.
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
+        return Res;
+  }
+
+  return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap.  The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero.  This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted.  For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry.  This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user.  For example, in (X & 255), X will be
+/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+                              SmallVector<Value*, 8> &ByteValues) {
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If this is an or instruction, it may be an inner node of the bswap.
+    if (I->getOpcode() == Instruction::Or) {
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+                               ByteValues) ||
+             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+                               ByteValues);
+    }
+  
+    // If this is a logical shift by a constant multiple of 8, recurse with
+    // OverallLeftShift and ByteMask adjusted.
+    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+      unsigned ShAmt = 
+        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+      // Ensure the shift amount is defined and of a byte value.
+      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+        return true;
+
+      unsigned ByteShift = ShAmt >> 3;
+      if (I->getOpcode() == Instruction::Shl) {
+        // X << 2 -> collect(X, +2)
+        OverallLeftShift += ByteShift;
+        ByteMask >>= ByteShift;
+      } else {
+        // X >>u 2 -> collect(X, -2)
+        OverallLeftShift -= ByteShift;
+        ByteMask <<= ByteShift;
+        ByteMask &= (~0U >> (32-ByteValues.size()));
+      }
+
+      if (OverallLeftShift >= (int)ByteValues.size()) return true;
+      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+
+    // If this is a logical 'and' with a mask that clears bytes, clear the
+    // corresponding bytes in ByteMask.
+    if (I->getOpcode() == Instruction::And &&
+        isa<ConstantInt>(I->getOperand(1))) {
+      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+      unsigned NumBytes = ByteValues.size();
+      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+      
+      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+        // If this byte is masked out by a later operation, we don't care what
+        // the and mask is.
+        if ((ByteMask & (1 << i)) == 0)
+          continue;
+        
+        // If the AndMask is all zeros for this byte, clear the bit.
+        APInt MaskB = AndMask & Byte;
+        if (MaskB == 0) {
+          ByteMask &= ~(1U << i);
+          continue;
+        }
+        
+        // If the AndMask is not all ones for this byte, it's not a bytezap.
+        if (MaskB != Byte)
+          return true;
+
+        // Otherwise, this byte is kept.
+      }
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+  }
+  
+  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
+  // the input value to the bswap.  Some observations: 1) if more than one byte
+  // is demanded from this input, then it could not be successfully assembled
+  // into a byteswap.  At least one of the two bytes would not be aligned with
+  // their ultimate destination.
+  if (!isPowerOf2_32(ByteMask)) return true;
+  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+  
+  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+  // is demanded, it needs to go into byte 0 of the result.  This means that the
+  // byte needs to be shifted until it lands in the right byte bucket.  The
+  // shift amount depends on the position: if the byte is coming from the high
+  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
+  // low part, it must be shifted left.
+  unsigned DestByteNo = InputByteNo + OverallLeftShift;
+  if (InputByteNo < ByteValues.size()/2) {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  } else {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  }
+  
+  // If the destination byte value is already defined, the values are or'd
+  // together, which isn't a bswap (unless it's an or of the same bits).
+  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+    return true;
+  ByteValues[DestByteNo] = V;
+  return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  if (!ITy || ITy->getBitWidth() % 16 || 
+      // ByteMask only allows up to 32-byte values.
+      ITy->getBitWidth() > 32*8) 
+    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
+  
+  /// ByteValues - For each byte of the result, we keep track of which value
+  /// defines each byte.
+  SmallVector<Value*, 8> ByteValues;
+  ByteValues.resize(ITy->getBitWidth()/8);
+    
+  // Try to find all the pieces corresponding to the bswap.
+  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+    return 0;
+  
+  // Check to see if all of the bytes come from the same value.
+  Value *V = ByteValues[0];
+  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
+  
+  // Check to make sure that all of the bytes come from the same value.
+  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+    if (ByteValues[i] != V)
+      return 0;
+  const Type *Tys[] = { ITy };
+  Module *M = I.getParent()->getParent()->getParent();
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+                                         Value *C, Value *D) {
+  // If A is not a select of -1/0, this cannot match.
+  Value *Cond = 0;
+  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
+    return 0;
+
+  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
+    return SelectInst::Create(Cond, C, B);
+  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, B);
+  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
+    return SelectInst::Create(Cond, C, D);
+  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, D);
+  return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
+                                         ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
+      if (Instruction *I = dyn_cast<Instruction>(RV))
+        return I;
+      // Otherwise, it's a constant boolean value.
+      return ReplaceInstUsesWith(I, RV);
+    }
+  }
+  
+  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+
+  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+  if (LHSCst == RHSCst && LHSCC == RHSCC &&
+      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+    Value *NewOr = Builder->CreateOr(Val, Val2);
+    return new ICmpInst(LHSCC, NewOr, LHSCst);
+  }
+  
+  // From here on, we only handle:
+  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+  
+  // We can't fold (ugt x, C) | (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+  
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+  
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+  
+  // At this point, we know we have have two icmp instructions
+  // comparing a value against two constants and or'ing the result
+  // together.  Because of the above check, we know that we only have
+  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+  // icmp folding check above), that the two constants are not
+  // equal.
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
+      }
+      break;                         // (X == 13 | X == 15) -> no change
+    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
+    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
+      break;
+    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
+    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
+      return ReplaceInstUsesWith(I, RHS);
+    }
+    break;
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
+    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
+    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
+      return ReplaceInstUsesWith(I, LHS);
+    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
+    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(false))
+        return ReplaceInstUsesWith(I, LHS);
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
+                             false, false, I);
+    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
+      return ReplaceInstUsesWith(I, RHS);
+    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(true))
+        return ReplaceInstUsesWith(I, LHS);
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
+                             true, false, I);
+    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
+      return ReplaceInstUsesWith(I, RHS);
+    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
+    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
+      return ReplaceInstUsesWith(I, LHS);
+    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
+    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
+      return ReplaceInstUsesWith(I, LHS);
+    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
+    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
+                                         FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // true.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        
+        // Otherwise, no need to compare the two constants, compare the
+        // rest.
+        return new FCmpInst(FCmpInst::FCMP_UNO,
+                            LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp uno x,x" is "fcmp uno x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return new FCmpInst(FCmpInst::FCMP_UNO,
+                          LHS->getOperand(0), RHS->getOperand(0));
+    
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return new FCmpInst((FCmpInst::Predicate)Op0CC,
+                          Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+    if (Op0CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, RHS);
+    if (Op1CC == FCmpInst::FCMP_FALSE)
+      return ReplaceInstUsesWith(I, LHS);
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op0Ordered == Op1Ordered) {
+      // If both are ordered or unordered, return a new fcmp with
+      // or'ed predicates.
+      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
+      if (Instruction *I = dyn_cast<Instruction>(RV))
+        return I;
+      // Otherwise, it's a constant boolean value...
+      return ReplaceInstUsesWith(I, RV);
+    }
+  }
+  return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+///     ((A | B) & C1) | (B & C2)
+///
+/// into:
+/// 
+///     (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                               Value *A, Value *B, Value *C) {
+  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+  if (!CI1) return 0;
+
+  Value *V1 = 0;
+  ConstantInt *CI2 = 0;
+  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+  APInt Xor = CI1->getValue() ^ CI2->getValue();
+  if (!Xor.isAllOnesValue()) return 0;
+
+  if (V1 == A || V1 == B) {
+    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+    return BinaryOperator::CreateOr(NewOp, V1);
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    ConstantInt *C1 = 0; Value *X = 0;
+    // (X & C1) | C2 --> (X | C2) & (C1|C2)
+    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateAnd(Or, 
+                         ConstantInt::get(I.getContext(),
+                                          RHS->getValue() | C1->getValue()));
+    }
+
+    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateXor(Or,
+                 ConstantInt::get(I.getContext(),
+                                  C1->getValue() & ~RHS->getValue()));
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  Value *A = 0, *B = 0;
+  ConstantInt *C1 = 0, *C2 = 0;
+
+  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
+  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
+  if (match(Op0, m_Or(m_Value(), m_Value())) ||
+      match(Op1, m_Or(m_Value(), m_Value())) ||
+      (match(Op0, m_Shift(m_Value(), m_Value())) &&
+       match(Op1, m_Shift(m_Value(), m_Value())))) {
+    if (Instruction *BSwap = MatchBSwap(I))
+      return BSwap;
+  }
+  
+  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op1, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op1);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+  if (Op1->hasOneUse() &&
+      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op0, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op0);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // (A & C)|(B & D)
+  Value *C = 0, *D = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    Value *V1 = 0, *V2 = 0, *V3 = 0;
+    C1 = dyn_cast<ConstantInt>(C);
+    C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2) {  // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      if (C1->getValue() == ~C2->getValue()) {
+        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+        }
+        // Or commutes, try both ways.
+        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+        }
+      }
+      
+      if ((C1->getValue() & C2->getValue()) == 0) {
+        // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+        // iff (C1&C2) == 0 and (N&~C1) == 0
+        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
+             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(A,
+                               ConstantInt::get(A->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        // Or commutes, try both ways.
+        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
+             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(B,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        
+        // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+        // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+        ConstantInt *C3 = 0, *C4 = 0;
+        if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+            (C3->getValue() & ~C1->getValue()) == 0 &&
+            match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+            (C4->getValue() & ~C2->getValue()) == 0) {
+          V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+          return BinaryOperator::CreateAnd(V2,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        }
+      }
+    }
+    
+    // Check to see if we have any common things being and'ed.  If so, find the
+    // terms for V1 & (V2|V3).
+    if (Op0->hasOneUse() || Op1->hasOneUse()) {
+      V1 = 0;
+      if (A == B)      // (A & C)|(A & D) == A & (C|D)
+        V1 = A, V2 = C, V3 = D;
+      else if (A == D) // (A & C)|(B & A) == A & (B|C)
+        V1 = A, V2 = B, V3 = C;
+      else if (C == B) // (A & C)|(C & D) == C & (A|D)
+        V1 = C, V2 = A, V3 = D;
+      else if (C == D) // (A & C)|(B & C) == C & (A|B)
+        V1 = C, V2 = A, V3 = B;
+      
+      if (V1) {
+        Value *Or = Builder->CreateOr(V2, V3, "tmp");
+        return BinaryOperator::CreateAnd(V1, Or);
+      }
+    }
+
+    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
+    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+      return Match;
+    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+      return Match;
+    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+      return Match;
+    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+      return Match;
+
+    // ((A&~B)|(~A&B)) -> A^B
+    if ((match(C, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, D);
+    // ((~B&A)|(~A&B)) -> A^B
+    if ((match(A, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, D);
+    // ((A&~B)|(B&~A)) -> A^B
+    if ((match(C, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, B);
+    // ((~B&A)|(B&~A)) -> A^B
+    if ((match(A, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, B);
+  }
+  
+  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+                                         SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  // ((A|B)&1)|(B&-2) -> (A&1) | B
+  if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
+      match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
+    Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
+    if (Ret) return Ret;
+  }
+  // (B&-2)|((A|B)&1) -> (A&1) | B
+  if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
+      match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
+    Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
+    if (Ret) return Ret;
+  }
+
+  // (~A | ~B) == (~(A & B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+                                        I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(And);
+      }
+
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
+        return Res;
+    
+  // fold (or (cast A), (cast B)) -> (cast (or A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
+            !isa<ICmpInst>(Op1C->getOperand(0))) {
+          const Type *SrcTy = Op0C->getOperand(0)->getType();
+          if (SrcTy == Op1C->getOperand(0)->getType() &&
+              SrcTy->isIntOrIntVector() &&
+              // Only do this if the casts both really cause code to be
+              // generated.
+              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                                I.getType()) &&
+              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                                I.getType())) {
+            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
+                                             Op1C->getOperand(0), I.getName());
+            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+          }
+        }
+      }
+  }
+  
+    
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
+        return Res;
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (isa<UndefValue>(Op1)) {
+    if (isa<UndefValue>(Op0))
+      // Handle undef ^ undef -> 0 special case. This is a common
+      // idiom (misuse).
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
+  }
+
+  // xor X, X = 0
+  if (Op0 == Op1)
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType()))
+    if (isa<ConstantAggregateZero>(Op1))
+      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
+
+  // Is this a ~ operation?
+  if (Value *NotOp = dyn_castNotVal(&I)) {
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+      if (Op0I->getOpcode() == Instruction::And || 
+          Op0I->getOpcode() == Instruction::Or) {
+        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+        if (dyn_castNotVal(Op0I->getOperand(1)))
+          Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1),
+                               Op0I->getOperand(1)->getName()+".not");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(Op0NotVal, NotY);
+          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+        }
+        
+        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+        if (isFreeToInvert(Op0I->getOperand(0)) && 
+            isFreeToInvert(Op0I->getOperand(1))) {
+          Value *NotX =
+            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(NotX, NotY);
+          return BinaryOperator::CreateAnd(NotX, NotY);
+        }
+      }
+    }
+  }
+  
+  
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    if (RHS->isOne() && Op0->hasOneUse()) {
+      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
+        return new ICmpInst(ICI->getInversePredicate(),
+                            ICI->getOperand(0), ICI->getOperand(1));
+
+      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
+        return new FCmpInst(FCI->getInversePredicate(),
+                            FCI->getOperand(0), FCI->getOperand(1));
+    }
+
+    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+        if (CI->hasOneUse() && Op0C->hasOneUse()) {
+          Instruction::CastOps Opcode = Op0C->getOpcode();
+          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+              (RHS == ConstantExpr::getCast(Opcode, 
+                                           ConstantInt::getTrue(I.getContext()),
+                                            Op0C->getDestTy()))) {
+            CI->setPredicate(CI->getInversePredicate());
+            return CastInst::Create(Opcode, CI, Op0C->getType());
+          }
+        }
+      }
+    }
+
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      // ~(c-X) == X-c-1 == X+(-c-1)
+      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+                                      ConstantInt::get(I.getType(), 1));
+          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+        }
+          
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+        if (Op0I->getOpcode() == Instruction::Add) {
+          // ~(X-c) --> (-c-1)-X
+          if (RHS->isAllOnesValue()) {
+            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+            return BinaryOperator::CreateSub(
+                           ConstantExpr::getSub(NegOp0CI,
+                                      ConstantInt::get(I.getType(), 1)),
+                                      Op0I->getOperand(0));
+          } else if (RHS->getValue().isSignBit()) {
+            // (X + C) ^ signbit -> (X + C + signbit)
+            Constant *C = ConstantInt::get(I.getContext(),
+                                           RHS->getValue() + Op0CI->getValue());
+            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+          }
+        } else if (Op0I->getOpcode() == Instruction::Or) {
+          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+            // Anything in both C1 and C2 is known to be zero, remove it from
+            // NewRHS.
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            NewRHS = ConstantExpr::getAnd(NewRHS, 
+                                       ConstantExpr::getNot(CommonBits));
+            Worklist.Add(Op0I);
+            I.setOperand(0, Op0I->getOperand(0));
+            I.setOperand(1, NewRHS);
+            return &I;
+          }
+        }
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
+    if (X == Op1)
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
+    if (X == Op0)
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
+
+  
+  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+  if (Op1I) {
+    Value *A, *B;
+    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+      if (A == Op0) {              // B^(B|A) == (A|B)^B
+        Op1I->swapOperands();
+        I.swapOperands();
+        std::swap(Op0, Op1);
+      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
+      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
+    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
+      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+               Op1I->hasOneUse()){
+      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
+        Op1I->swapOperands();
+        std::swap(A, B);
+      }
+      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    }
+  }
+  
+  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+  if (Op0I) {
+    Value *A, *B;
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        Op0I->hasOneUse()) {
+      if (A == Op1)                                  // (B|A)^B == (A|B)^B
+        std::swap(A, B);
+      if (B == Op1)                                  // (A|B)^B == A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
+      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
+    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
+      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+               Op0I->hasOneUse()){
+      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
+        std::swap(A, B);
+      if (B == Op1 &&                                      // (B&A)^A == ~B & A
+          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
+        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+      }
+    }
+  }
+  
+  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
+  if (Op0I && Op1I && Op0I->isShift() && 
+      Op0I->getOpcode() == Op1I->getOpcode() && 
+      Op0I->getOperand(1) == Op1I->getOperand(1) &&
+      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+    Value *NewOp =
+      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+                         Op0I->getName());
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
+                                  Op1I->getOperand(1));
+  }
+    
+  if (Op0I && Op1I) {
+    Value *A, *B, *C, *D;
+    // (A & B)^(A | B) -> A ^ B
+    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+    // (A | B)^(A & B) -> A ^ B
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+    
+    // (A & B)^(C & D)
+    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
+        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+      // (X & Y)^(X & Y) -> (Y^Z) & X
+      Value *X = 0, *Y = 0, *Z = 0;
+      if (A == C)
+        X = A, Y = B, Z = D;
+      else if (A == D)
+        X = A, Y = B, Z = C;
+      else if (B == C)
+        X = B, Y = A, Z = D;
+      else if (B == D)
+        X = B, Y = A, Z = C;
+      
+      if (X) {
+        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
+        return BinaryOperator::CreateAnd(NewOp, X);
+      }
+    }
+  }
+    
+  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+        if (LHS->getOperand(0) == RHS->getOperand(1) &&
+            LHS->getOperand(1) == RHS->getOperand(0))
+          LHS->swapOperands();
+        if (LHS->getOperand(0) == RHS->getOperand(0) &&
+            LHS->getOperand(1) == RHS->getOperand(1)) {
+          Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+          unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+          bool isSigned = LHS->isSigned() || RHS->isSigned();
+          Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
+          if (Instruction *I = dyn_cast<Instruction>(RV))
+            return I;
+          // Otherwise, it's a constant boolean value.
+          return ReplaceInstUsesWith(I, RV);
+        }
+      }
+
+  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+            // Only do this if the casts both really cause code to be generated.
+            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                              I.getType()) &&
+            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                              I.getType())) {
+          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+      }
+  }
+
+  return Changed ? &I : 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 0000000..47c37c4
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1142 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+using namespace llvm;
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static const Type *getPromotedType(const Type *Ty) {
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+    if (ITy->getBitWidth() < 32)
+      return Type::getInt32Ty(Ty->getContext());
+  }
+  return Ty;
+}
+
+/// EnforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned EnforceKnownAlignment(Value *V,
+                                      unsigned Align, unsigned PrefAlign) {
+
+  User *U = dyn_cast<User>(V);
+  if (!U) return Align;
+
+  switch (Operator::getOpcode(U)) {
+  default: break;
+  case Instruction::BitCast:
+    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+  case Instruction::GetElementPtr: {
+    // If all indexes are zero, it is just the alignment of the base pointer.
+    bool AllZeroOperands = true;
+    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+      if (!isa<Constant>(*i) ||
+          !cast<Constant>(*i)->isNullValue()) {
+        AllZeroOperands = false;
+        break;
+      }
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+    }
+    break;
+  }
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (!GV->isDeclaration()) {
+      if (GV->getAlignment() >= PrefAlign)
+        Align = GV->getAlignment();
+      else {
+        GV->setAlignment(PrefAlign);
+        Align = PrefAlign;
+      }
+    }
+  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      Align = AI->getAlignment();
+    else {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
+  }
+
+  return Align;
+}
+
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
+                                                  unsigned PrefAlign) {
+  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
+                      sizeof(PrefAlign) * CHAR_BIT;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+  if (PrefAlign > Align)
+    Align = EnforceKnownAlignment(V, Align, PrefAlign);
+  
+    // We don't need to make any adjustment.
+  return Align;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
+  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+  unsigned MinAlign = std::min(DstAlign, SrcAlign);
+  unsigned CopyAlign = MI->getAlignment();
+
+  if (CopyAlign < MinAlign) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+                                             MinAlign, false));
+    return MI;
+  }
+  
+  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+  // load/store.
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+  if (MemOpLength == 0) return 0;
+  
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
+  unsigned Size = MemOpLength->getZExtValue();
+  if (Size == 0) return MI;  // Delete this mem transfer.
+  
+  if (Size > 8 || (Size&(Size-1)))
+    return 0;  // If not 1/2/4/8 bytes, exit.
+  
+  // Use an integer load+store unless we can find something better.
+  Type *NewPtrTy =
+            PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  Value *StrippedDest = MI->getOperand(1)->stripPointerCasts();
+  if (StrippedDest != MI->getOperand(1)) {
+    const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+                                    ->getElementType();
+    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isSingleValueType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isSingleValueType())
+        NewPtrTy = PointerType::getUnqual(SrcETy);
+    }
+  }
+  
+  
+  // If the memcpy/memmove provides better alignment info than we can
+  // infer, use it.
+  SrcAlign = std::max(SrcAlign, CopyAlign);
+  DstAlign = std::max(DstAlign, CopyAlign);
+  
+  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
+  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+  InsertNewInstBefore(L, *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+  if (MI->getAlignment() < Alignment) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+                                             Alignment, false));
+    return MI;
+  }
+  
+  // Extract the length and alignment and fill if they are constant.
+  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+  if (!LenC || !FillC || !FillC->getType()->isInteger(8))
+    return 0;
+  uint64_t Len = LenC->getZExtValue();
+  Alignment = MI->getAlignment();
+  
+  // If the length is zero, this is a no-op
+  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  
+  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
+    
+    Value *Dest = MI->getDest();
+    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
+
+    // Alignment 0 is identity for alignment 1 for memset, but not store.
+    if (Alignment == 0) Alignment = 1;
+    
+    // Extract the fill value and store.
+    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
+                                      Dest, false, Alignment), *MI);
+    
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(LenC->getType()));
+    return MI;
+  }
+
+  return 0;
+}
+
+
+/// visitCallInst - CallInst simplification.  This mostly only handles folding 
+/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  if (isFreeCall(&CI))
+    return visitFree(CI);
+
+  // If the caller function is nounwind, mark the call as nounwind, even if the
+  // callee isn't.
+  if (CI.getParent()->getParent()->doesNotThrow() &&
+      !CI.doesNotThrow()) {
+    CI.setDoesNotThrow();
+    return &CI;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+  if (!II) return visitCallSite(&CI);
+  
+  // Intrinsics cannot occur in an invoke, so handle them here instead of in
+  // visitCallSite.
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+    bool Changed = false;
+
+    // memmove/cpy/set of zero bytes is a noop.
+    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+        if (CI->getZExtValue() == 1) {
+          // Replace the instruction with just byte operations.  We would
+          // transform other cases to loads/stores, but we don't know if
+          // alignment is sufficient.
+        }
+    }
+
+    // If we have a memmove and the source operation is a constant global,
+    // then the source and dest pointers can't alias, so we can change this
+    // into a call to memcpy.
+    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+        if (GVSrc->isConstant()) {
+          Module *M = CI.getParent()->getParent()->getParent();
+          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+          const Type *Tys[1];
+          Tys[0] = CI.getOperand(3)->getType();
+          CI.setOperand(0, 
+                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
+          Changed = true;
+        }
+    }
+
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+      // memmove(x,x,size) -> noop.
+      if (MTI->getSource() == MTI->getDest())
+        return EraseInstFromFunction(CI);
+    }
+
+    // If we can determine a pointer alignment that is bigger than currently
+    // set, update the alignment.
+    if (isa<MemTransferInst>(MI)) {
+      if (Instruction *I = SimplifyMemTransfer(MI))
+        return I;
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+      if (Instruction *I = SimplifyMemSet(MSI))
+        return I;
+    }
+          
+    if (Changed) return II;
+  }
+  
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::bswap:
+    // bswap(bswap(x)) -> x
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+      if (Operand->getIntrinsicID() == Intrinsic::bswap)
+        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+      
+    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
+      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
+        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
+          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
+                       TI->getType()->getPrimitiveSizeInBits();
+          Value *CV = ConstantInt::get(Operand->getType(), C);
+          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
+          return new TruncInst(V, TI->getType());
+        }
+    }
+      
+    break;
+  case Intrinsic::powi:
+    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // powi(x, 0) -> 1.0
+      if (Power->isZero())
+        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+      // powi(x, 1) -> x
+      if (Power->isOne())
+        return ReplaceInstUsesWith(CI, II->getOperand(1));
+      // powi(x, -1) -> 1/x
+      if (Power->isAllOnesValue())
+        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+                                          II->getOperand(1));
+    }
+    break;
+  case Intrinsic::cttz: {
+    // If all bits below the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned TrailingZeros = KnownOne.countTrailingZeros();
+    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, TrailingZeros)));
+    
+    }
+    break;
+  case Intrinsic::ctlz: {
+    // If all bits above the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned LeadingZeros = KnownOne.countLeadingZeros();
+    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, LeadingZeros)));
+    
+    }
+    break;
+  case Intrinsic::uadd_with_overflow: {
+    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt Mask = APInt::getSignBit(BitWidth);
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+    if (LHSKnownNegative || LHSKnownPositive) {
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+      if (LHSKnownNegative && RHSKnownNegative) {
+        // The sign bit is set in both cases: this MUST overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+      
+      if (LHSKnownPositive && RHSKnownPositive) {
+        // The sign bit is clear in both cases: this CANNOT overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+    }
+  }
+  // FALL THROUGH uadd into sadd
+  case Intrinsic::sadd_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getOperand(1)) &&
+        !isa<Constant>(II->getOperand(2))) {
+      Value *LHS = II->getOperand(1);
+      II->setOperand(1, II->getOperand(2));
+      II->setOperand(2, LHS);
+      return II;
+    }
+
+    // X + undef -> undef
+    if (isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X + 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(II->getOperand(1)) ||
+        isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X - 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(1)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getOperand(1)) &&
+        !isa<Constant>(II->getOperand(2))) {
+      Value *LHS = II->getOperand(1);
+      II->setOperand(1, II->getOperand(2));
+      II->setOperand(2, LHS);
+      return II;
+    }
+
+    // X * undef -> undef
+    if (isa<UndefValue>(II->getOperand(2)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+      // X*0 -> {0, false}
+      if (RHSI->isZero())
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+      
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1)) {
+        Constant *V[] = {
+          UndefValue::get(II->getOperand(1)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+      }
+    }
+    break;
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+  case Intrinsic::x86_sse_loadu_ps:
+  case Intrinsic::x86_sse2_loadu_pd:
+  case Intrinsic::x86_sse2_loadu_dq:
+    // Turn PPC lvx     -> load if the pointer is known aligned.
+    // Turn X86 loadups -> load if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+                                         PointerType::getUnqual(II->getType()));
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+    // Turn stvx -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
+      return new StoreInst(II->getOperand(1), Ptr);
+    }
+    break;
+  case Intrinsic::x86_sse_storeu_ps:
+  case Intrinsic::x86_sse2_storeu_pd:
+  case Intrinsic::x86_sse2_storeu_dq:
+    // Turn X86 storeu -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(2)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
+      return new StoreInst(II->getOperand(2), Ptr);
+    }
+    break;
+    
+  case Intrinsic::x86_sse_cvttss2si: {
+    // These intrinsics only demands the 0th element of its input vector.  If
+    // we can simplify the input based on that, do so now.
+    unsigned VWidth =
+      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+    APInt DemandedElts(VWidth, 1);
+    APInt UndefElts(VWidth, 0);
+    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+                                              UndefElts)) {
+      II->setOperand(1, V);
+      return II;
+    }
+    break;
+  }
+    
+  case Intrinsic::ppc_altivec_vperm:
+    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+      
+      // Check that all of the elements are integer constants or undefs.
+      bool AllEltsOk = true;
+      for (unsigned i = 0; i != 16; ++i) {
+        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+            !isa<UndefValue>(Mask->getOperand(i))) {
+          AllEltsOk = false;
+          break;
+        }
+      }
+      
+      if (AllEltsOk) {
+        // Cast the input vectors to byte vectors.
+        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+        Value *Result = UndefValue::get(Op0->getType());
+        
+        // Only extract each element once.
+        Value *ExtractedElts[32];
+        memset(ExtractedElts, 0, sizeof(ExtractedElts));
+        
+        for (unsigned i = 0; i != 16; ++i) {
+          if (isa<UndefValue>(Mask->getOperand(i)))
+            continue;
+          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          Idx &= 31;  // Match the hardware behavior.
+          
+          if (ExtractedElts[Idx] == 0) {
+            ExtractedElts[Idx] = 
+              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
+                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                   Idx&15, false), "tmp");
+          }
+        
+          // Insert this value into the result vector.
+          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                          i, false), "tmp");
+        }
+        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+      }
+    }
+    break;
+
+  case Intrinsic::stackrestore: {
+    // If the save is right next to the restore, remove the restore.  This can
+    // happen when variable allocas are DCE'd.
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+        BasicBlock::iterator BI = SS;
+        if (&*++BI == II)
+          return EraseInstFromFunction(CI);
+      }
+    }
+    
+    // Scan down this block to see if there is another stack restore in the
+    // same block without an intervening call/alloca.
+    BasicBlock::iterator BI = II;
+    TerminatorInst *TI = II->getParent()->getTerminator();
+    bool CannotRemove = false;
+    for (++BI; &*BI != TI; ++BI) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+        CannotRemove = true;
+        break;
+      }
+      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+          // If there is a stackrestore below this one, remove this one.
+          if (II->getIntrinsicID() == Intrinsic::stackrestore)
+            return EraseInstFromFunction(CI);
+          // Otherwise, ignore the intrinsic.
+        } else {
+          // If we found a non-intrinsic call, we can't remove the stack
+          // restore.
+          CannotRemove = true;
+          break;
+        }
+      }
+    }
+    
+    // If the stack restore is in a return/unwind block and if there are no
+    // allocas or calls between the restore and the return, nuke the restore.
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+      return EraseInstFromFunction(CI);
+    break;
+  }
+  case Intrinsic::objectsize: {
+    ConstantInt *Const = cast<ConstantInt>(II->getOperand(2));
+    const Type *Ty = CI.getType();
+
+    // 0 is maximum number of bytes left, 1 is minimum number of bytes left.
+    // TODO: actually add these values, the current return values are "don't
+    // know".
+    if (Const->getZExtValue() == 0)
+      return ReplaceInstUsesWith(CI, Constant::getAllOnesValue(Ty));
+    else
+      return ReplaceInstUsesWith(CI, ConstantInt::get(Ty, 0));
+  }
+  }
+
+  return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+  return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+                                         const CastInst * const CI,
+                                         const TargetData * const TD,
+                                         const int ix) {
+  if (!CI->isLosslessCast())
+    return false;
+
+  // The size of ByVal arguments is derived from the type, so we
+  // can't change to a type with a different size.  If the size were
+  // passed explicitly we could avoid this check.
+  if (!CS.paramHasAttr(ix, Attribute::ByVal))
+    return true;
+
+  const Type* SrcTy = 
+            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  if (!SrcTy->isSized() || !DstTy->isSized())
+    return false;
+  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+    return false;
+  return true;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+  bool Changed = false;
+
+  // If the callee is a constexpr cast of a function, attempt to move the cast
+  // to the arguments of the call/invoke.
+  if (transformConstExprCastCall(CS)) return 0;
+
+  Value *Callee = CS.getCalledValue();
+
+  if (Function *CalleeF = dyn_cast<Function>(Callee))
+    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
+      Instruction *OldCall = CS.getInstruction();
+      // If the call and callee calling conventions don't match, this call must
+      // be unreachable, as the call is undefined.
+      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 
+                                  OldCall);
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
+        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
+      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
+        return EraseInstFromFunction(*OldCall);
+      return 0;
+    }
+
+  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+    // This instruction is not reachable, just remove it.  We insert a store to
+    // undef so that we know that this code is not reachable, despite the fact
+    // that we can't modify the CFG here.
+    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+                  CS.getInstruction());
+
+    // If CS dues not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
+      CS.getInstruction()->
+        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
+
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      // Don't break the CFG, insert a dummy cond branch.
+      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+                         ConstantInt::getTrue(Callee->getContext()), II);
+    }
+    return EraseInstFromFunction(*CS.getInstruction());
+  }
+
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+        return transformCallThroughTrampoline(CS);
+
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  if (FTy->isVarArg()) {
+    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+    // See if we can optimize any arguments passed through the varargs area of
+    // the call.
+    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+           E = CS.arg_end(); I != E; ++I, ++ix) {
+      CastInst *CI = dyn_cast<CastInst>(*I);
+      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+        *I = CI->getOperand(0);
+        Changed = true;
+      }
+    }
+  }
+
+  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+    // Inline asm calls cannot throw - mark them 'nounwind'.
+    CS.setDoesNotThrow();
+    Changed = true;
+  }
+
+  return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
+  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
+  if (CE->getOpcode() != Instruction::BitCast || 
+      !isa<Function>(CE->getOperand(0)))
+    return false;
+  Function *Callee = cast<Function>(CE->getOperand(0));
+  Instruction *Caller = CS.getInstruction();
+  const AttrListPtr &CallerPAL = CS.getAttributes();
+
+  // Okay, this is a cast from a function to a different type.  Unless doing so
+  // would cause a type conversion of one of our arguments, change this call to
+  // be a direct call with arguments casted to the appropriate types.
+  //
+  const FunctionType *FT = Callee->getFunctionType();
+  const Type *OldRetTy = Caller->getType();
+  const Type *NewRetTy = FT->getReturnType();
+
+  if (isa<StructType>(NewRetTy))
+    return false; // TODO: Handle multiple return values.
+
+  // Check to see if we are changing the return type...
+  if (OldRetTy != NewRetTy) {
+    if (Callee->isDeclaration() &&
+        // Conversion is ok if changing from one pointer type to another or from
+        // a pointer to an integer of the same size.
+        !((isa<PointerType>(OldRetTy) || !TD ||
+           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+          (isa<PointerType>(NewRetTy) || !TD ||
+           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+      return false;   // Cannot transform this return value.
+
+    if (!Caller->use_empty() &&
+        // void -> non-void is handled specially
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+      return false;   // Cannot transform this return value.
+
+    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+      Attributes RAttrs = CallerPAL.getRetAttributes();
+      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+        return false;   // Attribute not compatible with transformed value.
+    }
+
+    // If the callsite is an invoke instruction, and the return value is used by
+    // a PHI node in a successor, we cannot change the return type of the call
+    // because there is no place to put the cast instruction (without breaking
+    // the critical edge).  Bail out in this case.
+    if (!Caller->use_empty())
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+             UI != E; ++UI)
+          if (PHINode *PN = dyn_cast<PHINode>(*UI))
+            if (PN->getParent() == II->getNormalDest() ||
+                PN->getParent() == II->getUnwindDest())
+              return false;
+  }
+
+  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+  CallSite::arg_iterator AI = CS.arg_begin();
+  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    const Type *ActTy = (*AI)->getType();
+
+    if (!CastInst::isCastable(ActTy, ParamTy))
+      return false;   // Cannot transform this parameter value.
+
+    if (CallerPAL.getParamAttributes(i + 1) 
+        & Attribute::typeIncompatible(ParamTy))
+      return false;   // Attribute not compatible with transformed value.
+
+    // Converting from one pointer type to another or between a pointer and an
+    // integer of the same size is safe even if we do not have a body.
+    bool isConvertible = ActTy == ParamTy ||
+      (TD && ((isa<PointerType>(ParamTy) ||
+      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+              (isa<PointerType>(ActTy) ||
+              ActTy == TD->getIntPtrType(Caller->getContext()))));
+    if (Callee->isDeclaration() && !isConvertible) return false;
+  }
+
+  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
+      Callee->isDeclaration())
+    return false;   // Do not delete arguments unless we have a function body.
+
+  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+      !CallerPAL.isEmpty())
+    // In this case we have more arguments than the new function type, but we
+    // won't be dropping them.  Check that these extra arguments have attributes
+    // that are compatible with being a vararg call argument.
+    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+        break;
+      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+      if (PAttrs & Attribute::VarArgsIncompatible)
+        return false;
+    }
+
+  // Okay, we decided that this is a safe thing to do: go ahead and start
+  // inserting cast instructions as necessary...
+  std::vector<Value*> Args;
+  Args.reserve(NumActualArgs);
+  SmallVector<AttributeWithIndex, 8> attrVec;
+  attrVec.reserve(NumCommonArgs);
+
+  // Get any return attributes.
+  Attributes RAttrs = CallerPAL.getRetAttributes();
+
+  // If the return value is not being used, the type may not be compatible
+  // with the existing attributes.  Wipe out any problematic attributes.
+  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+  // Add the new return attributes.
+  if (RAttrs)
+    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+  AI = CS.arg_begin();
+  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    if ((*AI)->getType() == ParamTy) {
+      Args.push_back(*AI);
+    } else {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+          false, ParamTy, false);
+      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+    }
+
+    // Add any parameter attributes.
+    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+  }
+
+  // If the function takes more arguments than the call was taking, add them
+  // now.
+  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+  // If we are removing arguments to the function, emit an obnoxious warning.
+  if (FT->getNumParams() < NumActualArgs) {
+    if (!FT->isVarArg()) {
+      errs() << "WARNING: While resolving call to function '"
+             << Callee->getName() << "' arguments were dropped!\n";
+    } else {
+      // Add all of the arguments in their promoted form to the arg list.
+      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+        const Type *PTy = getPromotedType((*AI)->getType());
+        if (PTy != (*AI)->getType()) {
+          // Must promote to pass through va_arg area!
+          Instruction::CastOps opcode =
+            CastInst::getCastOpcode(*AI, false, PTy, false);
+          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+        } else {
+          Args.push_back(*AI);
+        }
+
+        // Add any parameter attributes.
+        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+      }
+    }
+  }
+
+  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
+    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  if (NewRetTy->isVoidTy())
+    Caller->setName("");   // Void type should not have a name.
+
+  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+                                                     attrVec.end());
+
+  Instruction *NC;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+                            Args.begin(), Args.end(),
+                            Caller->getName(), Caller);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+  } else {
+    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+                          Caller->getName(), Caller);
+    CallInst *CI = cast<CallInst>(Caller);
+    if (CI->isTailCall())
+      cast<CallInst>(NC)->setTailCall();
+    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+  }
+
+  // Insert a cast of the return type as necessary.
+  Value *NV = NC;
+  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+    if (!NV->getType()->isVoidTy()) {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
+                                                            OldRetTy, false);
+      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+
+      // If this is an invoke instruction, we should insert it after the first
+      // non-phi, instruction in the normal successor block.
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+        InsertNewInstBefore(NC, *I);
+      } else {
+        // Otherwise, it's a call, just insert cast right after the call instr
+        InsertNewInstBefore(NC, *Caller);
+      }
+      Worklist.AddUsersToWorkList(*Caller);
+    } else {
+      NV = UndefValue::get(Caller->getType());
+    }
+  }
+
+
+  if (!Caller->use_empty())
+    Caller->replaceAllUsesWith(NV);
+  
+  EraseInstFromFunction(*Caller);
+  return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+  Value *Callee = CS.getCalledValue();
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  const AttrListPtr &Attrs = CS.getAttributes();
+
+  // If the call already has the 'nest' attribute somewhere then give up -
+  // otherwise 'nest' would occur twice after splicing in the chain.
+  if (Attrs.hasAttrSomewhere(Attribute::Nest))
+    return 0;
+
+  IntrinsicInst *Tramp =
+    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
+  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  if (!NestAttrs.isEmpty()) {
+    unsigned NestIdx = 1;
+    const Type *NestTy = 0;
+    Attributes NestAttr = Attribute::None;
+
+    // Look for a parameter marked with the 'nest' attribute.
+    for (FunctionType::param_iterator I = NestFTy->param_begin(),
+         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+        // Record the parameter type and any other attributes.
+        NestTy = *I;
+        NestAttr = NestAttrs.getParamAttributes(NestIdx);
+        break;
+      }
+
+    if (NestTy) {
+      Instruction *Caller = CS.getInstruction();
+      std::vector<Value*> NewArgs;
+      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+      SmallVector<AttributeWithIndex, 8> NewAttrs;
+      NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+      // Insert the nest argument into the call argument list, which may
+      // mean appending it.  Likewise for attributes.
+
+      // Add any result attributes.
+      if (Attributes Attr = Attrs.getRetAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+      {
+        unsigned Idx = 1;
+        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain argument and attributes.
+            Value *NestVal = Tramp->getOperand(3);
+            if (NestVal->getType() != NestTy)
+              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+            NewArgs.push_back(NestVal);
+            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original argument and attributes.
+          NewArgs.push_back(*I);
+          if (Attributes Attr = Attrs.getParamAttributes(Idx))
+            NewAttrs.push_back
+              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Add any function attributes.
+      if (Attributes Attr = Attrs.getFnAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+      // The trampoline may have been bitcast to a bogus type (FTy).
+      // Handle this by synthesizing a new function type, equal to FTy
+      // with the chain parameter inserted.
+
+      std::vector<const Type*> NewTypes;
+      NewTypes.reserve(FTy->getNumParams()+1);
+
+      // Insert the chain's type into the list of parameter types, which may
+      // mean appending it.
+      {
+        unsigned Idx = 1;
+        FunctionType::param_iterator I = FTy->param_begin(),
+          E = FTy->param_end();
+
+        do {
+          if (Idx == NestIdx)
+            // Add the chain's type.
+            NewTypes.push_back(NestTy);
+
+          if (I == E)
+            break;
+
+          // Add the original type.
+          NewTypes.push_back(*I);
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Replace the trampoline call with a direct call.  Let the generic
+      // code sort out any function type mismatches.
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
+                                                FTy->isVarArg());
+      Constant *NewCallee =
+        NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, 
+                                         PointerType::getUnqual(NewFTy));
+      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+                                                   NewAttrs.end());
+
+      Instruction *NewCaller;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        NewCaller = InvokeInst::Create(NewCallee,
+                                       II->getNormalDest(), II->getUnwindDest(),
+                                       NewArgs.begin(), NewArgs.end(),
+                                       Caller->getName(), Caller);
+        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+      } else {
+        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                     Caller->getName(), Caller);
+        if (cast<CallInst>(Caller)->isTailCall())
+          cast<CallInst>(NewCaller)->setTailCall();
+        cast<CallInst>(NewCaller)->
+          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+      }
+      if (!Caller->getType()->isVoidTy())
+        Caller->replaceAllUsesWith(NewCaller);
+      Caller->eraseFromParent();
+      Worklist.Remove(Caller);
+      return 0;
+    }
+  }
+
+  // Replace the trampoline call with a direct call.  Since there is no 'nest'
+  // parameter, there is no need to adjust the argument list.  Let the generic
+  // code sort out any function type mismatches.
+  Constant *NewCallee =
+    NestF->getType() == PTy ? NestF : 
+                              ConstantExpr::getBitCast(NestF, PTy);
+  CS.setCalledFunction(NewCallee);
+  return CS.getInstruction();
+}
+
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 0000000..e018b35
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1301 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression.  If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+                                        int &Offset) {
+  assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    Offset = CI->getZExtValue();
+    Scale  = 0;
+    return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
+  }
+  
+  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (I->getOpcode() == Instruction::Shl) {
+        // This is a value scaled by '1 << the shift amt'.
+        Scale = 1U << RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Mul) {
+        // This value is scaled by 'RHS'.
+        Scale = RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Add) {
+        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // where C1 is divisible by C2.
+        unsigned SubScale;
+        Value *SubVal = 
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+        Offset += RHS->getZExtValue();
+        Scale = SubScale;
+        return SubVal;
+      }
+    }
+  }
+
+  // Otherwise, we can't look past this.
+  Scale = 1;
+  Offset = 0;
+  return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+                                                   AllocaInst &AI) {
+  // This requires TargetData to get the alloca alignment and size information.
+  if (!TD) return 0;
+
+  const PointerType *PTy = cast<PointerType>(CI.getType());
+  
+  BuilderTy AllocaBuilder(*Builder);
+  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+  // Get the type really allocated and the type casted to.
+  const Type *AllocElTy = AI.getAllocatedType();
+  const Type *CastElTy = PTy->getElementType();
+  if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+  unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+  unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+  if (CastElTyAlign < AllocElTyAlign) return 0;
+
+  // If the allocation has multiple uses, only promote it if we are strictly
+  // increasing the alignment of the resultant allocation.  If we keep it the
+  // same, we open the door to infinite loops of various kinds.  (A reference
+  // from a dbg.declare doesn't count as a use for this purpose.)
+  if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
+      CastElTyAlign == AllocElTyAlign) return 0;
+
+  uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+  uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+  if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+  // See if we can satisfy the modulus by pulling a scale out of the array
+  // size argument.
+  unsigned ArraySizeScale;
+  int ArrayOffset;
+  Value *NumElements = // See if the array size is a decomposable linear expr.
+    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ 
+  // If we can now satisfy the modulus, by using a non-1 scale, we really can
+  // do the xform.
+  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+      (AllocElTySize*ArrayOffset   ) % CastElTySize != 0) return 0;
+
+  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+  Value *Amt = 0;
+  if (Scale == 1) {
+    Amt = NumElements;
+  } else {
+    Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale);
+    // Insert before the alloca, not before the cast.
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+  }
+  
+  if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+    Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()),
+                                  Offset, true);
+    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+  }
+  
+  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  New->setAlignment(AI.getAlignment());
+  New->takeName(&AI);
+  
+  // If the allocation has one real use plus a dbg.declare, just remove the
+  // declare.
+  if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
+    EraseInstFromFunction(*(Instruction*)DI);
+  }
+  // If the allocation has multiple real uses, insert a cast and change all
+  // things that used it to use the new cast.  This will also hack on CI, but it
+  // will die soon.
+  else if (!AI.hasOneUse()) {
+    // New is the allocation instruction, pointer typed. AI is the original
+    // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+    AI.replaceAllUsesWith(NewCast);
+  }
+  return ReplaceInstUsesWith(CI, New);
+}
+
+
+
+/// EvaluateInDifferentType - Given an expression that 
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
+                                             bool isSigned) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      C = ConstantFoldConstantExpression(CE, TD);
+    return C;
+  }
+
+  // Otherwise, it must be an instruction.
+  Instruction *I = cast<Instruction>(V);
+  Instruction *Res = 0;
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl:
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+    Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+    break;
+  }    
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // If the source type of the cast is the type we're trying for then we can
+    // just return the source.  There's no need to insert it because it is not
+    // new.
+    if (I->getOperand(0)->getType() == Ty)
+      return I->getOperand(0);
+    
+    // Otherwise, must be the same type of cast, so just reinsert a new one.
+    // This also handles the case of zext(trunc(x)) -> zext(x).
+    Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+                                      Opc == Instruction::SExt);
+    break;
+  case Instruction::Select: {
+    Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+    Res = SelectInst::Create(I->getOperand(0), True, False);
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *OPN = cast<PHINode>(I);
+    PHINode *NPN = PHINode::Create(Ty);
+    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      NPN->addIncoming(V, OPN->getIncomingBlock(i));
+    }
+    Res = NPN;
+    break;
+  }
+  default: 
+    // TODO: Can handle more cases here.
+    llvm_unreachable("Unreachable!");
+    break;
+  }
+  
+  Res->takeName(I);
+  return InsertNewInstBefore(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps 
+isEliminableCastPair(
+  const CastInst *CI, ///< The first cast instruction
+  unsigned opcode,       ///< The opcode of the second cast instruction
+  const Type *DstTy,     ///< The target type for the second cast instruction
+  TargetData *TD         ///< The target data for pointer size
+) {
+
+  const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
+  const Type *MidTy = CI->getType();                  // B from above
+
+  // Get the opcodes of the two Cast instructions
+  Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+                                                DstTy,
+                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
+  
+  // We don't want to form an inttoptr or ptrtoint that converts to an integer
+  // type that differs from the pointer size.
+  if ((Res == Instruction::IntToPtr &&
+          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+      (Res == Instruction::PtrToInt &&
+          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+    Res = 0;
+  
+  return Instruction::CastOps(Res);
+}
+
+/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
+/// in any code being generated.  It does not require codegen if V is simple
+/// enough or if the cast can be folded into other casts.
+bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
+                                     const Type *Ty) {
+  if (V->getType() == Ty || isa<Constant>(V)) return false;
+  
+  // If this is another cast that can be eliminated, it isn't codegen either.
+  if (const CastInst *CI = dyn_cast<CastInst>(V))
+    if (isEliminableCastPair(CI, opcode, Ty, TD))
+      return false;
+  return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+
+  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+  // eliminate it now.
+  if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
+    if (Instruction::CastOps opc = 
+        isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+      // The first cast (CSrc) is eliminable so we need to fix up or replace
+      // the second cast (CI). CSrc will then have a good chance of being dead.
+      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+    }
+  }
+
+  // If we are casting a select then fold the cast into the select
+  if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+    if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+      return NV;
+
+  // If we are casting a PHI then fold the cast into the PHI
+  if (isa<PHINode>(Src)) {
+    // We don't do this if this would create a PHI node with an illegal type if
+    // it is currently legal.
+    if (!isa<IntegerType>(Src->getType()) ||
+        !isa<IntegerType>(CI.getType()) ||
+        ShouldChangeType(CI.getType(), Src->getType()))
+      if (Instruction *NV = FoldOpIntoPhi(CI))
+        return NV;
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value.  This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V.  We should return true if trunc(V)
+/// can be computed by computing V in the smaller type.  If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+  // We can always evaluate constants in another type.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  const Type *OrigTy = V->getType();
+  
+  // If this is an extension from the dest type, we can eliminate it, even if it
+  // has multiple uses.
+  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
+      I->getOperand(0)->getType() == Ty)
+    return true;
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // These operators can all arbitrarily be extended or truncated.
+    return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+           CanEvaluateTruncated(I->getOperand(1), Ty);
+
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // UDiv and URem can be truncated if all the truncated bits are zero.
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    if (BitWidth < OrigBitWidth) {
+      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+          MaskedValueIsZero(I->getOperand(1), Mask)) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+               CanEvaluateTruncated(I->getOperand(1), Ty);
+      }
+    }
+    break;
+  }
+  case Instruction::Shl:
+    // If we are truncating the result of this SHL, and if it's a shift of a
+    // constant amount, we can always perform a SHL in a smaller type.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (CI->getLimitedValue(BitWidth) < BitWidth)
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+    }
+    break;
+  case Instruction::LShr:
+    // If this is a truncate of a logical shr, we can truncate it to a smaller
+    // lshr iff we know that the bits we would otherwise be shifting in are
+    // already zeros.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+    }
+    break;
+  case Instruction::Trunc:
+    // trunc(trunc(x)) -> trunc(x)
+    return true;
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+           CanEvaluateTruncated(SI->getFalseValue(), Ty);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+  
+  // Attempt to truncate the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateTruncated(Src, DestTy)) {
+      
+    // If this cast is a truncate, evaluting in a different type always
+    // eliminates the cast, so it is always a win.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid cast: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    return ReplaceInstUsesWith(CI, Res);
+  }
+
+  // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+  if (DestTy->getScalarSizeInBits() == 1) {
+    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Src = Builder->CreateAnd(Src, One, "tmp");
+    Value *Zero = Constant::getNullValue(Src->getType());
+    return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+  }
+
+  return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                             bool DoXform) {
+  // If we are just checking for a icmp eq of a single bit and zext'ing it
+  // to an integer, then shift the bit to the appropriate place and then
+  // cast to integer to avoid the comparison.
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+    const APInt &Op1CV = Op1C->getValue();
+      
+    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+      if (!DoXform) return ICI;
+
+      Value *In = ICI->getOperand(0);
+      Value *Sh = ConstantInt::get(In->getType(),
+                                   In->getType()->getScalarSizeInBits()-1);
+      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+
+      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = Builder->CreateXor(In, One, In->getName()+".not");
+      }
+
+      return ReplaceInstUsesWith(CI, In);
+    }
+      
+      
+      
+    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+        // This only works for EQ and NE
+        ICI->isEquality()) {
+      // If Op1C some other power of two, convert:
+      uint32_t BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+        if (!DoXform) return ICI;
+
+        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+          // (X&4) == 2 --> false
+          // (X&4) != 2 --> true
+          Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+                                           isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
+          return ReplaceInstUsesWith(CI, Res);
+        }
+          
+        uint32_t ShiftAmt = KnownZeroMask.logBase2();
+        Value *In = ICI->getOperand(0);
+        if (ShiftAmt) {
+          // Perform a logical shr by shiftamt.
+          // Insert the shift to put the result in the low bit.
+          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+                                   In->getName()+".lobit");
+        }
+          
+        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = Builder->CreateXor(In, One, "tmp");
+        }
+          
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        else
+          return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+      }
+    }
+  }
+
+  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+  // may lead to additional simplifications.
+  if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+      uint32_t BitWidth = ITy->getBitWidth();
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+
+      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+        APInt UnknownBit = ~KnownBits;
+        if (UnknownBit.countPopulation() == 1) {
+          if (!DoXform) return ICI;
+
+          Value *Result = Builder->CreateXor(LHS, RHS);
+
+          // Mask off any bits that are set and won't be shifted away.
+          if (KnownOneLHS.uge(UnknownBit))
+            Result = Builder->CreateAnd(Result,
+                                        ConstantInt::get(ITy, UnknownBit));
+
+          // Shift the bit we're testing down to the lsb.
+          Result = Builder->CreateLShr(
+               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+          Result->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Result);
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits.  If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out.  For example, to promote something like:
+///
+///   %B = trunc i64 %A to i32
+///   %C = lshr i32 %B, 8
+///   %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63.  Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+  BitsToClear = 0;
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If the input is a truncate from the destination type, we can trivially
+  // eliminate it, even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  unsigned Opc = I->getOpcode(), Tmp;
+  switch (Opc) {
+  case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
+  case Instruction::SExt:  // zext(sext(x)) -> sext(x).
+  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::Shl:
+    if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+        !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+      return false;
+    // These can all be promoted if neither operand has 'bits to clear'.
+    if (BitsToClear == 0 && Tmp == 0)
+      return true;
+      
+    // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+    // other side, BitsToClear is ok.
+    if (Tmp == 0 &&
+        (Opc == Instruction::And || Opc == Instruction::Or ||
+         Opc == Instruction::Xor)) {
+      // We use MaskedValueIsZero here for generality, but the case we care
+      // about the most is constant RHS.
+      unsigned VSize = V->getType()->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(1),
+                            APInt::getHighBitsSet(VSize, BitsToClear)))
+        return true;
+    }
+      
+    // Otherwise, we don't know how to analyze this BitsToClear case yet.
+    return false;
+      
+  case Instruction::LShr:
+    // We can promote lshr(x, cst) if we can promote x.  This requires the
+    // ultimate 'and' to clear out the high zero bits we're clearing out though.
+    if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+        return false;
+      BitsToClear += Amt->getZExtValue();
+      if (BitsToClear > V->getType()->getScalarSizeInBits())
+        BitsToClear = V->getType()->getScalarSizeInBits();
+      return true;
+    }
+    // Cannot promote variable LSHR.
+    return false;
+  case Instruction::Select:
+    if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+        !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+        // TODO: If important, we could handle the case when the BitsToClear are
+        // known zero in the disagreeing side.
+        Tmp != BitsToClear)
+      return false;
+    return true;
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+      return false;
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+          // TODO: If important, we could handle the case when the BitsToClear
+          // are known zero in the disagreeing input.
+          Tmp != BitsToClear)
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    return false;
+  }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+  // If this zero extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  // If one of the common conversion will work, do it.
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+  
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  unsigned BitsToClear;
+  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+    assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+           "Unreasonable BitsToClear");
+    
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid zero extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    
+    uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+    
+    // If the high bits are already filled with zeros, just replace this
+    // cast with the result.
+    if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+                                                     DestBitSize-SrcBitsKept)))
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit an AND to clear the high bits.
+    Constant *C = ConstantInt::get(Res->getType(),
+                               APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+    return BinaryOperator::CreateAnd(Res, C);
+  }
+
+  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+  // types and if the sizes are just right we can convert this into a logical
+  // 'and' which will be much cheaper than the pair of casts.
+  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
+    // TODO: Subsume this into EvaluateInDifferentType.
+    
+    // Get the sizes of the types involved.  We know that the intermediate type
+    // will be smaller than A or C, but don't know the relation between A and C.
+    Value *A = CSrc->getOperand(0);
+    unsigned SrcSize = A->getType()->getScalarSizeInBits();
+    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+    unsigned DstSize = CI.getType()->getScalarSizeInBits();
+    // If we're actually extending zero bits, then if
+    // SrcSize <  DstSize: zext(a & mask)
+    // SrcSize == DstSize: a & mask
+    // SrcSize  > DstSize: trunc(a) & mask
+    if (SrcSize < DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+      return new ZExtInst(And, CI.getType());
+    }
+    
+    if (SrcSize == DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+                                                           AndValue));
+    }
+    if (SrcSize > DstSize) {
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+      APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+      return BinaryOperator::CreateAnd(Trunc, 
+                                       ConstantInt::get(Trunc->getType(),
+                                                        AndValue));
+    }
+  }
+
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformZExtICmp(ICI, CI);
+
+  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) will be transformed.
+    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        (transformZExtICmp(LHS, CI, false) ||
+         transformZExtICmp(RHS, CI, false))) {
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+    }
+  }
+
+  // zext(trunc(t) & C) -> (t & zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+        Value *TI0 = TI->getOperand(0);
+        if (TI0->getType() == CI.getType())
+          return
+            BinaryOperator::CreateAnd(TI0,
+                                ConstantExpr::getZExt(C, CI.getType()));
+      }
+
+  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+            And->getOperand(1) == C)
+          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+            Value *TI0 = TI->getOperand(0);
+            if (TI0->getType() == CI.getType()) {
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+              return BinaryOperator::CreateXor(NewAnd, ZC);
+            }
+          }
+
+  // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
+  Value *X;
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+      match(SrcI, m_Not(m_Value(X))) &&
+      (!X->hasOneUse() || !isa<CmpInst>(X))) {
+    Value *New = Builder->CreateZExt(X, CI.getType());
+    return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits.  This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+  assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+         "Can't sign extend type to a smaller type");
+  // If this is a constant, it can be trivially promoted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is a truncate from the dest type, we can trivially eliminate it,
+  // even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  switch (I->getOpcode()) {
+  case Instruction::SExt:  // sext(sext(x)) -> sext(x)
+  case Instruction::ZExt:  // sext(zext(x)) -> zext(x)
+  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // These operators can all arbitrarily be extended if their inputs can.
+    return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+           CanEvaluateSExtd(I->getOperand(1), Ty);
+      
+  //case Instruction::Shl:   TODO
+  //case Instruction::LShr:  TODO
+      
+  case Instruction::Select:
+    return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+           CanEvaluateSExtd(I->getOperand(2), Ty);
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+  // If this sign extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+  // Canonicalize sign-extend from i1 to a select.
+  if (Src->getType()->isInteger(1))
+    return SelectInst::Create(Src,
+                              Constant::getAllOnesValue(CI.getType()),
+                              Constant::getNullValue(CI.getType()));
+  
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateSExtd(Src, DestTy)) {
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid sign extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+    assert(Res->getType() == DestTy);
+
+    uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+    // If the high bits are already filled with sign bit, just replace this
+    // cast with the result.
+    if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit a shl + ashr to do the sign extend.
+    Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+    return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+                                      ShAmt);
+  }
+
+  // If the input is a shl/ashr pair of a same constant, then this is a sign
+  // extension from a smaller value.  If we could trust arbitrary bitwidth
+  // integers, we could turn this into a truncate to the smaller bit and then
+  // use a sext for the whole extension.  Since we don't, look deeper and check
+  // for a truncate.  If the source and dest are the same type, eliminate the
+  // trunc and extend and just do shifts.  For example, turn:
+  //   %a = trunc i32 %i to i8
+  //   %b = shl i8 %a, 6
+  //   %c = ashr i8 %b, 6
+  //   %d = sext i8 %c to i32
+  // into:
+  //   %a = shl i32 %i, 30
+  //   %d = ashr i32 %a, 30
+  Value *A = 0;
+  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+  ConstantInt *BA = 0, *CA = 0;
+  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+                        m_ConstantInt(CA))) &&
+      BA == CA && A->getType() == CI.getType()) {
+    unsigned MidSize = Src->getType()->getScalarSizeInBits();
+    unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+    unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+    Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+    A = Builder->CreateShl(A, ShAmtV, CI.getName());
+    return BinaryOperator::CreateAShr(A, ShAmtV);
+  }
+  
+  return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+  bool losesInfo;
+  APFloat F = CFP->getValueAPF();
+  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+  if (!losesInfo)
+    return ConstantFP::get(CFP->getContext(), F);
+  return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::FPExt)
+      return LookThroughFPExtensions(I->getOperand(0));
+  
+  // If this value is a constant, return the constant in the smallest FP type
+  // that can accurately represent it.  This allows us to turn
+  // (float)((double)X+2.0) into x+2.0f.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+      return V;  // No constant folding of this.
+    // See if the value can be truncated to float and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+      return V;
+    if (CFP->getType()->isDoubleTy())
+      return V;  // Won't shrink.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+      return V;
+    // Don't try to shrink to various long double types.
+  }
+  
+  return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+  // smaller than the destination type, we can eliminate the truncate by doing
+  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well
+  // as many builtins (sqrt, etc).
+  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+  if (OpI && OpI->hasOneUse()) {
+    switch (OpI->getOpcode()) {
+    default: break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      const Type *SrcTy = OpI->getType();
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      if (LHSTrunc->getType() != SrcTy && 
+          RHSTrunc->getType() != SrcTy) {
+        unsigned DstSize = CI.getType()->getScalarSizeInBits();
+        // If the source types were both smaller than the destination type of
+        // the cast, do this xform.
+        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+        }
+      }
+      break;  
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+
+  // fptoui(uitofp(X)) --> X
+  // fptoui(sitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+  
+  // fptosi(sitofp(X)) --> X
+  // fptosi(uitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() <=
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+  // If the source integer type is larger than the intptr_t type for
+  // this target, do a trunc to the intptr_t type, then inttoptr of it.  This
+  // allows the trunc to be exposed to other transforms.  Don't do this for
+  // extending inttoptr's, because we don't know if the target sign or zero
+  // extends to pointers.
+  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
+      TD->getPointerSizeInBits()) {
+    Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                    TD->getIntPtrType(CI.getContext()), "tmp");
+    return new IntToPtrInst(P, CI.getType());
+  }
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+
+  return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      Worklist.Add(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+        GEP->hasAllConstantIndices()) {
+      // We are guaranteed to get a constant from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // Get the base pointer input of the bitcast, and the type it points to.
+      Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+      const Type *GEPIdxTy =
+      cast<PointerType>(OrigBase->getType())->getElementType();
+      SmallVector<Value*, 8> NewIndices;
+      if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+        // If we were able to index down into an element, create the GEP
+        // and bitcast the result.  This eliminates one bitcast, potentially
+        // two.
+        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+        Builder->CreateInBoundsGEP(OrigBase,
+                                   NewIndices.begin(), NewIndices.end()) :
+        Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+        NGEP->takeName(GEP);
+        
+        if (isa<BitCastInst>(CI))
+          return new BitCastInst(NGEP, CI.getType());
+        assert(isa<PtrToIntInst>(CI));
+        return new PtrToIntInst(NGEP, CI.getType());
+      }      
+    }
+  }
+  
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+  // If the destination integer type is smaller than the intptr_t type for
+  // this target, do a ptrtoint to intptr_t then do a trunc.  This allows the
+  // trunc to be exposed to other transforms.  Don't do this for extending
+  // ptrtoint's, because we don't know if the target sign or zero extends its
+  // pointers.
+  if (TD &&
+      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                       TD->getIntPtrType(CI.getContext()),
+                                       "tmp");
+    return new TruncInst(P, CI.getType());
+  }
+  
+  return commonPointerCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+  // If the operands are integer typed then apply the integer transforms,
+  // otherwise just apply the common ones.
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType();
+  const Type *DestTy = CI.getType();
+
+  // Get rid of casts from one type to the same type. These are useless and can
+  // be replaced by the operand.
+  if (DestTy == Src->getType())
+    return ReplaceInstUsesWith(CI, Src);
+
+  if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If the address spaces don't match, don't eliminate the bitcast, which is
+    // required for changing types.
+    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+      return 0;
+    
+    // If we are casting a alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent
+    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt =
+      Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
+
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"",
+                                               ((Instruction*)NULL));
+    }
+  }
+
+  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+    if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+      Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+      return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+    }
+  }
+
+  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+    if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+      Value *Elem = 
+        Builder->CreateExtractElement(Src,
+                   Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+    }
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+    // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
+    // a bitconvert to a vector with the same # elts.
+    if (SVI->hasOneUse() && isa<VectorType>(DestTy) && 
+        cast<VectorType>(DestTy)->getNumElements() ==
+              SVI->getType()->getNumElements() &&
+        SVI->getType()->getNumElements() ==
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+      BitCastInst *Tmp;
+      // If either of the operands is a cast from CI.getType(), then
+      // evaluating the shuffle in the casted destination's type will allow
+      // us to eliminate at least one cast.
+      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && 
+           Tmp->getOperand(0)->getType() == DestTy) ||
+          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && 
+           Tmp->getOperand(0)->getType() == DestTy)) {
+        Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+        Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+        // Return a new shuffle vector.  Use the same element ID's, as we
+        // know the vector types match #elts.
+        return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+      }
+    }
+  }
+  
+  if (isa<PointerType>(SrcTy))
+    return commonPointerCastTransforms(CI);
+  return commonCastTransforms(CI);
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 0000000..e59406c6
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,2475 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantExpr::getSub(C,  ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (IsSigned)
+    if (In2->getValue().isNegative())
+      return Result->getValue().sgt(In1->getValue());
+    else
+      return Result->getValue().slt(In1->getValue());
+  else
+    return Result->getValue().ult(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getAdd(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasAddOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasAddOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (IsSigned)
+    if (In2->getValue().isNegative())
+      return Result->getValue().slt(In1->getValue());
+    else
+      return Result->getValue().sgt(In1->getValue());
+  else
+    return Result->getValue().ugt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getSub(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasSubOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasSubOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit.  If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+                           bool &TrueIfSigned) {
+  switch (pred) {
+  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
+    TrueIfSigned = true;
+    return RHS->isZero();
+  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
+    TrueIfSigned = true;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
+    TrueIfSigned = false;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_UGT:
+    // True if LHS u> RHS and RHS == high-bit-mask - 1
+    TrueIfSigned = true;
+    return RHS->getValue() ==
+      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+  case ICmpInst::ICMP_UGE: 
+    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+    TrueIfSigned = true;
+    return RHS->getValue().isSignBit();
+  default:
+    return false;
+  }
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+  return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+                                                   const APInt& KnownOne,
+                                                   APInt& Min, APInt& Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+
+  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+  // bit if it is unknown.
+  Min = KnownOne;
+  Max = KnownOne|UnknownBits;
+  
+  if (UnknownBits.isNegative()) { // Sign bit is unknown
+    Min.set(Min.getBitWidth()-1);
+    Max.clear(Max.getBitWidth()-1);
+  }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+  
+  // The minimum value is when the unknown bits are all zeros.
+  Min = KnownOne;
+  // The maximum value is when the unknown bits are all ones.
+  Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+///   cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer.  Try to simplify
+/// this into some simple computation that does not need the load.  For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison.  This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+                             CmpInst &ICI, ConstantInt *AndCst) {
+  // We need TD information to know the pointer size unless this is inbounds.
+  if (!GEP->isInBounds() && TD == 0) return 0;
+  
+  ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+  
+  // There are many forms of this optimization we can handle, for now, just do
+  // the simple index into a single-dimensional array.
+  //
+  // Require: GEP GV, 0, i {{, constant indices}}
+  if (GEP->getNumOperands() < 3 ||
+      !isa<ConstantInt>(GEP->getOperand(1)) ||
+      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+      isa<Constant>(GEP->getOperand(2)))
+    return 0;
+
+  // Check that indices after the variable are constants and in-range for the
+  // type they index.  Collect the indices.  This is typically for arrays of
+  // structs.
+  SmallVector<unsigned, 4> LaterIndices;
+  
+  const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (Idx == 0) return 0;  // Variable index.
+    
+    uint64_t IdxVal = Idx->getZExtValue();
+    if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+    
+    if (const StructType *STy = dyn_cast<StructType>(EltTy))
+      EltTy = STy->getElementType(IdxVal);
+    else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+      if (IdxVal >= ATy->getNumElements()) return 0;
+      EltTy = ATy->getElementType();
+    } else {
+      return 0; // Unknown type.
+    }
+    
+    LaterIndices.push_back(IdxVal);
+  }
+  
+  enum { Overdefined = -3, Undefined = -2 };
+
+  // Variables for our state machines.
+  
+  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+  // and 87 is the second (and last) index.  FirstTrueElement is -2 when
+  // undefined, otherwise set to the first true element.  SecondTrueElement is
+  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+  // form "i != 47 & i != 87".  Same state transitions as for true elements.
+  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+  
+  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+  /// define a state machine that triggers for ranges of values that the index
+  /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
+  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+  /// index in the range (inclusive).  We use -2 for undefined here because we
+  /// use relative comparisons and don't want 0-1 to match -1.
+  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+  
+  // MagicBitvector - This is a magic bitvector where we set a bit if the
+  // comparison is true for element 'i'.  If there are 64 elements or less in
+  // the array, this will fully represent all the comparison results.
+  uint64_t MagicBitvector = 0;
+  
+  
+  // Scan the array and see if one of our patterns matches.
+  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+  for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+    Constant *Elt = Init->getOperand(i);
+    
+    // If this is indexing an array of structures, get the structure element.
+    if (!LaterIndices.empty())
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
+                                          LaterIndices.size());
+    
+    // If the element is masked, handle it.
+    if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+    
+    // Find out if the comparison would be true or false for the i'th element.
+    Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+                                                  CompareRHS, TD);
+    // If the result is undef for this element, ignore it.
+    if (isa<UndefValue>(C)) {
+      // Extend range state machines to cover this element in case there is an
+      // undef in the middle of the range.
+      if (TrueRangeEnd == (int)i-1)
+        TrueRangeEnd = i;
+      if (FalseRangeEnd == (int)i-1)
+        FalseRangeEnd = i;
+      continue;
+    }
+    
+    // If we can't compute the result for any of the elements, we have to give
+    // up evaluating the entire conditional.
+    if (!isa<ConstantInt>(C)) return 0;
+    
+    // Otherwise, we know if the comparison is true or false for this element,
+    // update our state machines.
+    bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+    
+    // State machine for single/double/range index comparison.
+    if (IsTrueForElt) {
+      // Update the TrueElement state machine.
+      if (FirstTrueElement == Undefined)
+        FirstTrueElement = TrueRangeEnd = i;  // First true element.
+      else {
+        // Update double-compare state machine.
+        if (SecondTrueElement == Undefined)
+          SecondTrueElement = i;
+        else
+          SecondTrueElement = Overdefined;
+        
+        // Update range state machine.
+        if (TrueRangeEnd == (int)i-1)
+          TrueRangeEnd = i;
+        else
+          TrueRangeEnd = Overdefined;
+      }
+    } else {
+      // Update the FalseElement state machine.
+      if (FirstFalseElement == Undefined)
+        FirstFalseElement = FalseRangeEnd = i; // First false element.
+      else {
+        // Update double-compare state machine.
+        if (SecondFalseElement == Undefined)
+          SecondFalseElement = i;
+        else
+          SecondFalseElement = Overdefined;
+        
+        // Update range state machine.
+        if (FalseRangeEnd == (int)i-1)
+          FalseRangeEnd = i;
+        else
+          FalseRangeEnd = Overdefined;
+      }
+    }
+    
+    
+    // If this element is in range, update our magic bitvector.
+    if (i < 64 && IsTrueForElt)
+      MagicBitvector |= 1ULL << i;
+    
+    // If all of our states become overdefined, bail out early.  Since the
+    // predicate is expensive, only check it every 8 elements.  This is only
+    // really useful for really huge arrays.
+    if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+        SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+        FalseRangeEnd == Overdefined)
+      return 0;
+  }
+
+  // Now that we've scanned the entire array, emit our new comparison(s).  We
+  // order the state machines in complexity of the generated code.
+  Value *Idx = GEP->getOperand(2);
+
+  // If the index is larger than the pointer size of the target, truncate the
+  // index down like the GEP would do implicitly.  We don't have to do this for
+  // an inbounds GEP because the index can't be out of range.
+  if (!GEP->isInBounds() &&
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+  
+  // If the comparison is only true for one or two elements, emit direct
+  // comparisons.
+  if (SecondTrueElement != Overdefined) {
+    // None true -> false.
+    if (FirstTrueElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+    
+    Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+    
+    // True for one element -> 'i == 47'.
+    if (SecondTrueElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+    
+    // True for two elements -> 'i == 47 | i == 72'.
+    Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+    Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+    Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+    return BinaryOperator::CreateOr(C1, C2);
+  }
+
+  // If the comparison is only false for one or two elements, emit direct
+  // comparisons.
+  if (SecondFalseElement != Overdefined) {
+    // None false -> true.
+    if (FirstFalseElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+    
+    Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+    // False for one element -> 'i != 47'.
+    if (SecondFalseElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+     
+    // False for two elements -> 'i != 47 & i != 72'.
+    Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+    Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+    Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+    return BinaryOperator::CreateAnd(C1, C2);
+  }
+  
+  // If the comparison can be replaced with a range comparison for the elements
+  // where it is true, emit the range check.
+  if (TrueRangeEnd != Overdefined) {
+    assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+    
+    // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+    if (FirstTrueElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  TrueRangeEnd-FirstTrueElement+1);
+    return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+  }
+  
+  // False range check.
+  if (FalseRangeEnd != Overdefined) {
+    assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+    // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+    if (FirstFalseElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  FalseRangeEnd-FirstFalseElement);
+    return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+  }
+  
+  
+  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // of this load, replace it with computation that does:
+  //   ((magic_cst >> i) & 1) != 0
+  if (Init->getNumOperands() <= 32 ||
+      (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+    const Type *Ty;
+    if (Init->getNumOperands() <= 32)
+      Ty = Type::getInt32Ty(Init->getContext());
+    else
+      Ty = Type::getInt64Ty(Init->getContext());
+    Value *V = Builder->CreateIntCast(Idx, Ty, false);
+    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+  }
+  
+  return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+  TargetData &TD = *IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
+      }
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+    
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
+    }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, 
+                                  TD.getIntPtrType(VariableIdx->getContext()),
+                                  VariableIdx->getName(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+  
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+  
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getName(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else.  At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                                       ICmpInst::Predicate Cond,
+                                       Instruction &I) {
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
+
+  Value *PtrBase = GEPLHS->getOperand(0);
+  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow since the gep is inbounds.  See if we can
+    // output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS);
+    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+                        Constant::getNullValue(Offset->getType()));
+  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+    // If the base pointers are different, but the indices are the same, just
+    // compare the base pointer.
+    if (PtrBase != GEPRHS->getOperand(0)) {
+      bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+      IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+                        GEPRHS->getOperand(0)->getType();
+      if (IndicesTheSame)
+        for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+          if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+            IndicesTheSame = false;
+            break;
+          }
+
+      // If all indices are the same, just compare the base pointers.
+      if (IndicesTheSame)
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+                            GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+      // Otherwise, the base pointers are different and the indices are
+      // different, bail out.
+      return 0;
+    }
+
+    // If one of the GEPs has all zero indices, recurse.
+    bool AllZeros = true;
+    for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+          !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+                          ICmpInst::getSwappedPredicate(Cond), I);
+
+    // If the other GEP has all zero indices, recurse.
+    AllZeros = true;
+    for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+          !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+      // If the GEPs only differ by one index, compare it.
+      unsigned NumDifferences = 0;  // Keep track of # differences.
+      unsigned DiffOperand = 0;     // The operand that differs.
+      for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+        if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+          if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+                   GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+            // Irreconcilable differences.
+            NumDifferences = 2;
+            break;
+          } else {
+            if (NumDifferences++) break;
+            DiffOperand = i;
+          }
+        }
+
+      if (NumDifferences == 0)   // SAME GEP?
+        return ReplaceInstUsesWith(I, // No comparison is needed here.
+                               ConstantInt::get(Type::getInt1Ty(I.getContext()),
+                                             ICmpInst::isTrueWhenEqual(Cond)));
+
+      else if (NumDifferences == 1) {
+        Value *LHSV = GEPLHS->getOperand(DiffOperand);
+        Value *RHSV = GEPRHS->getOperand(DiffOperand);
+        // Make sure we do a signed comparison here.
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+      }
+    }
+
+    // Only lower this if the icmp is the only user of the GEP or if we expect
+    // the result to fold to a constant!
+    if (TD &&
+        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+        (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+      // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
+      Value *L = EmitGEPOffset(GEPLHS);
+      Value *R = EmitGEPOffset(GEPRHS);
+      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+    }
+  }
+  return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+                                            Value *X, ConstantInt *CI,
+                                            ICmpInst::Predicate Pred,
+                                            Value *TheAdd) {
+  // If we have X+0, exit early (simplifying logic below) and let it get folded
+  // elsewhere.   icmp X+0, X  -> icmp X, X
+  if (CI->isZero()) {
+    bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+  }
+  
+  // (X+4) == X -> false.
+  if (Pred == ICmpInst::ICMP_EQ)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+  // (X+4) != X -> true.
+  if (Pred == ICmpInst::ICMP_NE)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+  // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
+  bool isNUW = false, isNSW = false;
+  if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
+    isNUW = Add->hasNoUnsignedWrap();
+    isNSW = Add->hasNoSignedWrap();
+  }      
+  
+  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+  // so the values can never be equal.  Similiarly for all other "or equals"
+  // operators.
+  
+  // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
+  // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
+  // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
+  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+    // If this is an NUW add, then this is always false.
+    if (isNUW)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); 
+    
+    Value *R = 
+      ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+    return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+  }
+  
+  // (X+1) >u X        --> X <u (0-1)        --> X != 255
+  // (X+2) >u X        --> X <u (0-2)        --> X <u 254
+  // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+    // If this is an NUW add, then this is always true.
+    if (isNUW)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); 
+    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+  }
+  
+  unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+  ConstantInt *SMax = ConstantInt::get(X->getContext(),
+                                       APInt::getSignedMaxValue(BitWidth));
+
+  // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
+  // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
+  // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
+  // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
+  // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
+  // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+    // If this is an NSW add, then we have two cases: if the constant is
+    // positive, then this is always false, if negative, this is always true.
+    if (isNSW) {
+      bool isTrue = CI->getValue().isNegative();
+      return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+    }
+    
+    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+  }
+  
+  // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
+  // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
+  // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+  // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+  // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
+  // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
+  
+  // If this is an NSW add, then we have two cases: if the constant is
+  // positive, then this is always true, if negative, this is always false.
+  if (isNSW) {
+    bool isTrue = !CI->getValue().isNegative();
+    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+  }
+  
+  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+  Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+  
+  // FIXME: If the operand types don't match the type of the divide 
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different 
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't 
+  // work. :(  The if statement below tests that condition and bails 
+  // if it finds it. 
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+    return 0;
+  if (DivRHS->isZero())
+    return 0; // The ProdOV computation fails on divide by zero.
+  if (DivIsSigned && DivRHS->isAllOnesValue())
+    return 0; // The overflow computation also screws up here
+  if (DivRHS->isOne())
+    return 0; // Not worth bothering, and eliminates some funny cases
+              // with INT_MIN.
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
+  // C2 (CI). By solving for X we can turn this into a range check 
+  // instead of computing a divide. 
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding. 
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  Constant *LoBound = 0, *HiBound = 0;
+  
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow)
+      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+      HiBound = DivRHS;
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      HiBound = AddOne(Prod);
+      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow) {
+        ConstantInt* DivNeg =
+                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+       }
+    }
+  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(DivRHS);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiBound = AddOne(Prod);
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+    } else {                       // (X / neg) op neg
+      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
+      LoOverflow = HiOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+    }
+    
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: llvm_unreachable("Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    else if (HiOverflow == -1)  // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                                          Instruction *LHSI,
+                                                          ConstantInt *RHS) {
+  const APInt &RHSV = RHS->getValue();
+  
+  switch (LHSI->getOpcode()) {
+  case Instruction::Trunc:
+    if (ICI.isEquality() && LHSI->hasOneUse()) {
+      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+      // of the high bits truncated out of x are known.
+      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+      
+      // If all the high bits are known, we can do this xform.
+      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+        // Pull in the high bits from known-ones set.
+        APInt NewRHS(RHS->getValue());
+        NewRHS.zext(SrcBits);
+        NewRHS |= KnownOne;
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantInt::get(ICI.getContext(), NewRHS));
+      }
+    }
+    break;
+      
+  case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
+    if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+      // fold the xor.
+      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+        Value *CompareVal = LHSI->getOperand(0);
+        
+        // If the sign bit of the XorCST is not set, there is no change to
+        // the operation, just stop using the Xor.
+        if (!XorCST->getValue().isNegative()) {
+          ICI.setOperand(0, CompareVal);
+          Worklist.Add(LHSI);
+          return &ICI;
+        }
+        
+        // Was the old condition true if the operand is positive?
+        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+        
+        // If so, the new one isn't.
+        isTrueIfPositive ^= true;
+        
+        if (isTrueIfPositive)
+          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+                              SubOne(RHS));
+        else
+          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+                              AddOne(RHS));
+      }
+
+      if (LHSI->hasOneUse()) {
+        // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+        if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+          const APInt &SignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ SignBit));
+        }
+
+        // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+          const APInt &NotSignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          Pred = ICI.getSwappedPredicate(Pred);
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ NotSignBit));
+        }
+      }
+    }
+    break;
+  case Instruction::And:         // (icmp pred (and X, AndCST), RHS)
+    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+        LHSI->getOperand(0)->hasOneUse()) {
+      ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+      
+      // If the LHS is an AND of a truncating cast, we can widen the
+      // and/compare to be the input width without changing the value
+      // produced, eliminating a cast.
+      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+        // We can do this transformation if either the AND constant does not
+        // have its sign bit set or if it is an equality comparison. 
+        // Extending a relational comparison when we're checking the sign
+        // bit would not work.
+        if (Cast->hasOneUse() &&
+            (ICI.isEquality() ||
+             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
+          uint32_t BitWidth = 
+            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
+          APInt NewCST = AndCST->getValue();
+          NewCST.zext(BitWidth);
+          APInt NewCI = RHSV;
+          NewCI.zext(BitWidth);
+          Value *NewAnd = 
+            Builder->CreateAnd(Cast->getOperand(0),
+                           ConstantInt::get(ICI.getContext(), NewCST),
+                               LHSI->getName());
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantInt::get(ICI.getContext(), NewCI));
+        }
+      }
+      
+      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
+      // happens a LOT in code produced by the C front-end, for bitfield
+      // access.
+      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+      if (Shift && !Shift->isShift())
+        Shift = 0;
+      
+      ConstantInt *ShAmt;
+      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+      const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
+      const Type *AndTy = AndCST->getType();          // Type of the and.
+      
+      // We can fold this as long as we can't shift unknown bits
+      // into the mask.  This can only happen with signed shift
+      // rights, as they sign-extend.
+      if (ShAmt) {
+        bool CanFold = Shift->isLogicalShift();
+        if (!CanFold) {
+          // To test for the bad case of the signed shr, see if any
+          // of the bits shifted in could be tested after the mask.
+          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+          int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+          
+          uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
+               AndCST->getValue()) == 0)
+            CanFold = true;
+        }
+        
+        if (CanFold) {
+          Constant *NewCst;
+          if (Shift->getOpcode() == Instruction::Shl)
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+          else
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
+          
+          // Check to see if we are shifting out any of the bits being
+          // compared.
+          if (ConstantExpr::get(Shift->getOpcode(),
+                                       NewCst, ShAmt) != RHS) {
+            // If we shifted bits out, the fold is not going to work out.
+            // As a special case, check to see if this means that the
+            // result is always true or false now.
+            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getFalse(ICI.getContext()));
+            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getTrue(ICI.getContext()));
+          } else {
+            ICI.setOperand(1, NewCst);
+            Constant *NewAndCST;
+            if (Shift->getOpcode() == Instruction::Shl)
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+            else
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+            LHSI->setOperand(1, NewAndCST);
+            LHSI->setOperand(0, Shift->getOperand(0));
+            Worklist.Add(Shift); // Shift is dead.
+            return &ICI;
+          }
+        }
+      }
+      
+      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
+      // preferable because it allows the C<<Y expression to be hoisted out
+      // of a loop if Y is invariant and X is not.
+      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+          ICI.isEquality() && !Shift->isArithmeticShift() &&
+          !isa<Constant>(Shift->getOperand(0))) {
+        // Compute C << Y.
+        Value *NS;
+        if (Shift->getOpcode() == Instruction::LShr) {
+          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+        } else {
+          // Insert a logical shift.
+          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+        }
+        
+        // Compute X & (C << Y).
+        Value *NewAnd = 
+          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+        
+        ICI.setOperand(0, NewAnd);
+        return &ICI;
+      }
+    }
+      
+    // Try to optimize things like "A[i]&42 == 0" to index computations.
+    if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+      if (GetElementPtrInst *GEP =
+          dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+              !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+            ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+              return Res;
+          }
+    }
+    break;
+
+  case Instruction::Or: {
+    if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+      break;
+    Value *P, *Q;
+    if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+      // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+      // -> and (icmp eq P, null), (icmp eq Q, null).
+
+      Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+                                        Constant::getNullValue(P->getType()));
+      Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+                                        Constant::getNullValue(Q->getType()));
+      Instruction *Op;
+      if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+        Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+      else
+        Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+      return Op;
+    }
+    break;
+  }
+    
+  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+    
+    uint32_t TypeBits = RHSV.getBitWidth();
+    
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    if (ICI.isEquality()) {
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      Constant *Comp =
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+                                                                 ShAmt);
+      if (Comp != RHS) {// Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst =
+          ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      if (LHSI->hasOneUse()) {
+        // Otherwise strength reduce the shift into an and.
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        Constant *Mask =
+          ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, 
+                                                       TypeBits-ShAmtVal));
+        
+        Value *And =
+          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantInt::get(ICI.getContext(),
+                                             RHSV.lshr(ShAmtVal)));
+      }
+    }
+    
+    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+    bool TrueIfSigned = false;
+    if (LHSI->hasOneUse() &&
+        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+      // (X << 31) <s 0  --> (X&1) != 0
+      Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) <<
+                                           (TypeBits-ShAmt->getZExtValue()-1));
+      Value *And =
+        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                          And, Constant::getNullValue(And->getType()));
+    }
+    break;
+  }
+    
+  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
+  case Instruction::AShr: {
+    // Only handle equality comparisons of shift-by-constant.
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt || !ICI.isEquality()) break;
+
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    uint32_t TypeBits = RHSV.getBitWidth();
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+      
+    // If we are comparing against bits always shifted out, the
+    // comparison cannot succeed.
+    APInt Comp = RHSV << ShAmtVal;
+    if (LHSI->getOpcode() == Instruction::LShr)
+      Comp = Comp.lshr(ShAmtVal);
+    else
+      Comp = Comp.ashr(ShAmtVal);
+    
+    if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+      bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+      Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                       IsICMP_NE);
+      return ReplaceInstUsesWith(ICI, Cst);
+    }
+    
+    // Otherwise, check to see if the bits shifted out are known to be zero.
+    // If so, we can compare against the unshifted value:
+    //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+    if (LHSI->hasOneUse() &&
+        MaskedValueIsZero(LHSI->getOperand(0), 
+                          APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
+      return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                          ConstantExpr::getShl(RHS, ShAmt));
+    }
+      
+    if (LHSI->hasOneUse()) {
+      // Otherwise strength reduce the shift into an and.
+      APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+      Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+      
+      Value *And = Builder->CreateAnd(LHSI->getOperand(0),
+                                      Mask, LHSI->getName()+".mask");
+      return new ICmpInst(ICI.getPredicate(), And,
+                          ConstantExpr::getShl(RHS, ShAmt));
+    }
+    break;
+  }
+    
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+    // Fold: icmp pred ([us]div X, C1), C2 -> range test
+    // Fold this div into the comparison, producing a range check. 
+    // Determine, based on the divide type, what the range is being 
+    // checked.  If there is an overflow on the low or high side, remember 
+    // it, otherwise compute the range [low, hi) bounding the new value.
+    // See: InsertRangeTest above for the kinds of replacements possible.
+    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+                                          DivRHS))
+        return R;
+    break;
+
+  case Instruction::Add:
+    // Fold: icmp pred (add X, C1), C2
+    if (!ICI.isEquality()) {
+      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+      if (!LHSC) break;
+      const APInt &LHSV = LHSC->getValue();
+
+      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+                            .subtract(LHSV);
+
+      if (ICI.isSigned()) {
+        if (CR.getLower().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      } else {
+        if (CR.getLower().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      }
+    }
+    break;
+  }
+  
+  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+  if (ICI.isEquality()) {
+    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    
+    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
+    // the second operand is a constant, simplify a bit.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+      switch (BO->getOpcode()) {
+      case Instruction::SRem:
+        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+          if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
+            Value *NewRem =
+              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+                                  BO->getName());
+            return new ICmpInst(ICI.getPredicate(), NewRem,
+                                Constant::getNullValue(BO->getType()));
+          }
+        }
+        break;
+      case Instruction::Add:
+        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                ConstantExpr::getSub(RHS, BOp1C));
+        } else if (RHSV == 0) {
+          // Replace ((add A, B) != 0) with (A != -B) if A or B is
+          // efficiently invertible, or if the add has just this one use.
+          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+          
+          if (Value *NegVal = dyn_castNegVal(BOp1))
+            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+          else if (Value *NegVal = dyn_castNegVal(BOp0))
+            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+          else if (BO->hasOneUse()) {
+            Value *Neg = Builder->CreateNeg(BOp1);
+            Neg->takeName(BO);
+            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+          }
+        }
+        break;
+      case Instruction::Xor:
+        // For the xor case, we can xor two constants together, eliminating
+        // the explicit xor.
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
+                              ConstantExpr::getXor(RHS, BOC));
+        
+        // FALLTHROUGH
+      case Instruction::Sub:
+        // Replace (([sub|xor] A, B) != 0) with (A != B)
+        if (RHSV == 0)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        break;
+        
+      case Instruction::Or:
+        // If bits are being or'd in that are not present in the constant we
+        // are comparing against, then the comparison could never succeed!
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()), 
+                                       isICMP_NE));
+        }
+        break;
+        
+      case Instruction::And:
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          // If bits are being compared against that are and'd out, then the
+          // comparison can never succeed!
+          if ((RHSV & ~BOC->getValue()) != 0)
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                       isICMP_NE));
+          
+          // If we have ((X & C) == C), turn it into ((X & C) != 0).
+          if (RHS == BOC && RHSV.isPowerOf2())
+            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+                                ICmpInst::ICMP_NE, LHSI,
+                                Constant::getNullValue(RHS->getType()));
+          
+          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+          if (BOC->getValue().isSignBit()) {
+            Value *X = BO->getOperand(0);
+            Constant *Zero = Constant::getNullValue(X->getType());
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+            return new ICmpInst(pred, X, Zero);
+          }
+          
+          // ((X & ~7) == 0) --> X < 8
+          if (RHSV == 0 && isHighOnes(BOC)) {
+            Value *X = BO->getOperand(0);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+            return new ICmpInst(pred, X, NegX);
+          }
+        }
+      default: break;
+      }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+      // Handle icmp {eq|ne} <intrinsic>, intcst.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::bswap:
+        Worklist.Add(II);
+        ICI.setOperand(0, II->getOperand(1));
+        ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+        return &ICI;
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz:
+        // ctz(A) == bitwidth(a)  ->  A == 0 and likewise for !=
+        if (RHSV == RHS->getType()->getBitWidth()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getOperand(1));
+          ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+          return &ICI;
+        }
+        break;
+      case Intrinsic::ctpop:
+        // popcount(A) == 0  ->  A == 0 and likewise for !=
+        if (RHS->isZero()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getOperand(1));
+          ICI.setOperand(1, RHS);
+          return &ICI;
+        }
+        break;
+      default:
+      	break;
+      }
+    }
+  }
+  return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+  Value *LHSCIOp        = LHSCI->getOperand(0);
+  const Type *SrcTy     = LHSCIOp->getType();
+  const Type *DestTy    = LHSCI->getType();
+  Value *RHSCIOp;
+
+  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
+  // integer type is the same size as the pointer type.
+  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+      TD->getPointerSizeInBits() ==
+         cast<IntegerType>(DestTy)->getBitWidth()) {
+    Value *RHSOp = 0;
+    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+      RHSOp = RHSC->getOperand(0);
+      // If the pointer types don't match, insert a bitcast.
+      if (LHSCIOp->getType() != RHSOp->getType())
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+    }
+
+    if (RHSOp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+  }
+  
+  // The code below only handles extension cast instructions, so far.
+  // Enforce this.
+  if (LHSCI->getOpcode() != Instruction::ZExt &&
+      LHSCI->getOpcode() != Instruction::SExt)
+    return 0;
+
+  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+  bool isSignedCmp = ICI.isSigned();
+
+  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+    // Not an extension from the same type?
+    RHSCIOp = CI->getOperand(0);
+    if (RHSCIOp->getType() != LHSCIOp->getType()) 
+      return 0;
+    
+    // If the signedness of the two casts doesn't agree (i.e. one is a sext
+    // and the other is a zext), then we can't handle this.
+    if (CI->getOpcode() != LHSCI->getOpcode())
+      return 0;
+
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedCmp && isSignedExt)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+  }
+
+  // If we aren't dealing with a constant on the RHS, exit early
+  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+  if (!CI)
+    return 0;
+
+  // Compute the constant that would happen if we truncated to SrcTy then
+  // reextended to DestTy.
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+                                                Res1, DestTy);
+
+  // If the re-extended constant didn't change...
+  if (Res2 == CI) {
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedExt && isSignedCmp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+  }
+
+  // The re-extended constant changed so the constant cannot be represented 
+  // in the shorter type. Consequently, we cannot emit a simple comparison.
+
+  // First, handle some easy cases. We know the result cannot be equal at this
+  // point so handle the ICI.isEquality() cases
+  if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+  if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+
+  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+  // should have been folded away previously and not enter in here.
+  Value *Result;
+  if (isSignedCmp) {
+    // We're performing a signed comparison.
+    if (cast<ConstantInt>(CI)->getValue().isNegative())
+      Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false
+    else
+      Result = ConstantInt::getTrue(ICI.getContext());  // X < (large) --> true
+  } else {
+    // We're performing an unsigned comparison.
+    if (isSignedExt) {
+      // We're performing an unsigned comp with a sign extended value.
+      // This is true if the input is >= 0. [aka >s -1]
+      Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+    } else {
+      // Unsigned extend & unsigned compare -> always true.
+      Result = ConstantInt::getTrue(ICI.getContext());
+    }
+  }
+
+  // Finally, return the value computed.
+  if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
+      ICI.getPredicate() == ICmpInst::ICMP_SLT)
+    return ReplaceInstUsesWith(ICI, Result);
+
+  assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || 
+          ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
+         "ICmp should be folded!");
+  if (Constant *CI = dyn_cast<Constant>(Result))
+    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+  return BinaryOperator::CreateNot(Result);
+}
+
+
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  const Type *Ty = Op0->getType();
+
+  // icmp's with boolean values can always be turned into bitwise operations
+  if (Ty == Type::getInt1Ty(I.getContext())) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Invalid icmp instruction!");
+    case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
+      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateNot(Xor);
+    }
+    case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
+      return BinaryOperator::CreateXor(Op0, Op1);
+
+    case ICmpInst::ICMP_UGT:
+      std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGT:
+      std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op0);
+    }
+    case ICmpInst::ICMP_UGE:
+      std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGE:
+      std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    }
+  }
+
+  unsigned BitWidth = 0;
+  if (TD)
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  else if (Ty->isIntOrIntVector())
+    BitWidth = Ty->getScalarSizeInBits();
+
+  bool isSignBit = false;
+
+  // See if we are doing a comparison with a constant.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    Value *A = 0, *B = 0;
+    
+    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+    if (I.isEquality() && CI->isZero() &&
+        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+      // (icmp cond A B) if cond is equality
+      return new ICmpInst(I.getPredicate(), A, B);
+    }
+    
+    // If we have an icmp le or icmp ge instruction, turn it into the
+    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
+    // them being folded in the code below.  The SimplifyICmpInst code has
+    // already handled the edge cases for us, so we just assert on them.
+    switch (I.getPredicate()) {
+    default: break;
+    case ICmpInst::ICMP_ULE:
+      assert(!CI->isMaxValue(false));                 // A <=u MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_SLE:
+      assert(!CI->isMaxValue(true));                  // A <=s MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_UGE:
+      assert(!CI->isMinValue(false));                  // A >=u MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    case ICmpInst::ICMP_SGE:
+      assert(!CI->isMinValue(true));                   // A >=s MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    }
+    
+    // If this comparison is a normal comparison, it demands all
+    // bits, if it is a sign bit comparison, it only demands the sign bit.
+    bool UnusedBit;
+    isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+  }
+
+  // See if we can fold the comparison based on range information we can get
+  // by checking whether bits are known to be zero or one in the input.
+  if (BitWidth != 0) {
+    APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+    APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+    if (SimplifyDemandedBits(I.getOperandUse(0),
+                             isSignBit ? APInt::getSignBit(BitWidth)
+                                       : APInt::getAllOnesValue(BitWidth),
+                             Op0KnownZero, Op0KnownOne, 0))
+      return &I;
+    if (SimplifyDemandedBits(I.getOperandUse(1),
+                             APInt::getAllOnesValue(BitWidth),
+                             Op1KnownZero, Op1KnownOne, 0))
+      return &I;
+
+    // Given the known and unknown bits, compute a range that the LHS could be
+    // in.  Compute the Min, Max and RHS values based on the known bits. For the
+    // EQ and NE we use unsigned values.
+    APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+    APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+    if (I.isSigned()) {
+      ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                             Op0Min, Op0Max);
+      ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                             Op1Min, Op1Max);
+    } else {
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                               Op0Min, Op0Max);
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                               Op1Min, Op1Max);
+    }
+
+    // If Min and Max are known to be the same, then SimplifyDemandedBits
+    // figured out that the LHS is a constant.  Just constant fold this now so
+    // that code below can assume that Min != Max.
+    if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+      return new ICmpInst(I.getPredicate(),
+                          ConstantInt::get(I.getContext(), Op0Min), Op1);
+    if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+      return new ICmpInst(I.getPredicate(), Op0,
+                          ConstantInt::get(I.getContext(), Op1Min));
+
+    // Based on the range information we know about the LHS, see if we can
+    // simplify this comparison.  For example, (x&4) < 8  is always true.
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown icmp opcode!");
+    case ICmpInst::ICMP_EQ:
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      break;
+    case ICmpInst::ICMP_NE:
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+        // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
+        if (CI->isMinValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                           Constant::getAllOnesValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+      if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+        // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
+        if (CI->isMaxValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                              Constant::getNullValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+      if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+      }
+      break;
+    case ICmpInst::ICMP_SGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+      if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      break;
+    case ICmpInst::ICMP_SLE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+      if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      break;
+    case ICmpInst::ICMP_UGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+      if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      break;
+    case ICmpInst::ICMP_ULE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+      if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      break;
+    }
+
+    // Turn a signed comparison into an unsigned one if both operands
+    // are known to have the same sign.
+    if (I.isSigned() &&
+        ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+         (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+      return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+  }
+
+  // Test if the ICmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (I.hasOneUse())
+    if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return 0;
+
+  // See if we are doing a comparison between a constant and an instruction that
+  // can be folded into the comparison.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    // Since the RHS is a ConstantInt (CI), if the left hand side is an 
+    // instruction, see if that instruction also has constants so that the 
+    // instruction can be folded into the icmp 
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+        return Res;
+  }
+
+  // Handle icmp with constant (but not simple integer constant) RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::GetElementPtr:
+          // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+        if (RHSC->isNullValue() &&
+            cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                  Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+      case Instruction::PHI:
+        // Only fold icmp into the PHI if the phi and icmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
+            return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+          Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+          Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+        // We only want to perform this transformation if it will not lead to
+        // additional code. This is true if either both sides of the select
+        // fold to a constant (in which case the icmp is replaced with a select
+        // which will usually simplify) or this is the only user of the
+        // select (in which case we are trading a select+icmp for a simpler
+        // select+icmp).
+        if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+          if (!Op1)
+            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          if (!Op2)
+            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+                                      RHSC, I.getName());
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        }
+        break;
+      }
+      case Instruction::Call:
+        // If we have (malloc != null), and if the malloc has a single use, we
+        // can assume it is successful and remove the malloc.
+        if (isMalloc(LHSI) && LHSI->hasOneUse() &&
+            isa<ConstantPointerNull>(RHSC)) {
+          // Need to explicitly erase malloc call here, instead of adding it to
+          // Worklist, because it won't get DCE'd from the Worklist since
+          // isInstructionTriviallyDead() returns false for function calls.
+          // It is OK to replace LHSI/MallocCall with Undef because the 
+          // instruction that uses it will be erased via Worklist.
+          if (extractMallocCall(LHSI)) {
+            LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
+            EraseInstFromFunction(*LHSI);
+            return ReplaceInstUsesWith(I,
+                               ConstantInt::get(Type::getInt1Ty(I.getContext()),
+                                                      !I.isTrueWhenEqual()));
+          }
+          if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
+            if (MallocCall->hasOneUse()) {
+              MallocCall->replaceAllUsesWith(
+                                        UndefValue::get(MallocCall->getType()));
+              EraseInstFromFunction(*MallocCall);
+              Worklist.Add(LHSI); // The malloc's bitcast use.
+              return ReplaceInstUsesWith(I,
+                               ConstantInt::get(Type::getInt1Ty(I.getContext()),
+                                                      !I.isTrueWhenEqual()));
+            }
+        }
+        break;
+      case Instruction::IntToPtr:
+        // icmp pred inttoptr(X), null -> icmp pred X, 0
+        if (RHSC->isNullValue() && TD &&
+            TD->getIntPtrType(RHSC->getContext()) == 
+               LHSI->getOperand(0)->getType())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                        Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+
+      case Instruction::Load:
+        // Try to optimize things like "A[i] > 4" to index computations.
+        if (GetElementPtrInst *GEP =
+              dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
+      }
+  }
+
+  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+      return NI;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+                           ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+      return NI;
+
+  // Test to see if the operands of the icmp are casted versions of other
+  // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
+  // now.
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+    if (isa<PointerType>(Op0->getType()) && 
+        (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 
+      // We keep moving the cast from the left operand over to the right
+      // operand, where it can often be eliminated completely.
+      Op0 = CI->getOperand(0);
+
+      // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+      // so eliminate it as well.
+      if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+        Op1 = CI2->getOperand(0);
+
+      // If Op1 is a constant, we can fold the cast into the constant.
+      if (Op0->getType() != Op1->getType()) {
+        if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+        } else {
+          // Otherwise, cast the RHS right before the icmp
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+        }
+      }
+      return new ICmpInst(I.getPredicate(), Op0, Op1);
+    }
+  }
+  
+  if (isa<CastInst>(Op0)) {
+    // Handle the special case of: icmp (cast bool to X), <cst>
+    // This comes up when you have code like
+    //   int X = A < B;
+    //   if (X) ...
+    // For generality, we handle any zero-extension of any operand comparison
+    // with a constant or another cast from the same type.
+    if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+      if (Instruction *R = visitICmpInstWithCastAndCast(I))
+        return R;
+  }
+  
+  // See if it's the same type of instruction on the left and right.
+  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+      if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
+          Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
+        switch (Op0I->getOpcode()) {
+        default: break;
+        case Instruction::Add:
+        case Instruction::Sub:
+        case Instruction::Xor:
+          if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
+            return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
+                                Op1I->getOperand(0));
+          // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+            if (CI->getValue().isSignBit()) {
+              ICmpInst::Predicate Pred = I.isSigned()
+                                             ? I.getUnsignedPredicate()
+                                             : I.getSignedPredicate();
+              return new ICmpInst(Pred, Op0I->getOperand(0),
+                                  Op1I->getOperand(0));
+            }
+            
+            if (CI->getValue().isMaxSignedValue()) {
+              ICmpInst::Predicate Pred = I.isSigned()
+                                             ? I.getUnsignedPredicate()
+                                             : I.getSignedPredicate();
+              Pred = I.getSwappedPredicate(Pred);
+              return new ICmpInst(Pred, Op0I->getOperand(0),
+                                  Op1I->getOperand(0));
+            }
+          }
+          break;
+        case Instruction::Mul:
+          if (!I.isEquality())
+            break;
+
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+            // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+            // Mask = -1 >> count-trailing-zeros(Cst).
+            if (!CI->isZero() && !CI->isOne()) {
+              const APInt &AP = CI->getValue();
+              ConstantInt *Mask = ConstantInt::get(I.getContext(), 
+                                      APInt::getLowBitsSet(AP.getBitWidth(),
+                                                           AP.getBitWidth() -
+                                                      AP.countTrailingZeros()));
+              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
+              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
+              return new ICmpInst(I.getPredicate(), And1, And2);
+            }
+          }
+          break;
+        }
+      }
+    }
+  }
+  
+  // ~x < ~y --> y < x
+  { Value *A, *B;
+    if (match(Op0, m_Not(m_Value(A))) &&
+        match(Op1, m_Not(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), B, A);
+  }
+  
+  if (I.isEquality()) {
+    Value *A, *B, *C, *D;
+    
+    // -x == -y --> x == y
+    if (match(Op0, m_Neg(m_Value(A))) &&
+        match(Op1, m_Neg(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), A, B);
+    
+    if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
+        Value *OtherVal = A == Op1 ? B : A;
+        return new ICmpInst(I.getPredicate(), OtherVal,
+                            Constant::getNullValue(A->getType()));
+      }
+
+      if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+        // A^c1 == C^c2 --> A == C^(c1^c2)
+        ConstantInt *C1, *C2;
+        if (match(B, m_ConstantInt(C1)) &&
+            match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+          Constant *NC = ConstantInt::get(I.getContext(),
+                                          C1->getValue() ^ C2->getValue());
+          Value *Xor = Builder->CreateXor(C, NC, "tmp");
+          return new ICmpInst(I.getPredicate(), A, Xor);
+        }
+        
+        // A^B == A^D -> B == D
+        if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+        if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+        if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+        if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+      }
+    }
+    
+    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+        (A == Op0 || B == Op0)) {
+      // A == (A^B)  ->  B == 0
+      Value *OtherVal = A == Op0 ? B : A;
+      return new ICmpInst(I.getPredicate(), OtherVal,
+                          Constant::getNullValue(A->getType()));
+    }
+
+    // (A-B) == A  ->  B == 0
+    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
+      return new ICmpInst(I.getPredicate(), B, 
+                          Constant::getNullValue(B->getType()));
+
+    // A == (A-B)  ->  B == 0
+    if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
+      return new ICmpInst(I.getPredicate(), B,
+                          Constant::getNullValue(B->getType()));
+    
+    // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+    if (Op0->hasOneUse() && Op1->hasOneUse() &&
+        match(Op0, m_And(m_Value(A), m_Value(B))) && 
+        match(Op1, m_And(m_Value(C), m_Value(D)))) {
+      Value *X = 0, *Y = 0, *Z = 0;
+      
+      if (A == C) {
+        X = B; Y = D; Z = A;
+      } else if (A == D) {
+        X = B; Y = C; Z = A;
+      } else if (B == C) {
+        X = A; Y = D; Z = B;
+      } else if (B == D) {
+        X = A; Y = C; Z = B;
+      }
+      
+      if (X) {   // Build (X^Y) & Z
+        Op1 = Builder->CreateXor(X, Y, "tmp");
+        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+        I.setOperand(0, Op1);
+        I.setOperand(1, Constant::getNullValue(Op1->getType()));
+        return &I;
+      }
+    }
+  }
+  
+  {
+    Value *X; ConstantInt *Cst;
+    // icmp X+Cst, X
+    if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+    // icmp X, X+Cst
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+  }
+  return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+                                                Instruction *LHSI,
+                                                Constant *RHSC) {
+  if (!isa<ConstantFP>(RHSC)) return 0;
+  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+  
+  // Get the width of the mantissa.  We don't want to hack on conversions that
+  // might lose information from the integer, e.g. "i64 -> float"
+  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+  if (MantissaWidth == -1) return 0;  // Unknown.
+  
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+  
+  // If this is a uitofp instruction, we need an extra bit to hold the sign.
+  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+  if (LHSUnsigned)
+    ++InputSize;
+  
+  // If the conversion would lose info, don't hack on this.
+  if ((int)InputSize > MantissaWidth)
+    return 0;
+  
+  // Otherwise, we can potentially simplify the comparison.  We know that it
+  // will always come through as an integer value and we know the constant is
+  // not a NAN (it would have been previously simplified).
+  assert(!RHS.isNaN() && "NaN comparison not already folded!");
+  
+  ICmpInst::Predicate Pred;
+  switch (I.getPredicate()) {
+  default: llvm_unreachable("Unexpected predicate!");
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_OEQ:
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case FCmpInst::FCMP_UGT:
+  case FCmpInst::FCMP_OGT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+    break;
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OGE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+    break;
+  case FCmpInst::FCMP_ULT:
+  case FCmpInst::FCMP_OLT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+    break;
+  case FCmpInst::FCMP_ULE:
+  case FCmpInst::FCMP_OLE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+    break;
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ONE:
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case FCmpInst::FCMP_ORD:
+    return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+  case FCmpInst::FCMP_UNO:
+    return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+  }
+  
+  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+  
+  // Now we know that the APFloat is a normal number, zero or inf.
+  
+  // See if the FP constant is too large for the integer.  For example,
+  // comparing an i8 to 300.0.
+  unsigned IntWidth = IntTy->getScalarSizeInBits();
+  
+  if (!LHSUnsigned) {
+    // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
+    // and large values.
+    APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+    SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
+          Pred == ICmpInst::ICMP_SLE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  } else {
+    // If the RHS value is > UnsignedMax, fold the comparison. This handles
+    // +INF and large values.
+    APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+    UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+                          APFloat::rmNearestTiesToEven);
+    if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
+          Pred == ICmpInst::ICMP_ULE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+  
+  if (!LHSUnsigned) {
+    // See if the RHS value is < SignedMin.
+    APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+    SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+          Pred == ICmpInst::ICMP_SGE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+
+  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+  // [0, UMAX], but it may still be fractional.  See if it is fractional by
+  // casting the FP value to the integer value and back, checking for equality.
+  // Don't do this for zero, because -0.0 is not fractional.
+  Constant *RHSInt = LHSUnsigned
+    ? ConstantExpr::getFPToUI(RHSC, IntTy)
+    : ConstantExpr::getFPToSI(RHSC, IntTy);
+  if (!RHS.isZero()) {
+    bool Equal = LHSUnsigned
+      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+    if (!Equal) {
+      // If we had a comparison against a fractional value, we have to adjust
+      // the compare predicate and sometimes the value.  RHSC is rounded towards
+      // zero at this point.
+      switch (Pred) {
+      default: llvm_unreachable("Unexpected integer comparison!");
+      case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      case ICmpInst::ICMP_ULE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> false
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SLE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> int < -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLT;
+        break;
+      case ICmpInst::ICMP_ULT:
+        // (float)int < -4.4   --> false
+        // (float)int < 4.4    --> int <= 4
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        Pred = ICmpInst::ICMP_ULE;
+        break;
+      case ICmpInst::ICMP_SLT:
+        // (float)int < -4.4   --> int < -4
+        // (float)int < 4.4    --> int <= 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLE;
+        break;
+      case ICmpInst::ICMP_UGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> true
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> int >= -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGE;
+        break;
+      case ICmpInst::ICMP_UGE:
+        // (float)int >= -4.4   --> true
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        Pred = ICmpInst::ICMP_UGT;
+        break;
+      case ICmpInst::ICMP_SGE:
+        // (float)int >= -4.4   --> int >= -4
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGT;
+        break;
+      }
+    }
+  }
+
+  // Lower this FP comparison into an appropriate integer version of the
+  // comparison.
+  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
+
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Simplify 'fcmp pred X, X'
+  if (Op0 == Op1) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown predicate!");
+    case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
+    case FCmpInst::FCMP_ULT:    // True if unordered or less than
+    case FCmpInst::FCMP_UGT:    // True if unordered or greater than
+    case FCmpInst::FCMP_UNE:    // True if unordered or not equal
+      // Canonicalize these to be 'fcmp uno %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_UNO);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+      
+    case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
+    case FCmpInst::FCMP_OEQ:    // True if ordered and equal
+    case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
+    case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
+      // Canonicalize these to be 'fcmp ord %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_ORD);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+    }
+  }
+    
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::PHI:
+        // Only fold fcmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I, true))
+            return NV;
+        break;
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+          return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (LHSI->hasOneUse()) {
+          if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+            // Fold the known value into the constant operand.
+            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op2 = Builder->CreateFCmp(I.getPredicate(),
+                                      LHSI->getOperand(2), RHSC, I.getName());
+          } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+            // Fold the known value into the constant operand.
+            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          }
+        }
+
+        if (Op1)
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        break;
+      }
+    case Instruction::Load:
+      if (GetElementPtrInst *GEP =
+          dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+              !cast<LoadInst>(LHSI)->isVolatile())
+            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+              return Res;
+      }
+      break;
+    }
+  }
+
+  return Changed ? &I : 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 0000000..6c0ecc9
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,613 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+  if (AI.isArrayAllocation()) {  // Check C != 1
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+      const Type *NewTy = 
+        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+      New->setAlignment(AI.getAlignment());
+
+      // Scan to the end of the allocation instructions, to skip over a block of
+      // allocas if possible...also skip interleaved debug info
+      //
+      BasicBlock::iterator It = New;
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+      // Now that I is pointing to the first non-allocation-inst in the block,
+      // insert our getelementptr instruction...
+      //
+      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+      Value *Idx[2];
+      Idx[0] = NullIdx;
+      Idx[1] = NullIdx;
+      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                                   New->getName()+".sub", It);
+
+      // Now make everything use the getelementptr instead of the original
+      // allocation.
+      return ReplaceInstUsesWith(AI, V);
+    } else if (isa<UndefValue>(AI.getArraySize())) {
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+    }
+  }
+
+  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+    // Note that we only do this for alloca's, because malloc should allocate
+    // and return a unique pointer, even for a zero byte allocation.
+    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+    // If the alignment is 0 (unspecified), assign it the preferred alignment.
+    if (AI.getAlignment() == 0)
+      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+  }
+
+  return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+                                        const TargetData *TD) {
+  User *CI = cast<User>(LI.getOperand(0));
+  Value *CastOp = CI->getOperand(0);
+
+  const PointerType *DestTy = cast<PointerType>(CI->getType());
+  const Type *DestPTy = DestTy->getElementType();
+  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+    // If the address spaces don't match, don't eliminate the cast.
+    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+      return 0;
+
+    const Type *SrcPTy = SrcTy->getElementType();
+
+    if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 
+         isa<VectorType>(DestPTy)) {
+      // If the source is an array, the code below will not succeed.  Check to
+      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+      // constants.
+      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+          if (ASrcTy->getNumElements() != 0) {
+            Value *Idxs[2];
+            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+            Idxs[1] = Idxs[0];
+            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+            SrcTy = cast<PointerType>(CastOp->getType());
+            SrcPTy = SrcTy->getElementType();
+          }
+
+      if (IC.getTargetData() &&
+          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
+            isa<VectorType>(SrcPTy)) &&
+          // Do not allow turning this into a load of an integer, which is then
+          // casted to a pointer, this pessimizes pointer analysis a lot.
+          (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
+          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+
+        // Okay, we are casting from one integer or pointer type to another of
+        // the same size.  Instead of casting the pointer before the load, cast
+        // the result of the loaded value.
+        Value *NewLoad = 
+          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+        // Now cast the result of the load.
+        return new BitCastInst(NewLoad, LI.getType());
+      }
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+  Value *Op = LI.getOperand(0);
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+    if (KnownAlign >
+        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+                                  LI.getAlignment()))
+      LI.setAlignment(KnownAlign);
+  }
+
+  // load (cast X) --> cast (load X) iff safe.
+  if (isa<CastInst>(Op))
+    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+      return Res;
+
+  // None of the following transforms are legal for volatile loads.
+  if (LI.isVolatile()) return 0;
+  
+  // Do really simple store-to-load forwarding and load CSE, to catch cases
+  // where there are several consequtive memory accesses to the same location,
+  // separated by a few arithmetic operations.
+  BasicBlock::iterator BBI = &LI;
+  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+    return ReplaceInstUsesWith(LI, AvailableVal);
+
+  // load(gep null, ...) -> unreachable
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    const Value *GEPI0 = GEPI->getOperand(0);
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+      // Insert a new store to null instruction before the load to indicate
+      // that this code is not reachable.  We do this instead of inserting
+      // an unreachable instruction directly because we cannot modify the
+      // CFG.
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  } 
+
+  // load null/undef -> unreachable
+  // TODO: Consider a target hook for valid address spaces for this xform.
+  if (isa<UndefValue>(Op) ||
+      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+    // Insert a new store to null instruction before the load to indicate that
+    // this code is not reachable.  We do this instead of inserting an
+    // unreachable instruction directly because we cannot modify the CFG.
+    new StoreInst(UndefValue::get(LI.getType()),
+                  Constant::getNullValue(Op->getType()), &LI);
+    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+  }
+
+  // Instcombine load (constantexpr_cast global) -> cast (load global)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+        return Res;
+  
+  if (Op->hasOneUse()) {
+    // Change select and PHI nodes to select values instead of addresses: this
+    // helps alias analysis out a lot, allows many others simplifications, and
+    // exposes redundancy in the code.
+    //
+    // Note that we cannot do the transformation unless we know that the
+    // introduced loads cannot trap!  Something like this is valid as long as
+    // the condition is always false: load (select bool %C, int* null, int* %G),
+    // but it would not be valid if we transformed it to load from null
+    // unconditionally.
+    //
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
+        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                        SI->getOperand(1)->getName()+".val");
+        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                        SI->getOperand(2)->getName()+".val");
+        return SelectInst::Create(SI->getCondition(), V1, V2);
+      }
+
+      // load (select (cond, null, P)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(2));
+          return &LI;
+        }
+
+      // load (select (cond, P, null)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(1));
+          return &LI;
+        }
+    }
+  }
+  return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible.  This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+  User *CI = cast<User>(SI.getOperand(1));
+  Value *CastOp = CI->getOperand(0);
+
+  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+  if (SrcTy == 0) return 0;
+  
+  const Type *SrcPTy = SrcTy->getElementType();
+
+  if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
+    return 0;
+  
+  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+  /// to its first element.  This allows us to handle things like:
+  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+  /// on 32-bit hosts.
+  SmallVector<Value*, 4> NewGEPIndices;
+  
+  // If the source is an array, the code below will not succeed.  Check to
+  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+  // constants.
+  if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
+    // Index through pointer.
+    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+    NewGEPIndices.push_back(Zero);
+    
+    while (1) {
+      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+        if (!STy->getNumElements()) /* Struct can be empty {} */
+          break;
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = STy->getElementType(0);
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = ATy->getElementType();
+      } else {
+        break;
+      }
+    }
+    
+    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+  }
+
+  if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
+    return 0;
+  
+  // If the pointers point into different address spaces or if they point to
+  // values with different sizes, we can't do the transformation.
+  if (!IC.getTargetData() ||
+      SrcTy->getAddressSpace() != 
+        cast<PointerType>(CI->getType())->getAddressSpace() ||
+      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+      IC.getTargetData()->getTypeSizeInBits(DestPTy))
+    return 0;
+
+  // Okay, we are casting from one integer or pointer type to another of
+  // the same size.  Instead of casting the pointer before 
+  // the store, cast the value to be stored.
+  Value *NewCast;
+  Value *SIOp0 = SI.getOperand(0);
+  Instruction::CastOps opcode = Instruction::BitCast;
+  const Type* CastSrcTy = SIOp0->getType();
+  const Type* CastDstTy = SrcPTy;
+  if (isa<PointerType>(CastDstTy)) {
+    if (CastSrcTy->isInteger())
+      opcode = Instruction::IntToPtr;
+  } else if (isa<IntegerType>(CastDstTy)) {
+    if (isa<PointerType>(SIOp0->getType()))
+      opcode = Instruction::PtrToInt;
+  }
+  
+  // SIOp0 is a pointer to aggregate and this is a store to the first field,
+  // emit a GEP to index into its first field.
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
+  
+  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+                                   SIOp0->getName()+".c");
+  return new StoreInst(NewCast, CastOp);
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B) return true;
+  
+  // Test if the values come form identical arithmetic instructions.
+  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+  // its only used to compare two uses within the same basic block, which
+  // means that they'll always either have the same value or one of them
+  // will have an undefined value.
+  if (isa<BinaryOperator>(A) ||
+      isa<CastInst>(A) ||
+      isa<PHINode>(A) ||
+      isa<GetElementPtrInst>(A))
+    if (Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+  
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+// If this instruction has two uses, one of which is a llvm.dbg.declare,
+// return the llvm.dbg.declare.
+DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
+  if (!V->hasNUses(2))
+    return 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
+      return DI;
+    if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
+      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
+        return DI;
+      }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+  Value *Val = SI.getOperand(0);
+  Value *Ptr = SI.getOperand(1);
+
+  // If the RHS is an alloca with a single use, zapify the store, making the
+  // alloca dead.
+  // If the RHS is an alloca with a two uses, the other one being a 
+  // llvm.dbg.declare, zapify the store and the declare, making the
+  // alloca dead.  We must do this to prevent declare's from affecting
+  // codegen.
+  if (!SI.isVolatile()) {
+    if (Ptr->hasOneUse()) {
+      if (isa<AllocaInst>(Ptr)) 
+        return EraseInstFromFunction(SI);
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+        if (isa<AllocaInst>(GEP->getOperand(0))) {
+          if (GEP->getOperand(0)->hasOneUse())
+            return EraseInstFromFunction(SI);
+          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
+            EraseInstFromFunction(*DI);
+            return EraseInstFromFunction(SI);
+          }
+        }
+      }
+    }
+    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
+      EraseInstFromFunction(*DI);
+      return EraseInstFromFunction(SI);
+    }
+  }
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+    if (KnownAlign >
+        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+                                  SI.getAlignment()))
+      SI.setAlignment(KnownAlign);
+  }
+
+  // Do really simple DSE, to catch cases where there are several consecutive
+  // stores to the same location, separated by a few arithmetic operations. This
+  // situation often occurs with bitfield accesses.
+  BasicBlock::iterator BBI = &SI;
+  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+       --ScanInsts) {
+    --BBI;
+    // Don't count debug info directives, lest they affect codegen,
+    // and we skip pointer-to-pointer bitcasts, which are NOPs.
+    // It is necessary for correctness to skip those that feed into a
+    // llvm.dbg.declare, as these are not present when debugging is off.
+    if (isa<DbgInfoIntrinsic>(BBI) ||
+        (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+      ScanInsts++;
+      continue;
+    }    
+    
+    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+      // Prev store isn't volatile, and stores to the same location?
+      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+                                                          SI.getOperand(1))) {
+        ++NumDeadStore;
+        ++BBI;
+        EraseInstFromFunction(*PrevSI);
+        continue;
+      }
+      break;
+    }
+    
+    // If this is a load, we have to stop.  However, if the loaded value is from
+    // the pointer we're loading and is producing the pointer we're storing,
+    // then *this* store is dead (X = load P; store X -> P).
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+          !SI.isVolatile())
+        return EraseInstFromFunction(SI);
+      
+      // Otherwise, this is a load from some other location.  Stores before it
+      // may not be dead.
+      break;
+    }
+    
+    // Don't skip over loads or things that can modify memory.
+    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+      break;
+  }
+  
+  
+  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
+
+  // store X, null    -> turns into 'unreachable' in SimplifyCFG
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+    if (!isa<UndefValue>(Val)) {
+      SI.setOperand(0, UndefValue::get(Val->getType()));
+      if (Instruction *U = dyn_cast<Instruction>(Val))
+        Worklist.Add(U);  // Dropped a use.
+    }
+    return 0;  // Do not modify these!
+  }
+
+  // store undef, Ptr -> noop
+  if (isa<UndefValue>(Val))
+    return EraseInstFromFunction(SI);
+
+  // If the pointer destination is a cast, see if we can fold the cast into the
+  // source instead.
+  if (isa<CastInst>(Ptr))
+    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+      return Res;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+        return Res;
+
+  
+  // If this store is the last instruction in the basic block (possibly
+  // excepting debug info instructions and the pointer bitcasts that feed
+  // into them), and if the block ends with an unconditional branch, try
+  // to move it to the successor block.
+  BBI = &SI; 
+  do {
+    ++BBI;
+  } while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *OtherBB = 0;
+  if (*PI != StoreBB)
+    OtherBB = *PI;
+  ++PI;
+  if (PI == pred_end(DestBB))
+    return false;
+  
+  if (*PI != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = *PI;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+
+  // Bail out if all the relevant blocks aren't distinct (this can happen,
+  // for example, if SI is in an infinite loop)
+  if (StoreBB == DestBB || OtherBB == DestBB)
+    return false;
+
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    --BBI;
+    // Skip over debugging info.
+    while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+      if (BBI==OtherBB->begin())
+        return false;
+      --BBI;
+    }
+    // If this isn't a store, isn't a store to the same location, or if the
+    // alignments differ, bail out.
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+        OtherStore->getAlignment() != SI.getAlignment())
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch. If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // lives in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+            OtherStore->getAlignment() != SI.getAlignment())
+          return false;
+        break;
+      }
+      // If we find something that may be using or overwriting the stored
+      // value, or if we run out of instructions, we can't do the xform.
+      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
+    }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads or overwrites the stored value in
+    // StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (I->mayReadFromMemory() || I->mayWriteToMemory())
+        return false;
+    }
+  }
+  
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
+    PN->reserveOperandSpace(2);
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->getFirstNonPHI();
+  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+                                    OtherStore->isVolatile(),
+                                    SI.getAlignment()), *BBI);
+  
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  return true;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 0000000..6afc0cd
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,695 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+  uint32_t W = C1->getBitWidth();
+  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+  if (sign) {
+    LHSExt.sext(W * 2);
+    RHSExt.sext(W * 2);
+  } else {
+    LHSExt.zext(W * 2);
+    RHSExt.zext(W * 2);
+  }
+  
+  APInt MulExt = LHSExt * RHSExt;
+  
+  if (!sign)
+    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+  
+  APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+  APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+  return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (isa<UndefValue>(Op1))              // undef * X -> 0
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  // Simplify mul instructions with a constant RHS.
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
+
+      // ((X << C1)*C2) == (X * (C2 << C1))
+      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+        if (SI->getOpcode() == Instruction::Shl)
+          if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+            return BinaryOperator::CreateMul(SI->getOperand(0),
+                                        ConstantExpr::getShl(CI, ShOp));
+
+      if (CI->isZero())
+        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
+      if (CI->equalsInt(1))                  // X * 1  == X
+        return ReplaceInstUsesWith(I, Op0);
+      if (CI->isAllOnesValue())              // X * -1 == 0 - X
+        return BinaryOperator::CreateNeg(Op0, I.getName());
+
+      const APInt& Val = cast<ConstantInt>(CI)->getValue();
+      if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
+        return BinaryOperator::CreateShl(Op0,
+                 ConstantInt::get(Op0->getType(), Val.logBase2()));
+      }
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (Op1C->isNullValue())
+        return ReplaceInstUsesWith(I, Op1C);
+
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+        if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
+          return BinaryOperator::CreateNeg(Op0, I.getName());
+
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
+            if (CI->equalsInt(1))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+    
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+      if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
+        // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
+        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
+        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
+        return BinaryOperator::CreateAdd(Add, C1C2);
+        
+      }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(Op1))
+      return BinaryOperator::CreateMul(Op0v, Op1v);
+
+  // (X / Y) *  Y = X - (X % Y)
+  // (X / Y) * -Y = (X % Y) - X
+  {
+    Value *Op1C = Op1;
+    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+    if (!BO ||
+        (BO->getOpcode() != Instruction::UDiv && 
+         BO->getOpcode() != Instruction::SDiv)) {
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
+    }
+    Value *Neg = dyn_castNegVal(Op1C);
+    if (BO && BO->hasOneUse() &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+        (BO->getOpcode() == Instruction::UDiv ||
+         BO->getOpcode() == Instruction::SDiv)) {
+      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+      // If the division is exact, X % Y is zero.
+      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+        if (SDiv->isExact()) {
+          if (Op1BO == Op1C)
+            return ReplaceInstUsesWith(I, Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
+        }
+
+      Value *Rem;
+      if (BO->getOpcode() == Instruction::UDiv)
+        Rem = Builder->CreateURem(Op0BO, Op1BO);
+      else
+        Rem = Builder->CreateSRem(Op0BO, Op1BO);
+      Rem->takeName(BO);
+
+      if (Op1BO == Op1C)
+        return BinaryOperator::CreateSub(Op0BO, Rem);
+      return BinaryOperator::CreateSub(Rem, Op0BO);
+    }
+  }
+
+  /// i1 mul -> i1 and.
+  if (I.getType()->isInteger(1))
+    return BinaryOperator::CreateAnd(Op0, Op1);
+
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
+  // If one of the operands of the multiply is a cast from a boolean value, then
+  // we know the bool is either zero or one, so this is a 'masking' multiply.
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!isa<VectorType>(I.getType())) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
+
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+  bool Changed = SimplifyCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Simplify mul instructions with a constant RHS...
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
+      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
+      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+    } else if (isa<VectorType>(Op1C->getType())) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+            if (F->isExactlyValue(1.0))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(Op1))
+      return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+  return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+  
+  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+  int NonNullOperand = -1;
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+    if (ST->isNullValue())
+      NonNullOperand = 2;
+  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+    if (ST->isNullValue())
+      NonNullOperand = 1;
+  
+  if (NonNullOperand == -1)
+    return false;
+  
+  Value *SelectCond = SI->getOperand(0);
+  
+  // Change the div/rem to use 'Y' instead of the select.
+  I.setOperand(1, SI->getOperand(NonNullOperand));
+  
+  // Okay, we know we replace the operand of the div/rem with 'Y' with no
+  // problem.  However, the select, or the condition of the select may have
+  // multiple uses.  Based on our knowledge that the operand must be non-zero,
+  // propagate the known value for the select into other uses of it, and
+  // propagate a known value of the condition into its other users.
+  
+  // If the select and condition only have a single use, don't bother with this,
+  // early exit.
+  if (SI->use_empty() && SelectCond->hasOneUse())
+    return true;
+  
+  // Scan the current block backward, looking for other uses of SI.
+  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+  
+  while (BBI != BBFront) {
+    --BBI;
+    // If we found a call to a function, we can't assume it will return, so
+    // information from below it cannot be propagated above it.
+    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+      break;
+    
+    // Replace uses of the select or its condition with the known values.
+    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+         I != E; ++I) {
+      if (*I == SI) {
+        *I = SI->getOperand(NonNullOperand);
+        Worklist.Add(BBI);
+      } else if (*I == SelectCond) {
+        *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+                                   ConstantInt::getFalse(BBI->getContext());
+        Worklist.Add(BBI);
+      }
+    }
+    
+    // If we past the instruction, quit looking for it.
+    if (&*BBI == SI)
+      SI = 0;
+    if (&*BBI == SelectCond)
+      SelectCond = 0;
+    
+    // If we ran out of things to eliminate, break out of the loop.
+    if (SelectCond == 0 && SI == 0)
+      break;
+    
+  }
+  return true;
+}
+
+
+/// This function implements the transforms on div instructions that work
+/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
+/// used by the visitors to those instructions.
+/// @brief Transforms common to all three div instructions
+Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // undef / X -> 0        for integer.
+  // undef / X -> undef    for FP (the undef could be a snan).
+  if (isa<UndefValue>(Op0)) {
+    if (Op0->getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
+
+  // X / undef -> undef
+  if (isa<UndefValue>(Op1))
+    return ReplaceInstUsesWith(I, Op1);
+
+  return 0;
+}
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // (sdiv X, X) --> 1     (udiv X, X) --> 1
+  if (Op0 == Op1) {
+    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
+      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+    }
+
+    Constant *CI = ConstantInt::get(I.getType(), 1);
+    return ReplaceInstUsesWith(I, CI);
+  }
+  
+  if (Instruction *Common = commonDivTransforms(I))
+    return Common;
+  
+  // Handle cases involving: [su]div X, (select Cond, Y, Z)
+  // This does not apply for fdiv.
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // div X, 1 == X
+    if (RHS->equalsInt(1))
+      return ReplaceInstUsesWith(I, Op0);
+
+    // (X / C1) / C2  -> X / (C1*C2)
+    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+      if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+        if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+          if (MultiplyOverflows(RHS, LHSRHS,
+                                I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          else 
+            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                      ConstantExpr::getMul(RHS, LHSRHS));
+        }
+
+    if (!RHS->isZero()) { // avoid X udiv 0
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      if (isa<PHINode>(Op0))
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+    }
+  }
+
+  // 0 / X == 0, we don't need to preserve faults!
+  if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
+    if (LHS->equalsInt(0))
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  // It can't be division by zero, hence it must be division by one.
+  if (I.getType()->isInteger(1))
+    return ReplaceInstUsesWith(I, Op0);
+
+  if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
+    if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
+      // div X, 1 == X
+      if (X->isOne())
+        return ReplaceInstUsesWith(I, Op0);
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+    // X udiv C^2 -> X >> C
+    // Check to see if this is an unsigned division with an exact power of 2,
+    // if so, convert to a right shift.
+    if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
+      return BinaryOperator::CreateLShr(Op0, 
+            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+
+    // X udiv C, where C >= signbit
+    if (C->getValue().isNegative()) {
+      Value *IC = Builder->CreateICmpULT( Op0, C);
+      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+                                ConstantInt::get(I.getType(), 1));
+    }
+  }
+
+  // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
+  if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
+    if (RHSI->getOpcode() == Instruction::Shl &&
+        isa<ConstantInt>(RHSI->getOperand(0))) {
+      const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
+      if (C1.isPowerOf2()) {
+        Value *N = RHSI->getOperand(1);
+        const Type *NTy = N->getType();
+        if (uint32_t C2 = C1.logBase2())
+          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
+        return BinaryOperator::CreateLShr(Op0, N);
+      }
+    }
+  }
+  
+  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+  // where C1&C2 are powers of two.
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 
+    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2)))  {
+        const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
+        if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
+          // Compute the shift amounts
+          uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
+          // Construct the "on true" case of the select
+          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
+          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
+  
+          // Construct the "on false" case of the select
+          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
+          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
+
+          // construct the select instruction and return it.
+          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
+        }
+      }
+  return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // sdiv X, -1 == -X
+    if (RHS->isAllOnesValue())
+      return BinaryOperator::CreateNeg(Op0);
+
+    // sdiv X, C  -->  ashr X, log2(C)
+    if (cast<SDivOperator>(&I)->isExact() &&
+        RHS->getValue().isNonNegative() &&
+        RHS->getValue().isPowerOf2()) {
+      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+                                            RHS->getValue().exactLogBase2());
+      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+    }
+
+    // -X/C  -->  X/-C  provided the negation doesn't overflow.
+    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+      if (isa<Constant>(Sub->getOperand(0)) &&
+          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
+          Sub->hasNoSignedWrap())
+        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+                                          ConstantExpr::getNeg(RHS));
+  }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a udiv.
+  if (I.getType()->isInteger()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op0, Mask)) {
+      if (MaskedValueIsZero(Op1, Mask)) {
+        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+      ConstantInt *ShiftedInt;
+      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
+          ShiftedInt->getValue().isPowerOf2()) {
+        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+        // Safe because the only negative value (1 << Y) can take on is
+        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+        // the sign bit set.
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+  return commonDivTransforms(I);
+}
+
+/// This function implements the transforms on rem instructions that work
+/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
+/// is used by the visitors to those instructions.
+/// @brief Transforms common to all three rem instructions
+Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
+    if (I.getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
+  if (isa<UndefValue>(Op1))
+    return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
+
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Instruction *common = commonRemTransforms(I))
+    return common;
+
+  // 0 % X == 0 for integer, we don't need to preserve faults!
+  if (Constant *LHS = dyn_cast<Constant>(Op0))
+    if (LHS->isNullValue())
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // X % 0 == undef, we don't need to preserve faults!
+    if (RHS->equalsInt(0))
+      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+    
+    if (RHS->equalsInt(1))  // X % 1 == 0
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+    if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      } else if (isa<PHINode>(Op0I)) {
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+      }
+
+      // See if we can fold away this rem instruction.
+      if (SimplifyDemandedInstructionBits(I))
+        return &I;
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Instruction *common = commonIRemTransforms(I))
+    return common;
+  
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // X urem C^2 -> X and C
+    // Check to see if this is an unsigned remainder with an exact power of 2,
+    // if so, convert to a bitwise and.
+    if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
+      if (C->getValue().isPowerOf2())
+        return BinaryOperator::CreateAnd(Op0, SubOne(C));
+  }
+
+  if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
+    // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+    if (RHSI->getOpcode() == Instruction::Shl &&
+        isa<ConstantInt>(RHSI->getOperand(0))) {
+      if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
+        Constant *N1 = Constant::getAllOnesValue(I.getType());
+        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
+        return BinaryOperator::CreateAnd(Op0, Add);
+      }
+    }
+  }
+
+  // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
+  // where C1&C2 are powers of two.
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
+    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
+      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
+        // STO == 0 and SFO == 0 handled above.
+        if ((STO->getValue().isPowerOf2()) && 
+            (SFO->getValue().isPowerOf2())) {
+          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
+                                              SI->getName()+".t");
+          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
+                                               SI->getName()+".f");
+          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
+        }
+      }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Handle the integer rem common cases
+  if (Instruction *Common = commonIRemTransforms(I))
+    return Common;
+  
+  if (Value *RHSNeg = dyn_castNegVal(Op1))
+    if (!isa<Constant>(RHSNeg) ||
+        (isa<ConstantInt>(RHSNeg) &&
+         cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+      // X % -Y -> X % Y
+      Worklist.AddValue(I.getOperand(1));
+      I.setOperand(1, RHSNeg);
+      return &I;
+    }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a urem.
+  if (I.getType()->isInteger()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+    }
+  }
+
+  // If it's a constant vector, flip any negative values positive.
+  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+    unsigned VWidth = RHSV->getNumOperands();
+
+    bool hasNegative = false;
+    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+        if (RHS->getValue().isNegative())
+          hasNegative = true;
+
+    if (hasNegative) {
+      std::vector<Constant *> Elts(VWidth);
+      for (unsigned i = 0; i != VWidth; ++i) {
+        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+          if (RHS->getValue().isNegative())
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+          else
+            Elts[i] = RHS;
+        }
+      }
+
+      Constant *NewRHSV = ConstantVector::get(Elts);
+      if (NewRHSV != RHSV) {
+        Worklist.AddValue(I.getOperand(1));
+        I.setOperand(1, NewRHSV);
+        return &I;
+      }
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+  return commonRemTransforms(I);
+}
+
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 0000000..bb7632f
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,841 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+  assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+  unsigned Opc = FirstInst->getOpcode();
+  Value *LHSVal = FirstInst->getOperand(0);
+  Value *RHSVal = FirstInst->getOperand(1);
+    
+  const Type *LHSType = LHSVal->getType();
+  const Type *RHSType = RHSVal->getType();
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+        // Verify type of the LHS matches so we don't fold cmp's of different
+        // types or GEP's with different index types.
+        I->getOperand(0)->getType() != LHSType ||
+        I->getOperand(1)->getType() != RHSType)
+      return 0;
+
+    // If they are CmpInst instructions, check their predicates
+    if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
+      if (cast<CmpInst>(I)->getPredicate() !=
+          cast<CmpInst>(FirstInst)->getPredicate())
+        return 0;
+    
+    // Keep track of which operand needs a phi node.
+    if (I->getOperand(0) != LHSVal) LHSVal = 0;
+    if (I->getOperand(1) != RHSVal) RHSVal = 0;
+  }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
+  
+  // Otherwise, this is safe to transform!
+  
+  Value *InLHS = FirstInst->getOperand(0);
+  Value *InRHS = FirstInst->getOperand(1);
+  PHINode *NewLHS = 0, *NewRHS = 0;
+  if (LHSVal == 0) {
+    NewLHS = PHINode::Create(LHSType,
+                             FirstInst->getOperand(0)->getName() + ".pn");
+    NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
+    NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewLHS, PN);
+    LHSVal = NewLHS;
+  }
+  
+  if (RHSVal == 0) {
+    NewRHS = PHINode::Create(RHSType,
+                             FirstInst->getOperand(1)->getName() + ".pn");
+    NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
+    NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewRHS, PN);
+    RHSVal = NewRHS;
+  }
+  
+  // Add all operands to the new PHIs.
+  if (NewLHS || NewRHS) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+      if (NewLHS) {
+        Value *NewInLHS = InInst->getOperand(0);
+        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+      }
+      if (NewRHS) {
+        Value *NewInRHS = InInst->getOperand(1);
+        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+      }
+    }
+  }
+    
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+  CmpInst *CIOp = cast<CmpInst>(FirstInst);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         LHSVal, RHSVal);
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+  GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+  
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+                                        FirstInst->op_end());
+  // This is true if all GEP bases are allocas and if all indices into them are
+  // constants.
+  bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+      GEP->getNumOperands() != FirstInst->getNumOperands())
+      return 0;
+
+    // Keep track of whether or not all GEPs are of alloca pointers.
+    if (AllBasePointersAreAllocas &&
+        (!isa<AllocaInst>(GEP->getOperand(0)) ||
+         !GEP->hasAllConstantIndices()))
+      AllBasePointersAreAllocas = false;
+    
+    // Compare the operand lists.
+    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+      if (FirstInst->getOperand(op) == GEP->getOperand(op))
+        continue;
+      
+      // Don't merge two GEPs when two operands differ (introducing phi nodes)
+      // if one of the PHIs has a constant for the index.  The index may be
+      // substantially cheaper to compute for the constants, so making it a
+      // variable index could pessimize the path.  This also handles the case
+      // for struct indices, which must always be constant.
+      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+          isa<ConstantInt>(GEP->getOperand(op)))
+        return 0;
+      
+      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+        return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
+      FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
+    }
+  }
+  
+  // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+  // bother doing this transformation.  At best, this will just save a bit of
+  // offset calculation, but all the predecessors will have to materialize the
+  // stack address into a register anyway.  We'd actually rather *clone* the
+  // load up into the predecessors so that we have a load of a gep of an alloca,
+  // which can usually all be folded into the load.
+  if (AllBasePointersAreAllocas)
+    return 0;
+  
+  // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
+  // that is variable.
+  SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+  
+  bool HasAnyPHIs = false;
+  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
+    Value *FirstOp = FirstInst->getOperand(i);
+    PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+                                     FirstOp->getName()+".pn");
+    InsertNewInstBefore(NewPN, PN);
+    
+    NewPN->reserveOperandSpace(e);
+    NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+    OperandPhis[i] = NewPN;
+    FixedOperands[i] = NewPN;
+    HasAnyPHIs = true;
+  }
+
+  
+  // Add all operands to the new PHIs.
+  if (HasAnyPHIs) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+      BasicBlock *InBB = PN.getIncomingBlock(i);
+      
+      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+        if (PHINode *OpPhi = OperandPhis[op])
+          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+    }
+  }
+  
+  Value *Base = FixedOperands[0];
+  return cast<GEPOperator>(FirstInst)->isInBounds() ?
+    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
+                                      FixedOperands.end()) :
+    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+                              FixedOperands.end());
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it.  This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// non-address-taken alloca.  Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+  BasicBlock::iterator BBI = L, E = L->getParent()->end();
+  
+  for (++BBI; BBI != E; ++BBI)
+    if (BBI->mayWriteToMemory())
+      return false;
+  
+  // Check for non-address taken alloca.  If not address-taken already, it isn't
+  // profitable to do this xform.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+    bool isAddressTaken = false;
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+         UI != E; ++UI) {
+      if (isa<LoadInst>(UI)) continue;
+      if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+        // If storing TO the alloca, then the address isn't taken.
+        if (SI->getOperand(1) == AI) continue;
+      }
+      isAddressTaken = true;
+      break;
+    }
+    
+    if (!isAddressTaken && AI->isStaticAlloca())
+      return false;
+  }
+  
+  // If this load is a load from a GEP with a constant offset from an alloca,
+  // then we don't want to sink it.  In its present form, it will be
+  // load [constant stack offset].  Sinking it will cause us to have to
+  // materialize the stack addresses in each predecessor in a register only to
+  // do a shared load from register in the successor.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+      if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+        return false;
+  
+  return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+  
+  // When processing loads, we need to propagate two bits of information to the
+  // sunk load: whether it is volatile, and what its alignment is.  We currently
+  // don't sink loads when some have their alignment specified and some don't.
+  // visitLoadInst will propagate an alignment onto the load when TD is around,
+  // and if TD isn't around, we can't handle the mixed case.
+  bool isVolatile = FirstLI->isVolatile();
+  unsigned LoadAlignment = FirstLI->getAlignment();
+  
+  // We can't sink the load if the loaded value could be modified between the
+  // load and the PHI.
+  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+      !isSafeAndProfitableToSinkLoad(FirstLI))
+    return 0;
+  
+  // If the PHI is of volatile loads and the load block has multiple
+  // successors, sinking it would remove a load of the volatile value from
+  // the path through the other successor.
+  if (isVolatile && 
+      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+  
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+    if (!LI || !LI->hasOneUse())
+      return 0;
+    
+    // We can't sink the load if the loaded value could be modified between 
+    // the load and the PHI.
+    if (LI->isVolatile() != isVolatile ||
+        LI->getParent() != PN.getIncomingBlock(i) ||
+        !isSafeAndProfitableToSinkLoad(LI))
+      return 0;
+      
+    // If some of the loads have an alignment specified but not all of them,
+    // we can't do the transformation.
+    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+      return 0;
+    
+    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+    
+    // If the PHI is of volatile loads and the load block has multiple
+    // successors, sinking it would remove a load of the volatile value from
+    // the path through the other successor.
+    if (isVolatile &&
+        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+      return 0;
+  }
+  
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+                                   PN.getName()+".in");
+  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+  
+  Value *InVal = FirstLI->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+  
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+  
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+  if (isa<GetElementPtrInst>(FirstInst))
+    return FoldPHIArgGEPIntoPHI(PN);
+  if (isa<LoadInst>(FirstInst))
+    return FoldPHIArgLoadIntoPHI(PN);
+  
+  // Scan the instruction, looking for input operations that can be folded away.
+  // If all input operands to the phi are the same instruction (e.g. a cast from
+  // the same type or "+42") we can pull the operation through the PHI, reducing
+  // code size and simplifying code.
+  Constant *ConstantOp = 0;
+  const Type *CastSrcTy = 0;
+  
+  if (isa<CastInst>(FirstInst)) {
+    CastSrcTy = FirstInst->getOperand(0)->getType();
+
+    // Be careful about transforming integer PHIs.  We don't want to pessimize
+    // the code by turning an i32 into an i1293.
+    if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+      if (!ShouldChangeType(PN.getType(), CastSrcTy))
+        return 0;
+    }
+  } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // otherwise call FoldPHIArgBinOpIntoPHI.
+    ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+    if (ConstantOp == 0)
+      return FoldPHIArgBinOpIntoPHI(PN);
+  } else {
+    return 0;  // Cannot fold this operation.
+  }
+
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+      return 0;
+    if (CastSrcTy) {
+      if (I->getOperand(0)->getType() != CastSrcTy)
+        return 0;  // Cast operation must match.
+    } else if (I->getOperand(1) != ConstantOp) {
+      return 0;
+    }
+  }
+
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getName()+".in");
+  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+
+  Value *InVal = FirstInst->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+
+  // Insert and return the new operation.
+  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
+    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+  
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+  
+  CmpInst *CIOp = cast<CmpInst>(FirstInst);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         PhiVal, ConstantOp);
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+                         SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+  if (PN->use_empty()) return true;
+  if (!PN->hasOneUse()) return false;
+
+  // Remember this node, and if we find the cycle, return.
+  if (!PotentiallyDeadPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
+
+  if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+    return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+  return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
+///   z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+  // See if we already saw this PHI node.
+  if (!ValueEqualPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (ValueEqualPHIs.size() == 16)
+    return false;
+ 
+  // Scan the operands to see if they are either phi nodes or are equal to
+  // the value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Op = PN->getIncomingValue(i);
+    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+        return false;
+    } else if (Op != NonPhiInVal)
+      return false;
+  }
+  
+  return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
+  unsigned Shift;     // The amount shifted.
+  Instruction *Inst;  // The trunc instruction.
+  
+  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+    : PHIId(pn), Shift(Sh), Inst(User) {}
+  
+  bool operator<(const PHIUsageRecord &RHS) const {
+    if (PHIId < RHS.PHIId) return true;
+    if (PHIId > RHS.PHIId) return false;
+    if (Shift < RHS.Shift) return true;
+    if (Shift > RHS.Shift) return false;
+    return Inst->getType()->getPrimitiveSizeInBits() <
+           RHS.Inst->getType()->getPrimitiveSizeInBits();
+  }
+};
+  
+struct LoweredPHIRecord {
+  PHINode *PN;        // The PHI that was lowered.
+  unsigned Shift;     // The amount shifted.
+  unsigned Width;     // The width extracted.
+  
+  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+  
+  // Ctor form used by DenseMap.
+  LoweredPHIRecord(PHINode *pn, unsigned Sh)
+    : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LoweredPHIRecord> {
+    static inline LoweredPHIRecord getEmptyKey() {
+      return LoweredPHIRecord(0, 0);
+    }
+    static inline LoweredPHIRecord getTombstoneKey() {
+      return LoweredPHIRecord(0, 1);
+    }
+    static unsigned getHashValue(const LoweredPHIRecord &Val) {
+      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+             (Val.Width>>3);
+    }
+    static bool isEqual(const LoweredPHIRecord &LHS,
+                        const LoweredPHIRecord &RHS) {
+      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+             LHS.Width == RHS.Width;
+    }
+  };
+  template <>
+  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
+/// so, we split the PHI into the various pieces being extracted.  This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr.  We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+  // PHIUsers - Keep track of all of the truncated values extracted from a set
+  // of PHIs, along with their offset.  These are the things we want to rewrite.
+  SmallVector<PHIUsageRecord, 16> PHIUsers;
+  
+  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+  // check the uses of (to ensure they are all extracts).
+  SmallVector<PHINode*, 8> PHIsToSlice;
+  SmallPtrSet<PHINode*, 8> PHIsInspected;
+  
+  PHIsToSlice.push_back(&FirstPhi);
+  PHIsInspected.insert(&FirstPhi);
+  
+  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+    PHINode *PN = PHIsToSlice[PHIId];
+    
+    // Scan the input list of the PHI.  If any input is an invoke, and if the
+    // input is defined in the predecessor, then we won't be split the critical
+    // edge which is required to insert a truncate.  Because of this, we have to
+    // bail out.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+      if (II == 0) continue;
+      if (II->getParent() != PN->getIncomingBlock(i))
+        continue;
+     
+      // If we have a phi, and if it's directly in the predecessor, then we have
+      // a critical edge where we need to put the truncate.  Since we can't
+      // split the edge in instcombine, we have to bail out.
+      return 0;
+    }
+      
+    
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      
+      // If the user is a PHI, inspect its uses recursively.
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (PHIsInspected.insert(UserPN))
+          PHIsToSlice.push_back(UserPN);
+        continue;
+      }
+      
+      // Truncates are always ok.
+      if (isa<TruncInst>(User)) {
+        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+        continue;
+      }
+      
+      // Otherwise it must be a lshr which can only be used by one trunc.
+      if (User->getOpcode() != Instruction::LShr ||
+          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+          !isa<ConstantInt>(User->getOperand(1)))
+        return 0;
+      
+      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+    }
+  }
+  
+  // If we have no users, they must be all self uses, just nuke the PHI.
+  if (PHIUsers.empty())
+    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+  
+  // If this phi node is transformable, create new PHIs for all the pieces
+  // extracted out of it.  First, sort the users by their offset and size.
+  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+  
+  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+        );
+  
+  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
+  // hoisted out here to avoid construction/destruction thrashing.
+  DenseMap<BasicBlock*, Value*> PredValues;
+  
+  // ExtractedVals - Each new PHI we introduce is saved here so we don't
+  // introduce redundant PHIs.
+  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+  
+  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+    unsigned PHIId = PHIUsers[UserI].PHIId;
+    PHINode *PN = PHIsToSlice[PHIId];
+    unsigned Offset = PHIUsers[UserI].Shift;
+    const Type *Ty = PHIUsers[UserI].Inst->getType();
+    
+    PHINode *EltPHI;
+    
+    // If we've already lowered a user like this, reuse the previously lowered
+    // value.
+    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+      
+      // Otherwise, Create the new PHI node for this user.
+      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+      assert(EltPHI->getType() != PN->getType() &&
+             "Truncate didn't shrink phi?");
+    
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        Value *&PredVal = PredValues[Pred];
+        
+        // If we already have a value for this predecessor, reuse it.
+        if (PredVal) {
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+
+        // Handle the PHI self-reuse case.
+        Value *InVal = PN->getIncomingValue(i);
+        if (InVal == PN) {
+          PredVal = EltPHI;
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+        
+        if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+          // If the incoming value was a PHI, and if it was one of the PHIs we
+          // already rewrote it, just use the lowered value.
+          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+            PredVal = Res;
+            EltPHI->addIncoming(PredVal, Pred);
+            continue;
+          }
+        }
+        
+        // Otherwise, do an extract in the predecessor.
+        Builder->SetInsertPoint(Pred, Pred->getTerminator());
+        Value *Res = InVal;
+        if (Offset)
+          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+                                                          Offset), "extract");
+        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+        PredVal = Res;
+        EltPHI->addIncoming(Res, Pred);
+        
+        // If the incoming value was a PHI, and if it was one of the PHIs we are
+        // rewriting, we will ultimately delete the code we inserted.  This
+        // means we need to revisit that PHI to make sure we extract out the
+        // needed piece.
+        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+          if (PHIsInspected.count(OldInVal)) {
+            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+                                          OldInVal)-PHIsToSlice.begin();
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+                                              cast<Instruction>(Res)));
+            ++UserE;
+          }
+      }
+      PredValues.clear();
+      
+      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
+                   << *EltPHI << '\n');
+      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+    }
+    
+    // Replace the use of this piece with the PHI node.
+    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+  }
+  
+  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+  // with undefs.
+  Value *Undef = UndefValue::get(FirstPhi.getType());
+  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+  return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+  // If LCSSA is around, don't mess with Phi nodes
+  if (MustPreserveLCSSA) return 0;
+  
+  if (Value *V = PN.hasConstantValue())
+    return ReplaceInstUsesWith(PN, V);
+
+  // If all PHI operands are the same operation, pull them through the PHI,
+  // reducing code size.
+  if (isa<Instruction>(PN.getIncomingValue(0)) &&
+      isa<Instruction>(PN.getIncomingValue(1)) &&
+      cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+      // FIXME: The hasOneUse check will fail for PHIs that use the value more
+      // than themselves more than once.
+      PN.getIncomingValue(0)->hasOneUse())
+    if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+      return Result;
+
+  // If this is a trivial cycle in the PHI node graph, remove it.  Basically, if
+  // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+  // PHI)... break the cycle.
+  if (PN.hasOneUse()) {
+    Instruction *PHIUser = cast<Instruction>(PN.use_back());
+    if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+      SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+      PotentiallyDeadPHIs.insert(&PN);
+      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+   
+    // If this phi has a single use, and if that use just computes a value for
+    // the next iteration of a loop, delete the phi.  This occurs with unused
+    // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
+    // common case here is good because the only other things that catch this
+    // are induction variable analysis (sometimes) and ADCE, which is only run
+    // late.
+    if (PHIUser->hasOneUse() &&
+        (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+        PHIUser->use_back() == &PN) {
+      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+  }
+
+  // We sometimes end up with phi cycles that non-obviously end up being the
+  // same value, for example:
+  //   z = some value; x = phi (y, z); y = phi (x, z)
+  // where the phi nodes don't necessarily need to be in the same block.  Do a
+  // quick check to see if the PHI node only contains a single non-phi value, if
+  // so, scan to see if the phi cycle is actually equal to that value.
+  {
+    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+    // Scan for the first non-phi operand.
+    while (InValNo != NumOperandVals && 
+           isa<PHINode>(PN.getIncomingValue(InValNo)))
+      ++InValNo;
+
+    if (InValNo != NumOperandVals) {
+      Value *NonPhiInVal = PN.getOperand(InValNo);
+      
+      // Scan the rest of the operands to see if there are any conflicts, if so
+      // there is no need to recursively scan other phis.
+      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+        Value *OpVal = PN.getIncomingValue(InValNo);
+        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+          break;
+      }
+      
+      // If we scanned over all operands, then we have one unique value plus
+      // phi values.  Scan PHI nodes to see if they all merge in each other or
+      // the value.
+      if (InValNo == NumOperandVals) {
+        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+          return ReplaceInstUsesWith(PN, NonPhiInVal);
+      }
+    }
+  }
+
+  // If there are multiple PHIs, sort their operands so that they all list
+  // the blocks in the same order. This will help identical PHIs be eliminated
+  // by other passes. Other passes shouldn't depend on this for correctness
+  // however.
+  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+  if (&PN != FirstPN)
+    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *BBA = PN.getIncomingBlock(i);
+      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+      if (BBA != BBB) {
+        Value *VA = PN.getIncomingValue(i);
+        unsigned j = PN.getBasicBlockIndex(BBB);
+        Value *VB = PN.getIncomingValue(j);
+        PN.setIncomingBlock(i, BBB);
+        PN.setIncomingValue(i, VB);
+        PN.setIncomingBlock(j, BBA);
+        PN.setIncomingValue(j, VA);
+        // NOTE: Instcombine normally would want us to "return &PN" if we
+        // modified any of the operands of an instruction.  However, since we
+        // aren't adding or removing uses (just rearranging them) we don't do
+        // this in this case.
+      }
+    }
+
+  // If this is an integer PHI and we know that it has an illegal type, see if
+  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
+  // PHI into the various pieces being extracted.  This sort of thing is
+  // introduced when SROA promotes an aggregate to a single large integer type.
+  if (isa<IntegerType>(PN.getType()) && TD &&
+      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+      return Res;
+  
+  return 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 0000000..18b2dff
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,703 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (SI == 0) return SPF_UNKNOWN;
+  
+  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+  if (ICI == 0) return SPF_UNKNOWN;
+  
+  LHS = ICI->getOperand(0);
+  RHS = ICI->getOperand(1);
+  
+  // (icmp X, Y) ? X : Y 
+  if (SI->getTrueValue() == ICI->getOperand(0) &&
+      SI->getFalseValue() == ICI->getOperand(1)) {
+    switch (ICI->getPredicate()) {
+    default: return SPF_UNKNOWN; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return SPF_UMAX;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return SPF_SMAX;
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return SPF_UMIN;
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+    }
+  }
+  
+  // (icmp X, Y) ? Y : X 
+  if (SI->getTrueValue() == ICI->getOperand(1) &&
+      SI->getFalseValue() == ICI->getOperand(0)) {
+    switch (ICI->getPredicate()) {
+      default: return SPF_UNKNOWN; // Equality.
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_UGE: return SPF_UMIN;
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_SGE: return SPF_SMIN;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_ULE: return SPF_UMAX;
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    }
+  }
+  
+  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
+  
+  return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+///   %C = or %A, %B
+///   %D = select %cond, %C, %A
+/// into:
+///   %C = select %cond, %B, 0
+///   %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return 3;              // Can fold through either operand.
+  case Instruction::Sub:   // Can only fold on the amount subtracted.
+  case Instruction::Shl:   // Can only fold on the shift amount.
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return 1;
+  default:
+    return 0;              // Cannot fold
+  }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("This cannot happen!");
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return Constant::getNullValue(I->getType());
+  case Instruction::And:
+    return Constant::getAllOnesValue(I->getType());
+  case Instruction::Mul:
+    return ConstantInt::get(I->getType(), 1);
+  }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each.  Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                                          Instruction *FI) {
+  if (TI->getNumOperands() == 1) {
+    // If this is a non-volatile load or a cast from the same type,
+    // merge.
+    if (TI->isCast()) {
+      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+        return 0;
+    } else {
+      return 0;  // unknown unary op.
+    }
+
+    // Fold this by inserting a select from the input values.
+    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+                                          FI->getOperand(0), SI.getName()+".v");
+    InsertNewInstBefore(NewSI, SI);
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
+                            TI->getType());
+  }
+
+  // Only handle binary operators here.
+  if (!isa<BinaryOperator>(TI))
+    return 0;
+
+  // Figure out if the operations have any operands in common.
+  Value *MatchOp, *OtherOpT, *OtherOpF;
+  bool MatchIsOpZero;
+  if (TI->getOperand(0) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = false;
+  } else if (!TI->isCommutative()) {
+    return 0;
+  } else if (TI->getOperand(0) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else {
+    return 0;
+  }
+
+  // If we reach here, they do have operations in common.
+  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+                                         OtherOpF, SI.getName()+".v");
+  InsertNewInstBefore(NewSI, SI);
+
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+    if (MatchIsOpZero)
+      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+    else
+      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+  }
+  llvm_unreachable("Shouldn't get here");
+  return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+  ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+  if (!C1I)
+    return false;
+  ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+  if (!C2I)
+    return false;
+  return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+                                            Value *FalseVal) {
+  // See the comment above GetSelectFoldableOperands for a description of the
+  // transformation we are doing here.
+  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+        !isa<Constant>(FalseVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+          OpToFold = 1;
+        } else  if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(TVI);
+          Value *OOp = TVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0 and 1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
+            InsertNewInstBefore(NewSel, SI);
+            NewSel->takeName(TVI);
+            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
+              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
+            llvm_unreachable("Unknown instruction!!");
+          }
+        }
+      }
+    }
+  }
+
+  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+        !isa<Constant>(TrueVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+          OpToFold = 1;
+        } else  if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(FVI);
+          Value *OOp = FVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0 and 1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
+            InsertNewInstBefore(NewSel, SI);
+            NewSel->takeName(FVI);
+            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
+              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
+            llvm_unreachable("Unknown instruction!!");
+          }
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+                                                   ICmpInst *ICI) {
+  bool Changed = false;
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  // Check cases where the comparison is with a constant that
+  // can be adjusted to fit the min/max idiom. We may edit ICI in
+  // place here, so make sure the select is the only user.
+  if (ICI->hasOneUse())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+      switch (Pred) {
+      default: break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT: {
+        // X < MIN ? T : F  -->  F
+        if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+          return ReplaceInstUsesWith(SI, FalseVal);
+        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
+        Constant *AdjustedRHS =
+          ConstantInt::get(CI->getContext(), CI->getValue()-1);
+        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+          Pred = ICmpInst::getSwappedPredicate(Pred);
+          CmpRHS = AdjustedRHS;
+          std::swap(FalseVal, TrueVal);
+          ICI->setPredicate(Pred);
+          ICI->setOperand(1, CmpRHS);
+          SI.setOperand(1, TrueVal);
+          SI.setOperand(2, FalseVal);
+          Changed = true;
+        }
+        break;
+      }
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: {
+        // X > MAX ? T : F  -->  F
+        if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+          return ReplaceInstUsesWith(SI, FalseVal);
+        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
+        Constant *AdjustedRHS =
+          ConstantInt::get(CI->getContext(), CI->getValue()+1);
+        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+          Pred = ICmpInst::getSwappedPredicate(Pred);
+          CmpRHS = AdjustedRHS;
+          std::swap(FalseVal, TrueVal);
+          ICI->setPredicate(Pred);
+          ICI->setOperand(1, CmpRHS);
+          SI.setOperand(1, TrueVal);
+          SI.setOperand(2, FalseVal);
+          Changed = true;
+        }
+        break;
+      }
+      }
+
+      // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
+      // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
+      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+      if (match(TrueVal, m_ConstantInt<-1>()) &&
+          match(FalseVal, m_ConstantInt<0>()))
+        Pred = ICI->getPredicate();
+      else if (match(TrueVal, m_ConstantInt<0>()) &&
+               match(FalseVal, m_ConstantInt<-1>()))
+        Pred = CmpInst::getInversePredicate(ICI->getPredicate());
+      
+      if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
+        // If we are just checking for a icmp eq of a single bit and zext'ing it
+        // to an integer, then shift the bit to the appropriate place and then
+        // cast to integer to avoid the comparison.
+        const APInt &Op1CV = CI->getValue();
+    
+        // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
+        // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+        if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+            (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
+          Value *In = ICI->getOperand(0);
+          Value *Sh = ConstantInt::get(In->getType(),
+                                       In->getType()->getScalarSizeInBits()-1);
+          In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                   *ICI);
+          if (In->getType() != SI.getType())
+            In = CastInst::CreateIntegerCast(In, SI.getType(),
+                                             true/*SExt*/, "tmp", ICI);
+    
+          if (Pred == ICmpInst::ICMP_SGT)
+            In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
+                                       In->getName()+".not"), *ICI);
+    
+          return ReplaceInstUsesWith(SI, In);
+        }
+      }
+    }
+
+  if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
+    // Transform (X == Y) ? X : Y  -> Y
+    if (Pred == ICmpInst::ICMP_EQ)
+      return ReplaceInstUsesWith(SI, FalseVal);
+    // Transform (X != Y) ? X : Y  -> X
+    if (Pred == ICmpInst::ICMP_NE)
+      return ReplaceInstUsesWith(SI, TrueVal);
+    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+  } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
+    // Transform (X == Y) ? Y : X  -> X
+    if (Pred == ICmpInst::ICMP_EQ)
+      return ReplaceInstUsesWith(SI, FalseVal);
+    // Transform (X != Y) ? Y : X  -> Y
+    if (Pred == ICmpInst::ICMP_NE)
+      return ReplaceInstUsesWith(SI, TrueVal);
+    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+  }
+  return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks).  See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI.  For
+/// example, cases like this cannot be mapped:
+///
+///   X = phi [ C1, BB1], [C2, BB2]
+///   Y = add
+///   Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+                                                   const SelectInst &SI) {
+  // If the value is a non-instruction value like a constant or argument, it
+  // can always be mapped.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return true;
+  
+  // If V is a PHI node defined in the same block as the condition PHI, we can
+  // map the arguments.
+  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+  
+  if (const PHINode *VP = dyn_cast<PHINode>(I))
+    if (VP->getParent() == CondPHI->getParent())
+      return true;
+  
+  // Otherwise, if the PHI and select are defined in the same block and if V is
+  // defined in a different block, then we can transform it.
+  if (SI.getParent() == CondPHI->getParent() &&
+      I->getParent() != CondPHI->getParent())
+    return true;
+  
+  // Otherwise we have a 'hard' case and we can't tell without doing more
+  // detailed dominator based analysis, punt.
+  return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+///   SPF2(SPF1(A, B), C) 
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+                                        SelectPatternFlavor SPF1,
+                                        Value *A, Value *B,
+                                        Instruction &Outer,
+                                        SelectPatternFlavor SPF2, Value *C) {
+  if (C == A || C == B) {
+    // MAX(MAX(A, B), B) -> MAX(A, B)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    if (SPF1 == SPF2)
+      return ReplaceInstUsesWith(Outer, Inner);
+    
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+        (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+        (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+        (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+      return ReplaceInstUsesWith(Outer, C);
+  }
+  
+  // TODO: MIN(MIN(A, 23), 97)
+  return 0;
+}
+
+
+
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+  Value *CondVal = SI.getCondition();
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
+    return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
+
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return ReplaceInstUsesWith(SI, TrueVal);
+
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return ReplaceInstUsesWith(SI, FalseVal);
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return ReplaceInstUsesWith(SI, TrueVal);
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return ReplaceInstUsesWith(SI, TrueVal);
+    else
+      return ReplaceInstUsesWith(SI, FalseVal);
+  }
+
+  if (SI.getType()->isInteger(1)) {
+    if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+      if (C->getZExtValue()) {
+        // Change: A = select B, true, C --> A = or B, C
+        return BinaryOperator::CreateOr(CondVal, FalseVal);
+      } else {
+        // Change: A = select B, false, C --> A = and !B, C
+        Value *NotCond =
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                             "not."+CondVal->getName()), SI);
+        return BinaryOperator::CreateAnd(NotCond, FalseVal);
+      }
+    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+      if (C->getZExtValue() == false) {
+        // Change: A = select B, C, false --> A = and B, C
+        return BinaryOperator::CreateAnd(CondVal, TrueVal);
+      } else {
+        // Change: A = select B, C, true --> A = or !B, C
+        Value *NotCond =
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                             "not."+CondVal->getName()), SI);
+        return BinaryOperator::CreateOr(NotCond, TrueVal);
+      }
+    }
+    
+    // select a, b, a  -> a&b
+    // select a, a, b  -> a|b
+    if (CondVal == TrueVal)
+      return BinaryOperator::CreateOr(CondVal, FalseVal);
+    else if (CondVal == FalseVal)
+      return BinaryOperator::CreateAnd(CondVal, TrueVal);
+  }
+
+  // Selecting between two integer constants?
+  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+      // select C, 1, 0 -> zext C to int
+      if (FalseValC->isZero() && TrueValC->getValue() == 1) {
+        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
+      } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+        // select C, 0, 1 -> zext !C to int
+        Value *NotCond =
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                               "not."+CondVal->getName()), SI);
+        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
+      }
+
+      if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
+        // If one of the constants is zero (we know they can't both be) and we
+        // have an icmp instruction with zero, and we have an 'and' with the
+        // non-constant value, eliminate this whole mess.  This corresponds to
+        // cases like this: ((X & 27) ? 27 : 0)
+        if (TrueValC->isZero() || FalseValC->isZero())
+          if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
+              cast<Constant>(IC->getOperand(1))->isNullValue())
+            if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
+              if (ICA->getOpcode() == Instruction::And &&
+                  isa<ConstantInt>(ICA->getOperand(1)) &&
+                  (ICA->getOperand(1) == TrueValC ||
+                   ICA->getOperand(1) == FalseValC) &&
+               cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) {
+                // Okay, now we know that everything is set up, we just don't
+                // know whether we have a icmp_ne or icmp_eq and whether the 
+                // true or false val is the zero.
+                bool ShouldNotVal = !TrueValC->isZero();
+                ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+                Value *V = ICA;
+                if (ShouldNotVal)
+                  V = InsertNewInstBefore(BinaryOperator::Create(
+                                  Instruction::Xor, V, ICA->getOperand(1)), SI);
+                return ReplaceInstUsesWith(SI, V);
+              }
+      }
+    }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+    if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+      // Transform (X == Y) ? X : Y  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X != Y) ? X : Y  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+        return ReplaceInstUsesWith(SI, TrueVal);
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+    } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+      // Transform (X == Y) ? Y : X  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X != Y) ? Y : X  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+        return ReplaceInstUsesWith(SI, TrueVal);
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+    }
+    // NOTE: if we wanted to, this is where to detect ABS
+  }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+      return Result;
+
+  if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+    if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+      if (TI->hasOneUse() && FI->hasOneUse()) {
+        Instruction *AddOp = 0, *SubOp = 0;
+
+        // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+        if (TI->getOpcode() == FI->getOpcode())
+          if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+            return IV;
+
+        // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
+        // even legal for FP.
+        if ((TI->getOpcode() == Instruction::Sub &&
+             FI->getOpcode() == Instruction::Add) ||
+            (TI->getOpcode() == Instruction::FSub &&
+             FI->getOpcode() == Instruction::FAdd)) {
+          AddOp = FI; SubOp = TI;
+        } else if ((FI->getOpcode() == Instruction::Sub &&
+                    TI->getOpcode() == Instruction::Add) ||
+                   (FI->getOpcode() == Instruction::FSub &&
+                    TI->getOpcode() == Instruction::FAdd)) {
+          AddOp = TI; SubOp = FI;
+        }
+
+        if (AddOp) {
+          Value *OtherAddOp = 0;
+          if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+            OtherAddOp = AddOp->getOperand(1);
+          } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+            OtherAddOp = AddOp->getOperand(0);
+          }
+
+          if (OtherAddOp) {
+            // So at this point we know we have (Y -> OtherAddOp):
+            //        select C, (add X, Y), (sub X, Z)
+            Value *NegVal;  // Compute -Z
+            if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
+              NegVal = ConstantExpr::getNeg(C);
+            } else {
+              NegVal = InsertNewInstBefore(
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
+            }
+
+            Value *NewTrueOp = OtherAddOp;
+            Value *NewFalseOp = NegVal;
+            if (AddOp != TI)
+              std::swap(NewTrueOp, NewFalseOp);
+            Instruction *NewSel =
+              SelectInst::Create(CondVal, NewTrueOp,
+                                 NewFalseOp, SI.getName() + ".p");
+
+            NewSel = InsertNewInstBefore(NewSel, SI);
+            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+          }
+        }
+      }
+
+  // See if we can fold the select into one of our operands.
+  if (SI.getType()->isInteger()) {
+    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+      return FoldI;
+    
+    // MAX(MAX(a, b), a) -> MAX(a, b)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    Value *LHS, *RHS, *LHS2, *RHS2;
+    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+                                          SI, SPF, RHS))
+          return R;
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+                                          SI, SPF, LHS))
+          return R;
+    }
+
+    // TODO.
+    // ABS(-X) -> ABS(X)
+    // ABS(ABS(X)) -> ABS(X)
+  }
+
+  // See if we can fold the select into a phi node if the condition is a select.
+  if (isa<PHINode>(SI.getCondition())) 
+    // The true/false values have to be live in the PHI predecessor's blocks.
+    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+      if (Instruction *NV = FoldOpIntoPhi(SI))
+        return NV;
+
+  if (BinaryOperator::isNot(CondVal)) {
+    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+    SI.setOperand(1, FalseVal);
+    SI.setOperand(2, TrueVal);
+    return &SI;
+  }
+
+  return 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 0000000..fe91da1
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,427 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // shl X, 0 == X and shr X, 0 == X
+  // shl 0, X == 0 and shr 0, X == 0
+  if (Op1 == Constant::getNullValue(Op1->getType()) ||
+      Op0 == Constant::getNullValue(Op0->getType()))
+    return ReplaceInstUsesWith(I, Op0);
+  
+  if (isa<UndefValue>(Op0)) {            
+    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
+      return ReplaceInstUsesWith(I, Op0);
+    else                                    // undef << X -> 0, undef >>u X -> 0
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
+  if (isa<UndefValue>(Op1)) {
+    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
+      return ReplaceInstUsesWith(I, Op0);          
+    else                                     // X << undef, X >>u undef -> 0
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
+
+  // See if we can fold away this shift.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // Try to fold constant and into select arguments.
+  if (isa<Constant>(Op0))
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+      return Res;
+  return 0;
+}
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                               BinaryOperator &I) {
+  bool isLeftShift = I.getOpcode() == Instruction::Shl;
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+  
+  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+  // a signed shift.
+  //
+  if (Op1->uge(TypeBits)) {
+    if (I.getOpcode() != Instruction::AShr)
+      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+    else {
+      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+      return &I;
+    }
+  }
+  
+  // ((X*C1) << C2) == (X * (C1 << C2))
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+        return BinaryOperator::CreateMul(BO->getOperand(0),
+                                        ConstantExpr::getShl(BOOp, Op1));
+  
+  // Try to fold constant and into select arguments.
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+    if (Instruction *R = FoldOpIntoSelect(I, SI))
+      return R;
+  if (isa<PHINode>(Op0))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+  
+  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+    // place.  Don't try to do this transformation in this case.  Also, we
+    // require that the input operand is a shift-by-constant so that we have
+    // confidence that the shifts will get folded together.  We could do this
+    // xform in more cases, but it is unlikely to be profitable.
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+        isa<ConstantInt>(TrOp->getOperand(1))) {
+      // Okay, we'll do this xform.  Make the shift of shift.
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      // (shift2 (shift1 & 0x00FF), c2)
+      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+      // For logical shifts, the truncation has the effect of making the high
+      // part of the register be zeros.  Emulate this by inserting an AND to
+      // clear the top bits as needed.  This 'and' will usually be zapped by
+      // other xforms later if dead.
+      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+      unsigned DstSize = TI->getType()->getScalarSizeInBits();
+      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+      
+      // The mask we constructed says what the trunc would do if occurring
+      // between the shifts.  We want to know the effect *after* the second
+      // shift.  We know that it is a logical shift by a constant, so adjust the
+      // mask as appropriate.
+      if (I.getOpcode() == Instruction::Shl)
+        MaskV <<= Op1->getZExtValue();
+      else {
+        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+        MaskV = MaskV.lshr(Op1->getZExtValue());
+      }
+
+      // shift1 & 0x00FF
+      Value *And = Builder->CreateAnd(NSh,
+                                      ConstantInt::get(I.getContext(), MaskV),
+                                      TI->getName());
+
+      // Return the value truncated to the interesting size.
+      return new TruncInst(And, I.getType());
+    }
+  }
+  
+  if (Op0->hasOneUse()) {
+    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+      Value *V1, *V2;
+      ConstantInt *CC;
+      switch (Op0BO->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor: {
+        // These operators commute.
+        // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+            match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =         // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                          Op0BO->getOperand(1)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
+        Value *Op0BOOp1 = Op0BO->getOperand(1);
+        if (isLeftShift && Op0BOOp1->hasOneUse() &&
+            match(Op0BOOp1, 
+                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+                        m_ConstantInt(CC))) &&
+            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+          Value *YS =   // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1,
+                                         Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+        }
+      }
+        
+      // FALL THROUGH.
+      case Instruction::Sub: {
+        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =  // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                          Op0BO->getOperand(0)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0),
+                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
+                        m_ConstantInt(CC))) && V2 == Op1 &&
+            cast<BinaryOperator>(Op0BO->getOperand(0))
+                ->getOperand(0)->hasOneUse()) {
+          Value *YS = // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+        }
+        
+        break;
+      }
+      }
+      
+      
+      // If the operand is an bitwise operator with a constant RHS, and the
+      // shift is the only use, we can pull it out of the shift.
+      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+        bool isValid = true;     // Valid only for And, Or, Xor
+        bool highBitSet = false; // Transform if high bit of constant set?
+        
+        switch (Op0BO->getOpcode()) {
+        default: isValid = false; break;   // Do not perform transform!
+        case Instruction::Add:
+          isValid = isLeftShift;
+          break;
+        case Instruction::Or:
+        case Instruction::Xor:
+          highBitSet = false;
+          break;
+        case Instruction::And:
+          highBitSet = true;
+          break;
+        }
+        
+        // If this is a signed shift right, and the high bit is modified
+        // by the logical operation, do not perform the transformation.
+        // The highBitSet boolean indicates the value of the high bit of
+        // the constant which would cause it to be modified for this
+        // operation.
+        //
+        if (isValid && I.getOpcode() == Instruction::AShr)
+          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+        
+        if (isValid) {
+          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+          
+          Value *NewShift =
+            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+          NewShift->takeName(Op0BO);
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+                                        NewRHS);
+        }
+      }
+    }
+  }
+  
+  // Find out if this is a shift of a shift by a constant.
+  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+  if (ShiftOp && !ShiftOp->isShift())
+    ShiftOp = 0;
+  
+  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
+    Value *X = ShiftOp->getOperand(0);
+    
+    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
+    
+    const IntegerType *Ty = cast<IntegerType>(I.getType());
+    
+    // Check for (X << c1) << c2  and  (X >> c1) >> c2
+    if (I.getOpcode() == ShiftOp->getOpcode()) {
+      // If this is oversized composite shift, then unsigned shifts get 0, ashr
+      // saturates.
+      if (AmtSum >= TypeBits) {
+        if (I.getOpcode() != Instruction::AShr)
+          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
+      }
+      
+      return BinaryOperator::Create(I.getOpcode(), X,
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::LShr &&
+        I.getOpcode() == Instruction::AShr) {
+      if (AmtSum >= TypeBits)
+        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+      
+      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
+      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftOp->getOpcode() == Instruction::AShr &&
+        I.getOpcode() == Instruction::LShr) {
+      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
+      if (AmtSum >= TypeBits)
+        AmtSum = TypeBits-1;
+      
+      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
+
+      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+      return BinaryOperator::CreateAnd(Shift,
+                                       ConstantInt::get(I.getContext(), Mask));
+    }
+    
+    // Okay, if we get here, one shift must be left, and the other shift must be
+    // right.  See if the amounts are equal.
+    if (ShiftAmt1 == ShiftAmt2) {
+      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+      if (I.getOpcode() == Instruction::Shl) {
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+      if (I.getOpcode() == Instruction::LShr) {
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                        ConstantInt::get(I.getContext(), Mask));
+      }
+    } else if (ShiftAmt1 < ShiftAmt2) {
+      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+      
+      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::LShr ||
+               ShiftOp->getOpcode() == Instruction::AShr);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr) {
+        assert(ShiftOp->getOpcode() == Instruction::Shl);
+        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+    } else {
+      assert(ShiftAmt2 < ShiftAmt1);
+      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::LShr ||
+               ShiftOp->getOpcode() == Instruction::AShr);
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+                                            ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr) {
+        assert(ShiftOp->getOpcode() == Instruction::Shl);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+  return commonShiftTransforms(I);
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+  return commonShiftTransforms(I);
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) {
+    // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
+    if (CSI->isAllOnesValue())
+      return ReplaceInstUsesWith(I, CSI);
+  }
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+    // have a sign-extend idiom.  If the input value is known to already be sign
+    // extended enough, delete the extension.
+    Value *X;
+    if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+        ComputeNumSignBits(X) > Op1C->getZExtValue())
+      return ReplaceInstUsesWith(I, X);
+  }            
+  
+  // See if we can turn a signed shr into an unsigned shr.
+  if (MaskedValueIsZero(Op0,
+                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, Op1);
+  
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return ReplaceInstUsesWith(I, Op0);
+  
+  return 0;
+}
+
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 0000000..74a1b68
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1106 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
+                                   APInt Demanded) {
+  assert(I && "No instruction?");
+  assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+  // If the operand is not a constant integer, nothing to do.
+  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+  if (!OpC) return false;
+
+  // If there are no bits set that aren't demanded, nothing to do.
+  Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+  if ((~Demanded & OpC->getValue()) == 0)
+    return false;
+
+  // This instruction is producing bits that are not demanded. Shrink the RHS.
+  Demanded &= OpC->getValue();
+  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+  return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about.  See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+  
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
+                                     KnownZero, KnownOne, 0);
+  if (V == 0) return false;
+  if (V == &Inst) return true;
+  ReplaceInstUsesWith(Inst, V);
+  return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place.  It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                                        APInt &KnownZero, APInt &KnownOne,
+                                        unsigned Depth) {
+  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+                                          KnownZero, KnownOne, Depth);
+  if (NewVal == 0) return false;
+  U = NewVal;
+  return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits.  When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression.  KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that 
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification.  This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+                                             APInt &KnownZero, APInt &KnownOne,
+                                             unsigned Depth) {
+  assert(V != 0 && "Null pointer of Value???");
+  assert(Depth <= 6 && "Limit Search Depth");
+  uint32_t BitWidth = DemandedMask.getBitWidth();
+  const Type *VTy = V->getType();
+  assert((TD || !isa<PointerType>(VTy)) &&
+         "SimplifyDemandedBits needs to know bit widths!");
+  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+         (!VTy->isIntOrIntVector() ||
+          VTy->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth &&
+         KnownOne.getBitWidth() == BitWidth &&
+         "Value *V, DemandedMask, KnownZero and KnownOne "
+         "must have same BitWidth");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue() & DemandedMask;
+    KnownZero = ~KnownOne & DemandedMask;
+    return 0;
+  }
+  if (isa<ConstantPointerNull>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne.clear();
+    KnownZero = DemandedMask;
+    return 0;
+  }
+
+  KnownZero.clear();
+  KnownOne.clear();
+  if (DemandedMask == 0) {   // Not demanding any bits from V.
+    if (isa<UndefValue>(V))
+      return 0;
+    return UndefValue::get(VTy);
+  }
+  
+  if (Depth == 6)        // Limit search depth.
+    return 0;
+  
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    return 0;        // Only analyze instructions.
+  }
+
+  // If there are multiple uses of this value and we aren't at the root, then
+  // we can't do any simplifications of the operands, because DemandedMask
+  // only reflects the bits demanded by *one* of the users.
+  if (Depth != 0 && !I->hasOneUse()) {
+    // Despite the fact that we can't simplify this instruction in all User's
+    // context, we can at least compute the knownzero/knownone bits, and we can
+    // do simplifications that apply to *just* the one user if we know that
+    // this instruction has a simpler value in that context.
+    if (I->getOpcode() == Instruction::And) {
+      // If either the LHS or the RHS are Zero, the result is zero.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask,
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known 1 on one side, return the other.
+      // These bits cannot contribute to the result of the 'and' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+          (DemandedMask & ~LHSKnownZero))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+          (DemandedMask & ~RHSKnownZero))
+        return I->getOperand(1);
+      
+      // If all of the demanded bits in the inputs are known zeros, return zero.
+      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+        return Constant::getNullValue(VTy);
+      
+    } else if (I->getOpcode() == Instruction::Or) {
+      // We can simplify (X|Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+      
+      // If either the LHS or the RHS are One, the result is One.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask, 
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known zero on one side, return the
+      // other.  These bits cannot contribute to the result of the 'or' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+          (DemandedMask & ~LHSKnownOne))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+          (DemandedMask & ~RHSKnownOne))
+        return I->getOperand(1);
+      
+      // If all of the potentially set bits on one side are known to be set on
+      // the other side, just use the 'other' side.
+      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+          (DemandedMask & (~RHSKnownZero)))
+        return I->getOperand(0);
+      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+          (DemandedMask & (~LHSKnownZero)))
+        return I->getOperand(1);
+    }
+    
+    // Compute the KnownZero/KnownOne bits to simplify things downstream.
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    return 0;
+  }
+  
+  // If this is the root being simplified, allow it to have multiple uses,
+  // just set the DemandedMask to all bits so that we can try to simplify the
+  // operands.  This allows visitTruncInst (for example) to simplify the
+  // operand of a trunc without duplicating all the logic below.
+  if (Depth == 0 && !V->hasOneUse())
+    DemandedMask = APInt::getAllOnesValue(BitWidth);
+  
+  switch (I->getOpcode()) {
+  default:
+    ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
+  case Instruction::And:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+
+    // If all of the demanded bits are known 1 on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+        (DemandedMask & ~LHSKnownZero))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+        (DemandedMask & ~RHSKnownZero))
+      return I->getOperand(1);
+    
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+      return Constant::getNullValue(VTy);
+      
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+      return I;
+      
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    RHSKnownOne &= LHSKnownOne;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    RHSKnownZero |= LHSKnownZero;
+    break;
+  case Instruction::Or:
+    // If either the LHS or the RHS are One, the result is One.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+        (DemandedMask & ~LHSKnownOne))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+        (DemandedMask & ~RHSKnownOne))
+      return I->getOperand(1);
+
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+        (DemandedMask & (~RHSKnownZero)))
+      return I->getOperand(0);
+    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+        (DemandedMask & (~LHSKnownZero)))
+      return I->getOperand(1);
+        
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+          
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    RHSKnownZero &= LHSKnownZero;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    RHSKnownOne |= LHSKnownOne;
+    break;
+  case Instruction::Xor: {
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((DemandedMask & RHSKnownZero) == DemandedMask)
+      return I->getOperand(0);
+    if ((DemandedMask & LHSKnownZero) == DemandedMask)
+      return I->getOperand(1);
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | 
+                         (RHSKnownOne & LHSKnownOne);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | 
+                        (RHSKnownOne & LHSKnownZero);
+    
+    // If all of the demanded bits are known to be zero on one side or the
+    // other, turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+      Instruction *Or = 
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                 I->getName());
+      return InsertNewInstBefore(Or, *I);
+    }
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
+      // all known
+      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+        Constant *AndC = Constant::getIntegerValue(VTy,
+                                                   ~RHSKnownOne & DemandedMask);
+        Instruction *And = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        return InsertNewInstBefore(And, *I);
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+    
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstBefore(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstBefore(NewXor, *I);
+      }
+          
+          
+    RHSKnownZero = KnownZeroOut;
+    RHSKnownOne  = KnownOneOut;
+    break;
+  }
+  case Instruction::Select:
+    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask))
+      return I;
+    
+    // Only known if known in both the LHS and RHS.
+    RHSKnownOne &= LHSKnownOne;
+    RHSKnownZero &= LHSKnownZero;
+    break;
+  case Instruction::Trunc: {
+    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+    DemandedMask.zext(truncBf);
+    RHSKnownZero.zext(truncBf);
+    RHSKnownOne.zext(truncBf);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             RHSKnownZero, RHSKnownOne, Depth+1))
+      return I;
+    DemandedMask.trunc(BitWidth);
+    RHSKnownZero.trunc(BitWidth);
+    RHSKnownOne.trunc(BitWidth);
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case Instruction::BitCast:
+    if (!I->getOperand(0)->getType()->isIntOrIntVector())
+      return false;  // vector->int or fp->int?
+
+    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+      if (const VectorType *SrcVTy =
+            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+          // Don't touch a bitcast between vectors of different element counts.
+          return false;
+      } else
+        // Don't touch a scalar-to-vector bitcast.
+        return false;
+    } else if (isa<VectorType>(I->getOperand(0)->getType()))
+      // Don't touch a vector-to-scalar bitcast.
+      return false;
+
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  case Instruction::ZExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    DemandedMask.trunc(SrcBitWidth);
+    RHSKnownZero.trunc(SrcBitWidth);
+    RHSKnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1))
+      return I;
+    DemandedMask.zext(BitWidth);
+    RHSKnownZero.zext(BitWidth);
+    RHSKnownOne.zext(BitWidth);
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    // The top bits are known to be zero.
+    RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    APInt InputDemandedBits = DemandedMask & 
+                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if ((NewBits & DemandedMask) != 0)
+      InputDemandedBits.set(SrcBitWidth-1);
+      
+    InputDemandedBits.trunc(SrcBitWidth);
+    RHSKnownZero.trunc(SrcBitWidth);
+    RHSKnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+                             RHSKnownZero, RHSKnownOne, Depth+1))
+      return I;
+    InputDemandedBits.zext(BitWidth);
+    RHSKnownZero.zext(BitWidth);
+    RHSKnownOne.zext(BitWidth);
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+      
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+
+    // If the input sign bit is known zero, or if the NewBits are not demanded
+    // convert this into a zero extension.
+    if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+      // Convert to ZExt cast
+      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+      return InsertNewInstBefore(NewCast, *I);
+    } else if (RHSKnownOne[SrcBitWidth-1]) {    // Input sign bit known set
+      RHSKnownOne |= NewBits;
+    }
+    break;
+  }
+  case Instruction::Add: {
+    // Figure out what the input bits are.  If the top bits of the and result
+    // are not demanded, then the add doesn't demand them from its input
+    // either.
+    unsigned NLZ = DemandedMask.countLeadingZeros();
+      
+    // If there is a constant on the RHS, there are a variety of xformations
+    // we can do.
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // If null, this should be simplified elsewhere.  Some of the xforms here
+      // won't work if the RHS is zero.
+      if (RHS->isZero())
+        break;
+      
+      // If the top bit of the output is demanded, demand everything from the
+      // input.  Otherwise, we demand all the input bits except NLZ top bits.
+      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+      // Find information about known zero/one bits in the input.
+      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+
+      // If the RHS of the add has bits set that can't affect the input, reduce
+      // the constant.
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+        return I;
+      
+      // Avoid excess work.
+      if (LHSKnownZero == 0 && LHSKnownOne == 0)
+        break;
+      
+      // Turn it into OR if input bits are zero.
+      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+        Instruction *Or =
+          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                   I->getName());
+        return InsertNewInstBefore(Or, *I);
+      }
+      
+      // We can say something about the output known-zero and known-one bits,
+      // depending on potential carries from the input constant and the
+      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
+      // bits set and the RHS constant is 0x01001, then we know we have a known
+      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+      
+      // To compute this, we first compute the potential carry bits.  These are
+      // the bits which may be modified.  I'm not aware of a better way to do
+      // this scan.
+      const APInt &RHSVal = RHS->getValue();
+      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+      
+      // Now that we know which bits have carries, compute the known-1/0 sets.
+      
+      // Bits are known one if they are known zero in one operand and one in the
+      // other, and there is no input carry.
+      RHSKnownOne = ((LHSKnownZero & RHSVal) | 
+                     (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+      
+      // Bits are known zero if they are known zero in both operands and there
+      // is no input carry.
+      RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+    } else {
+      // If the high-bits of this ADD are not demanded, then it does not demand
+      // the high bits of its LHS or RHS.
+      if (DemandedMask[BitWidth-1] == 0) {
+        // Right fill the mask of bits for this ADD to demand the most
+        // significant bit and all those below it.
+        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
+            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+      }
+    }
+    break;
+  }
+  case Instruction::Sub:
+    // If the high-bits of this SUB are not demanded, then it does not demand
+    // the high bits of its LHS or RHS.
+    if (DemandedMask[BitWidth-1] == 0) {
+      // Right fill the mask of bits for this SUB to demand the most
+      // significant bit and all those below it.
+      uint32_t NLZ = DemandedMask.countLeadingZeros();
+      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1) ||
+          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+    }
+    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+    // the known zeros and ones.
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
+  case Instruction::Shl:
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
+                               RHSKnownZero, RHSKnownOne, Depth+1))
+        return I;
+      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+      RHSKnownZero <<= ShiftAmt;
+      RHSKnownOne  <<= ShiftAmt;
+      // low bits known zero.
+      if (ShiftAmt)
+        RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+    }
+    break;
+  case Instruction::LShr:
+    // For a logical shift right
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      
+      // Unsigned shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               RHSKnownZero, RHSKnownOne, Depth+1))
+        return I;
+      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
+      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+      if (ShiftAmt) {
+        // Compute the new bits that are at the top now.
+        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+        RHSKnownZero |= HighBits;  // high bits known zero.
+      }
+    }
+    break;
+  case Instruction::AShr:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1) {
+      // Perform the logical shift right.
+      Instruction *NewVal = BinaryOperator::CreateLShr(
+                        I->getOperand(0), I->getOperand(1), I->getName());
+      return InsertNewInstBefore(NewVal, *I);
+    }    
+
+    // If the sign bit is the only bit demanded by this ashr, then there is no
+    // need to do it, the shift doesn't change the high bit.
+    if (DemandedMask.isSignBit())
+      return I->getOperand(0);
+    
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      
+      // Signed shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      // If any of the "high bits" are demanded, we should set the sign bit as
+      // demanded.
+      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+        DemandedMaskIn.set(BitWidth-1);
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               RHSKnownZero, RHSKnownOne, Depth+1))
+        return I;
+      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+      // Compute the new bits that are at the top now.
+      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
+      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
+        
+      // Handle the sign bits.
+      APInt SignBit(APInt::getSignBit(BitWidth));
+      // Adjust to where it is now in the mask.
+      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
+        
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 
+          (HighBits & ~DemandedMask) == HighBits) {
+        // Perform the logical shift right.
+        Instruction *NewVal = BinaryOperator::CreateLShr(
+                          I->getOperand(0), SA, I->getName());
+        return InsertNewInstBefore(NewVal, *I);
+      } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one.
+        RHSKnownOne |= HighBits;
+      }
+    }
+    break;
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
+          return I->getOperand(0);
+
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+
+        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+          LHSKnownZero |= ~LowBits;
+
+        KnownZero |= LHSKnownZero & DemandedMask;
+
+        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+      }
+    }
+    break;
+  case Instruction::URem: {
+    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return I;
+
+    unsigned Leaders = KnownZero2.countLeadingOnes();
+    Leaders = std::max(Leaders,
+                       KnownZero2.countLeadingOnes());
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap: {
+        // If the only bits demanded come from one byte of the bswap result,
+        // just shift the input byte into position to eliminate the bswap.
+        unsigned NLZ = DemandedMask.countLeadingZeros();
+        unsigned NTZ = DemandedMask.countTrailingZeros();
+          
+        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
+        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
+        // have 14 leading zeros, round to 8.
+        NLZ &= ~7;
+        NTZ &= ~7;
+        // If we need exactly one byte, we can do this transformation.
+        if (BitWidth-NLZ-NTZ == 8) {
+          unsigned ResultBit = NTZ;
+          unsigned InputBit = BitWidth-NTZ-8;
+          
+          // Replace this with either a left or right shift to get the byte into
+          // the right place.
+          Instruction *NewVal;
+          if (InputBit > ResultBit)
+            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          else
+            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+          NewVal->takeName(I);
+          return InsertNewInstBefore(NewVal, *I);
+        }
+          
+        // TODO: Could compute known zero/one bits based on the input.
+        break;
+      }
+      }
+    }
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
+  }
+  
+  // If the client is only demanding bits that we know, return the known
+  // constant.
+  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, RHSKnownOne);
+  return false;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller.  This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned.  This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                                APInt& UndefElts,
+                                                unsigned Depth) {
+  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+  APInt EltMask(APInt::getAllOnesValue(VWidth));
+  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+  if (isa<UndefValue>(V)) {
+    // If the entire vector is undefined, just return this info.
+    UndefElts = EltMask;
+    return 0;
+  } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+    UndefElts = EltMask;
+    return UndefValue::get(V->getType());
+  }
+
+  UndefElts = 0;
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
+
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i)
+      if (!DemandedElts[i]) {   // If not demanded, set to undef.
+        Elts.push_back(Undef);
+        UndefElts.set(i);
+      } else if (isa<UndefValue>(CP->getOperand(i))) {   // Already undef.
+        Elts.push_back(Undef);
+        UndefElts.set(i);
+      } else {                               // Otherwise, defined.
+        Elts.push_back(CP->getOperand(i));
+      }
+
+    // If we changed the constant, return it.
+    Constant *NewCP = ConstantVector::get(Elts);
+    return NewCP != CP ? NewCP : 0;
+  } else if (isa<ConstantAggregateZero>(V)) {
+    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+    // set to undef.
+    
+    // Check if this is identity. If so, return 0 since we are not simplifying
+    // anything.
+    if (DemandedElts == ((1ULL << VWidth) -1))
+      return 0;
+    
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Zero = Constant::getNullValue(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i) {
+      Constant *Elt = DemandedElts[i] ? Zero : Undef;
+      Elts.push_back(Elt);
+    }
+    UndefElts = DemandedElts ^ EltMask;
+    return ConstantVector::get(Elts);
+  }
+  
+  // Limit search depth.
+  if (Depth == 10)
+    return 0;
+
+  // If multiple users are using the root value, procede with
+  // simplification conservatively assuming that all elements
+  // are needed.
+  if (!V->hasOneUse()) {
+    // Quit if we find multiple users of a non-root value though.
+    // They'll be handled when it's their turn to be visited by
+    // the main instcombine process.
+    if (Depth != 0)
+      // TODO: Just compute the UndefElts information recursively.
+      return 0;
+
+    // Conservatively assume that all elements are needed.
+    DemandedElts = EltMask;
+  }
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return 0;        // Only analyze instructions.
+  
+  bool MadeChange = false;
+  APInt UndefElts2(VWidth, 0);
+  Value *TmpV;
+  switch (I->getOpcode()) {
+  default: break;
+    
+  case Instruction::InsertElement: {
+    // If this is a variable index, we don't know which element it overwrites.
+    // demand exactly the same input as we produce.
+    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+    if (Idx == 0) {
+      // Note that we can't propagate undef elt info, because we don't know
+      // which elt is getting updated.
+      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+      break;
+    }
+    
+    // If this is inserting an element that isn't demanded, remove this
+    // insertelement.
+    unsigned IdxNo = Idx->getZExtValue();
+    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+      Worklist.Add(I);
+      return I->getOperand(0);
+    }
+    
+    // Otherwise, the element inserted overwrites whatever was there, so the
+    // input demanded set is simpler than the output set.
+    APInt DemandedElts2 = DemandedElts;
+    DemandedElts2.clear(IdxNo);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    // The inserted element is defined.
+    UndefElts.clear(IdxNo);
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    uint64_t LHSVWidth =
+      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+    for (unsigned i = 0; i < VWidth; i++) {
+      if (DemandedElts[i]) {
+        unsigned MaskVal = Shuffle->getMaskValue(i);
+        if (MaskVal != -1u) {
+          assert(MaskVal < LHSVWidth * 2 &&
+                 "shufflevector mask index out of range!");
+          if (MaskVal < LHSVWidth)
+            LeftDemanded.set(MaskVal);
+          else
+            RightDemanded.set(MaskVal - LHSVWidth);
+        }
+      }
+    }
+
+    APInt UndefElts4(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+                                      UndefElts4, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    APInt UndefElts3(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+                                      UndefElts3, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+    bool NewUndefElts = false;
+    for (unsigned i = 0; i < VWidth; i++) {
+      unsigned MaskVal = Shuffle->getMaskValue(i);
+      if (MaskVal == -1u) {
+        UndefElts.set(i);
+      } else if (MaskVal < LHSVWidth) {
+        if (UndefElts4[MaskVal]) {
+          NewUndefElts = true;
+          UndefElts.set(i);
+        }
+      } else {
+        if (UndefElts3[MaskVal - LHSVWidth]) {
+          NewUndefElts = true;
+          UndefElts.set(i);
+        }
+      }
+    }
+
+    if (NewUndefElts) {
+      // Add additional discovered undefs.
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i < VWidth; ++i) {
+        if (UndefElts[i])
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+        else
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                          Shuffle->getMaskValue(i)));
+      }
+      I->setOperand(2, ConstantVector::get(Elts));
+      MadeChange = true;
+    }
+    break;
+  }
+  case Instruction::BitCast: {
+    // Vector->vector casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    APInt InputDemandedElts(InVWidth, 0);
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts[OutIdx])
+          InputDemandedElts.set(OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts[InIdx/Ratio])
+          InputDemandedElts.set(InIdx);
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
+    
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2[OutIdx/Ratio])
+          UndefElts.set(OutIdx);
+    } else if (VWidth < InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (!UndefElts2[InIdx])            // Not undef?
+          UndefElts.clear(InIdx/Ratio);    // Clear undef bit.
+    }
+    break;
+  }
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+      
+    // Output elements are undefined if both are undefined.  Consider things
+    // like undef&0.  The result is known zero, not undef.
+    UndefElts &= UndefElts2;
+    break;
+    
+  case Instruction::Call: {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+    if (!II) break;
+    switch (II->getIntrinsicID()) {
+    default: break;
+      
+    // Binary vector operations that work column-wise.  A dest element is a
+    // function of the corresponding input elements from the two inputs.
+    case Intrinsic::x86_sse_sub_ss:
+    case Intrinsic::x86_sse_mul_ss:
+    case Intrinsic::x86_sse_min_ss:
+    case Intrinsic::x86_sse_max_ss:
+    case Intrinsic::x86_sse2_sub_sd:
+    case Intrinsic::x86_sse2_mul_sd:
+    case Intrinsic::x86_sse2_min_sd:
+    case Intrinsic::x86_sse2_max_sd:
+      TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+                                        UndefElts, Depth+1);
+      if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
+
+      // If only the low elt is demanded and this is a scalarizable intrinsic,
+      // scalarize it now.
+      if (DemandedElts == 1) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::x86_sse_sub_ss:
+        case Intrinsic::x86_sse_mul_ss:
+        case Intrinsic::x86_sse2_sub_sd:
+        case Intrinsic::x86_sse2_mul_sd:
+          // TODO: Lower MIN/MAX/ABS/etc
+          Value *LHS = II->getOperand(1);
+          Value *RHS = II->getOperand(2);
+          // Extract the element as scalars.
+          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          
+          switch (II->getIntrinsicID()) {
+          default: llvm_unreachable("Case stmts out of sync!");
+          case Intrinsic::x86_sse_sub_ss:
+          case Intrinsic::x86_sse2_sub_sd:
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
+                                                        II->getName()), *II);
+            break;
+          case Intrinsic::x86_sse_mul_ss:
+          case Intrinsic::x86_sse2_mul_sd:
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
+                                                         II->getName()), *II);
+            break;
+          }
+          
+          Instruction *New =
+            InsertElementInst::Create(
+              UndefValue::get(II->getType()), TmpV,
+              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+                                      II->getName());
+          InsertNewInstBefore(New, *II);
+          return New;
+        }            
+      }
+        
+      // Output elements are undefined if both are undefined.  Consider things
+      // like undef&0.  The result is known zero, not undef.
+      UndefElts &= UndefElts2;
+      break;
+    }
+    break;
+  }
+  }
+  return MadeChange ? I : 0;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 0000000..f11f557
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,560 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+using namespace llvm;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+  if (isa<ConstantAggregateZero>(V)) 
+    return true;
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+    if (isConstant) return true;
+    // If all elts are the same, we can extract.
+    Constant *Op0 = C->getOperand(0);
+    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+      if (C->getOperand(i) != Op0)
+        return false;
+    return true;
+  }
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // Insert element gets simplified to the inserted element or is deleted if
+  // this is constant idx extract element and its a constant idx insertelt.
+  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+      isa<ConstantInt>(I->getOperand(2)))
+    return true;
+  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+    return true;
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+    if (BO->hasOneUse() &&
+        (CheapToScalarize(BO->getOperand(0), isConstant) ||
+         CheapToScalarize(BO->getOperand(1), isConstant)))
+      return true;
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    if (CI->hasOneUse() &&
+        (CheapToScalarize(CI->getOperand(0), isConstant) ||
+         CheapToScalarize(CI->getOperand(1), isConstant)))
+      return true;
+  
+  return false;
+}
+
+/// Read and decode a shufflevector mask.
+///
+/// It turns undef elements into values that are larger than the number of
+/// elements in the input.
+static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+  unsigned NElts = SVI->getType()->getNumElements();
+  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+    return std::vector<unsigned>(NElts, 0);
+  if (isa<UndefValue>(SVI->getOperand(2)))
+    return std::vector<unsigned>(NElts, 2*NElts);
+  
+  std::vector<unsigned> Result;
+  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+    if (isa<UndefValue>(*i))
+      Result.push_back(NElts*2);  // undef -> 8
+    else
+      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+  return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+  assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
+  const VectorType *PTy = cast<VectorType>(V->getType());
+  unsigned Width = PTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
+    return UndefValue::get(PTy->getElementType());
+  
+  if (isa<UndefValue>(V))
+    return UndefValue::get(PTy->getElementType());
+  if (isa<ConstantAggregateZero>(V))
+    return Constant::getNullValue(PTy->getElementType());
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+    return CP->getOperand(EltNo);
+  
+  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert to a variable element, we don't know what it is.
+    if (!isa<ConstantInt>(III->getOperand(2))) 
+      return 0;
+    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+    
+    // If this is an insert to the element we are looking for, return the
+    // inserted value.
+    if (EltNo == IIElt) 
+      return III->getOperand(1);
+    
+    // Otherwise, the insertelement doesn't modify the value, recurse on its
+    // vector input.
+    return FindScalarElement(III->getOperand(0), EltNo);
+  }
+  
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth =
+    cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+    unsigned InEl = getShuffleMask(SVI)[EltNo];
+    if (InEl < LHSWidth)
+      return FindScalarElement(SVI->getOperand(0), InEl);
+    else if (InEl < LHSWidth*2)
+      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+    else
+      return UndefValue::get(PTy->getElementType());
+  }
+  
+  // Otherwise, we don't know.
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+  // If vector val is undef, replace extract with scalar undef.
+  if (isa<UndefValue>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+  
+  // If vector val is constant 0, replace extract with scalar 0.
+  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+  
+  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+    // If vector val is constant with all elements the same, replace EI with
+    // that element. When the elements are not identical, we cannot replace yet
+    // (we do that below, but only when the index is constant).
+    Constant *op0 = C->getOperand(0);
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
+      if (C->getOperand(i) != op0) {
+        op0 = 0; 
+        break;
+      }
+    if (op0)
+      return ReplaceInstUsesWith(EI, op0);
+  }
+  
+  // If extracting a specified index from the vector, see if we can recursively
+  // find a previously computed scalar that was inserted into the vector.
+  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+    unsigned IndexVal = IdxC->getZExtValue();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+    
+    // If this is extracting an invalid index, turn this into undef, to avoid
+    // crashing the code below.
+    if (IndexVal >= VectorWidth)
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+    
+    // This instruction only demands the single element from the input vector.
+    // If the input vector has a single use, simplify it based on this use
+    // property.
+    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+      APInt UndefElts(VectorWidth, 0);
+      APInt DemandedMask(VectorWidth, 1 << IndexVal);
+      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+                                                DemandedMask, UndefElts)) {
+        EI.setOperand(0, V);
+        return &EI;
+      }
+    }
+    
+    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+      return ReplaceInstUsesWith(EI, Elt);
+    
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT = 
+          dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
+  }
+  
+  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+        Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                      EI.getName()+".lhs");
+        Value *newEI1 =
+        Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                      EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+      // Extracting the inserted element?
+      if (IE->getOperand(2) == EI.getOperand(1))
+        return ReplaceInstUsesWith(EI, IE->getOperand(1));
+      // If the inserted and extracted elements are constants, they must not
+      // be the same value, extract from the pre-inserted value instead.
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+        Worklist.AddValue(EI.getOperand(0));
+        EI.setOperand(0, IE->getOperand(0));
+        return &EI;
+      }
+    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+      // If this is extracting an element from a shufflevector, figure out where
+      // it came from and extract from the appropriate input element instead.
+      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+        unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+        Value *Src;
+        unsigned LHSWidth =
+        cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+        
+        if (SrcIdx < LHSWidth)
+          Src = SVI->getOperand(0);
+        else if (SrcIdx < LHSWidth*2) {
+          SrcIdx -= LHSWidth;
+          Src = SVI->getOperand(1);
+        } else {
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+        }
+        return ExtractElementInst::Create(Src,
+                                          ConstantInt::get(Type::getInt32Ty(EI.getContext()),
+                                                           SrcIdx, false));
+      }
+    }
+    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
+  }
+  return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true. 
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+                                         std::vector<Constant*> &Mask) {
+  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+         "Invalid CollectSingleShuffleElements");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return true;
+  }
+  
+  if (V == LHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+    return true;
+  }
+  
+  if (V == RHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                      i+NumElts));
+    return true;
+  }
+  
+  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+    
+    if (!isa<ConstantInt>(IdxOp))
+      return false;
+    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+    
+    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
+      // Okay, we can handle this if the vector we are insertinting into is
+      // transitively ok.
+      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+        // If so, update the mask to reflect the inserted undef.
+        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+        return true;
+      }      
+    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+      if (isa<ConstantInt>(EI->getOperand(1)) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        
+        // This must be extracting from either LHS or RHS.
+        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+          // Okay, we can handle this if the vector we are insertinting into is
+          // transitively ok.
+          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+            // If so, update the mask to reflect the inserted value.
+            if (EI->getOperand(0) == LHS) {
+              Mask[InsertedIdx % NumElts] = 
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx);
+            } else {
+              assert(EI->getOperand(0) == RHS);
+              Mask[InsertedIdx % NumElts] = 
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx+NumElts);
+              
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+  
+  return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+                                     Value *&RHS) {
+  assert(isa<VectorType>(V->getType()) && 
+         (RHS == 0 || V->getType() == RHS->getType()) &&
+         "Invalid shuffle!");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return V;
+  } else if (isa<ConstantAggregateZero>(V)) {
+    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+    return V;
+  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+    
+    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+        
+        // Either the extracted from or inserted into vector must be RHSVec,
+        // otherwise we'd end up with a shuffle of three inputs.
+        if (EI->getOperand(0) == RHS || RHS == 0) {
+          RHS = EI->getOperand(0);
+          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+          Mask[InsertedIdx % NumElts] = 
+          ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                           NumElts+ExtractedIdx);
+          return V;
+        }
+        
+        if (VecOp == RHS) {
+          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Everything but the extracted element is replaced with the RHS.
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (i != InsertedIdx)
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                         NumElts+i);
+          }
+          return V;
+        }
+        
+        // If this insertelement is a chain that comes from exactly these two
+        // vectors, return the vector and the effective shuffle.
+        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+          return EI->getOperand(0);
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+  
+  // Otherwise, can't do anything fancy.  Return an identity vector.
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+  return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+  Value *VecOp    = IE.getOperand(0);
+  Value *ScalarOp = IE.getOperand(1);
+  Value *IdxOp    = IE.getOperand(2);
+  
+  // Inserting an undef or into an undefined place, remove this.
+  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+    ReplaceInstUsesWith(IE, VecOp);
+  
+  // If the inserted element was extracted from some other vector, and if the 
+  // indexes are constant, try to turn this into a shufflevector operation.
+  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+        EI->getOperand(0)->getType() == IE.getType()) {
+      unsigned NumVectorElts = IE.getType()->getNumElements();
+      unsigned ExtractedIdx =
+      cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+      
+      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+        return ReplaceInstUsesWith(IE, VecOp);
+      
+      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+      
+      // If we are extracting a value from a vector, then inserting it right
+      // back into the same place, just use the input vector.
+      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+        return ReplaceInstUsesWith(IE, VecOp);      
+      
+      // If this insertelement isn't used by some other insertelement, turn it
+      // (and any insertelements it points to), into one big shuffle.
+      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+        std::vector<Constant*> Mask;
+        Value *RHS = 0;
+        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        // We now have a shuffle of LHS, RHS, Mask.
+        return new ShuffleVectorInst(LHS, RHS,
+                                     ConstantVector::get(Mask));
+      }
+    }
+  }
+  
+  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
+    return &IE;
+  
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Value *LHS = SVI.getOperand(0);
+  Value *RHS = SVI.getOperand(1);
+  std::vector<unsigned> Mask = getShuffleMask(&SVI);
+  
+  bool MadeChange = false;
+  
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(SVI.getOperand(2)))
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+  
+  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+  
+  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+    return 0;
+  
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+  
+  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
+  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+  if (LHS == RHS || isa<UndefValue>(LHS)) {
+    if (isa<UndefValue>(LHS) && LHS == RHS) {
+      // shuffle(undef,undef,mask) -> undef.
+      return ReplaceInstUsesWith(SVI, LHS);
+    }
+    
+    // Remap any references to RHS to use LHS.
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+      if (Mask[i] >= 2*e)
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+      else {
+        if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
+            (Mask[i] <  e && isa<UndefValue>(LHS))) {
+          Mask[i] = 2*e;     // Turn into undef.
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        } else {
+          Mask[i] = Mask[i] % e;  // Force to LHS.
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+                                          Mask[i]));
+        }
+      }
+    }
+    SVI.setOperand(0, SVI.getOperand(1));
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, ConstantVector::get(Elts));
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+  
+  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+  bool isLHSID = true, isRHSID = true;
+  
+  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] >= e*2) continue;  // Ignore undef values.
+    // Is this an identity shuffle of the LHS value?
+    isLHSID &= (Mask[i] == i);
+    
+    // Is this an identity shuffle of the RHS value?
+    isRHSID &= (Mask[i]-e == i);
+  }
+  
+  // Eliminate identity shuffles.
+  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+  
+  // If the LHS is a shufflevector itself, see if we can combine it with this
+  // one without producing an unusual shuffle.  Here we are really conservative:
+  // we are absolutely afraid of producing a shuffle mask not in the input
+  // program, because the code gen may not be smart enough to turn a merged
+  // shuffle into two specific shuffles: it may produce worse code.  As such,
+  // we only merge two shuffles if the result is one of the two input shuffle
+  // masks.  In this case, merging the shuffles just removes one instruction,
+  // which we know is safe.  This is good for things like turning:
+  // (splat(splat)) -> splat.
+  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+    if (isa<UndefValue>(RHS)) {
+      std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
+      
+      if (LHSMask.size() == Mask.size()) {
+        std::vector<unsigned> NewMask;
+        for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+          if (Mask[i] >= e)
+            NewMask.push_back(2*e);
+          else
+            NewMask.push_back(LHSMask[Mask[i]]);
+        
+        // If the result mask is equal to the src shuffle or this
+        // shuffle mask, do the replacement.
+        if (NewMask == LHSMask || NewMask == Mask) {
+          unsigned LHSInNElts =
+          cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+          getNumElements();
+          std::vector<Constant*> Elts;
+          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+            if (NewMask[i] >= LHSInNElts*2) {
+              Elts.push_back(UndefValue::get(
+                                             Type::getInt32Ty(SVI.getContext())));
+            } else {
+              Elts.push_back(ConstantInt::get(
+                                              Type::getInt32Ty(SVI.getContext()),
+                                              NewMask[i]));
+            }
+          }
+          return new ShuffleVectorInst(LHSSVI->getOperand(0),
+                                       LHSSVI->getOperand(1),
+                                       ConstantVector::get(Elts));
+        }
+      }
+    }
+  }
+  
+  return MadeChange ? &SVI : 0;
+}
+
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 0000000..9d88621
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,105 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class VISIBILITY_HIDDEN InstCombineWorklist {
+  SmallVector<Instruction*, 256> Worklist;
+  DenseMap<Instruction*, unsigned> WorklistMap;
+  
+  void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
+  InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+public:
+  InstCombineWorklist() {}
+  
+  bool isEmpty() const { return Worklist.empty(); }
+  
+  /// Add - Add the specified instruction to the worklist if it isn't already
+  /// in it.
+  void Add(Instruction *I) {
+    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+      DEBUG(errs() << "IC: ADD: " << *I << '\n');
+      Worklist.push_back(I);
+    }
+  }
+  
+  void AddValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      Add(I);
+  }
+  
+  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+  /// which should only be done when the worklist is empty and when the group
+  /// has no duplicates.
+  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+    assert(Worklist.empty() && "Worklist must be empty to add initial group");
+    Worklist.reserve(NumEntries+16);
+    DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+    for (; NumEntries; --NumEntries) {
+      Instruction *I = List[NumEntries-1];
+      WorklistMap.insert(std::make_pair(I, Worklist.size()));
+      Worklist.push_back(I);
+    }
+  }
+  
+  // Remove - remove I from the worklist if it exists.
+  void Remove(Instruction *I) {
+    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+    if (It == WorklistMap.end()) return; // Not in worklist.
+    
+    // Don't bother moving everything down, just null out the slot.
+    Worklist[It->second] = 0;
+    
+    WorklistMap.erase(It);
+  }
+  
+  Instruction *RemoveOne() {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+    WorklistMap.erase(I);
+    return I;
+  }
+  
+  /// AddUsersToWorkList - When an instruction is simplified, add all users of
+  /// the instruction to the work lists because they might get more simplified
+  /// now.
+  ///
+  void AddUsersToWorkList(Instruction &I) {
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Add(cast<Instruction>(*UI));
+  }
+  
+  
+  /// Zap - check that the worklist is empty and nuke the backing store for
+  /// the map if it is large.
+  void Zap() {
+    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+    // Do an explicit clear, this shrinks the map if needed.
+    WorklistMap.clear();
+  }
+};
+  
+} // end namespace llvm.
+
+#endif
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 0000000..93b1961
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,1274 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions.  This pass does not modify the CFG.  This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+//    %Y = add i32 %X, 1
+//    %Z = add i32 %Y, 1
+// into:
+//    %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+//    1. If a binary operator has a constant operand, it is moved to the RHS
+//    2. Bitwise operators with constant operands are always grouped so that
+//       shifts are performed first, then or's, then and's, then xor's.
+//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+//    4. All cmp instructions on boolean values are replaced with logical ops
+//    5. add X, X is represented as (X*2) => (X << 1)
+//    6. Multiplies with a power-of-two constant argument are transformed into
+//       shifts.
+//   ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+
+
+char InstCombiner::ID = 0;
+static RegisterPass<InstCombiner>
+X("instcombine", "Combine redundant instructions");
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreservedID(LCSSAID);
+  AU.setPreservesCFG();
+}
+
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+  
+  // If we don't have TD, we don't know if the source/dest are legal.
+  if (!TD) return false;
+  
+  unsigned FromWidth = From->getPrimitiveSizeInBits();
+  unsigned ToWidth = To->getPrimitiveSizeInBits();
+  bool FromLegal = TD->isLegalInteger(FromWidth);
+  bool ToLegal = TD->isLegalInteger(ToWidth);
+  
+  // If this is a legal integer from type, and the result would be an illegal
+  // type, don't do the transformation.
+  if (FromLegal && !ToLegal)
+    return false;
+  
+  // Otherwise, if both are illegal, do not increase the size of the result. We
+  // do allow things like i160 -> i64, but not i64 -> i160.
+  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+    return false;
+  
+  return true;
+}
+
+
+// SimplifyCommutative - This performs a few simplifications for commutative
+// operators:
+//
+//  1. Order operands such that they are listed from right (least complex) to
+//     left (most complex).  This puts constants before unary operators before
+//     binary operators.
+//
+//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
+//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+//
+bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+  bool Changed = false;
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
+    Changed = !I.swapOperands();
+
+  if (!I.isAssociative()) return Changed;
+  
+  Instruction::BinaryOps Opcode = I.getOpcode();
+  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
+    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
+      if (isa<Constant>(I.getOperand(1))) {
+        Constant *Folded = ConstantExpr::get(I.getOpcode(),
+                                             cast<Constant>(I.getOperand(1)),
+                                             cast<Constant>(Op->getOperand(1)));
+        I.setOperand(0, Op->getOperand(0));
+        I.setOperand(1, Folded);
+        return true;
+      }
+      
+      if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)))
+        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
+            Op->hasOneUse() && Op1->hasOneUse()) {
+          Constant *C1 = cast<Constant>(Op->getOperand(1));
+          Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
+          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
+                                                    Op1->getOperand(0),
+                                                    Op1->getName(), &I);
+          Worklist.Add(New);
+          I.setOperand(0, New);
+          I.setOperand(1, Folded);
+          return true;
+        }
+    }
+  return Changed;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+  if (BinaryOperator::isNeg(V))
+    return BinaryOperator::getNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantExpr::getNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isInteger())
+      return ConstantExpr::getNeg(C);
+
+  return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V) const {
+  if (BinaryOperator::isFNeg(V))
+    return BinaryOperator::getFNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+    return ConstantExpr::getFNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isFloatingPoint())
+      return ConstantExpr::getFNeg(C);
+
+  return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+                                             InstCombiner *IC) {
+  if (CastInst *CI = dyn_cast<CastInst>(&I))
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+
+  // Figure out if the constant is the left or the right argument.
+  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+  if (Constant *SOC = dyn_cast<Constant>(SO)) {
+    if (ConstIsRHS)
+      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+  }
+
+  Value *Op0 = SO, *Op1 = ConstOperand;
+  if (!ConstIsRHS)
+    std::swap(Op0, Op1);
+  
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                    SO->getName()+".op");
+  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments.  This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+  // Don't modify shared select instructions
+  if (!SI->hasOneUse()) return 0;
+  Value *TV = SI->getOperand(1);
+  Value *FV = SI->getOperand(2);
+
+  if (isa<Constant>(TV) || isa<Constant>(FV)) {
+    // Bool selects with constant operands can be folded to logical ops.
+    if (SI->getType()->isInteger(1)) return 0;
+
+    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
+                              SelectFalseVal);
+  }
+  return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
+/// that would normally be unprofitable because they strongly encourage jump
+/// threading.
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
+                                         bool AllowAggressive) {
+  AllowAggressive = false;
+  PHINode *PN = cast<PHINode>(I.getOperand(0));
+  unsigned NumPHIValues = PN->getNumIncomingValues();
+  if (NumPHIValues == 0 ||
+      // We normally only transform phis with a single use, unless we're trying
+      // hard to make jump threading happen.
+      (!PN->hasOneUse() && !AllowAggressive))
+    return 0;
+  
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
+  BasicBlock *NonConstBB = 0;
+  for (unsigned i = 0; i != NumPHIValues; ++i)
+    if (!isa<Constant>(PN->getIncomingValue(i)) ||
+        isa<ConstantExpr>(PN->getIncomingValue(i))) {
+      if (NonConstBB) return 0;  // More than one non-const value.
+      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
+      NonConstBB = PN->getIncomingBlock(i);
+      
+      // If the incoming non-constant value is in I's block, we have an infinite
+      // loop.
+      if (NonConstBB == I.getParent())
+        return 0;
+    }
+  
+  // If there is exactly one non-constant value, we can insert a copy of the
+  // operation in that block.  However, if this is a critical edge, we would be
+  // inserting the computation one some other paths (e.g. inside a loop).  Only
+  // do this if the pred block is unconditionally branching into the phi block.
+  if (NonConstBB != 0 && !AllowAggressive) {
+    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+    if (!BI || !BI->isUnconditional()) return 0;
+  }
+
+  // Okay, we can do the transformation: create the new PHI node.
+  PHINode *NewPN = PHINode::Create(I.getType(), "");
+  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+  InsertNewInstBefore(NewPN, *PN);
+  NewPN->takeName(PN);
+
+  // Next, add all of the operands to the PHI.
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
+                                 FalseVInPred,
+                                 "phitmp", NonConstBB->getTerminator());
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (I.getNumOperands() == 2) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+        else
+          InV = ConstantExpr::get(I.getOpcode(), InC, C);
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
+          InV = BinaryOperator::Create(BO->getOpcode(),
+                                       PN->getIncomingValue(i), C, "phitmp",
+                                       NonConstBB->getTerminator());
+        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
+          InV = CmpInst::Create(CI->getOpcode(),
+                                CI->getPredicate(),
+                                PN->getIncomingValue(i), C, "phitmp",
+                                NonConstBB->getTerminator());
+        else
+          llvm_unreachable("Unknown binop!");
+        
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  } else { 
+    CastInst *CI = cast<CastInst>(&I);
+    const Type *RetTy = CI->getType();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+      } else {
+        assert(PN->getIncomingBlock(i) == NonConstBB);
+        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
+                               I.getType(), "phitmp", 
+                               NonConstBB->getTerminator());
+        Worklist.Add(cast<Instruction>(InV));
+      }
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  }
+  return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset.  If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                          SmallVectorImpl<Value*> &NewIndices) {
+  if (!TD) return 0;
+  if (!Ty->isSized()) return 0;
+  
+  // Start with the index over the outer type.  Note that the type size
+  // might be zero (even if the offset isn't zero) if the indexed type
+  // is something like [0 x {int, int}]
+  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+  int64_t FirstIdx = 0;
+  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+    FirstIdx = Offset/TySize;
+    Offset -= FirstIdx*TySize;
+    
+    // Handle hosts where % returns negative instead of values [0..TySize).
+    if (Offset < 0) {
+      --FirstIdx;
+      Offset += TySize;
+      assert(Offset >= 0);
+    }
+    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+  }
+  
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+    
+  // Index into the types.  If we fail, set OrigBase to null.
+  while (Offset) {
+    // Indexing into tail padding between struct/array elements.
+    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+      return 0;
+    
+    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = TD->getStructLayout(STy);
+      assert(Offset < (int64_t)SL->getSizeInBytes() &&
+             "Offset must stay within the indexed type");
+      
+      unsigned Elt = SL->getElementContainingOffset(Offset);
+      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                            Elt));
+      
+      Offset -= SL->getElementOffset(Elt);
+      Ty = STy->getElementType(Elt);
+    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+      assert(EltSize && "Cannot index into a zero-sized array");
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      Offset %= EltSize;
+      Ty = AT->getElementType();
+    } else {
+      // Otherwise, we can't index into the middle of this atomic type, bail.
+      return 0;
+    }
+  }
+  
+  return Ty;
+}
+
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+    return ReplaceInstUsesWith(GEP, V);
+
+  Value *PtrOp = GEP.getOperand(0);
+
+  if (isa<UndefValue>(GEP.getOperand(0)))
+    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
+
+  // Eliminate unneeded casts for indices.
+  if (TD) {
+    bool MadeChange = false;
+    unsigned PtrSize = TD->getPointerSizeInBits();
+    
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+         I != E; ++I, ++GTI) {
+      if (!isa<SequentialType>(*GTI)) continue;
+      
+      // If we are using a wider index than needed for this platform, shrink it
+      // to what we need.  If narrower, sign-extend it to what we need.  This
+      // explicit cast can make subsequent optimizations more obvious.
+      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
+      if (OpBits == PtrSize)
+        continue;
+      
+      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
+      MadeChange = true;
+    }
+    if (MadeChange) return &GEP;
+  }
+
+  // Combine Indices - If the source pointer to this getelementptr instruction
+  // is a getelementptr instruction, combine the indices of the two
+  // getelementptr instructions into a single instruction.
+  //
+  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+    // Note that if our source is a gep chain itself that we wait for that
+    // chain to be resolved before we perform this transformation.  This
+    // avoids us creating a TON of code in some cases.
+    //
+    if (GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+      if (SrcGEP->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
+
+    SmallVector<Value*, 8> Indices;
+
+    // Find out whether the last index in the source GEP is a sequential idx.
+    bool EndsWithSequential = false;
+    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+         I != E; ++I)
+      EndsWithSequential = !isa<StructType>(*I);
+
+    // Can we combine the two pointer arithmetics offsets?
+    if (EndsWithSequential) {
+      // Replace: gep (gep %P, long B), long A, ...
+      // With:    T = long A+B; gep %P, T, ...
+      //
+      Value *Sum;
+      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *GO1 = GEP.getOperand(1);
+      if (SO1 == Constant::getNullValue(SO1->getType())) {
+        Sum = GO1;
+      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+        Sum = SO1;
+      } else {
+        // If they aren't the same type, then the input hasn't been processed
+        // by the loop above yet (which canonicalizes sequential index types to
+        // intptr_t).  Just avoid transforming this until the input has been
+        // normalized.
+        if (SO1->getType() != GO1->getType())
+          return 0;
+        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+      }
+
+      // Update the GEP in place if possible.
+      if (Src->getNumOperands() == 2) {
+        GEP.setOperand(0, Src->getOperand(0));
+        GEP.setOperand(1, Sum);
+        return &GEP;
+      }
+      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.push_back(Sum);
+      Indices.append(GEP.op_begin()+2, GEP.op_end());
+    } else if (isa<Constant>(*GEP.idx_begin()) &&
+               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+               Src->getNumOperands() != 1) {
+      // Otherwise we can do the fold if the first index of the GEP is a zero
+      Indices.append(Src->op_begin()+1, Src->op_end());
+      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+    }
+
+    if (!Indices.empty())
+      return (GEP.isInBounds() && Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
+        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+                                  Indices.end(), GEP.getName());
+  }
+  
+  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+  Value *StrippedPtr = PtrOp->stripPointerCasts();
+  if (StrippedPtr != PtrOp) {
+    const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+
+    bool HasZeroPointerIndex = false;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+      HasZeroPointerIndex = C->isZero();
+    
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    // 
+    // This occurs when the program declares an array extern like "int X[];"
+    if (HasZeroPointerIndex) {
+      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+      if (const ArrayType *CATy =
+          dyn_cast<ArrayType>(CPTy->getElementType())) {
+        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+        if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+          // -> GEP i8* X, ...
+          SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+          GetElementPtrInst *Res =
+            GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
+                                      Idx.end(), GEP.getName());
+          Res->setIsInBounds(GEP.isInBounds());
+          return Res;
+        }
+        
+        if (const ArrayType *XATy =
+              dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+          if (CATy->getElementType() == XATy->getElementType()) {
+            // -> GEP [10 x i8]* X, i32 0, ...
+            // At this point, we know that the cast source type is a pointer
+            // to an array of the same type as the destination pointer
+            // array.  Because the array type is never stepped over (there
+            // is a leading zero) we can fold the cast into this GEP.
+            GEP.setOperand(0, StrippedPtr);
+            return &GEP;
+          }
+        }
+      }
+    } else if (GEP.getNumOperands() == 2) {
+      // Transform things like:
+      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+      const Type *SrcElTy = StrippedPtrTy->getElementType();
+      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+      if (TD && isa<ArrayType>(SrcElTy) &&
+          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getTypeAllocSize(ResElTy)) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+        Idx[1] = GEP.getOperand(1);
+        Value *NewGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
+          Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+        // V and GEP are both pointer types --> BitCast
+        return new BitCastInst(NewGEP, GEP.getType());
+      }
+      
+      // Transform things like:
+      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+      //   (where tmp = 8*tmp2) into:
+      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+      
+      if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) {
+        uint64_t ArrayEltSize =
+            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        
+        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
+        // allow either a mul, shift, or constant here.
+        Value *NewIdx = 0;
+        ConstantInt *Scale = 0;
+        if (ArrayEltSize == 1) {
+          NewIdx = GEP.getOperand(1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+          NewIdx = ConstantInt::get(CI->getType(), 1);
+          Scale = CI;
+        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+          if (Inst->getOpcode() == Instruction::Shl &&
+              isa<ConstantInt>(Inst->getOperand(1))) {
+            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+                                     1ULL << ShAmtVal);
+            NewIdx = Inst->getOperand(0);
+          } else if (Inst->getOpcode() == Instruction::Mul &&
+                     isa<ConstantInt>(Inst->getOperand(1))) {
+            Scale = cast<ConstantInt>(Inst->getOperand(1));
+            NewIdx = Inst->getOperand(0);
+          }
+        }
+        
+        // If the index will be to exactly the right offset with the scale taken
+        // out, perform the transformation. Note, we don't know whether Scale is
+        // signed or not. We'll use unsigned version of division/modulo
+        // operation after making sure Scale doesn't have the sign bit set.
+        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+            Scale->getZExtValue() % ArrayEltSize == 0) {
+          Scale = ConstantInt::get(Scale->getType(),
+                                   Scale->getZExtValue() / ArrayEltSize);
+          if (Scale->getZExtValue() != 1) {
+            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+                                                       false /*ZExt*/);
+            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
+          }
+
+          // Insert the new GEP instruction.
+          Value *Idx[2];
+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+          Idx[1] = NewIdx;
+          Value *NewGEP = GEP.isInBounds() ?
+            Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
+            Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+          // The NewGEP must be pointer typed, so must the old one -> BitCast
+          return new BitCastInst(NewGEP, GEP.getType());
+        }
+      }
+    }
+  }
+  
+  /// See if we can simplify:
+  ///   X = bitcast A* to B*
+  ///   Y = gep X, <...constant indices...>
+  /// into a gep of the original struct.  This is important for SROA and alias
+  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    if (TD &&
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+      // Determine how much the GEP moves the pointer.  We are guaranteed to get
+      // a constant back from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // If this GEP instruction doesn't move the pointer, just replace the GEP
+      // with a bitcast of the real input to the dest type.
+      if (Offset == 0) {
+        // If the bitcast is of an allocation, and the allocation will be
+        // converted to match the type of the cast, don't touch this.
+        if (isa<AllocaInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
+          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+          if (Instruction *I = visitBitCast(*BCI)) {
+            if (I != BCI) {
+              I->takeName(BCI);
+              BCI->getParent()->getInstList().insert(BCI, I);
+              ReplaceInstUsesWith(*BCI, I);
+            }
+            return &GEP;
+          }
+        }
+        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+      }
+      
+      // Otherwise, if the offset is non-zero, we need to find out if there is a
+      // field at Offset in 'A's type.  If so, we can pull the cast through the
+      // GEP.
+      SmallVector<Value*, 8> NewIndices;
+      const Type *InTy =
+        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+        Value *NGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
+        
+        if (NGEP->getType() == GEP.getType())
+          return ReplaceInstUsesWith(GEP, NGEP);
+        NGEP->takeName(&GEP);
+        return new BitCastInst(NGEP, GEP.getType());
+      }
+    }
+  }    
+    
+  return 0;
+}
+
+Instruction *InstCombiner::visitFree(Instruction &FI) {
+  Value *Op = FI.getOperand(1);
+
+  // free undef -> unreachable.
+  if (isa<UndefValue>(Op)) {
+    // Insert a new store to null because we cannot modify the CFG here.
+    new StoreInst(ConstantInt::getTrue(FI.getContext()),
+           UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
+    return EraseInstFromFunction(FI);
+  }
+  
+  // If we have 'free null' delete the instruction.  This can happen in stl code
+  // when lots of inlining happens.
+  if (isa<ConstantPointerNull>(Op))
+    return EraseInstFromFunction(FI);
+
+  // If we have a malloc call whose only use is a free call, delete both.
+  if (isMalloc(Op)) {
+    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
+      if (Op->hasOneUse() && CI->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        EraseInstFromFunction(*CI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    } else {
+      // Op is a call to malloc
+      if (Op->hasOneUse()) {
+        EraseInstFromFunction(FI);
+        return EraseInstFromFunction(*cast<Instruction>(Op));
+      }
+    }
+  }
+
+  return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+  // Change br (not X), label True, label False to: br X, label False, True
+  Value *X = 0;
+  BasicBlock *TrueDest;
+  BasicBlock *FalseDest;
+  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+      !isa<Constant>(X)) {
+    // Swap Destinations and condition...
+    BI.setCondition(X);
+    BI.setSuccessor(0, FalseDest);
+    BI.setSuccessor(1, TrueDest);
+    return &BI;
+  }
+
+  // Cannonicalize fcmp_one -> fcmp_oeq
+  FCmpInst::Predicate FPred; Value *Y;
+  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
+                             TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+        FPred == FCmpInst::FCMP_OGE) {
+      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+      
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  // Cannonicalize icmp_ne -> icmp_eq
+  ICmpInst::Predicate IPred;
+  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+                      TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
+        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+        IPred == ICmpInst::ICMP_SGE) {
+      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+  Value *Cond = SI.getCondition();
+  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+    if (I->getOpcode() == Instruction::Add)
+      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+          SI.setOperand(i,
+                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+                                                AddRHS));
+        SI.setOperand(0, I->getOperand(0));
+        Worklist.Add(I);
+        return &SI;
+      }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+  Value *Agg = EV.getAggregateOperand();
+
+  if (!EV.hasIndices())
+    return ReplaceInstUsesWith(EV, Agg);
+
+  if (Constant *C = dyn_cast<Constant>(Agg)) {
+    if (isa<UndefValue>(C))
+      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+      
+    if (isa<ConstantAggregateZero>(C))
+      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+      // Extract the element indexed by the first index out of the constant
+      Value *V = C->getOperand(*EV.idx_begin());
+      if (EV.getNumIndices() > 1)
+        // Extract the remaining indices out of the constant indexed by the
+        // first index
+        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+      else
+        return ReplaceInstUsesWith(EV, V);
+    }
+    return 0; // Can't handle other constants
+  } 
+  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+    // We're extracting from an insertvalue instruction, compare the indices
+    const unsigned *exti, *exte, *insi, *inse;
+    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+         exte = EV.idx_end(), inse = IV->idx_end();
+         exti != exte && insi != inse;
+         ++exti, ++insi) {
+      if (*insi != *exti)
+        // The insert and extract both reference distinctly different elements.
+        // This means the extract is not influenced by the insert, and we can
+        // replace the aggregate operand of the extract with the aggregate
+        // operand of the insert. i.e., replace
+        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+        // %E = extractvalue { i32, { i32 } } %I, 0
+        // with
+        // %E = extractvalue { i32, { i32 } } %A, 0
+        return ExtractValueInst::Create(IV->getAggregateOperand(),
+                                        EV.idx_begin(), EV.idx_end());
+    }
+    if (exti == exte && insi == inse)
+      // Both iterators are at the end: Index lists are identical. Replace
+      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %C = extractvalue { i32, { i32 } } %B, 1, 0
+      // with "i32 42"
+      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+    if (exti == exte) {
+      // The extract list is a prefix of the insert list. i.e. replace
+      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %E = extractvalue { i32, { i32 } } %I, 1
+      // with
+      // %X = extractvalue { i32, { i32 } } %A, 1
+      // %E = insertvalue { i32 } %X, i32 42, 0
+      // by switching the order of the insert and extract (though the
+      // insertvalue should be left in, since it may have other uses).
+      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+                                                 EV.idx_begin(), EV.idx_end());
+      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+                                     insi, inse);
+    }
+    if (insi == inse)
+      // The insert list is a prefix of the extract list
+      // We can simply remove the common indices from the extract and make it
+      // operate on the inserted value instead of the insertvalue result.
+      // i.e., replace
+      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+      // %E = extractvalue { i32, { i32 } } %I, 1, 0
+      // with
+      // %E extractvalue { i32 } { i32 42 }, 0
+      return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
+                                      exti, exte);
+  }
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+    // We're extracting from an intrinsic, see if we're the only user, which
+    // allows us to simplify multiple result intrinsics to simpler things that
+    // just get one value..
+    if (II->hasOneUse()) {
+      // Check if we're grabbing the overflow bit or the result of a 'with
+      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
+      // and replace it with a traditional binary instruction.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateAdd(LHS, RHS);
+        }
+        break;
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateSub(LHS, RHS);
+        }
+        break;
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateMul(LHS, RHS);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  // Can't simplify extracts from other values. Note that nested extracts are
+  // already simplified implicitely by the above (extract ( extract (insert) )
+  // will be translated into extract ( insert ( extract ) ) first and then just
+  // the value inserted, if appropriate).
+  return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+  assert(I->hasOneUse() && "Invariants didn't hold!");
+
+  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+    return false;
+
+  // Do not sink alloca instructions out of the entry block.
+  if (isa<AllocaInst>(I) && I->getParent() ==
+        &DestBlock->getParent()->getEntryBlock())
+    return false;
+
+  // We can only sink load instructions if there is nothing between the load and
+  // the end of block that could change the value.
+  if (I->mayReadFromMemory()) {
+    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+         Scan != E; ++Scan)
+      if (Scan->mayWriteToMemory())
+        return false;
+  }
+
+  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+  I->moveBefore(InsertPos);
+  ++NumSunkInst;
+  return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful.  In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant).  Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, 
+                                       SmallPtrSet<BasicBlock*, 64> &Visited,
+                                       InstCombiner &IC,
+                                       const TargetData *TD) {
+  bool MadeIRChange = false;
+  SmallVector<BasicBlock*, 256> Worklist;
+  Worklist.push_back(BB);
+  
+  std::vector<Instruction*> InstrsForInstCombineWorklist;
+  InstrsForInstCombineWorklist.reserve(128);
+
+  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+  
+  do {
+    BB = Worklist.pop_back_val();
+    
+    // We have now visited this block!  If we've already been here, ignore it.
+    if (!Visited.insert(BB)) continue;
+
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+      Instruction *Inst = BBI++;
+      
+      // DCE instruction if trivially dead.
+      if (isInstructionTriviallyDead(Inst)) {
+        ++NumDeadInst;
+        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+        Inst->eraseFromParent();
+        continue;
+      }
+      
+      // ConstantProp instruction if trivially constant.
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+      
+      if (TD) {
+        // See if we can constant fold its operands.
+        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+             i != e; ++i) {
+          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+          if (CE == 0) continue;
+          
+          // If we already folded this constant, don't try again.
+          if (!FoldedConstants.insert(CE))
+            continue;
+          
+          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
+          if (NewC && NewC != CE) {
+            *i = NewC;
+            MadeIRChange = true;
+          }
+        }
+      }
+
+      InstrsForInstCombineWorklist.push_back(Inst);
+    }
+
+    // Recursively visit successors.  If this is a branch or switch on a
+    // constant, only visit the reachable successor.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
+        continue;
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+        // See if this is an explicit destination.
+        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+          if (SI->getCaseValue(i) == Cond) {
+            BasicBlock *ReachableBB = SI->getSuccessor(i);
+            Worklist.push_back(ReachableBB);
+            continue;
+          }
+        
+        // Otherwise it is the default destination.
+        Worklist.push_back(SI->getSuccessor(0));
+        continue;
+      }
+    }
+    
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      Worklist.push_back(TI->getSuccessor(i));
+  } while (!Worklist.empty());
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
+  
+  return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+  MadeIRChange = false;
+  
+  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+        << F.getNameStr() << "\n");
+
+  {
+    // Do a depth-first traversal of the function, populate the worklist with
+    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
+    // track of which blocks we visit.
+    SmallPtrSet<BasicBlock*, 64> Visited;
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+    // Do a quick scan over the function.  If we find any blocks that are
+    // unreachable, remove any instructions inside of them.  This prevents
+    // the instcombine code from having to deal with some bad special cases.
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      if (!Visited.count(BB)) {
+        Instruction *Term = BB->getTerminator();
+        while (Term != BB->begin()) {   // Remove instrs bottom-up
+          BasicBlock::iterator I = Term; --I;
+
+          DEBUG(errs() << "IC: DCE: " << *I << '\n');
+          // A debug intrinsic shouldn't force another iteration if we weren't
+          // going to do one without it.
+          if (!isa<DbgInfoIntrinsic>(I)) {
+            ++NumDeadInst;
+            MadeIRChange = true;
+          }
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
+          I->eraseFromParent();
+        }
+      }
+  }
+
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.RemoveOne();
+    if (I == 0) continue;  // skip null values.
+
+    // Check to see if we can DCE the instruction.
+    if (isInstructionTriviallyDead(I)) {
+      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      EraseInstFromFunction(*I);
+      ++NumDeadInst;
+      MadeIRChange = true;
+      continue;
+    }
+
+    // Instruction isn't dead, see if we can constant propagate it.
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+      if (Constant *C = ConstantFoldInstruction(I, TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
+
+    // See if we can trivially sink this instruction to a successor basic block.
+    if (I->hasOneUse()) {
+      BasicBlock *BB = I->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
+      if (UserParent != BB) {
+        bool UserIsSuccessor = false;
+        // See if the user is one of our successors.
+        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+          if (*SI == UserParent) {
+            UserIsSuccessor = true;
+            break;
+          }
+
+        // If the user is one of our immediate successors, and if that successor
+        // only has us as a predecessors (we'd have to split the critical edge
+        // otherwise), we can keep going.
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
+          // Okay, the CFG is simple enough, try to sink this instruction.
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
+      }
+    }
+
+    // Now that we have an instruction, try combining it to simplify it.
+    Builder->SetInsertPoint(I->getParent(), I);
+    
+#ifndef NDEBUG
+    std::string OrigI;
+#endif
+    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+    if (Instruction *Result = visit(*I)) {
+      ++NumCombined;
+      // Should we replace the old instruction with a new one?
+      if (Result != I) {
+        DEBUG(errs() << "IC: Old = " << *I << '\n'
+                     << "    New = " << *Result << '\n');
+
+        // Everything uses the new instruction now.
+        I->replaceAllUsesWith(Result);
+
+        // Push the new instruction and any users onto the worklist.
+        Worklist.Add(Result);
+        Worklist.AddUsersToWorkList(*Result);
+
+        // Move the name to the new instruction first.
+        Result->takeName(I);
+
+        // Insert the new instruction into the basic block...
+        BasicBlock *InstParent = I->getParent();
+        BasicBlock::iterator InsertPos = I;
+
+        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
+          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+            ++InsertPos;
+
+        InstParent->getInstList().insert(InsertPos, Result);
+
+        EraseInstFromFunction(*I);
+      } else {
+#ifndef NDEBUG
+        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+                     << "    New = " << *I << '\n');
+#endif
+
+        // If the instruction was modified, it's possible that it is now dead.
+        // if so, remove it.
+        if (isInstructionTriviallyDead(I)) {
+          EraseInstFromFunction(*I);
+        } else {
+          Worklist.Add(I);
+          Worklist.AddUsersToWorkList(*I);
+        }
+      }
+      MadeIRChange = true;
+    }
+  }
+
+  Worklist.Zap();
+  return MadeIRChange;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), TargetFolder(TD),
+               InstCombineIRInserter(Worklist));
+  Builder = &TheBuilder;
+  
+  bool EverMadeChange = false;
+
+  // Iterate while there is work to do.
+  unsigned Iteration = 0;
+  while (DoOneIteration(F, Iteration++))
+    EverMadeChange = true;
+  
+  Builder = 0;
+  return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+  return new InstCombiner();
+}
diff --git a/lib/Transforms/InstCombine/Makefile b/lib/Transforms/InstCombine/Makefile
new file mode 100644
index 0000000..0c488e78
--- /dev/null
+++ b/lib/Transforms/InstCombine/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstCombine
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp
deleted file mode 100644
index 211a6d6..0000000
--- a/lib/Transforms/Instrumentation/BlockProfiling.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- BlockProfiling.cpp - Insert counters for block profiling -----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for basic block or
-// function profiling.  This is the most basic form of profiling, which can tell
-// which blocks are hot, but cannot reliably detect hot paths through the CFG.
-// Block profiling counts the number of times each basic block executes, and
-// function profiling counts the number of times each function is called.
-//
-// Note that this implementation is very naive.  Control equivalent regions of
-// the CFG should not require duplicate counters, but we do put duplicate
-// counters in.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "RSProfiling.h"
-#include "ProfilingUtils.h"
-using namespace llvm;
-
-namespace {
-  class FunctionProfiler : public RSProfilers_std {
-  public:
-    static char ID;
-    bool runOnModule(Module &M);
-  };
-}
-
-char FunctionProfiler::ID = 0;
-
-static RegisterPass<FunctionProfiler>
-X("insert-function-profiling",
-  "Insert instrumentation for function profiling");
-static RegisterAnalysisGroup<RSProfilers> XG(X);
-
-ModulePass *llvm::createFunctionProfilerPass() {
-  return new FunctionProfiler();
-}
-
-bool FunctionProfiler::runOnModule(Module &M) {
-  Function *Main = M.getFunction("main");
-  if (Main == 0) {
-    errs() << "WARNING: cannot insert function profiling into a module"
-           << " with no main function!\n";
-    return false;  // No main, no instrumentation!
-  }
-
-  unsigned NumFunctions = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration())
-      ++NumFunctions;
-
-  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()),
-                                   NumFunctions);
-  GlobalVariable *Counters =
-    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "FuncProfCounters");
-
-  // Instrument all of the functions...
-  unsigned i = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration())
-      // Insert counter at the start of the function
-      IncrementCounterInBlock(&I->getEntryBlock(), i++, Counters);
-
-  // Add the initialization call to main.
-  InsertProfilingInitCall(Main, "llvm_start_func_profiling", Counters);
-  return true;
-}
-
-
-namespace {
-  class BlockProfiler : public RSProfilers_std {
-    bool runOnModule(Module &M);
-  public:
-    static char ID;
-  };
-}
-
-char BlockProfiler::ID = 0;
-static RegisterPass<BlockProfiler>
-Y("insert-block-profiling", "Insert instrumentation for block profiling");
-static RegisterAnalysisGroup<RSProfilers> YG(Y);
-
-ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }
-
-bool BlockProfiler::runOnModule(Module &M) {
-  Function *Main = M.getFunction("main");
-  if (Main == 0) {
-    errs() << "WARNING: cannot insert block profiling into a module"
-           << " with no main function!\n";
-    return false;  // No main, no instrumentation!
-  }
-
-  unsigned NumBlocks = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration())
-      NumBlocks += I->size();
-
-  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks);
-  GlobalVariable *Counters =
-    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "BlockProfCounters");
-
-  // Instrument all of the blocks...
-  unsigned i = 0;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (I->isDeclaration()) continue;
-    for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
-      // Insert counter at the start of the block
-      IncrementCounterInBlock(BB, i++, Counters);
-  }
-
-  // Add the initialization call to main.
-  InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters);
-  return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 494928e..128bf48 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,7 +1,5 @@
 add_llvm_library(LLVMInstrumentation
-  BlockProfiling.cpp
   EdgeProfiling.cpp
   OptimalEdgeProfiling.cpp
   ProfilingUtils.cpp
-  RSProfiling.cpp
   )
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 0a46fe5..94b0671 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -61,7 +61,7 @@ ModulePass *llvm::createOptimalEdgeProfilerPass() {
 inline static void printEdgeCounter(ProfileInfo::Edge e,
                                     BasicBlock* b,
                                     unsigned i) {
-  DEBUG(errs() << "--Edge Counter for " << (e) << " in " \
+  DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
                << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
 }
 
@@ -120,7 +120,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
   unsigned i = 0;
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration()) continue;
-    DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n");
+    DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
 
     // Calculate a Maximum Spanning Tree with the edge weights determined by
     // ProfileEstimator. ProfileEstimator also assign weights to the virtual
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 1679bea..3214c8c 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     AI = MainFn->arg_begin();
     // If the program looked at argc, have it look at the return value of the
     // init call instead.
-    if (AI->getType() != Type::getInt32Ty(Context)) {
+    if (!AI->getType()->isInteger(32)) {
       Instruction::CastOps opcode;
       if (!AI->use_empty()) {
         opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
deleted file mode 100644
index c08efc1..0000000
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ /dev/null
@@ -1,662 +0,0 @@
-//===- RSProfiling.cpp - Various profiling using random sampling ----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// These passes implement a random sampling based profiling.  Different methods
-// of choosing when to sample are supported, as well as different types of
-// profiling.  This is done as two passes.  The first is a sequence of profiling
-// passes which insert profiling into the program, and remember what they 
-// inserted.
-//
-// The second stage duplicates all instructions in a function, ignoring the 
-// profiling code, then connects the two versions togeather at the entry and at
-// backedges.  At each connection point a choice is made as to whether to jump
-// to the profiled code (take a sample) or execute the unprofiled code.
-//
-// It is highly recommended that after this pass one runs mem2reg and adce
-// (instcombine load-vn gdce dse also are good to run afterwards)
-//
-// This design is intended to make the profiling passes independent of the RS
-// framework, but any profiling pass that implements the RSProfiling interface
-// is compatible with the rs framework (and thus can be sampled)
-//
-// TODO: obviously the block and function profiling are almost identical to the
-// existing ones, so they can be unified (esp since these passes are valid
-// without the rs framework).
-// TODO: Fix choice code so that frequency is not hard coded
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Pass.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "RSProfiling.h"
-#include <set>
-#include <map>
-#include <queue>
-using namespace llvm;
-
-namespace {
-  enum RandomMeth {
-    GBV, GBVO, HOSTCC
-  };
-}
-
-static cl::opt<RandomMeth> RandomMethod("profile-randomness",
-    cl::desc("How to randomly choose to profile:"),
-    cl::values(
-               clEnumValN(GBV, "global", "global counter"),
-               clEnumValN(GBVO, "ra_global", 
-                          "register allocated global counter"),
-               clEnumValN(HOSTCC, "rdcc", "cycle counter"),
-               clEnumValEnd));
-  
-namespace {
-  /// NullProfilerRS - The basic profiler that does nothing.  It is the default
-  /// profiler and thus terminates RSProfiler chains.  It is useful for 
-  /// measuring framework overhead
-  class NullProfilerRS : public RSProfilers {
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    bool isProfiling(Value* v) {
-      return false;
-    }
-    bool runOnModule(Module &M) {
-      return false;
-    }
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-  };
-}
-
-static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
-static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
-                                       "Measure profiling framework overhead");
-static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
-
-namespace {
-  /// Chooser - Something that chooses when to make a sample of the profiled code
-  class Chooser {
-  public:
-    /// ProcessChoicePoint - is called for each basic block inserted to choose 
-    /// between normal and sample code
-    virtual void ProcessChoicePoint(BasicBlock*) = 0;
-    /// PrepFunction - is called once per function before other work is done.
-    /// This gives the opertunity to insert new allocas and such.
-    virtual void PrepFunction(Function*) = 0;
-    virtual ~Chooser() {}
-  };
-
-  //Things that implement sampling policies
-  //A global value that is read-mod-stored to choose when to sample.
-  //A sample is taken when the global counter hits 0
-  class GlobalRandomCounter : public Chooser {
-    GlobalVariable* Counter;
-    Value* ResetValue;
-    const IntegerType* T;
-  public:
-    GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
-    virtual ~GlobalRandomCounter();
-    virtual void PrepFunction(Function* F);
-    virtual void ProcessChoicePoint(BasicBlock* bb);
-  };
-
-  //Same is GRC, but allow register allocation of the global counter
-  class GlobalRandomCounterOpt : public Chooser {
-    GlobalVariable* Counter;
-    Value* ResetValue;
-    AllocaInst* AI;
-    const IntegerType* T;
-  public:
-    GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
-    virtual ~GlobalRandomCounterOpt();
-    virtual void PrepFunction(Function* F);
-    virtual void ProcessChoicePoint(BasicBlock* bb);
-  };
-
-  //Use the cycle counter intrinsic as a source of pseudo randomness when
-  //deciding when to sample.
-  class CycleCounter : public Chooser {
-    uint64_t rm;
-    Constant *F;
-  public:
-    CycleCounter(Module& m, uint64_t resetmask);
-    virtual ~CycleCounter();
-    virtual void PrepFunction(Function* F);
-    virtual void ProcessChoicePoint(BasicBlock* bb);
-  };
-
-  /// ProfilerRS - Insert the random sampling framework
-  struct ProfilerRS : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    ProfilerRS() : FunctionPass(&ID) {}
-
-    std::map<Value*, Value*> TransCache;
-    std::set<BasicBlock*> ChoicePoints;
-    Chooser* c;
-
-    //Translate and duplicate values for the new profile free version of stuff
-    Value* Translate(Value* v);
-    //Duplicate an entire function (with out profiling)
-    void Duplicate(Function& F, RSProfilers& LI);
-    //Called once for each backedge, handle the insertion of choice points and
-    //the interconection of the two versions of the code
-    void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
-    bool runOnFunction(Function& F);
-    bool doInitialization(Module &M);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  };
-}
-
-static RegisterPass<ProfilerRS>
-X("insert-rs-profiling-framework",
-  "Insert random sampling instrumentation framework");
-
-char RSProfilers::ID = 0;
-char NullProfilerRS::ID = 0;
-char ProfilerRS::ID = 0;
-
-//Local utilities
-static void ReplacePhiPred(BasicBlock* btarget, 
-                           BasicBlock* bold, BasicBlock* bnew);
-
-static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
-
-template<class T>
-static void recBackEdge(BasicBlock* bb, T& BackEdges, 
-                        std::map<BasicBlock*, int>& color,
-                        std::map<BasicBlock*, int>& depth,
-                        std::map<BasicBlock*, int>& finish,
-                        int& time);
-
-//find the back edges and where they go to
-template<class T>
-static void getBackEdges(Function& F, T& BackEdges);
-
-
-///////////////////////////////////////
-// Methods of choosing when to profile
-///////////////////////////////////////
-  
-GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
-                                         uint64_t resetval) : T(t) {
-  ConstantInt* Init = ConstantInt::get(T, resetval); 
-  ResetValue = Init;
-  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter");
-}
-
-GlobalRandomCounter::~GlobalRandomCounter() {}
-
-void GlobalRandomCounter::PrepFunction(Function* F) {}
-
-void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
-  BranchInst* t = cast<BranchInst>(bb->getTerminator());
-  
-  //decrement counter
-  LoadInst* l = new LoadInst(Counter, "counter", t);
-  
-  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
-                             ConstantInt::get(T, 0), 
-                             "countercc");
-
-  Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
-                                        "counternew", t);
-  new StoreInst(nv, Counter, t);
-  t->setCondition(s);
-  
-  //reset counter
-  BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
-                                              "reset", oldnext->getParent(), 
-                                              oldnext);
-  TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
-  t->setSuccessor(0, resetblock);
-  new StoreInst(ResetValue, Counter, t2);
-  ReplacePhiPred(oldnext, bb, resetblock);
-}
-
-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
-                                               uint64_t resetval) 
-  : AI(0), T(t) {
-  ConstantInt* Init = ConstantInt::get(T, resetval);
-  ResetValue  = Init;
-  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage,
-                               Init, "RandomSteeringCounter");
-}
-
-GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
-
-void GlobalRandomCounterOpt::PrepFunction(Function* F) {
-  //make a local temporary to cache the global
-  BasicBlock& bb = F->getEntryBlock();
-  BasicBlock::iterator InsertPt = bb.begin();
-  AI = new AllocaInst(T, 0, "localcounter", InsertPt);
-  LoadInst* l = new LoadInst(Counter, "counterload", InsertPt);
-  new StoreInst(l, AI, InsertPt);
-  
-  //modify all functions and return values to restore the local variable to/from
-  //the global variable
-  for(Function::iterator fib = F->begin(), fie = F->end();
-      fib != fie; ++fib)
-    for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
-        bib != bie; ++bib)
-      if (isa<CallInst>(bib)) {
-        LoadInst* l = new LoadInst(AI, "counter", bib);
-        new StoreInst(l, Counter, bib);
-        l = new LoadInst(Counter, "counter", ++bib);
-        new StoreInst(l, AI, bib--);
-      } else if (isa<InvokeInst>(bib)) {
-        LoadInst* l = new LoadInst(AI, "counter", bib);
-        new StoreInst(l, Counter, bib);
-        
-        BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
-        BasicBlock::iterator i = bb->getFirstNonPHI();
-        l = new LoadInst(Counter, "counter", i);
-        
-        bb = cast<InvokeInst>(bib)->getUnwindDest();
-        i = bb->getFirstNonPHI();
-        l = new LoadInst(Counter, "counter", i);
-        new StoreInst(l, AI, i);
-      } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
-        LoadInst* l = new LoadInst(AI, "counter", bib);
-        new StoreInst(l, Counter, bib);
-      }
-}
-
-void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
-  BranchInst* t = cast<BranchInst>(bb->getTerminator());
-  
-  //decrement counter
-  LoadInst* l = new LoadInst(AI, "counter", t);
-  
-  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l,
-                             ConstantInt::get(T, 0), 
-                             "countercc");
-
-  Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),
-                                        "counternew", t);
-  new StoreInst(nv, AI, t);
-  t->setCondition(s);
-  
-  //reset counter
-  BasicBlock* oldnext = t->getSuccessor(0);
-  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(),
-                                              "reset", oldnext->getParent(), 
-                                              oldnext);
-  TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);
-  t->setSuccessor(0, resetblock);
-  new StoreInst(ResetValue, AI, t2);
-  ReplacePhiPred(oldnext, bb, resetblock);
-}
-
-
-CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
-  F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter);
-}
-
-CycleCounter::~CycleCounter() {}
-
-void CycleCounter::PrepFunction(Function* F) {}
-
-void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
-  BranchInst* t = cast<BranchInst>(bb->getTerminator());
-  
-  CallInst* c = CallInst::Create(F, "rdcc", t);
-  BinaryOperator* b = 
-    BinaryOperator::CreateAnd(c,
-                      ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm),
-                              "mrdcc", t);
-  
-  ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b,
-                        ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), 
-                             "mrdccc");
-
-  t->setCondition(s);
-}
-
-///////////////////////////////////////
-// Profiling:
-///////////////////////////////////////
-bool RSProfilers_std::isProfiling(Value* v) {
-  if (profcode.find(v) != profcode.end())
-    return true;
-  //else
-  RSProfilers& LI = getAnalysis<RSProfilers>();
-  return LI.isProfiling(v);
-}
-
-void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                                          GlobalValue *CounterArray) {
-  // Insert the increment after any alloca or PHI instructions...
-  BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
-  while (isa<AllocaInst>(InsertPos))
-    ++InsertPos;
-  
-  // Create the getelementptr constant expression
-  std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext()));
-  Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum);
-  Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray,
-                                                        &Indices[0], 2);
-  
-  // Load, increment and store the value back.
-  Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
-  profcode.insert(OldVal);
-  Value *NewVal = BinaryOperator::CreateAdd(OldVal,
-                       ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1),
-                                            "NewCounter", InsertPos);
-  profcode.insert(NewVal);
-  profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
-}
-
-void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
-  //grab any outstanding profiler, or get the null one
-  AU.addRequired<RSProfilers>();
-}
-
-///////////////////////////////////////
-// RS Framework
-///////////////////////////////////////
-
-Value* ProfilerRS::Translate(Value* v) {
-  if(TransCache[v])
-    return TransCache[v];
-  
-  if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
-    if (bb == &bb->getParent()->getEntryBlock())
-      TransCache[bb] = bb; //don't translate entry block
-    else
-      TransCache[bb] = BasicBlock::Create(v->getContext(), 
-                                          "dup_" + bb->getName(),
-                                          bb->getParent(), NULL);
-    return TransCache[bb];
-  } else if (Instruction* i = dyn_cast<Instruction>(v)) {
-    //we have already translated this
-    //do not translate entry block allocas
-    if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
-      TransCache[i] = i;
-      return i;
-    } else {
-      //translate this
-      Instruction* i2 = i->clone();
-      if (i->hasName())
-        i2->setName("dup_" + i->getName());
-      TransCache[i] = i2;
-      //NumNewInst++;
-      for (unsigned x = 0; x < i2->getNumOperands(); ++x)
-        i2->setOperand(x, Translate(i2->getOperand(x)));
-      return i2;
-    }
-  } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
-    TransCache[v] = v;
-    return v;
-  }
-  llvm_unreachable("Value not handled");
-  return 0;
-}
-
-void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
-{
-  //perform a breadth first search, building up a duplicate of the code
-  std::queue<BasicBlock*> worklist;
-  std::set<BasicBlock*> seen;
-  
-  //This loop ensures proper BB order, to help performance
-  for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
-    worklist.push(fib);
-  while (!worklist.empty()) {
-    Translate(worklist.front());
-    worklist.pop();
-  }
-  
-  //remember than reg2mem created a new entry block we don't want to duplicate
-  worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
-  seen.insert(&F.getEntryBlock());
-  
-  while (!worklist.empty()) {
-    BasicBlock* bb = worklist.front();
-    worklist.pop();
-    if(seen.find(bb) == seen.end()) {
-      BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
-      BasicBlock::InstListType& instlist = bbtarget->getInstList();
-      for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); 
-           iib != iie; ++iib) {
-        //NumOldInst++;
-        if (!LI.isProfiling(&*iib)) {
-          Instruction* i = cast<Instruction>(Translate(iib));
-          instlist.insert(bbtarget->end(), i);
-        }
-      }
-      //updated search state;
-      seen.insert(bb);
-      TerminatorInst* ti = bb->getTerminator();
-      for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
-        BasicBlock* bbs = ti->getSuccessor(x);
-        if (seen.find(bbs) == seen.end()) {
-          worklist.push(bbs);
-        }
-      }
-    }
-  }
-}
-
-void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
-  //given a backedge from B -> A, and translations A' and B',
-  //a: insert C and C'
-  //b: add branches in C to A and A' and in C' to A and A'
-  //c: mod terminators@B, replace A with C
-  //d: mod terminators@B', replace A' with C'
-  //e: mod phis@A for pred B to be pred C
-  //       if multiple entries, simplify to one
-  //f: mod phis@A' for pred B' to be pred C'
-  //       if multiple entries, simplify to one
-  //g: for all phis@A with pred C using x
-  //       add in edge from C' using x'
-  //       add in edge from C using x in A'
-  
-  //a:
-  Function::iterator BBN = src; ++BBN;
-  BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN);
-  //ChoicePoints.insert(bbC);
-  BBN = cast<BasicBlock>(Translate(src));
-  BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN);
-  ChoicePoints.insert(bbCp);
-  
-  //b:
-  BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);
-  BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)), 
-              ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp);
-  //c:
-  {
-    TerminatorInst* iB = src->getTerminator();
-    for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
-      if (iB->getSuccessor(x) == dst)
-        iB->setSuccessor(x, bbC);
-  }
-  //d:
-  {
-    TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
-    for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
-      if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
-        iBp->setSuccessor(x, bbCp);
-  }
-  //e:
-  ReplacePhiPred(dst, src, bbC);
-  //src could be a switch, in which case we are replacing several edges with one
-  //thus collapse those edges int the Phi
-  CollapsePhi(dst, bbC);
-  //f:
-  ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
-                 cast<BasicBlock>(Translate(src)),bbCp);
-  CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
-  //g:
-  for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
-      ++ib)
-    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
-      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
-        if(bbC == phi->getIncomingBlock(x)) {
-          phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
-          cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x), 
-                                                     bbC);
-        }
-      phi->removeIncomingValue(bbC);
-    }
-}
-
-bool ProfilerRS::runOnFunction(Function& F) {
-  if (!F.isDeclaration()) {
-    std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
-    RSProfilers& LI = getAnalysis<RSProfilers>();
-    
-    getBackEdges(F, BackEdges);
-    Duplicate(F, LI);
-    //assume that stuff worked.  now connect the duplicated basic blocks 
-    //with the originals in such a way as to preserve ssa.  yuk!
-    for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator 
-           ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
-      ProcessBackEdge(ib->first, ib->second, F);
-    
-    //oh, and add the edge from the reg2mem created entry node to the 
-    //duplicated second node
-    TerminatorInst* T = F.getEntryBlock().getTerminator();
-    ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),
-                                              cast<BasicBlock>(
-                   Translate(T->getSuccessor(0))),
-                      ConstantInt::get(Type::getInt1Ty(F.getContext()), true)));
-    
-    //do whatever is needed now that the function is duplicated
-    c->PrepFunction(&F);
-    
-    //add entry node to choice points
-    ChoicePoints.insert(&F.getEntryBlock());
-    
-    for (std::set<BasicBlock*>::iterator 
-           ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
-      c->ProcessChoicePoint(*ii);
-    
-    ChoicePoints.clear();
-    TransCache.clear();
-    
-    return true;
-  }
-  return false;
-}
-
-bool ProfilerRS::doInitialization(Module &M) {
-  switch (RandomMethod) {
-  case GBV:
-    c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()),
-                                (1 << 14) - 1);
-    break;
-  case GBVO:
-    c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()),
-                                   (1 << 14) - 1);
-    break;
-  case HOSTCC:
-    c = new CycleCounter(M, (1 << 14) - 1);
-    break;
-  };
-  return true;
-}
-
-void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<RSProfilers>();
-  AU.addRequiredID(DemoteRegisterToMemoryID);
-}
-
-///////////////////////////////////////
-// Utilities:
-///////////////////////////////////////
-static void ReplacePhiPred(BasicBlock* btarget, 
-                           BasicBlock* bold, BasicBlock* bnew) {
-  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
-      ib != ie; ++ib)
-    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
-      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
-        if(bold == phi->getIncomingBlock(x))
-          phi->setIncomingBlock(x, bnew);
-    }
-}
-
-static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
-  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
-      ib != ie; ++ib)
-    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
-      std::map<BasicBlock*, Value*> counter;
-      for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
-        if (counter[phi->getIncomingBlock(i)]) {
-          assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
-          phi->removeIncomingValue(i, false);
-        } else {
-          counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
-          ++i;
-        }
-      }
-    } 
-}
-
-template<class T>
-static void recBackEdge(BasicBlock* bb, T& BackEdges, 
-                        std::map<BasicBlock*, int>& color,
-                        std::map<BasicBlock*, int>& depth,
-                        std::map<BasicBlock*, int>& finish,
-                        int& time)
-{
-  color[bb] = 1;
-  ++time;
-  depth[bb] = time;
-  TerminatorInst* t= bb->getTerminator();
-  for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
-    BasicBlock* bbnew = t->getSuccessor(i);
-    if (color[bbnew] == 0)
-      recBackEdge(bbnew, BackEdges, color, depth, finish, time);
-    else if (color[bbnew] == 1) {
-      BackEdges.insert(std::make_pair(bb, bbnew));
-      //NumBackEdges++;
-    }
-  }
-  color[bb] = 2;
-  ++time;
-  finish[bb] = time;
-}
-
-
-
-//find the back edges and where they go to
-template<class T>
-static void getBackEdges(Function& F, T& BackEdges) {
-  std::map<BasicBlock*, int> color;
-  std::map<BasicBlock*, int> depth;
-  std::map<BasicBlock*, int> finish;
-  int time = 0;
-  recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
-  DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n");
-}
-
-
-//Creation functions
-ModulePass* llvm::createNullProfilerRSPass() {
-  return new NullProfilerRS();
-}
-
-FunctionPass* llvm::createRSProfilingPass() {
-  return new ProfilerRS();
-}
diff --git a/lib/Transforms/Instrumentation/RSProfiling.h b/lib/Transforms/Instrumentation/RSProfiling.h
deleted file mode 100644
index 8bbe7c7..0000000
--- a/lib/Transforms/Instrumentation/RSProfiling.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- RSProfiling.h - Various profiling using random sampling ----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// See notes in RSProfiling.cpp
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/RSProfiling.h"
-#include <set>
-
-namespace llvm {
-  /// RSProfilers_std - a simple support class for profilers that handles most
-  /// of the work of chaining and tracking inserted code.
-  struct RSProfilers_std : public RSProfilers {
-    static char ID;
-    std::set<Value*> profcode;
-    // Lookup up values in profcode
-    virtual bool isProfiling(Value* v);
-    // handles required chaining
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    // places counter updates in basic blocks and recordes added instructions in
-    // profcode
-    void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                                 GlobalValue *CounterArray);
-  };
-}
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 025d02a..ea4a115 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
 
 include $(LEVEL)/Makefile.config
 
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index e58fa63..cf5e8c0 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -451,7 +451,7 @@ bool ABCD::runOnFunction(Function &F) {
   modified = false;
   createSSI(F);
   executeABCD(F);
-  DEBUG(inequality_graph.printGraph(errs(), F));
+  DEBUG(inequality_graph.printGraph(dbgs(), F));
   removePhis();
 
   inequality_graph.clear();
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 37f383f..5a49841 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -62,8 +62,7 @@ bool ADCE::runOnFunction(Function& F) {
   
   // Propagate liveness backwards to operands.
   while (!worklist.empty()) {
-    Instruction* curr = worklist.back();
-    worklist.pop_back();
+    Instruction* curr = worklist.pop_back_val();
     
     for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
          OI != OE; ++OI)
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 5a92399..683c1c2 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -9,7 +9,6 @@ add_llvm_library(LLVMScalarOpts
   GEPSplitter.cpp
   GVN.cpp
   IndVarSimplify.cpp
-  InstructionCombining.cpp
   JumpThreading.cpp
   LICM.cpp
   LoopDeletion.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 372616c..9c1b440 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -237,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   BasicBlock *DestBB = BI->getSuccessor(0);
 
-  DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
 
   // If the destination block has a single pred, then this is a trivial edge,
   // just collapse it.
@@ -251,7 +251,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
       
-      DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
+      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
       return;
     }
   }
@@ -294,7 +294,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   }
   BB->eraseFromParent();
 
-  DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");
+  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
 
@@ -591,7 +591,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   // If all the instructions matched are already in this BB, don't do anything.
   if (!AnyNonLocal) {
-    DEBUG(errs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
     return false;
   }
 
@@ -606,12 +606,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // computation.
   Value *&SunkAddr = SunkAddrs[Addr];
   if (SunkAddr) {
-    DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst);
     if (SunkAddr->getType() != Addr->getType())
       SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
   } else {
-    DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst);
     const Type *IntPtrTy =
           TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1cfde8f..320afa1 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,9 +52,9 @@ namespace {
     bool runOnBasicBlock(BasicBlock &BB);
     bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
     bool handleEndBlock(BasicBlock &BB);
-    bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize,
-                              BasicBlock::iterator& BBI,
-                              SmallPtrSet<Value*, 64>& deadPointers);
+    bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
+                              BasicBlock::iterator &BBI,
+                              SmallPtrSet<Value*, 64> &deadPointers);
     void DeleteDeadInstruction(Instruction *I,
                                SmallPtrSet<Value*, 64> *deadPointers = 0);
     
@@ -70,6 +70,8 @@ namespace {
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
+
+    unsigned getPointerSize(Value *V) const;
   };
 }
 
@@ -173,7 +175,7 @@ static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
 }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
-  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
   TD = getAnalysisIfAvailable<TargetData>();
 
   bool MadeChange = false;
@@ -355,7 +357,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
         continue;
     }
     
-    Value* killPointer = 0;
+    Value *killPointer = 0;
     uint64_t killPointerSize = ~0UL;
     
     // If we encounter a use of the pointer, it is no longer considered dead
@@ -371,14 +373,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       }
       
       killPointer = L->getPointerOperand();
-    } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
+    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
       killPointer = V->getOperand(0);
     } else if (isa<MemTransferInst>(BBI) &&
                isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
       killPointer = cast<MemTransferInst>(BBI)->getSource();
       killPointerSize = cast<ConstantInt>(
                        cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
-    } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
+    } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
       deadPointers.erase(A);
       
       // Dead alloca's can be DCE'd when we reach them
@@ -412,23 +414,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
           deadPointers.clear();
           return MadeChange;
         }
-
-        // Get size information for the alloca
-        unsigned pointerSize = ~0U;
-        if (TD) {
-          if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-            if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-              pointerSize = C->getZExtValue() *
-                            TD->getTypeAllocSize(A->getAllocatedType());
-          } else {
-            const PointerType* PT = cast<PointerType>(
-                                                   cast<Argument>(*I)->getType());
-            pointerSize = TD->getTypeAllocSize(PT->getElementType());
-          }
-        }
-
+        
         // See if the call site touches it
-        AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize);
+        AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
+                                                         getPointerSize(*I));
         
         if (A == AliasAnalysis::ModRef)
           modRef++;
@@ -469,11 +458,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
 /// RemoveUndeadPointers - check for uses of a pointer that make it
 /// undead when scanning for dead stores to alloca's.
-bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
+bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
                                BasicBlock::iterator &BBI,
-                               SmallPtrSet<Value*, 64>& deadPointers) {
+                               SmallPtrSet<Value*, 64> &deadPointers) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-                                  
+
   // If the kill pointer can be easily reduced to an alloca,
   // don't bother doing extraneous AA queries.
   if (deadPointers.count(killPointer)) {
@@ -488,32 +477,19 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
   bool MadeChange = false;
   
   SmallVector<Value*, 16> undead;
-    
+  
   for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
-      E = deadPointers.end(); I != E; ++I) {
-    // Get size information for the alloca.
-    unsigned pointerSize = ~0U;
-    if (TD) {
-      if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
-        if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
-          pointerSize = C->getZExtValue() *
-                        TD->getTypeAllocSize(A->getAllocatedType());
-      } else {
-        const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType());
-        pointerSize = TD->getTypeAllocSize(PT->getElementType());
-      }
-    }
-
+       E = deadPointers.end(); I != E; ++I) {
     // See if this pointer could alias it
-    AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize,
+    AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
                                             killPointer, killPointerSize);
 
     // If it must-alias and a store, we can delete it
     if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
-      StoreInst* S = cast<StoreInst>(BBI);
+      StoreInst *S = cast<StoreInst>(BBI);
 
       // Remove it!
-      BBI++;
+      ++BBI;
       DeleteDeadInstruction(S, &deadPointers);
       NumFastStores++;
       MadeChange = true;
@@ -547,9 +523,8 @@ void DSE::DeleteDeadInstruction(Instruction *I,
 
   // Before we touch this instruction, remove it from memdep!
   MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
-  while (!NowDeadInsts.empty()) {
-    Instruction *DeadInst = NowDeadInsts.back();
-    NowDeadInsts.pop_back();
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
     
     ++NumFastOther;
     
@@ -573,5 +548,20 @@ void DSE::DeleteDeadInstruction(Instruction *I,
     DeadInst->eraseFromParent();
     
     if (ValueSet) ValueSet->erase(DeadInst);
+  } while (!NowDeadInsts.empty());
+}
+
+unsigned DSE::getPointerSize(Value *V) const {
+  if (TD) {
+    if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+      // Get size information for the alloca
+      if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+        return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+    } else {
+      assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+      const PointerType *PT = cast<PointerType>(V->getType());
+      return TD->getTypeAllocSize(PT->getElementType());
+    }
   }
+  return ~0U;
 }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 612b415..ac0d850 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -829,7 +829,7 @@ SpeculationFailure:
   SmallVector<BasicBlock*, 32> BBWorklist;
   BBWorklist.push_back(BB);
 
-  while (!BBWorklist.empty()) {
+  do {
     BasicBlock *Entry = BBWorklist.pop_back_val();
     // Note that this sets blocks to 0 (unavailable) if they happen to not
     // already be in FullyAvailableBlocks.  This is safe.
@@ -841,7 +841,7 @@ SpeculationFailure:
 
     for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
       BBWorklist.push_back(*I);
-  }
+  } while (!BBWorklist.empty());
 
   return false;
 }
@@ -1022,7 +1022,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
   // FIXME: Study to see if/when this happens.
   if (LoadOffset == StoreOffset) {
 #if 0
-    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
     << "Base       = " << *StoreBase << "\n"
     << "Store Ptr  = " << *WritePtr << "\n"
     << "Store Offs = " << StoreOffset << "\n"
@@ -1053,7 +1053,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
   }
   if (isAAFailure) {
 #if 0
-    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
     << "Base       = " << *StoreBase << "\n"
     << "Store Ptr  = " << *WritePtr << "\n"
     << "Store Offs = " << StoreOffset << "\n"
@@ -1362,7 +1362,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   SmallVector<NonLocalDepResult, 64> Deps;
   MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                    Deps);
-  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
+  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
   //             << Deps.size() << *LI << '\n');
 
   // If we had to process more than one hundred blocks to find the
@@ -1375,9 +1375,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // clobber in the current block.  Reject this early.
   if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
     DEBUG(
-      errs() << "GVN: non-local load ";
-      WriteAsOperand(errs(), LI);
-      errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
     );
     return false;
   }
@@ -1500,7 +1500,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // load, then it is fully redundant and we can use PHI insertion to compute
   // its value.  Insert PHIs and remove the fully redundant value now.
   if (UnavailableBlocks.empty()) {
-    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
     
     // Perform PHI construction.
     Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
@@ -1614,7 +1614,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // We don't currently handle critical edges :(
   if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
-    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+    DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
                  << UnavailablePred->getName() << "': " << *LI << '\n');
     return false;
   }
@@ -1646,7 +1646,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // we fail PRE.
   if (LoadPtr == 0) {
     assert(NewInsts.empty() && "Shouldn't insert insts on failure");
-    DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+    DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
                  << *LI->getOperand(0) << "\n");
     return false;
   }
@@ -1679,9 +1679,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
-  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
   DEBUG(if (!NewInsts.empty())
-          errs() << "INSERTED " << NewInsts.size() << " INSTS: "
+          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
                  << *NewInsts.back() << '\n');
   
   Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
@@ -1752,7 +1752,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     }
         
     if (AvailVal) {
-      DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
             << *AvailVal << '\n' << *L << "\n\n\n");
       
       // Replace the load!
@@ -1766,10 +1766,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
         
     DEBUG(
       // fast print dep, using operator<< on instruction would be too slow
-      errs() << "GVN: load ";
-      WriteAsOperand(errs(), L);
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
       Instruction *I = Dep.getInst();
-      errs() << " is clobbered by " << *I << '\n';
+      dbgs() << " is clobbered by " << *I << '\n';
     );
     return false;
   }
@@ -1793,7 +1793,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
         if (StoredVal == 0)
           return false;
         
-        DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
                      << '\n' << *L << "\n\n\n");
       }
       else 
@@ -1822,7 +1822,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
         if (AvailableVal == 0)
           return false;
       
-        DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
                      << "\n" << *L << "\n\n\n");
       }
       else 
@@ -1990,7 +1990,7 @@ bool GVN::runOnFunction(Function& F) {
   unsigned Iteration = 0;
 
   while (ShouldContinue) {
-    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
+    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
     ShouldContinue = iterateOnFunction(F);
     Changed |= ShouldContinue;
     ++Iteration;
@@ -2038,7 +2038,7 @@ bool GVN::processBlock(BasicBlock *BB) {
 
     for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
          E = toErase.end(); I != E; ++I) {
-      DEBUG(errs() << "GVN removed: " << **I << '\n');
+      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
       if (MD) MD->removeInstruction(*I);
       (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
@@ -2196,7 +2196,7 @@ bool GVN::performPRE(Function &F) {
         MD->invalidateCachedPointerInfo(Phi);
       VN.erase(CurInst);
 
-      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
+      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
       if (MD) MD->removeInstruction(CurInst);
       CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3aa4fd3..ce1307c 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -182,7 +182,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   else
     Opcode = ICmpInst::ICMP_EQ;
 
-  DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n"
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
                << "      LHS:" << *CmpIndVar << '\n'
                << "       op:\t"
                << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
@@ -273,7 +273,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
 
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
-        DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+        DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
                      << "  LoopVal = " << *Inst << "\n");
 
         PN->setIncomingValue(i, ExitVal);
@@ -401,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     ++NumInserted;
     Changed = true;
-    DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+    DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
 
     // Now that the official induction variable is established, reinsert
     // the old canonical-looking variable after it so that the IR remains
@@ -438,7 +438,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
 
   // Clean up dead instructions.
-  DeleteDeadPHIs(L->getHeader());
+  Changed |= DeleteDeadPHIs(L->getHeader());
   // Check a post-condition.
   assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!");
   return Changed;
@@ -506,7 +506,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         NewVal->takeName(Op);
       User->replaceUsesOfWith(Op, NewVal);
       UI->setOperandValToReplace(NewVal);
-      DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+      DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
                    << "   into = " << *NewVal << "\n");
       ++NumRemoved;
       Changed = true;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
deleted file mode 100644
index 516d72e..0000000
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ /dev/null
@@ -1,13736 +0,0 @@
-//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// InstructionCombining - Combine instructions to form fewer, simple
-// instructions.  This pass does not modify the CFG.  This pass is where
-// algebraic simplification happens.
-//
-// This pass combines things like:
-//    %Y = add i32 %X, 1
-//    %Z = add i32 %Y, 1
-// into:
-//    %Z = add i32 %X, 2
-//
-// This is a simple worklist driven algorithm.
-//
-// This pass guarantees that the following canonicalizations are performed on
-// the program:
-//    1. If a binary operator has a constant operand, it is moved to the RHS
-//    2. Bitwise operators with constant operands are always grouped so that
-//       shifts are performed first, then or's, then and's, then xor's.
-//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
-//    4. All cmp instructions on boolean values are replaced with logical ops
-//    5. add X, X is represented as (X*2) => (X << 1)
-//    6. Multiplies with a power-of-two constant argument are transformed into
-//       shifts.
-//   ... etc.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "instcombine"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Operator.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/TargetFolder.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-#include <climits>
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-STATISTIC(NumCombined , "Number of insts combined");
-STATISTIC(NumConstProp, "Number of constant folds");
-STATISTIC(NumDeadInst , "Number of dead inst eliminated");
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
-STATISTIC(NumSunkInst , "Number of instructions sunk");
-
-/// SelectPatternFlavor - We can match a variety of different patterns for
-/// select operations.
-enum SelectPatternFlavor {
-  SPF_UNKNOWN = 0,
-  SPF_SMIN, SPF_UMIN,
-  SPF_SMAX, SPF_UMAX
-  //SPF_ABS - TODO.
-};
-
-namespace {
-  /// InstCombineWorklist - This is the worklist management logic for
-  /// InstCombine.
-  class InstCombineWorklist {
-    SmallVector<Instruction*, 256> Worklist;
-    DenseMap<Instruction*, unsigned> WorklistMap;
-    
-    void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
-    InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
-  public:
-    InstCombineWorklist() {}
-    
-    bool isEmpty() const { return Worklist.empty(); }
-    
-    /// Add - Add the specified instruction to the worklist if it isn't already
-    /// in it.
-    void Add(Instruction *I) {
-      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
-        DEBUG(errs() << "IC: ADD: " << *I << '\n');
-        Worklist.push_back(I);
-      }
-    }
-    
-    void AddValue(Value *V) {
-      if (Instruction *I = dyn_cast<Instruction>(V))
-        Add(I);
-    }
-    
-    /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-    /// which should only be done when the worklist is empty and when the group
-    /// has no duplicates.
-    void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
-      assert(Worklist.empty() && "Worklist must be empty to add initial group");
-      Worklist.reserve(NumEntries+16);
-      DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
-      for (; NumEntries; --NumEntries) {
-        Instruction *I = List[NumEntries-1];
-        WorklistMap.insert(std::make_pair(I, Worklist.size()));
-        Worklist.push_back(I);
-      }
-    }
-    
-    // Remove - remove I from the worklist if it exists.
-    void Remove(Instruction *I) {
-      DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
-      if (It == WorklistMap.end()) return; // Not in worklist.
-      
-      // Don't bother moving everything down, just null out the slot.
-      Worklist[It->second] = 0;
-      
-      WorklistMap.erase(It);
-    }
-    
-    Instruction *RemoveOne() {
-      Instruction *I = Worklist.back();
-      Worklist.pop_back();
-      WorklistMap.erase(I);
-      return I;
-    }
-
-    /// AddUsersToWorkList - When an instruction is simplified, add all users of
-    /// the instruction to the work lists because they might get more simplified
-    /// now.
-    ///
-    void AddUsersToWorkList(Instruction &I) {
-      for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-           UI != UE; ++UI)
-        Add(cast<Instruction>(*UI));
-    }
-    
-    
-    /// Zap - check that the worklist is empty and nuke the backing store for
-    /// the map if it is large.
-    void Zap() {
-      assert(WorklistMap.empty() && "Worklist empty, but map not?");
-      
-      // Do an explicit clear, this shrinks the map if needed.
-      WorklistMap.clear();
-    }
-  };
-} // end anonymous namespace.
-
-
-namespace {
-  /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
-  /// just like the normal insertion helper, but also adds any new instructions
-  /// to the instcombine worklist.
-  class InstCombineIRInserter : public IRBuilderDefaultInserter<true> {
-    InstCombineWorklist &Worklist;
-  public:
-    InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
-    
-    void InsertHelper(Instruction *I, const Twine &Name,
-                      BasicBlock *BB, BasicBlock::iterator InsertPt) const {
-      IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
-      Worklist.Add(I);
-    }
-  };
-} // end anonymous namespace
-
-
-namespace {
-  class InstCombiner : public FunctionPass,
-                       public InstVisitor<InstCombiner, Instruction*> {
-    TargetData *TD;
-    bool MustPreserveLCSSA;
-    bool MadeIRChange;
-  public:
-    /// Worklist - All of the instructions that need to be simplified.
-    InstCombineWorklist Worklist;
-
-    /// Builder - This is an IRBuilder that automatically inserts new
-    /// instructions into the worklist when they are created.
-    typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
-    BuilderTy *Builder;
-        
-    static char ID; // Pass identification, replacement for typeid
-    InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
-
-    LLVMContext *Context;
-    LLVMContext *getContext() const { return Context; }
-
-  public:
-    virtual bool runOnFunction(Function &F);
-    
-    bool DoOneIteration(Function &F, unsigned ItNum);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreservedID(LCSSAID);
-      AU.setPreservesCFG();
-    }
-
-    TargetData *getTargetData() const { return TD; }
-
-    // Visitation implementation - Implement instruction combining for different
-    // instruction types.  The semantics are as follows:
-    // Return Value:
-    //    null        - No change was made
-    //     I          - Change was made, I is still valid, I may be dead though
-    //   otherwise    - Change was made, replace I with returned instruction
-    //
-    Instruction *visitAdd(BinaryOperator &I);
-    Instruction *visitFAdd(BinaryOperator &I);
-    Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
-    Instruction *visitSub(BinaryOperator &I);
-    Instruction *visitFSub(BinaryOperator &I);
-    Instruction *visitMul(BinaryOperator &I);
-    Instruction *visitFMul(BinaryOperator &I);
-    Instruction *visitURem(BinaryOperator &I);
-    Instruction *visitSRem(BinaryOperator &I);
-    Instruction *visitFRem(BinaryOperator &I);
-    bool SimplifyDivRemOfSelect(BinaryOperator &I);
-    Instruction *commonRemTransforms(BinaryOperator &I);
-    Instruction *commonIRemTransforms(BinaryOperator &I);
-    Instruction *commonDivTransforms(BinaryOperator &I);
-    Instruction *commonIDivTransforms(BinaryOperator &I);
-    Instruction *visitUDiv(BinaryOperator &I);
-    Instruction *visitSDiv(BinaryOperator &I);
-    Instruction *visitFDiv(BinaryOperator &I);
-    Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-    Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
-    Instruction *visitAnd(BinaryOperator &I);
-    Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-    Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
-    Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
-                                     Value *A, Value *B, Value *C);
-    Instruction *visitOr (BinaryOperator &I);
-    Instruction *visitXor(BinaryOperator &I);
-    Instruction *visitShl(BinaryOperator &I);
-    Instruction *visitAShr(BinaryOperator &I);
-    Instruction *visitLShr(BinaryOperator &I);
-    Instruction *commonShiftTransforms(BinaryOperator &I);
-    Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
-                                      Constant *RHSC);
-    Instruction *visitFCmpInst(FCmpInst &I);
-    Instruction *visitICmpInst(ICmpInst &I);
-    Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
-    Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
-                                                Instruction *LHS,
-                                                ConstantInt *RHS);
-    Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
-                                ConstantInt *DivRHS);
-    Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
-                                  ICmpInst::Predicate Pred, Value *TheAdd);
-    Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
-                             ICmpInst::Predicate Cond, Instruction &I);
-    Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
-                                     BinaryOperator &I);
-    Instruction *commonCastTransforms(CastInst &CI);
-    Instruction *commonIntCastTransforms(CastInst &CI);
-    Instruction *commonPointerCastTransforms(CastInst &CI);
-    Instruction *visitTrunc(TruncInst &CI);
-    Instruction *visitZExt(ZExtInst &CI);
-    Instruction *visitSExt(SExtInst &CI);
-    Instruction *visitFPTrunc(FPTruncInst &CI);
-    Instruction *visitFPExt(CastInst &CI);
-    Instruction *visitFPToUI(FPToUIInst &FI);
-    Instruction *visitFPToSI(FPToSIInst &FI);
-    Instruction *visitUIToFP(CastInst &CI);
-    Instruction *visitSIToFP(CastInst &CI);
-    Instruction *visitPtrToInt(PtrToIntInst &CI);
-    Instruction *visitIntToPtr(IntToPtrInst &CI);
-    Instruction *visitBitCast(BitCastInst &CI);
-    Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
-                                Instruction *FI);
-    Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
-    Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
-                              Value *A, Value *B, Instruction &Outer,
-                              SelectPatternFlavor SPF2, Value *C);
-    Instruction *visitSelectInst(SelectInst &SI);
-    Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
-    Instruction *visitCallInst(CallInst &CI);
-    Instruction *visitInvokeInst(InvokeInst &II);
-
-    Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
-    Instruction *visitPHINode(PHINode &PN);
-    Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
-    Instruction *visitAllocaInst(AllocaInst &AI);
-    Instruction *visitFree(Instruction &FI);
-    Instruction *visitLoadInst(LoadInst &LI);
-    Instruction *visitStoreInst(StoreInst &SI);
-    Instruction *visitBranchInst(BranchInst &BI);
-    Instruction *visitSwitchInst(SwitchInst &SI);
-    Instruction *visitInsertElementInst(InsertElementInst &IE);
-    Instruction *visitExtractElementInst(ExtractElementInst &EI);
-    Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
-    Instruction *visitExtractValueInst(ExtractValueInst &EV);
-
-    // visitInstruction - Specify what to return for unhandled instructions...
-    Instruction *visitInstruction(Instruction &I) { return 0; }
-
-  private:
-    Instruction *visitCallSite(CallSite CS);
-    bool transformConstExprCastCall(CallSite CS);
-    Instruction *transformCallThroughTrampoline(CallSite CS);
-    Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
-                                   bool DoXform = true);
-    bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
-    DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
-
-
-  public:
-    // InsertNewInstBefore - insert an instruction New before instruction Old
-    // in the program.  Add the new instruction to the worklist.
-    //
-    Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
-      assert(New && New->getParent() == 0 &&
-             "New instruction already inserted into a basic block!");
-      BasicBlock *BB = Old.getParent();
-      BB->getInstList().insert(&Old, New);  // Insert inst
-      Worklist.Add(New);
-      return New;
-    }
-        
-    // ReplaceInstUsesWith - This method is to be used when an instruction is
-    // found to be dead, replacable with another preexisting expression.  Here
-    // we add all uses of I to the worklist, replace all uses of I with the new
-    // value, then return I, so that the inst combiner will know that I was
-    // modified.
-    //
-    Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
-      Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
-      
-      // If we are replacing the instruction with itself, this must be in a
-      // segment of unreachable code, so just clobber the instruction.
-      if (&I == V) 
-        V = UndefValue::get(I.getType());
-        
-      I.replaceAllUsesWith(V);
-      return &I;
-    }
-
-    // EraseInstFromFunction - When dealing with an instruction that has side
-    // effects or produces a void value, we can't rely on DCE to delete the
-    // instruction.  Instead, visit methods should return the value returned by
-    // this function.
-    Instruction *EraseInstFromFunction(Instruction &I) {
-      DEBUG(errs() << "IC: ERASE " << I << '\n');
-
-      assert(I.use_empty() && "Cannot erase instruction that is used!");
-      // Make sure that we reprocess all operands now that we reduced their
-      // use counts.
-      if (I.getNumOperands() < 8) {
-        for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
-          if (Instruction *Op = dyn_cast<Instruction>(*i))
-            Worklist.Add(Op);
-      }
-      Worklist.Remove(&I);
-      I.eraseFromParent();
-      MadeIRChange = true;
-      return 0;  // Don't do anything with FI
-    }
-        
-    void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
-                           APInt &KnownOne, unsigned Depth = 0) const {
-      return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
-    }
-    
-    bool MaskedValueIsZero(Value *V, const APInt &Mask, 
-                           unsigned Depth = 0) const {
-      return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
-    }
-    unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
-      return llvm::ComputeNumSignBits(Op, TD, Depth);
-    }
-
-  private:
-
-    /// SimplifyCommutative - This performs a few simplifications for 
-    /// commutative operators.
-    bool SimplifyCommutative(BinaryOperator &I);
-
-    /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
-    /// based on the demanded bits.
-    Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
-                                   APInt& KnownZero, APInt& KnownOne,
-                                   unsigned Depth);
-    bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
-                              APInt& KnownZero, APInt& KnownOne,
-                              unsigned Depth=0);
-        
-    /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
-    /// SimplifyDemandedBits knows about.  See if the instruction has any
-    /// properties that allow us to simplify its operands.
-    bool SimplifyDemandedInstructionBits(Instruction &Inst);
-        
-    Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
-                                      APInt& UndefElts, unsigned Depth = 0);
-      
-    // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
-    // which has a PHI node as operand #0, see if we can fold the instruction
-    // into the PHI (which is only possible if all operands to the PHI are
-    // constants).
-    //
-    // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-    // that would normally be unprofitable because they strongly encourage jump
-    // threading.
-    Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
-
-    // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-    // operator and they all are only used by the PHI, PHI together their
-    // inputs, and do the operation once, to the result of the PHI.
-    Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
-    Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
-    Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
-    Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
-
-    
-    Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
-                          ConstantInt *AndRHS, BinaryOperator &TheAnd);
-    
-    Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
-                              bool isSub, Instruction &I);
-    Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                                 bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
-    Instruction *MatchBSwap(BinaryOperator &I);
-    bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
-    Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
-    Instruction *SimplifyMemSet(MemSetInst *MI);
-
-
-    Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-
-    bool CanEvaluateInDifferentType(Value *V, const Type *Ty,
-                                    unsigned CastOpc, int &NumCastsRemoved);
-    unsigned GetOrEnforceKnownAlignment(Value *V,
-                                        unsigned PrefAlign = 0);
-
-  };
-} // end anonymous namespace
-
-char InstCombiner::ID = 0;
-static RegisterPass<InstCombiner>
-X("instcombine", "Combine redundant instructions");
-
-// getComplexity:  Assign a complexity or rank value to LLVM Values...
-//   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
-static unsigned getComplexity(Value *V) {
-  if (isa<Instruction>(V)) {
-    if (BinaryOperator::isNeg(V) ||
-        BinaryOperator::isFNeg(V) ||
-        BinaryOperator::isNot(V))
-      return 3;
-    return 4;
-  }
-  if (isa<Argument>(V)) return 3;
-  return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
-}
-
-// isOnlyUse - Return true if this instruction will be deleted if we stop using
-// it.
-static bool isOnlyUse(Value *V) {
-  return V->hasOneUse() || isa<Constant>(V);
-}
-
-// getPromotedType - Return the specified type promoted as it would be to pass
-// though a va_arg area...
-static const Type *getPromotedType(const Type *Ty) {
-  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
-    if (ITy->getBitWidth() < 32)
-      return Type::getInt32Ty(Ty->getContext());
-  }
-  return Ty;
-}
-
-/// ShouldChangeType - Return true if it is desirable to convert a computation
-/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
-/// type for example, or from a smaller to a larger illegal type.
-static bool ShouldChangeType(const Type *From, const Type *To,
-                             const TargetData *TD) {
-  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
-  
-  // If we don't have TD, we don't know if the source/dest are legal.
-  if (!TD) return false;
-  
-  unsigned FromWidth = From->getPrimitiveSizeInBits();
-  unsigned ToWidth = To->getPrimitiveSizeInBits();
-  bool FromLegal = TD->isLegalInteger(FromWidth);
-  bool ToLegal = TD->isLegalInteger(ToWidth);
-  
-  // If this is a legal integer from type, and the result would be an illegal
-  // type, don't do the transformation.
-  if (FromLegal && !ToLegal)
-    return false;
-  
-  // Otherwise, if both are illegal, do not increase the size of the result. We
-  // do allow things like i160 -> i64, but not i64 -> i160.
-  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
-    return false;
-  
-  return true;
-}
-
-/// getBitCastOperand - If the specified operand is a CastInst, a constant
-/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
-/// operand value, otherwise return null.
-static Value *getBitCastOperand(Value *V) {
-  if (Operator *O = dyn_cast<Operator>(V)) {
-    if (O->getOpcode() == Instruction::BitCast)
-      return O->getOperand(0);
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      if (GEP->hasAllZeroIndices())
-        return GEP->getPointerOperand();
-  }
-  return 0;
-}
-
-/// This function is a wrapper around CastInst::isEliminableCastPair. It
-/// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps 
-isEliminableCastPair(
-  const CastInst *CI, ///< The first cast instruction
-  unsigned opcode,       ///< The opcode of the second cast instruction
-  const Type *DstTy,     ///< The target type for the second cast instruction
-  TargetData *TD         ///< The target data for pointer size
-) {
-
-  const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
-  const Type *MidTy = CI->getType();                  // B from above
-
-  // Get the opcodes of the two Cast instructions
-  Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
-  Instruction::CastOps secondOp = Instruction::CastOps(opcode);
-
-  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
-                                                DstTy,
-                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
-  
-  // We don't want to form an inttoptr or ptrtoint that converts to an integer
-  // type that differs from the pointer size.
-  if ((Res == Instruction::IntToPtr &&
-          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
-      (Res == Instruction::PtrToInt &&
-          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
-    Res = 0;
-  
-  return Instruction::CastOps(Res);
-}
-
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated.  It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, 
-                              const Type *Ty, TargetData *TD) {
-  if (V->getType() == Ty || isa<Constant>(V)) return false;
-  
-  // If this is another cast that can be eliminated, it isn't codegen either.
-  if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD))
-      return false;
-  return true;
-}
-
-// SimplifyCommutative - This performs a few simplifications for commutative
-// operators:
-//
-//  1. Order operands such that they are listed from right (least complex) to
-//     left (most complex).  This puts constants before unary operators before
-//     binary operators.
-//
-//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
-//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-//
-bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
-  bool Changed = false;
-  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
-    Changed = !I.swapOperands();
-
-  if (!I.isAssociative()) return Changed;
-  Instruction::BinaryOps Opcode = I.getOpcode();
-  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
-    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
-      if (isa<Constant>(I.getOperand(1))) {
-        Constant *Folded = ConstantExpr::get(I.getOpcode(),
-                                             cast<Constant>(I.getOperand(1)),
-                                             cast<Constant>(Op->getOperand(1)));
-        I.setOperand(0, Op->getOperand(0));
-        I.setOperand(1, Folded);
-        return true;
-      } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
-        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
-            isOnlyUse(Op) && isOnlyUse(Op1)) {
-          Constant *C1 = cast<Constant>(Op->getOperand(1));
-          Constant *C2 = cast<Constant>(Op1->getOperand(1));
-
-          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
-          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
-                                                    Op1->getOperand(0),
-                                                    Op1->getName(), &I);
-          Worklist.Add(New);
-          I.setOperand(0, New);
-          I.setOperand(1, Folded);
-          return true;
-        }
-    }
-  return Changed;
-}
-
-// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
-// if the LHS is a constant zero (which is the 'negate' form).
-//
-static inline Value *dyn_castNegVal(Value *V) {
-  if (BinaryOperator::isNeg(V))
-    return BinaryOperator::getNegArgument(V);
-
-  // Constants can be considered to be negated values if they can be folded.
-  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return ConstantExpr::getNeg(C);
-
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isInteger())
-      return ConstantExpr::getNeg(C);
-
-  return 0;
-}
-
-// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
-// instruction if the LHS is a constant negative zero (which is the 'negate'
-// form).
-//
-static inline Value *dyn_castFNegVal(Value *V) {
-  if (BinaryOperator::isFNeg(V))
-    return BinaryOperator::getFNegArgument(V);
-
-  // Constants can be considered to be negated values if they can be folded.
-  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
-    return ConstantExpr::getFNeg(C);
-
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isFloatingPoint())
-      return ConstantExpr::getFNeg(C);
-
-  return 0;
-}
-
-/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
-/// returning the kind and providing the out parameter results if we
-/// successfully match.
-static SelectPatternFlavor
-MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
-  SelectInst *SI = dyn_cast<SelectInst>(V);
-  if (SI == 0) return SPF_UNKNOWN;
-  
-  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
-  if (ICI == 0) return SPF_UNKNOWN;
-  
-  LHS = ICI->getOperand(0);
-  RHS = ICI->getOperand(1);
-  
-  // (icmp X, Y) ? X : Y 
-  if (SI->getTrueValue() == ICI->getOperand(0) &&
-      SI->getFalseValue() == ICI->getOperand(1)) {
-    switch (ICI->getPredicate()) {
-    default: return SPF_UNKNOWN; // Equality.
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMAX;
-    case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMAX;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMIN;
-    case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMIN;
-    }
-  }
-  
-  // (icmp X, Y) ? Y : X 
-  if (SI->getTrueValue() == ICI->getOperand(1) &&
-      SI->getFalseValue() == ICI->getOperand(0)) {
-    switch (ICI->getPredicate()) {
-      default: return SPF_UNKNOWN; // Equality.
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_UGE: return SPF_UMIN;
-      case ICmpInst::ICMP_SGT:
-      case ICmpInst::ICMP_SGE: return SPF_SMIN;
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_ULE: return SPF_UMAX;
-      case ICmpInst::ICMP_SLT:
-      case ICmpInst::ICMP_SLE: return SPF_SMAX;
-    }
-  }
-  
-  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
-  
-  return SPF_UNKNOWN;
-}
-
-/// isFreeToInvert - Return true if the specified value is free to invert (apply
-/// ~ to).  This happens in cases where the ~ can be eliminated.
-static inline bool isFreeToInvert(Value *V) {
-  // ~(~(X)) -> X.
-  if (BinaryOperator::isNot(V))
-    return true;
-  
-  // Constants can be considered to be not'ed values.
-  if (isa<ConstantInt>(V))
-    return true;
-  
-  // Compares can be inverted if they have a single use.
-  if (CmpInst *CI = dyn_cast<CmpInst>(V))
-    return CI->hasOneUse();
-  
-  return false;
-}
-
-static inline Value *dyn_castNotVal(Value *V) {
-  // If this is not(not(x)) don't return that this is a not: we want the two
-  // not's to be folded first.
-  if (BinaryOperator::isNot(V)) {
-    Value *Operand = BinaryOperator::getNotArgument(V);
-    if (!isFreeToInvert(Operand))
-      return Operand;
-  }
-
-  // Constants can be considered to be not'ed values...
-  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return ConstantInt::get(C->getType(), ~C->getValue());
-  return 0;
-}
-
-
-
-// dyn_castFoldableMul - If this value is a multiply that can be folded into
-// other computations (because it has a constant operand), return the
-// non-constant operand of the multiply, and set CST to point to the multiplier.
-// Otherwise, return null.
-//
-static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
-  if (V->hasOneUse() && V->getType()->isInteger())
-    if (Instruction *I = dyn_cast<Instruction>(V)) {
-      if (I->getOpcode() == Instruction::Mul)
-        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
-          return I->getOperand(0);
-      if (I->getOpcode() == Instruction::Shl)
-        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
-          // The multiplier is really 1 << CST.
-          uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-          uint32_t CSTVal = CST->getLimitedValue(BitWidth);
-          CST = ConstantInt::get(V->getType()->getContext(),
-                                 APInt(BitWidth, 1).shl(CSTVal));
-          return I->getOperand(0);
-        }
-    }
-  return 0;
-}
-
-/// AddOne - Add one to a ConstantInt
-static Constant *AddOne(Constant *C) {
-  return ConstantExpr::getAdd(C, 
-    ConstantInt::get(C->getType(), 1));
-}
-/// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C) {
-  return ConstantExpr::getSub(C, 
-    ConstantInt::get(C->getType(), 1));
-}
-/// MultiplyOverflows - True if the multiply can not be expressed in an int
-/// this size.
-static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
-  uint32_t W = C1->getBitWidth();
-  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
-  if (sign) {
-    LHSExt.sext(W * 2);
-    RHSExt.sext(W * 2);
-  } else {
-    LHSExt.zext(W * 2);
-    RHSExt.zext(W * 2);
-  }
-
-  APInt MulExt = LHSExt * RHSExt;
-
-  if (!sign)
-    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-  
-  APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
-  APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
-  return MulExt.slt(Min) || MulExt.sgt(Max);
-}
-
-
-/// ShrinkDemandedConstant - Check to see if the specified operand of the 
-/// specified instruction is a constant integer.  If so, check to see if there
-/// are any bits set in the constant that are not demanded.  If so, shrink the
-/// constant and return true.
-static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
-                                   APInt Demanded) {
-  assert(I && "No instruction?");
-  assert(OpNo < I->getNumOperands() && "Operand index too large");
-
-  // If the operand is not a constant integer, nothing to do.
-  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
-  if (!OpC) return false;
-
-  // If there are no bits set that aren't demanded, nothing to do.
-  Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
-  if ((~Demanded & OpC->getValue()) == 0)
-    return false;
-
-  // This instruction is producing bits that are not demanded. Shrink the RHS.
-  Demanded &= OpC->getValue();
-  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
-  return true;
-}
-
-// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 
-// set of known zero and one bits, compute the maximum and minimum values that
-// could have the specified known zero and known one bits, returning them in
-// min/max.
-static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
-                                                   const APInt& KnownOne,
-                                                   APInt& Min, APInt& Max) {
-  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
-         KnownZero.getBitWidth() == Min.getBitWidth() &&
-         KnownZero.getBitWidth() == Max.getBitWidth() &&
-         "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
-  APInt UnknownBits = ~(KnownZero|KnownOne);
-
-  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
-  // bit if it is unknown.
-  Min = KnownOne;
-  Max = KnownOne|UnknownBits;
-  
-  if (UnknownBits.isNegative()) { // Sign bit is unknown
-    Min.set(Min.getBitWidth()-1);
-    Max.clear(Max.getBitWidth()-1);
-  }
-}
-
-// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
-// a set of known zero and one bits, compute the maximum and minimum values that
-// could have the specified known zero and known one bits, returning them in
-// min/max.
-static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
-                                                     const APInt &KnownOne,
-                                                     APInt &Min, APInt &Max) {
-  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
-         KnownZero.getBitWidth() == Min.getBitWidth() &&
-         KnownZero.getBitWidth() == Max.getBitWidth() &&
-         "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
-  APInt UnknownBits = ~(KnownZero|KnownOne);
-  
-  // The minimum value is when the unknown bits are all zeros.
-  Min = KnownOne;
-  // The maximum value is when the unknown bits are all ones.
-  Max = KnownOne|UnknownBits;
-}
-
-/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
-/// SimplifyDemandedBits knows about.  See if the instruction has any
-/// properties that allow us to simplify its operands.
-bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
-  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
-  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
-  
-  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
-                                     KnownZero, KnownOne, 0);
-  if (V == 0) return false;
-  if (V == &Inst) return true;
-  ReplaceInstUsesWith(Inst, V);
-  return true;
-}
-
-/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
-/// specified instruction operand if possible, updating it in place.  It returns
-/// true if it made any change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
-                                        APInt &KnownZero, APInt &KnownOne,
-                                        unsigned Depth) {
-  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
-                                          KnownZero, KnownOne, Depth);
-  if (NewVal == 0) return false;
-  U = NewVal;
-  return true;
-}
-
-
-/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
-/// value based on the demanded bits.  When this function is called, it is known
-/// that only the bits set in DemandedMask of the result of V are ever used
-/// downstream. Consequently, depending on the mask and V, it may be possible
-/// to replace V with a constant or one of its operands. In such cases, this
-/// function does the replacement and returns true. In all other cases, it
-/// returns false after analyzing the expression and setting KnownOne and known
-/// to be one in the expression.  KnownZero contains all the bits that are known
-/// to be zero in the expression. These are provided to potentially allow the
-/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
-/// the expression. KnownOne and KnownZero always follow the invariant that 
-/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
-/// the bits in KnownOne and KnownZero may only be accurate for those bits set
-/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
-/// and KnownOne must all be the same.
-///
-/// This returns null if it did not change anything and it permits no
-/// simplification.  This returns V itself if it did some simplification of V's
-/// operands based on the information about what bits are demanded. This returns
-/// some other non-null value if it found out that V is equal to another value
-/// in the context where the specified bits are demanded, but not for all users.
-Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
-                                             APInt &KnownZero, APInt &KnownOne,
-                                             unsigned Depth) {
-  assert(V != 0 && "Null pointer of Value???");
-  assert(Depth <= 6 && "Limit Search Depth");
-  uint32_t BitWidth = DemandedMask.getBitWidth();
-  const Type *VTy = V->getType();
-  assert((TD || !isa<PointerType>(VTy)) &&
-         "SimplifyDemandedBits needs to know bit widths!");
-  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
-         (!VTy->isIntOrIntVector() ||
-          VTy->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth &&
-         KnownOne.getBitWidth() == BitWidth &&
-         "Value *V, DemandedMask, KnownZero and KnownOne "
-         "must have same BitWidth");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue() & DemandedMask;
-    KnownZero = ~KnownOne & DemandedMask;
-    return 0;
-  }
-  if (isa<ConstantPointerNull>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne.clear();
-    KnownZero = DemandedMask;
-    return 0;
-  }
-
-  KnownZero.clear();
-  KnownOne.clear();
-  if (DemandedMask == 0) {   // Not demanding any bits from V.
-    if (isa<UndefValue>(V))
-      return 0;
-    return UndefValue::get(VTy);
-  }
-  
-  if (Depth == 6)        // Limit search depth.
-    return 0;
-  
-  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-  APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
-
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) {
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
-    return 0;        // Only analyze instructions.
-  }
-
-  // If there are multiple uses of this value and we aren't at the root, then
-  // we can't do any simplifications of the operands, because DemandedMask
-  // only reflects the bits demanded by *one* of the users.
-  if (Depth != 0 && !I->hasOneUse()) {
-    // Despite the fact that we can't simplify this instruction in all User's
-    // context, we can at least compute the knownzero/knownone bits, and we can
-    // do simplifications that apply to *just* the one user if we know that
-    // this instruction has a simpler value in that context.
-    if (I->getOpcode() == Instruction::And) {
-      // If either the LHS or the RHS are Zero, the result is zero.
-      ComputeMaskedBits(I->getOperand(1), DemandedMask,
-                        RHSKnownZero, RHSKnownOne, Depth+1);
-      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
-                        LHSKnownZero, LHSKnownOne, Depth+1);
-      
-      // If all of the demanded bits are known 1 on one side, return the other.
-      // These bits cannot contribute to the result of the 'and' in this
-      // context.
-      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
-          (DemandedMask & ~LHSKnownZero))
-        return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
-          (DemandedMask & ~RHSKnownZero))
-        return I->getOperand(1);
-      
-      // If all of the demanded bits in the inputs are known zeros, return zero.
-      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-        return Constant::getNullValue(VTy);
-      
-    } else if (I->getOpcode() == Instruction::Or) {
-      // We can simplify (X|Y) -> X or Y in the user's context if we know that
-      // only bits from X or Y are demanded.
-      
-      // If either the LHS or the RHS are One, the result is One.
-      ComputeMaskedBits(I->getOperand(1), DemandedMask, 
-                        RHSKnownZero, RHSKnownOne, Depth+1);
-      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 
-                        LHSKnownZero, LHSKnownOne, Depth+1);
-      
-      // If all of the demanded bits are known zero on one side, return the
-      // other.  These bits cannot contribute to the result of the 'or' in this
-      // context.
-      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
-          (DemandedMask & ~LHSKnownOne))
-        return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
-          (DemandedMask & ~RHSKnownOne))
-        return I->getOperand(1);
-      
-      // If all of the potentially set bits on one side are known to be set on
-      // the other side, just use the 'other' side.
-      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
-          (DemandedMask & (~RHSKnownZero)))
-        return I->getOperand(0);
-      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
-          (DemandedMask & (~LHSKnownZero)))
-        return I->getOperand(1);
-    }
-    
-    // Compute the KnownZero/KnownOne bits to simplify things downstream.
-    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
-    return 0;
-  }
-  
-  // If this is the root being simplified, allow it to have multiple uses,
-  // just set the DemandedMask to all bits so that we can try to simplify the
-  // operands.  This allows visitTruncInst (for example) to simplify the
-  // operand of a trunc without duplicating all the logic below.
-  if (Depth == 0 && !V->hasOneUse())
-    DemandedMask = APInt::getAllOnesValue(BitWidth);
-  
-  switch (I->getOpcode()) {
-  default:
-    ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
-    break;
-  case Instruction::And:
-    // If either the LHS or the RHS are Zero, the result is zero.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
-                             LHSKnownZero, LHSKnownOne, Depth+1))
-      return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-
-    // If all of the demanded bits are known 1 on one side, return the other.
-    // These bits cannot contribute to the result of the 'and'.
-    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
-        (DemandedMask & ~LHSKnownZero))
-      return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
-        (DemandedMask & ~RHSKnownZero))
-      return I->getOperand(1);
-    
-    // If all of the demanded bits in the inputs are known zeros, return zero.
-    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
-      return Constant::getNullValue(VTy);
-      
-    // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
-      return I;
-      
-    // Output known-1 bits are only known if set in both the LHS & RHS.
-    RHSKnownOne &= LHSKnownOne;
-    // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    RHSKnownZero |= LHSKnownZero;
-    break;
-  case Instruction::Or:
-    // If either the LHS or the RHS are One, the result is One.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
-                             LHSKnownZero, LHSKnownOne, Depth+1))
-      return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
-    // If all of the demanded bits are known zero on one side, return the other.
-    // These bits cannot contribute to the result of the 'or'.
-    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
-        (DemandedMask & ~LHSKnownOne))
-      return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
-        (DemandedMask & ~RHSKnownOne))
-      return I->getOperand(1);
-
-    // If all of the potentially set bits on one side are known to be set on
-    // the other side, just use the 'other' side.
-    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
-        (DemandedMask & (~RHSKnownZero)))
-      return I->getOperand(0);
-    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
-        (DemandedMask & (~LHSKnownZero)))
-      return I->getOperand(1);
-        
-    // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask))
-      return I;
-          
-    // Output known-0 bits are only known if clear in both the LHS & RHS.
-    RHSKnownZero &= LHSKnownZero;
-    // Output known-1 are known to be set if set in either the LHS | RHS.
-    RHSKnownOne |= LHSKnownOne;
-    break;
-  case Instruction::Xor: {
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
-                             LHSKnownZero, LHSKnownOne, Depth+1))
-      return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
-    // If all of the demanded bits are known zero on one side, return the other.
-    // These bits cannot contribute to the result of the 'xor'.
-    if ((DemandedMask & RHSKnownZero) == DemandedMask)
-      return I->getOperand(0);
-    if ((DemandedMask & LHSKnownZero) == DemandedMask)
-      return I->getOperand(1);
-    
-    // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | 
-                         (RHSKnownOne & LHSKnownOne);
-    // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | 
-                        (RHSKnownOne & LHSKnownZero);
-    
-    // If all of the demanded bits are known to be zero on one side or the
-    // other, turn this into an *inclusive* or.
-    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
-    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
-      Instruction *Or = 
-        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
-                                 I->getName());
-      return InsertNewInstBefore(Or, *I);
-    }
-    
-    // If all of the demanded bits on one side are known, and all of the set
-    // bits on that side are also known to be set on the other side, turn this
-    // into an AND, as we know the bits will be cleared.
-    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
-    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
-      // all known
-      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
-        Constant *AndC = Constant::getIntegerValue(VTy,
-                                                   ~RHSKnownOne & DemandedMask);
-        Instruction *And = 
-          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
-        return InsertNewInstBefore(And, *I);
-      }
-    }
-    
-    // If the RHS is a constant, see if we can simplify it.
-    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask))
-      return I;
-    
-    // If our LHS is an 'and' and if it has one use, and if any of the bits we
-    // are flipping are known to be set, then the xor is just resetting those
-    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
-    // simplifying both of them.
-    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
-      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
-          isa<ConstantInt>(I->getOperand(1)) &&
-          isa<ConstantInt>(LHSInst->getOperand(1)) &&
-          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
-        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
-        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
-        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
-        
-        Constant *AndC =
-          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
-        Instruction *NewAnd = 
-          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
-        InsertNewInstBefore(NewAnd, *I);
-        
-        Constant *XorC =
-          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
-        Instruction *NewXor =
-          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
-        return InsertNewInstBefore(NewXor, *I);
-      }
-          
-          
-    RHSKnownZero = KnownZeroOut;
-    RHSKnownOne  = KnownOneOut;
-    break;
-  }
-  case Instruction::Select:
-    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
-                             LHSKnownZero, LHSKnownOne, Depth+1))
-      return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
-    // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
-        ShrinkDemandedConstant(I, 2, DemandedMask))
-      return I;
-    
-    // Only known if known in both the LHS and RHS.
-    RHSKnownOne &= LHSKnownOne;
-    RHSKnownZero &= LHSKnownZero;
-    break;
-  case Instruction::Trunc: {
-    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
-    DemandedMask.zext(truncBf);
-    RHSKnownZero.zext(truncBf);
-    RHSKnownOne.zext(truncBf);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
-                             RHSKnownZero, RHSKnownOne, Depth+1))
-      return I;
-    DemandedMask.trunc(BitWidth);
-    RHSKnownZero.trunc(BitWidth);
-    RHSKnownOne.trunc(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    break;
-  }
-  case Instruction::BitCast:
-    if (!I->getOperand(0)->getType()->isIntOrIntVector())
-      return false;  // vector->int or fp->int?
-
-    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
-      if (const VectorType *SrcVTy =
-            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
-        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
-          // Don't touch a bitcast between vectors of different element counts.
-          return false;
-      } else
-        // Don't touch a scalar-to-vector bitcast.
-        return false;
-    } else if (isa<VectorType>(I->getOperand(0)->getType()))
-      // Don't touch a vector-to-scalar bitcast.
-      return false;
-
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
-      return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    break;
-  case Instruction::ZExt: {
-    // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
-    DemandedMask.trunc(SrcBitWidth);
-    RHSKnownZero.trunc(SrcBitWidth);
-    RHSKnownOne.trunc(SrcBitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
-      return I;
-    DemandedMask.zext(BitWidth);
-    RHSKnownZero.zext(BitWidth);
-    RHSKnownOne.zext(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    // The top bits are known to be zero.
-    RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    break;
-  }
-  case Instruction::SExt: {
-    // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
-    APInt InputDemandedBits = DemandedMask & 
-                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
-
-    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
-    // If any of the sign extended bits are demanded, we know that the sign
-    // bit is demanded.
-    if ((NewBits & DemandedMask) != 0)
-      InputDemandedBits.set(SrcBitWidth-1);
-      
-    InputDemandedBits.trunc(SrcBitWidth);
-    RHSKnownZero.trunc(SrcBitWidth);
-    RHSKnownOne.trunc(SrcBitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
-                             RHSKnownZero, RHSKnownOne, Depth+1))
-      return I;
-    InputDemandedBits.zext(BitWidth);
-    RHSKnownZero.zext(BitWidth);
-    RHSKnownOne.zext(BitWidth);
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-      
-    // If the sign bit of the input is known set or clear, then we know the
-    // top bits of the result.
-
-    // If the input sign bit is known zero, or if the NewBits are not demanded
-    // convert this into a zero extension.
-    if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
-      // Convert to ZExt cast
-      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
-      return InsertNewInstBefore(NewCast, *I);
-    } else if (RHSKnownOne[SrcBitWidth-1]) {    // Input sign bit known set
-      RHSKnownOne |= NewBits;
-    }
-    break;
-  }
-  case Instruction::Add: {
-    // Figure out what the input bits are.  If the top bits of the and result
-    // are not demanded, then the add doesn't demand them from its input
-    // either.
-    unsigned NLZ = DemandedMask.countLeadingZeros();
-      
-    // If there is a constant on the RHS, there are a variety of xformations
-    // we can do.
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // If null, this should be simplified elsewhere.  Some of the xforms here
-      // won't work if the RHS is zero.
-      if (RHS->isZero())
-        break;
-      
-      // If the top bit of the output is demanded, demand everything from the
-      // input.  Otherwise, we demand all the input bits except NLZ top bits.
-      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
-
-      // Find information about known zero/one bits in the input.
-      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
-                               LHSKnownZero, LHSKnownOne, Depth+1))
-        return I;
-
-      // If the RHS of the add has bits set that can't affect the input, reduce
-      // the constant.
-      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
-        return I;
-      
-      // Avoid excess work.
-      if (LHSKnownZero == 0 && LHSKnownOne == 0)
-        break;
-      
-      // Turn it into OR if input bits are zero.
-      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
-        Instruction *Or =
-          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
-                                   I->getName());
-        return InsertNewInstBefore(Or, *I);
-      }
-      
-      // We can say something about the output known-zero and known-one bits,
-      // depending on potential carries from the input constant and the
-      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
-      // bits set and the RHS constant is 0x01001, then we know we have a known
-      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-      
-      // To compute this, we first compute the potential carry bits.  These are
-      // the bits which may be modified.  I'm not aware of a better way to do
-      // this scan.
-      const APInt &RHSVal = RHS->getValue();
-      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-      
-      // Now that we know which bits have carries, compute the known-1/0 sets.
-      
-      // Bits are known one if they are known zero in one operand and one in the
-      // other, and there is no input carry.
-      RHSKnownOne = ((LHSKnownZero & RHSVal) | 
-                     (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-      
-      // Bits are known zero if they are known zero in both operands and there
-      // is no input carry.
-      RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
-    } else {
-      // If the high-bits of this ADD are not demanded, then it does not demand
-      // the high bits of its LHS or RHS.
-      if (DemandedMask[BitWidth-1] == 0) {
-        // Right fill the mask of bits for this ADD to demand the most
-        // significant bit and all those below it.
-        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
-        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
-                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
-            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
-                                 LHSKnownZero, LHSKnownOne, Depth+1))
-          return I;
-      }
-    }
-    break;
-  }
-  case Instruction::Sub:
-    // If the high-bits of this SUB are not demanded, then it does not demand
-    // the high bits of its LHS or RHS.
-    if (DemandedMask[BitWidth-1] == 0) {
-      // Right fill the mask of bits for this SUB to demand the most
-      // significant bit and all those below it.
-      uint32_t NLZ = DemandedMask.countLeadingZeros();
-      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
-                               LHSKnownZero, LHSKnownOne, Depth+1) ||
-          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
-                               LHSKnownZero, LHSKnownOne, Depth+1))
-        return I;
-    }
-    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
-    // the known zeros and ones.
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
-    break;
-  case Instruction::Shl:
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
-                               RHSKnownZero, RHSKnownOne, Depth+1))
-        return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
-      RHSKnownZero <<= ShiftAmt;
-      RHSKnownOne  <<= ShiftAmt;
-      // low bits known zero.
-      if (ShiftAmt)
-        RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-    }
-    break;
-  case Instruction::LShr:
-    // For a logical shift right
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Unsigned shift right.
-      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               RHSKnownZero, RHSKnownOne, Depth+1))
-        return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
-      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
-      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
-      if (ShiftAmt) {
-        // Compute the new bits that are at the top now.
-        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-        RHSKnownZero |= HighBits;  // high bits known zero.
-      }
-    }
-    break;
-  case Instruction::AShr:
-    // If this is an arithmetic shift right and only the low-bit is set, we can
-    // always convert this into a logical shr, even if the shift amount is
-    // variable.  The low bit of the shift cannot be an input sign bit unless
-    // the shift amount is >= the size of the datatype, which is undefined.
-    if (DemandedMask == 1) {
-      // Perform the logical shift right.
-      Instruction *NewVal = BinaryOperator::CreateLShr(
-                        I->getOperand(0), I->getOperand(1), I->getName());
-      return InsertNewInstBefore(NewVal, *I);
-    }    
-
-    // If the sign bit is the only bit demanded by this ashr, then there is no
-    // need to do it, the shift doesn't change the high bit.
-    if (DemandedMask.isSignBit())
-      return I->getOperand(0);
-    
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Signed shift right.
-      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-      // If any of the "high bits" are demanded, we should set the sign bit as
-      // demanded.
-      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
-        DemandedMaskIn.set(BitWidth-1);
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
-                               RHSKnownZero, RHSKnownOne, Depth+1))
-        return I;
-      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
-      // Compute the new bits that are at the top now.
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
-      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
-        
-      // Handle the sign bits.
-      APInt SignBit(APInt::getSignBit(BitWidth));
-      // Adjust to where it is now in the mask.
-      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
-        
-      // If the input sign bit is known to be zero, or if none of the top bits
-      // are demanded, turn this into an unsigned shift right.
-      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 
-          (HighBits & ~DemandedMask) == HighBits) {
-        // Perform the logical shift right.
-        Instruction *NewVal = BinaryOperator::CreateLShr(
-                          I->getOperand(0), SA, I->getName());
-        return InsertNewInstBefore(NewVal, *I);
-      } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one.
-        RHSKnownOne |= HighBits;
-      }
-    }
-    break;
-  case Instruction::SRem:
-    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      APInt RA = Rem->getValue().abs();
-      if (RA.isPowerOf2()) {
-        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
-          return I->getOperand(0);
-
-        APInt LowBits = RA - 1;
-        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
-        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
-                                 LHSKnownZero, LHSKnownOne, Depth+1))
-          return I;
-
-        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
-          LHSKnownZero |= ~LowBits;
-
-        KnownZero |= LHSKnownZero & DemandedMask;
-
-        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
-      }
-    }
-    break;
-  case Instruction::URem: {
-    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
-                             KnownZero2, KnownOne2, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
-                             KnownZero2, KnownOne2, Depth+1))
-      return I;
-
-    unsigned Leaders = KnownZero2.countLeadingOnes();
-    Leaders = std::max(Leaders,
-                       KnownZero2.countLeadingOnes());
-    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
-    break;
-  }
-  case Instruction::Call:
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::bswap: {
-        // If the only bits demanded come from one byte of the bswap result,
-        // just shift the input byte into position to eliminate the bswap.
-        unsigned NLZ = DemandedMask.countLeadingZeros();
-        unsigned NTZ = DemandedMask.countTrailingZeros();
-          
-        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
-        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
-        // have 14 leading zeros, round to 8.
-        NLZ &= ~7;
-        NTZ &= ~7;
-        // If we need exactly one byte, we can do this transformation.
-        if (BitWidth-NLZ-NTZ == 8) {
-          unsigned ResultBit = NTZ;
-          unsigned InputBit = BitWidth-NTZ-8;
-          
-          // Replace this with either a left or right shift to get the byte into
-          // the right place.
-          Instruction *NewVal;
-          if (InputBit > ResultBit)
-            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
-                    ConstantInt::get(I->getType(), InputBit-ResultBit));
-          else
-            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
-                    ConstantInt::get(I->getType(), ResultBit-InputBit));
-          NewVal->takeName(I);
-          return InsertNewInstBefore(NewVal, *I);
-        }
-          
-        // TODO: Could compute known zero/one bits based on the input.
-        break;
-      }
-      }
-    }
-    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
-    break;
-  }
-  
-  // If the client is only demanding bits that we know, return the known
-  // constant.
-  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
-    return Constant::getIntegerValue(VTy, RHSKnownOne);
-  return false;
-}
-
-
-/// SimplifyDemandedVectorElts - The specified value produces a vector with
-/// any number of elements. DemandedElts contains the set of elements that are
-/// actually used by the caller.  This method analyzes which elements of the
-/// operand are undef and returns that information in UndefElts.
-///
-/// If the information about demanded elements can be used to simplify the
-/// operation, the operation is simplified, then the resultant value is
-/// returned.  This returns null if no change was made.
-Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
-                                                APInt& UndefElts,
-                                                unsigned Depth) {
-  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
-  APInt EltMask(APInt::getAllOnesValue(VWidth));
-  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
-
-  if (isa<UndefValue>(V)) {
-    // If the entire vector is undefined, just return this info.
-    UndefElts = EltMask;
-    return 0;
-  } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
-    UndefElts = EltMask;
-    return UndefValue::get(V->getType());
-  }
-
-  UndefElts = 0;
-  if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
-    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Undef = UndefValue::get(EltTy);
-
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0; i != VWidth; ++i)
-      if (!DemandedElts[i]) {   // If not demanded, set to undef.
-        Elts.push_back(Undef);
-        UndefElts.set(i);
-      } else if (isa<UndefValue>(CP->getOperand(i))) {   // Already undef.
-        Elts.push_back(Undef);
-        UndefElts.set(i);
-      } else {                               // Otherwise, defined.
-        Elts.push_back(CP->getOperand(i));
-      }
-
-    // If we changed the constant, return it.
-    Constant *NewCP = ConstantVector::get(Elts);
-    return NewCP != CP ? NewCP : 0;
-  } else if (isa<ConstantAggregateZero>(V)) {
-    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
-    // set to undef.
-    
-    // Check if this is identity. If so, return 0 since we are not simplifying
-    // anything.
-    if (DemandedElts == ((1ULL << VWidth) -1))
-      return 0;
-    
-    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Zero = Constant::getNullValue(EltTy);
-    Constant *Undef = UndefValue::get(EltTy);
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0; i != VWidth; ++i) {
-      Constant *Elt = DemandedElts[i] ? Zero : Undef;
-      Elts.push_back(Elt);
-    }
-    UndefElts = DemandedElts ^ EltMask;
-    return ConstantVector::get(Elts);
-  }
-  
-  // Limit search depth.
-  if (Depth == 10)
-    return 0;
-
-  // If multiple users are using the root value, procede with
-  // simplification conservatively assuming that all elements
-  // are needed.
-  if (!V->hasOneUse()) {
-    // Quit if we find multiple users of a non-root value though.
-    // They'll be handled when it's their turn to be visited by
-    // the main instcombine process.
-    if (Depth != 0)
-      // TODO: Just compute the UndefElts information recursively.
-      return 0;
-
-    // Conservatively assume that all elements are needed.
-    DemandedElts = EltMask;
-  }
-  
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return 0;        // Only analyze instructions.
-  
-  bool MadeChange = false;
-  APInt UndefElts2(VWidth, 0);
-  Value *TmpV;
-  switch (I->getOpcode()) {
-  default: break;
-    
-  case Instruction::InsertElement: {
-    // If this is a variable index, we don't know which element it overwrites.
-    // demand exactly the same input as we produce.
-    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
-    if (Idx == 0) {
-      // Note that we can't propagate undef elt info, because we don't know
-      // which elt is getting updated.
-      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                        UndefElts2, Depth+1);
-      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-      break;
-    }
-    
-    // If this is inserting an element that isn't demanded, remove this
-    // insertelement.
-    unsigned IdxNo = Idx->getZExtValue();
-    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
-      Worklist.Add(I);
-      return I->getOperand(0);
-    }
-    
-    // Otherwise, the element inserted overwrites whatever was there, so the
-    // input demanded set is simpler than the output set.
-    APInt DemandedElts2 = DemandedElts;
-    DemandedElts2.clear(IdxNo);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
-                                      UndefElts, Depth+1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-
-    // The inserted element is defined.
-    UndefElts.clear(IdxNo);
-    break;
-  }
-  case Instruction::ShuffleVector: {
-    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
-    uint64_t LHSVWidth =
-      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
-    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
-    for (unsigned i = 0; i < VWidth; i++) {
-      if (DemandedElts[i]) {
-        unsigned MaskVal = Shuffle->getMaskValue(i);
-        if (MaskVal != -1u) {
-          assert(MaskVal < LHSVWidth * 2 &&
-                 "shufflevector mask index out of range!");
-          if (MaskVal < LHSVWidth)
-            LeftDemanded.set(MaskVal);
-          else
-            RightDemanded.set(MaskVal - LHSVWidth);
-        }
-      }
-    }
-
-    APInt UndefElts4(LHSVWidth, 0);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
-                                      UndefElts4, Depth+1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-
-    APInt UndefElts3(LHSVWidth, 0);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
-                                      UndefElts3, Depth+1);
-    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
-    bool NewUndefElts = false;
-    for (unsigned i = 0; i < VWidth; i++) {
-      unsigned MaskVal = Shuffle->getMaskValue(i);
-      if (MaskVal == -1u) {
-        UndefElts.set(i);
-      } else if (MaskVal < LHSVWidth) {
-        if (UndefElts4[MaskVal]) {
-          NewUndefElts = true;
-          UndefElts.set(i);
-        }
-      } else {
-        if (UndefElts3[MaskVal - LHSVWidth]) {
-          NewUndefElts = true;
-          UndefElts.set(i);
-        }
-      }
-    }
-
-    if (NewUndefElts) {
-      // Add additional discovered undefs.
-      std::vector<Constant*> Elts;
-      for (unsigned i = 0; i < VWidth; ++i) {
-        if (UndefElts[i])
-          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
-        else
-          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
-                                          Shuffle->getMaskValue(i)));
-      }
-      I->setOperand(2, ConstantVector::get(Elts));
-      MadeChange = true;
-    }
-    break;
-  }
-  case Instruction::BitCast: {
-    // Vector->vector casts only.
-    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
-    if (!VTy) break;
-    unsigned InVWidth = VTy->getNumElements();
-    APInt InputDemandedElts(InVWidth, 0);
-    unsigned Ratio;
-
-    if (VWidth == InVWidth) {
-      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
-      // elements as are demanded of us.
-      Ratio = 1;
-      InputDemandedElts = DemandedElts;
-    } else if (VWidth > InVWidth) {
-      // Untested so far.
-      break;
-      
-      // If there are more elements in the result than there are in the source,
-      // then an input element is live if any of the corresponding output
-      // elements are live.
-      Ratio = VWidth/InVWidth;
-      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
-        if (DemandedElts[OutIdx])
-          InputDemandedElts.set(OutIdx/Ratio);
-      }
-    } else {
-      // Untested so far.
-      break;
-      
-      // If there are more elements in the source than there are in the result,
-      // then an input element is live if the corresponding output element is
-      // live.
-      Ratio = InVWidth/VWidth;
-      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
-        if (DemandedElts[InIdx/Ratio])
-          InputDemandedElts.set(InIdx);
-    }
-    
-    // div/rem demand all inputs, because they don't want divide by zero.
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
-                                      UndefElts2, Depth+1);
-    if (TmpV) {
-      I->setOperand(0, TmpV);
-      MadeChange = true;
-    }
-    
-    UndefElts = UndefElts2;
-    if (VWidth > InVWidth) {
-      llvm_unreachable("Unimp");
-      // If there are more elements in the result than there are in the source,
-      // then an output element is undef if the corresponding input element is
-      // undef.
-      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
-        if (UndefElts2[OutIdx/Ratio])
-          UndefElts.set(OutIdx);
-    } else if (VWidth < InVWidth) {
-      llvm_unreachable("Unimp");
-      // If there are more elements in the source than there are in the result,
-      // then a result element is undef if all of the corresponding input
-      // elements are undef.
-      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
-      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
-        if (!UndefElts2[InIdx])            // Not undef?
-          UndefElts.clear(InIdx/Ratio);    // Clear undef bit.
-    }
-    break;
-  }
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // div/rem demand all inputs, because they don't want divide by zero.
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                      UndefElts, Depth+1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
-                                      UndefElts2, Depth+1);
-    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-      
-    // Output elements are undefined if both are undefined.  Consider things
-    // like undef&0.  The result is known zero, not undef.
-    UndefElts &= UndefElts2;
-    break;
-    
-  case Instruction::Call: {
-    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
-    if (!II) break;
-    switch (II->getIntrinsicID()) {
-    default: break;
-      
-    // Binary vector operations that work column-wise.  A dest element is a
-    // function of the corresponding input elements from the two inputs.
-    case Intrinsic::x86_sse_sub_ss:
-    case Intrinsic::x86_sse_mul_ss:
-    case Intrinsic::x86_sse_min_ss:
-    case Intrinsic::x86_sse_max_ss:
-    case Intrinsic::x86_sse2_sub_sd:
-    case Intrinsic::x86_sse2_mul_sd:
-    case Intrinsic::x86_sse2_min_sd:
-    case Intrinsic::x86_sse2_max_sd:
-      TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
-                                        UndefElts, Depth+1);
-      if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
-      TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
-                                        UndefElts2, Depth+1);
-      if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
-
-      // If only the low elt is demanded and this is a scalarizable intrinsic,
-      // scalarize it now.
-      if (DemandedElts == 1) {
-        switch (II->getIntrinsicID()) {
-        default: break;
-        case Intrinsic::x86_sse_sub_ss:
-        case Intrinsic::x86_sse_mul_ss:
-        case Intrinsic::x86_sse2_sub_sd:
-        case Intrinsic::x86_sse2_mul_sd:
-          // TODO: Lower MIN/MAX/ABS/etc
-          Value *LHS = II->getOperand(1);
-          Value *RHS = II->getOperand(2);
-          // Extract the element as scalars.
-          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
-            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
-          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
-            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);
-          
-          switch (II->getIntrinsicID()) {
-          default: llvm_unreachable("Case stmts out of sync!");
-          case Intrinsic::x86_sse_sub_ss:
-          case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
-                                                        II->getName()), *II);
-            break;
-          case Intrinsic::x86_sse_mul_ss:
-          case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
-                                                         II->getName()), *II);
-            break;
-          }
-          
-          Instruction *New =
-            InsertElementInst::Create(
-              UndefValue::get(II->getType()), TmpV,
-              ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName());
-          InsertNewInstBefore(New, *II);
-          return New;
-        }            
-      }
-        
-      // Output elements are undefined if both are undefined.  Consider things
-      // like undef&0.  The result is known zero, not undef.
-      UndefElts &= UndefElts2;
-      break;
-    }
-    break;
-  }
-  }
-  return MadeChange ? I : 0;
-}
-
-
-/// AssociativeOpt - Perform an optimization on an associative operator.  This
-/// function is designed to check a chain of associative operators for a
-/// potential to apply a certain optimization.  Since the optimization may be
-/// applicable if the expression was reassociated, this checks the chain, then
-/// reassociates the expression as necessary to expose the optimization
-/// opportunity.  This makes use of a special Functor, which must define
-/// 'shouldApply' and 'apply' methods.
-///
-template<typename Functor>
-static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
-  unsigned Opcode = Root.getOpcode();
-  Value *LHS = Root.getOperand(0);
-
-  // Quick check, see if the immediate LHS matches...
-  if (F.shouldApply(LHS))
-    return F.apply(Root);
-
-  // Otherwise, if the LHS is not of the same opcode as the root, return.
-  Instruction *LHSI = dyn_cast<Instruction>(LHS);
-  while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) {
-    // Should we apply this transform to the RHS?
-    bool ShouldApply = F.shouldApply(LHSI->getOperand(1));
-
-    // If not to the RHS, check to see if we should apply to the LHS...
-    if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) {
-      cast<BinaryOperator>(LHSI)->swapOperands();   // Make the LHS the RHS
-      ShouldApply = true;
-    }
-
-    // If the functor wants to apply the optimization to the RHS of LHSI,
-    // reassociate the expression from ((? op A) op B) to (? op (A op B))
-    if (ShouldApply) {
-      // Now all of the instructions are in the current basic block, go ahead
-      // and perform the reassociation.
-      Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
-
-      // First move the selected RHS to the LHS of the root...
-      Root.setOperand(0, LHSI->getOperand(1));
-
-      // Make what used to be the LHS of the root be the user of the root...
-      Value *ExtraOperand = TmpLHSI->getOperand(1);
-      if (&Root == TmpLHSI) {
-        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
-        return 0;
-      }
-      Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
-      TmpLHSI->setOperand(1, &Root);             // TmpLHSI now uses the root
-      BasicBlock::iterator ARI = &Root; ++ARI;
-      TmpLHSI->moveBefore(ARI);                  // Move TmpLHSI to after Root
-      ARI = Root;
-
-      // Now propagate the ExtraOperand down the chain of instructions until we
-      // get to LHSI.
-      while (TmpLHSI != LHSI) {
-        Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
-        // Move the instruction to immediately before the chain we are
-        // constructing to avoid breaking dominance properties.
-        NextLHSI->moveBefore(ARI);
-        ARI = NextLHSI;
-
-        Value *NextOp = NextLHSI->getOperand(1);
-        NextLHSI->setOperand(1, ExtraOperand);
-        TmpLHSI = NextLHSI;
-        ExtraOperand = NextOp;
-      }
-
-      // Now that the instructions are reassociated, have the functor perform
-      // the transformation...
-      return F.apply(Root);
-    }
-
-    LHSI = dyn_cast<Instruction>(LHSI->getOperand(0));
-  }
-  return 0;
-}
-
-namespace {
-
-// AddRHS - Implements: X + X --> X << 1
-struct AddRHS {
-  Value *RHS;
-  explicit AddRHS(Value *rhs) : RHS(rhs) {}
-  bool shouldApply(Value *LHS) const { return LHS == RHS; }
-  Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::CreateShl(Add.getOperand(0),
-                                     ConstantInt::get(Add.getType(), 1));
-  }
-};
-
-// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2)
-//                 iff C1&C2 == 0
-struct AddMaskingAnd {
-  Constant *C2;
-  explicit AddMaskingAnd(Constant *c) : C2(c) {}
-  bool shouldApply(Value *LHS) const {
-    ConstantInt *C1;
-    return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
-           ConstantExpr::getAnd(C1, C2)->isNullValue();
-  }
-  Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
-  }
-};
-
-}
-
-static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
-                                             InstCombiner *IC) {
-  if (CastInst *CI = dyn_cast<CastInst>(&I))
-    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
-
-  // Figure out if the constant is the left or the right argument.
-  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
-  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
-
-  if (Constant *SOC = dyn_cast<Constant>(SO)) {
-    if (ConstIsRHS)
-      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
-    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
-  }
-
-  Value *Op0 = SO, *Op1 = ConstOperand;
-  if (!ConstIsRHS)
-    std::swap(Op0, Op1);
-  
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
-                                    SO->getName()+".op");
-  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
-    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
-                                   SO->getName()+".cmp");
-  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
-    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
-                                   SO->getName()+".cmp");
-  llvm_unreachable("Unknown binary instruction type!");
-}
-
-// FoldOpIntoSelect - Given an instruction with a select as one operand and a
-// constant as the other operand, try to fold the binary operator into the
-// select arguments.  This also works for Cast instructions, which obviously do
-// not have a second operand.
-static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
-                                     InstCombiner *IC) {
-  // Don't modify shared select instructions
-  if (!SI->hasOneUse()) return 0;
-  Value *TV = SI->getOperand(1);
-  Value *FV = SI->getOperand(2);
-
-  if (isa<Constant>(TV) || isa<Constant>(FV)) {
-    // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0;
-
-    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
-    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
-
-    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
-                              SelectFalseVal);
-  }
-  return 0;
-}
-
-
-/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
-/// has a PHI node as operand #0, see if we can fold the instruction into the
-/// PHI (which is only possible if all operands to the PHI are constants).
-///
-/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-/// that would normally be unprofitable because they strongly encourage jump
-/// threading.
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
-                                         bool AllowAggressive) {
-  AllowAggressive = false;
-  PHINode *PN = cast<PHINode>(I.getOperand(0));
-  unsigned NumPHIValues = PN->getNumIncomingValues();
-  if (NumPHIValues == 0 ||
-      // We normally only transform phis with a single use, unless we're trying
-      // hard to make jump threading happen.
-      (!PN->hasOneUse() && !AllowAggressive))
-    return 0;
-  
-  
-  // Check to see if all of the operands of the PHI are simple constants
-  // (constantint/constantfp/undef).  If there is one non-constant value,
-  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
-  // bail out.  We don't do arbitrary constant expressions here because moving
-  // their computation can be expensive without a cost model.
-  BasicBlock *NonConstBB = 0;
-  for (unsigned i = 0; i != NumPHIValues; ++i)
-    if (!isa<Constant>(PN->getIncomingValue(i)) ||
-        isa<ConstantExpr>(PN->getIncomingValue(i))) {
-      if (NonConstBB) return 0;  // More than one non-const value.
-      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
-      NonConstBB = PN->getIncomingBlock(i);
-      
-      // If the incoming non-constant value is in I's block, we have an infinite
-      // loop.
-      if (NonConstBB == I.getParent())
-        return 0;
-    }
-  
-  // If there is exactly one non-constant value, we can insert a copy of the
-  // operation in that block.  However, if this is a critical edge, we would be
-  // inserting the computation one some other paths (e.g. inside a loop).  Only
-  // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB != 0 && !AllowAggressive) {
-    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
-    if (!BI || !BI->isUnconditional()) return 0;
-  }
-
-  // Okay, we can do the transformation: create the new PHI node.
-  PHINode *NewPN = PHINode::Create(I.getType(), "");
-  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
-  InsertNewInstBefore(NewPN, *PN);
-  NewPN->takeName(PN);
-
-  // Next, add all of the operands to the PHI.
-  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
-    // We only currently try to fold the condition of a select when it is a phi,
-    // not the true/false values.
-    Value *TrueV = SI->getTrueValue();
-    Value *FalseV = SI->getFalseValue();
-    BasicBlock *PhiTransBB = PN->getParent();
-    for (unsigned i = 0; i != NumPHIValues; ++i) {
-      BasicBlock *ThisBB = PN->getIncomingBlock(i);
-      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
-      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
-      Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
-                                 FalseVInPred,
-                                 "phitmp", NonConstBB->getTerminator());
-        Worklist.Add(cast<Instruction>(InV));
-      }
-      NewPN->addIncoming(InV, ThisBB);
-    }
-  } else if (I.getNumOperands() == 2) {
-    Constant *C = cast<Constant>(I.getOperand(1));
-    for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
-        else
-          InV = ConstantExpr::get(I.getOpcode(), InC, C);
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
-          InV = BinaryOperator::Create(BO->getOpcode(),
-                                       PN->getIncomingValue(i), C, "phitmp",
-                                       NonConstBB->getTerminator());
-        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::Create(CI->getOpcode(),
-                                CI->getPredicate(),
-                                PN->getIncomingValue(i), C, "phitmp",
-                                NonConstBB->getTerminator());
-        else
-          llvm_unreachable("Unknown binop!");
-        
-        Worklist.Add(cast<Instruction>(InV));
-      }
-      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
-    }
-  } else { 
-    CastInst *CI = cast<CastInst>(&I);
-    const Type *RetTy = CI->getType();
-    for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
-                               I.getType(), "phitmp", 
-                               NonConstBB->getTerminator());
-        Worklist.Add(cast<Instruction>(InV));
-      }
-      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
-    }
-  }
-  return ReplaceInstUsesWith(I, NewPN);
-}
-
-
-/// WillNotOverflowSignedAdd - Return true if we can prove that:
-///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
-/// This basically requires proving that the add in the original type would not
-/// overflow to change the sign bit or have a carry out.
-bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
-  // There are different heuristics we can use for this.  Here are some simple
-  // ones.
-  
-  // Add has the property that adding any two 2's complement numbers can only 
-  // have one carry bit which can change a sign.  As such, if LHS and RHS each
-  // have at least two sign bits, we know that the addition of the two values
-  // will sign extend fine.
-  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
-    return true;
-  
-  
-  // If one of the operands only has one non-zero bit, and if the other operand
-  // has a known-zero bit in a more significant place than it (not including the
-  // sign bit) the ripple may go up to and fill the zero, but won't change the
-  // sign.  For example, (X & ~4) + 1.
-  
-  // TODO: Implement.
-  
-  return false;
-}
-
-
-Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-
-  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), TD))
-    return ReplaceInstUsesWith(I, V);
-
-  
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
-      // X + (signbit) --> X ^ signbit
-      const APInt& Val = CI->getValue();
-      uint32_t BitWidth = Val.getBitWidth();
-      if (Val == APInt::getSignBit(BitWidth))
-        return BinaryOperator::CreateXor(LHS, RHS);
-      
-      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
-      // (X & 254)+1 -> (X&254)|1
-      if (SimplifyDemandedInstructionBits(I))
-        return &I;
-
-      // zext(bool) + C -> bool ? C + 1 : C
-      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
-        if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
-          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
-    }
-
-    if (isa<PHINode>(LHS))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-    
-    ConstantInt *XorRHS = 0;
-    Value *XorLHS = 0;
-    if (isa<ConstantInt>(RHSC) &&
-        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
-      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
-      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
-      
-      uint32_t Size = TySizeBits / 2;
-      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
-      APInt CFF80Val(-C0080Val);
-      do {
-        if (TySizeBits > Size) {
-          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
-          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
-          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
-              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
-            // This is a sign extend if the top bits are known zero.
-            if (!MaskedValueIsZero(XorLHS, 
-                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
-              Size = 0;  // Not a sign ext, but can't be any others either.
-            break;
-          }
-        }
-        Size >>= 1;
-        C0080Val = APIntOps::lshr(C0080Val, Size);
-        CFF80Val = APIntOps::ashr(CFF80Val, Size);
-      } while (Size >= 1);
-      
-      // FIXME: This shouldn't be necessary. When the backends can handle types
-      // with funny bit widths then this switch statement should be removed. It
-      // is just here to get the size of the "middle" type back up to something
-      // that the back ends can handle.
-      const Type *MiddleType = 0;
-      switch (Size) {
-        default: break;
-        case 32: MiddleType = Type::getInt32Ty(*Context); break;
-        case 16: MiddleType = Type::getInt16Ty(*Context); break;
-        case  8: MiddleType = Type::getInt8Ty(*Context); break;
-      }
-      if (MiddleType) {
-        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
-        return new SExtInst(NewTrunc, I.getType(), I.getName());
-      }
-    }
-  }
-
-  if (I.getType() == Type::getInt1Ty(*Context))
-    return BinaryOperator::CreateXor(LHS, RHS);
-
-  // X + X --> X << 1
-  if (I.getType()->isInteger()) {
-    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))
-      return Result;
-
-    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
-      if (RHSI->getOpcode() == Instruction::Sub)
-        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
-          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
-    }
-    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
-      if (LHSI->getOpcode() == Instruction::Sub)
-        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
-          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
-    }
-  }
-
-  // -A + B  -->  B - A
-  // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castNegVal(LHS)) {
-    if (LHS->getType()->isIntOrIntVector()) {
-      if (Value *RHSV = dyn_castNegVal(RHS)) {
-        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
-        return BinaryOperator::CreateNeg(NewAdd);
-      }
-    }
-    
-    return BinaryOperator::CreateSub(RHS, LHSV);
-  }
-
-  // A + -B  -->  A - B
-  if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castNegVal(RHS))
-      return BinaryOperator::CreateSub(LHS, V);
-
-
-  ConstantInt *C2;
-  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
-    if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
-    // X*C1 + X*C2 --> X * (C1+C2)
-    ConstantInt *C1;
-    if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
-  }
-
-  // X + X*C --> X * (C+1)
-  if (dyn_castFoldableMul(RHS, C2) == LHS)
-    return BinaryOperator::CreateMul(LHS, AddOne(C2));
-
-  // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS) == RHS ||
-      dyn_castNotVal(RHS) == LHS)
-    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-  
-
-  // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
-  if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
-    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
-      return R;
-  
-  // A+B --> A|B iff A and B have no bits set in common.
-  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
-    APInt LHSKnownOne(IT->getBitWidth(), 0);
-    APInt LHSKnownZero(IT->getBitWidth(), 0);
-    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
-    if (LHSKnownZero != 0) {
-      APInt RHSKnownOne(IT->getBitWidth(), 0);
-      APInt RHSKnownZero(IT->getBitWidth(), 0);
-      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
-      
-      // No bits in common -> bitwise or.
-      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
-        return BinaryOperator::CreateOr(LHS, RHS);
-    }
-  }
-
-  // W*X + Y*Z --> W * (X+Z)  iff W == Y
-  if (I.getType()->isIntOrIntVector()) {
-    Value *W, *X, *Y, *Z;
-    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
-        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
-      if (W != Y) {
-        if (W == Z) {
-          std::swap(Y, Z);
-        } else if (Y == X) {
-          std::swap(W, X);
-        } else if (X == Z) {
-          std::swap(Y, Z);
-          std::swap(W, X);
-        }
-      }
-
-      if (W == Y) {
-        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
-        return BinaryOperator::CreateMul(W, NewAdd);
-      }
-    }
-  }
-
-  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
-    Value *X = 0;
-    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::CreateSub(SubOne(CRHS), X);
-
-    // (X & FF00) + xx00  -> (X+xx00) & FF00
-    if (LHS->hasOneUse() &&
-        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
-      if (Anded == CRHS) {
-        // See if all bits from the first bit set in the Add RHS up are included
-        // in the mask.  First, get the rightmost bit.
-        const APInt& AddRHSV = CRHS->getValue();
-
-        // Form a mask of all bits from the lowest bit added through the top.
-        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
-
-        // See if the and mask includes all of these bits.
-        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
-
-        if (AddRHSHighBits == AddRHSHighBitsAnd) {
-          // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
-          return BinaryOperator::CreateAnd(NewAdd, C2);
-        }
-      }
-    }
-
-    // Try to fold constant add into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-  }
-
-  // add (select X 0 (sub n A)) A  -->  select X A n
-  {
-    SelectInst *SI = dyn_cast<SelectInst>(LHS);
-    Value *A = RHS;
-    if (!SI) {
-      SI = dyn_cast<SelectInst>(RHS);
-      A = LHS;
-    }
-    if (SI && SI->hasOneUse()) {
-      Value *TV = SI->getTrueValue();
-      Value *FV = SI->getFalseValue();
-      Value *N;
-
-      // Can we fold the add into the argument of the select?
-      // We check both true and false select arguments for a matching subtract.
-      if (match(FV, m_Zero()) &&
-          match(TV, m_Sub(m_Value(N), m_Specific(A))))
-        // Fold the add into the true select value.
-        return SelectInst::Create(SI->getCondition(), N, A);
-      if (match(TV, m_Zero()) &&
-          match(FV, m_Sub(m_Value(N), m_Specific(A))))
-        // Fold the add into the false select value.
-        return SelectInst::Create(SI->getCondition(), A, N);
-    }
-  }
-
-  // Check for (add (sext x), y), see if we can merge this into an
-  // integer add followed by a sext.
-  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
-    // (add (sext x), cst) --> (sext (add x, cst'))
-    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      Constant *CI = 
-        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
-      if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
-        // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
-                                              CI, "addconv");
-        return new SExtInst(NewAdd, I.getType());
-      }
-    }
-    
-    // (add (sext x), (sext y)) --> (sext (add int x, y))
-    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
-      // Only do this if x/y have the same type, if at last one of them has a
-      // single use (so we don't increase the number of sexts), and if the
-      // integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
-          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
-                                   RHSConv->getOperand(0))) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
-                                              RHSConv->getOperand(0), "addconv");
-        return new SExtInst(NewAdd, I.getType());
-      }
-    }
-  }
-
-  return Changed ? &I : 0;
-}
-
-Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    // X + 0 --> X
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
-                              (I.getType())->getValueAPF()))
-        return ReplaceInstUsesWith(I, LHS);
-    }
-
-    if (isa<PHINode>(LHS))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-  // -A + B  -->  B - A
-  // -A + -B  -->  -(A + B)
-  if (Value *LHSV = dyn_castFNegVal(LHS))
-    return BinaryOperator::CreateFSub(RHS, LHSV);
-
-  // A + -B  -->  A - B
-  if (!isa<Constant>(RHS))
-    if (Value *V = dyn_castFNegVal(RHS))
-      return BinaryOperator::CreateFSub(LHS, V);
-
-  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
-    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
-      return ReplaceInstUsesWith(I, LHS);
-
-  // Check for (add double (sitofp x), y), see if we can merge this into an
-  // integer add followed by a promotion.
-  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
-    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
-    // ... if the constant fits in the integer value.  This is useful for things
-    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
-    // requires a constant pool load, and generally allows the add to be better
-    // instcombined.
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
-      Constant *CI = 
-      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
-      if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
-                                              CI, "addconv");
-        return new SIToFPInst(NewAdd, I.getType());
-      }
-    }
-    
-    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
-    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
-      // Only do this if x/y have the same type, if at last one of them has a
-      // single use (so we don't increase the number of int->fp conversions),
-      // and if the integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
-          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
-                                   RHSConv->getOperand(0))) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
-                                              RHSConv->getOperand(0),"addconv");
-        return new SIToFPInst(NewAdd, I.getType());
-      }
-    }
-  }
-  
-  return Changed ? &I : 0;
-}
-
-
-/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
-/// code necessary to compute the offset from the base pointer (without adding
-/// in the base pointer).  Return the result as a signed integer of intptr size.
-static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) {
-  TargetData &TD = *IC.getTargetData();
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
-  Value *Result = Constant::getNullValue(IntPtrTy);
-
-  // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
-  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
-       ++i, ++GTI) {
-    Value *Op = *i;
-    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
-    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
-      if (OpC->isZero()) continue;
-      
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
-        
-        Result = IC.Builder->CreateAdd(Result,
-                                       ConstantInt::get(IntPtrTy, Size),
-                                       GEP->getName()+".offs");
-        continue;
-      }
-      
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-      Constant *OC =
-              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = ConstantExpr::getMul(OC, Scale);
-      // Emit an add instruction.
-      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
-      continue;
-    }
-    // Convert to correct type.
-    if (Op->getType() != IntPtrTy)
-      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
-    if (Size != 1) {
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-      // We'll let instcombine(mul) convert this to a shl if possible.
-      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
-    }
-
-    // Emit an add instruction.
-    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
-  }
-  return Result;
-}
-
-
-/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
-/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
-/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
-/// be complex, and scales are involved.  The above expression would also be
-/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
-/// This later form is less amenable to optimization though, and we are allowed
-/// to generate the first by knowing that pointer arithmetic doesn't overflow.
-///
-/// If we can't emit an optimized form for this expression, this returns null.
-/// 
-static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
-                                          InstCombiner &IC) {
-  TargetData &TD = *IC.getTargetData();
-  gep_type_iterator GTI = gep_type_begin(GEP);
-
-  // Check to see if this gep only has a single variable index.  If so, and if
-  // any constant indices are a multiple of its scale, then we can compute this
-  // in terms of the scale of the variable index.  For example, if the GEP
-  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
-  // because the expression will cross zero at the same point.
-  unsigned i, e = GEP->getNumOperands();
-  int64_t Offset = 0;
-  for (i = 1; i != e; ++i, ++GTI) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
-      // Compute the aggregate offset of constant indices.
-      if (CI->isZero()) continue;
-
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
-      } else {
-        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-        Offset += Size*CI->getSExtValue();
-      }
-    } else {
-      // Found our variable index.
-      break;
-    }
-  }
-  
-  // If there are no variable indices, we must have a constant offset, just
-  // evaluate it the general way.
-  if (i == e) return 0;
-  
-  Value *VariableIdx = GEP->getOperand(i);
-  // Determine the scale factor of the variable element.  For example, this is
-  // 4 if the variable index is into an array of i32.
-  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
-  
-  // Verify that there are no other variable indices.  If so, emit the hard way.
-  for (++i, ++GTI; i != e; ++i, ++GTI) {
-    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
-    if (!CI) return 0;
-   
-    // Compute the aggregate offset of constant indices.
-    if (CI->isZero()) continue;
-    
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
-    } else {
-      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-      Offset += Size*CI->getSExtValue();
-    }
-  }
-  
-  // Okay, we know we have a single variable index, which must be a
-  // pointer/array/vector index.  If there is no offset, life is simple, return
-  // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
-  if (Offset == 0) {
-    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
-    // we don't need to bother extending: the extension won't affect where the
-    // computation crosses zero.
-    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
-      VariableIdx = new TruncInst(VariableIdx, 
-                                  TD.getIntPtrType(VariableIdx->getContext()),
-                                  VariableIdx->getName(), &I);
-    return VariableIdx;
-  }
-  
-  // Otherwise, there is an index.  The computation we will do will be modulo
-  // the pointer size, so get it.
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-  
-  Offset &= PtrSizeMask;
-  VariableScale &= PtrSizeMask;
-
-  // To do this transformation, any constant index must be a multiple of the
-  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
-  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
-  // multiple of the variable scale.
-  int64_t NewOffs = Offset / (int64_t)VariableScale;
-  if (Offset != NewOffs*(int64_t)VariableScale)
-    return 0;
-
-  // Okay, we can do this evaluation.  Start by converting the index to intptr.
-  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
-  if (VariableIdx->getType() != IntPtrTy)
-    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
-                                              true /*SExt*/, 
-                                              VariableIdx->getName(), &I);
-  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
-  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
-}
-
-
-/// Optimize pointer differences into the same array into a size.  Consider:
-///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
-/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
-///
-Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
-                                               const Type *Ty) {
-  assert(TD && "Must have target data info for this");
-  
-  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
-  // this.
-  bool Swapped;
-  GetElementPtrInst *GEP;
-  
-  if ((GEP = dyn_cast<GetElementPtrInst>(LHS)) &&
-      GEP->getOperand(0) == RHS)
-    Swapped = false;
-  else if ((GEP = dyn_cast<GetElementPtrInst>(RHS)) &&
-           GEP->getOperand(0) == LHS)
-    Swapped = true;
-  else
-    return 0;
-  
-  // TODO: Could also optimize &A[i] - &A[j] -> "i-j".
-  
-  // Emit the offset of the GEP and an intptr_t.
-  Value *Result = EmitGEPOffset(GEP, *this);
-
-  // If we have p - gep(p, ...)  then we have to negate the result.
-  if (Swapped)
-    Result = Builder->CreateNeg(Result, "diff.neg");
-
-  return Builder->CreateIntCast(Result, Ty, true);
-}
-
-
-Instruction *InstCombiner::visitSub(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Op0 == Op1)                        // sub X, X  -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
-  if (Value *V = dyn_castNegVal(Op1)) {
-    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
-    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
-    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
-    return Res;
-  }
-
-  if (isa<UndefValue>(Op0))
-    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
-  if (I.getType() == Type::getInt1Ty(*Context))
-    return BinaryOperator::CreateXor(Op0, Op1);
-  
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
-    // Replace (-1 - A) with (~A).
-    if (C->isAllOnesValue())
-      return BinaryOperator::CreateNot(Op1);
-
-    // C - ~X == X + (1+C)
-    Value *X = 0;
-    if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::CreateAdd(X, AddOne(C));
-
-    // -(X >>u 31) -> (X >>s 31)
-    // -(X >>s 31) -> (X >>u 31)
-    if (C->isZero()) {
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
-        if (SI->getOpcode() == Instruction::LShr) {
-          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
-            // Check to see if we are shifting out everything but the sign bit.
-            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
-                SI->getType()->getPrimitiveSizeInBits()-1) {
-              // Ok, the transformation is safe.  Insert AShr.
-              return BinaryOperator::Create(Instruction::AShr, 
-                                          SI->getOperand(0), CU, SI->getName());
-            }
-          }
-        } else if (SI->getOpcode() == Instruction::AShr) {
-          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
-            // Check to see if we are shifting out everything but the sign bit.
-            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
-                SI->getType()->getPrimitiveSizeInBits()-1) {
-              // Ok, the transformation is safe.  Insert LShr. 
-              return BinaryOperator::CreateLShr(
-                                          SI->getOperand(0), CU, SI->getName());
-            }
-          }
-        }
-      }
-    }
-
-    // Try to fold constant sub into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-
-    // C - zext(bool) -> bool ? C - 1 : C
-    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
-      if (ZI->getSrcTy() == Type::getInt1Ty(*Context))
-        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
-  }
-
-  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-    if (Op1I->getOpcode() == Instruction::Add) {
-      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
-                                         I.getName());
-      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
-                                         I.getName());
-      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
-        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
-          // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::CreateSub(
-            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
-      }
-    }
-
-    if (Op1I->hasOneUse()) {
-      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
-      // is not used by anyone else...
-      //
-      if (Op1I->getOpcode() == Instruction::Sub) {
-        // Swap the two operands of the subexpr...
-        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
-        Op1I->setOperand(0, IIOp1);
-        Op1I->setOperand(1, IIOp0);
-
-        // Create the new top level add instruction...
-        return BinaryOperator::CreateAdd(Op0, Op1);
-      }
-
-      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
-      //
-      if (Op1I->getOpcode() == Instruction::And &&
-          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
-        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
-
-        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
-        return BinaryOperator::CreateAnd(Op0, NewNot);
-      }
-
-      // 0 - (X sdiv C)  -> (X sdiv -C)
-      if (Op1I->getOpcode() == Instruction::SDiv)
-        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isZero())
-            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
-              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
-                                          ConstantExpr::getNeg(DivRHS));
-
-      // X - X*C --> X * (1-C)
-      ConstantInt *C2 = 0;
-      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
-        Constant *CP1 = 
-          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
-                                             C2);
-        return BinaryOperator::CreateMul(Op0, CP1);
-      }
-    }
-  }
-
-  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-    if (Op0I->getOpcode() == Instruction::Add) {
-      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
-        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
-      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
-        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
-    } else if (Op0I->getOpcode() == Instruction::Sub) {
-      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
-                                         I.getName());
-    }
-  }
-
-  ConstantInt *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
-    if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
-    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
-  }
-  
-  // Optimize pointer differences into the same array into a size.  Consider:
-  //  &A[10] - &A[0]: we should compile this to "10".
-  if (TD) {
-    if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(Op0))
-      if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(Op1))
-        if (Value *Res = OptimizePointerDifference(LHS->getOperand(0),
-                                                   RHS->getOperand(0),
-                                                   I.getType()))
-          return ReplaceInstUsesWith(I, Res);
-    
-    // trunc(p)-trunc(q) -> trunc(p-q)
-    if (TruncInst *LHST = dyn_cast<TruncInst>(Op0))
-      if (TruncInst *RHST = dyn_cast<TruncInst>(Op1))
-        if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(LHST->getOperand(0)))
-          if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(RHST->getOperand(0)))
-            if (Value *Res = OptimizePointerDifference(LHS->getOperand(0),
-                                                       RHS->getOperand(0),
-                                                       I.getType()))
-              return ReplaceInstUsesWith(I, Res);
-  }
-  
-  return 0;
-}
-
-Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // If this is a 'B = x-(-A)', change to B = x+A...
-  if (Value *V = dyn_castFNegVal(Op1))
-    return BinaryOperator::CreateFAdd(Op0, V);
-
-  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-    if (Op1I->getOpcode() == Instruction::FAdd) {
-      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
-                                          I.getName());
-      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
-                                          I.getName());
-    }
-  }
-
-  return 0;
-}
-
-/// isSignBitCheck - Given an exploded icmp instruction, return true if the
-/// comparison only checks the sign bit.  If it only checks the sign bit, set
-/// TrueIfSigned if the result of the comparison is true when the input value is
-/// signed.
-static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
-                           bool &TrueIfSigned) {
-  switch (pred) {
-  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
-    TrueIfSigned = true;
-    return RHS->isZero();
-  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
-    TrueIfSigned = true;
-    return RHS->isAllOnesValue();
-  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
-    TrueIfSigned = false;
-    return RHS->isAllOnesValue();
-  case ICmpInst::ICMP_UGT:
-    // True if LHS u> RHS and RHS == high-bit-mask - 1
-    TrueIfSigned = true;
-    return RHS->getValue() ==
-      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
-  case ICmpInst::ICMP_UGE: 
-    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
-    TrueIfSigned = true;
-    return RHS->getValue().isSignBit();
-  default:
-    return false;
-  }
-}
-
-Instruction *InstCombiner::visitMul(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (isa<UndefValue>(Op1))              // undef * X -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // Simplify mul instructions with a constant RHS.
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
-
-      // ((X << C1)*C2) == (X * (C2 << C1))
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
-        if (SI->getOpcode() == Instruction::Shl)
-          if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
-            return BinaryOperator::CreateMul(SI->getOperand(0),
-                                        ConstantExpr::getShl(CI, ShOp));
-
-      if (CI->isZero())
-        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
-      if (CI->equalsInt(1))                  // X * 1  == X
-        return ReplaceInstUsesWith(I, Op0);
-      if (CI->isAllOnesValue())              // X * -1 == 0 - X
-        return BinaryOperator::CreateNeg(Op0, I.getName());
-
-      const APInt& Val = cast<ConstantInt>(CI)->getValue();
-      if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
-        return BinaryOperator::CreateShl(Op0,
-                 ConstantInt::get(Op0->getType(), Val.logBase2()));
-      }
-    } else if (isa<VectorType>(Op1C->getType())) {
-      if (Op1C->isNullValue())
-        return ReplaceInstUsesWith(I, Op1C);
-
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
-        if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
-          return BinaryOperator::CreateNeg(Op0, I.getName());
-
-        // As above, vector X*splat(1.0) -> X in all defined cases.
-        if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
-            if (CI->equalsInt(1))
-              return ReplaceInstUsesWith(I, Op0);
-        }
-      }
-    }
-    
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
-      if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
-        // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
-        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
-        return BinaryOperator::CreateAdd(Add, C1C2);
-        
-      }
-
-    // Try to fold constant mul into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-
-    if (isa<PHINode>(Op0))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castNegVal(Op1))
-      return BinaryOperator::CreateMul(Op0v, Op1v);
-
-  // (X / Y) *  Y = X - (X % Y)
-  // (X / Y) * -Y = (X % Y) - X
-  {
-    Value *Op1C = Op1;
-    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
-    if (!BO ||
-        (BO->getOpcode() != Instruction::UDiv && 
-         BO->getOpcode() != Instruction::SDiv)) {
-      Op1C = Op0;
-      BO = dyn_cast<BinaryOperator>(Op1);
-    }
-    Value *Neg = dyn_castNegVal(Op1C);
-    if (BO && BO->hasOneUse() &&
-        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
-        (BO->getOpcode() == Instruction::UDiv ||
-         BO->getOpcode() == Instruction::SDiv)) {
-      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
-
-      // If the division is exact, X % Y is zero.
-      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
-        if (SDiv->isExact()) {
-          if (Op1BO == Op1C)
-            return ReplaceInstUsesWith(I, Op0BO);
-          return BinaryOperator::CreateNeg(Op0BO);
-        }
-
-      Value *Rem;
-      if (BO->getOpcode() == Instruction::UDiv)
-        Rem = Builder->CreateURem(Op0BO, Op1BO);
-      else
-        Rem = Builder->CreateSRem(Op0BO, Op1BO);
-      Rem->takeName(BO);
-
-      if (Op1BO == Op1C)
-        return BinaryOperator::CreateSub(Op0BO, Rem);
-      return BinaryOperator::CreateSub(Rem, Op0BO);
-    }
-  }
-
-  /// i1 mul -> i1 and.
-  if (I.getType() == Type::getInt1Ty(*Context))
-    return BinaryOperator::CreateAnd(Op0, Op1);
-
-  // X*(1 << Y) --> X << Y
-  // (1 << Y)*X --> X << Y
-  {
-    Value *Y;
-    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
-      return BinaryOperator::CreateShl(Op1, Y);
-    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
-      return BinaryOperator::CreateShl(Op0, Y);
-  }
-  
-  // If one of the operands of the multiply is a cast from a boolean value, then
-  // we know the bool is either zero or one, so this is a 'masking' multiply.
-  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
-  if (!isa<VectorType>(I.getType())) {
-    // -2 is "-1 << 1" so it is all bits set except the low one.
-    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-    
-    Value *BoolCast = 0, *OtherOp = 0;
-    if (MaskedValueIsZero(Op0, Negative2))
-      BoolCast = Op0, OtherOp = Op1;
-    else if (MaskedValueIsZero(Op1, Negative2))
-      BoolCast = Op1, OtherOp = Op0;
-
-    if (BoolCast) {
-      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
-                                    BoolCast, "tmp");
-      return BinaryOperator::CreateAnd(V, OtherOp);
-    }
-  }
-
-  return Changed ? &I : 0;
-}
-
-Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Simplify mul instructions with a constant RHS...
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
-      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
-      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
-    } else if (isa<VectorType>(Op1C->getType())) {
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
-        // As above, vector X*splat(1.0) -> X in all defined cases.
-        if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
-            if (F->isExactlyValue(1.0))
-              return ReplaceInstUsesWith(I, Op0);
-        }
-      }
-    }
-
-    // Try to fold constant mul into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-
-    if (isa<PHINode>(Op0))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(Op1))
-      return BinaryOperator::CreateFMul(Op0v, Op1v);
-
-  return Changed ? &I : 0;
-}
-
-/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
-/// instruction.
-bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
-  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-  
-  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
-  int NonNullOperand = -1;
-  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
-    if (ST->isNullValue())
-      NonNullOperand = 2;
-  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
-  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
-    if (ST->isNullValue())
-      NonNullOperand = 1;
-  
-  if (NonNullOperand == -1)
-    return false;
-  
-  Value *SelectCond = SI->getOperand(0);
-  
-  // Change the div/rem to use 'Y' instead of the select.
-  I.setOperand(1, SI->getOperand(NonNullOperand));
-  
-  // Okay, we know we replace the operand of the div/rem with 'Y' with no
-  // problem.  However, the select, or the condition of the select may have
-  // multiple uses.  Based on our knowledge that the operand must be non-zero,
-  // propagate the known value for the select into other uses of it, and
-  // propagate a known value of the condition into its other users.
-  
-  // If the select and condition only have a single use, don't bother with this,
-  // early exit.
-  if (SI->use_empty() && SelectCond->hasOneUse())
-    return true;
-  
-  // Scan the current block backward, looking for other uses of SI.
-  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-  
-  while (BBI != BBFront) {
-    --BBI;
-    // If we found a call to a function, we can't assume it will return, so
-    // information from below it cannot be propagated above it.
-    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
-      break;
-    
-    // Replace uses of the select or its condition with the known values.
-    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
-         I != E; ++I) {
-      if (*I == SI) {
-        *I = SI->getOperand(NonNullOperand);
-        Worklist.Add(BBI);
-      } else if (*I == SelectCond) {
-        *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) :
-                                   ConstantInt::getFalse(*Context);
-        Worklist.Add(BBI);
-      }
-    }
-    
-    // If we past the instruction, quit looking for it.
-    if (&*BBI == SI)
-      SI = 0;
-    if (&*BBI == SelectCond)
-      SelectCond = 0;
-    
-    // If we ran out of things to eliminate, break out of the loop.
-    if (SelectCond == 0 && SI == 0)
-      break;
-    
-  }
-  return true;
-}
-
-
-/// This function implements the transforms on div instructions that work
-/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
-/// used by the visitors to those instructions.
-/// @brief Transforms common to all three div instructions
-Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // undef / X -> 0        for integer.
-  // undef / X -> undef    for FP (the undef could be a snan).
-  if (isa<UndefValue>(Op0)) {
-    if (Op0->getType()->isFPOrFPVector())
-      return ReplaceInstUsesWith(I, Op0);
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-
-  // X / undef -> undef
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);
-
-  return 0;
-}
-
-/// This function implements the transforms common to both integer division
-/// instructions (udiv and sdiv). It is called by the visitors to those integer
-/// division instructions.
-/// @brief Common integer divide transforms
-Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // (sdiv X, X) --> 1     (udiv X, X) --> 1
-  if (Op0 == Op1) {
-    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
-      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
-      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
-    }
-
-    Constant *CI = ConstantInt::get(I.getType(), 1);
-    return ReplaceInstUsesWith(I, CI);
-  }
-  
-  if (Instruction *Common = commonDivTransforms(I))
-    return Common;
-  
-  // Handle cases involving: [su]div X, (select Cond, Y, Z)
-  // This does not apply for fdiv.
-  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
-    return &I;
-
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // div X, 1 == X
-    if (RHS->equalsInt(1))
-      return ReplaceInstUsesWith(I, Op0);
-
-    // (X / C1) / C2  -> X / (C1*C2)
-    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
-      if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
-        if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
-          if (MultiplyOverflows(RHS, LHSRHS,
-                                I.getOpcode()==Instruction::SDiv))
-            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-          else 
-            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                      ConstantExpr::getMul(RHS, LHSRHS));
-        }
-
-    if (!RHS->isZero()) { // avoid X udiv 0
-      if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-        if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-          return R;
-      if (isa<PHINode>(Op0))
-        if (Instruction *NV = FoldOpIntoPhi(I))
-          return NV;
-    }
-  }
-
-  // 0 / X == 0, we don't need to preserve faults!
-  if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
-    if (LHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // It can't be division by zero, hence it must be division by one.
-  if (I.getType() == Type::getInt1Ty(*Context))
-    return ReplaceInstUsesWith(I, Op0);
-
-  if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
-    if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
-      // div X, 1 == X
-      if (X->isOne())
-        return ReplaceInstUsesWith(I, Op0);
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Handle the integer div common cases
-  if (Instruction *Common = commonIDivTransforms(I))
-    return Common;
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
-    // X udiv C^2 -> X >> C
-    // Check to see if this is an unsigned division with an exact power of 2,
-    // if so, convert to a right shift.
-    if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
-      return BinaryOperator::CreateLShr(Op0, 
-            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
-
-    // X udiv C, where C >= signbit
-    if (C->getValue().isNegative()) {
-      Value *IC = Builder->CreateICmpULT( Op0, C);
-      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
-                                ConstantInt::get(I.getType(), 1));
-    }
-  }
-
-  // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
-  if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
-    if (RHSI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(RHSI->getOperand(0))) {
-      const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
-      if (C1.isPowerOf2()) {
-        Value *N = RHSI->getOperand(1);
-        const Type *NTy = N->getType();
-        if (uint32_t C2 = C1.logBase2())
-          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
-        return BinaryOperator::CreateLShr(Op0, N);
-      }
-    }
-  }
-  
-  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
-  // where C1&C2 are powers of two.
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 
-    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
-      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2)))  {
-        const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
-        if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
-          // Compute the shift amounts
-          uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
-          // Construct the "on true" case of the select
-          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
-          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
-  
-          // Construct the "on false" case of the select
-          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
-          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
-
-          // construct the select instruction and return it.
-          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
-        }
-      }
-  return 0;
-}
-
-Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Handle the integer div common cases
-  if (Instruction *Common = commonIDivTransforms(I))
-    return Common;
-
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // sdiv X, -1 == -X
-    if (RHS->isAllOnesValue())
-      return BinaryOperator::CreateNeg(Op0);
-
-    // sdiv X, C  -->  ashr X, log2(C)
-    if (cast<SDivOperator>(&I)->isExact() &&
-        RHS->getValue().isNonNegative() &&
-        RHS->getValue().isPowerOf2()) {
-      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
-                                            RHS->getValue().exactLogBase2());
-      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
-    }
-
-    // -X/C  -->  X/-C  provided the negation doesn't overflow.
-    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
-      if (isa<Constant>(Sub->getOperand(0)) &&
-          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
-          Sub->hasNoSignedWrap())
-        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
-                                          ConstantExpr::getNeg(RHS));
-  }
-
-  // If the sign bits of both operands are zero (i.e. we can prove they are
-  // unsigned inputs), turn this into a udiv.
-  if (I.getType()->isInteger()) {
-    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
-    if (MaskedValueIsZero(Op0, Mask)) {
-      if (MaskedValueIsZero(Op1, Mask)) {
-        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
-        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
-      }
-      ConstantInt *ShiftedInt;
-      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
-          ShiftedInt->getValue().isPowerOf2()) {
-        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
-        // Safe because the only negative value (1 << Y) can take on is
-        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
-        // the sign bit set.
-        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
-      }
-    }
-  }
-  
-  return 0;
-}
-
-Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
-  return commonDivTransforms(I);
-}
-
-/// This function implements the transforms on rem instructions that work
-/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
-/// is used by the visitors to those instructions.
-/// @brief Transforms common to all three rem instructions
-Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
-    if (I.getType()->isFPOrFPVector())
-      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
-
-  // Handle cases involving: rem X, (select Cond, Y, Z)
-  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
-    return &I;
-
-  return 0;
-}
-
-/// This function implements the transforms common to both integer remainder
-/// instructions (urem and srem). It is called by the visitors to those integer
-/// remainder instructions.
-/// @brief Common integer remainder transforms
-Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Instruction *common = commonRemTransforms(I))
-    return common;
-
-  // 0 % X == 0 for integer, we don't need to preserve faults!
-  if (Constant *LHS = dyn_cast<Constant>(Op0))
-    if (LHS->isNullValue())
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // X % 0 == undef, we don't need to preserve faults!
-    if (RHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-    
-    if (RHS->equalsInt(1))  // X % 1 == 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-    if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
-      if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
-        if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-          return R;
-      } else if (isa<PHINode>(Op0I)) {
-        if (Instruction *NV = FoldOpIntoPhi(I))
-          return NV;
-      }
-
-      // See if we can fold away this rem instruction.
-      if (SimplifyDemandedInstructionBits(I))
-        return &I;
-    }
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitURem(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Instruction *common = commonIRemTransforms(I))
-    return common;
-  
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // X urem C^2 -> X and C
-    // Check to see if this is an unsigned remainder with an exact power of 2,
-    // if so, convert to a bitwise and.
-    if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
-      if (C->getValue().isPowerOf2())
-        return BinaryOperator::CreateAnd(Op0, SubOne(C));
-  }
-
-  if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
-    // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
-    if (RHSI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(RHSI->getOperand(0))) {
-      if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
-        Constant *N1 = Constant::getAllOnesValue(I.getType());
-        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
-        return BinaryOperator::CreateAnd(Op0, Add);
-      }
-    }
-  }
-
-  // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
-  // where C1&C2 are powers of two.
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
-    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
-      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
-        // STO == 0 and SFO == 0 handled above.
-        if ((STO->getValue().isPowerOf2()) && 
-            (SFO->getValue().isPowerOf2())) {
-          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
-                                              SI->getName()+".t");
-          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
-                                               SI->getName()+".f");
-          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
-        }
-      }
-  }
-  
-  return 0;
-}
-
-Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // Handle the integer rem common cases
-  if (Instruction *Common = commonIRemTransforms(I))
-    return Common;
-  
-  if (Value *RHSNeg = dyn_castNegVal(Op1))
-    if (!isa<Constant>(RHSNeg) ||
-        (isa<ConstantInt>(RHSNeg) &&
-         cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
-      // X % -Y -> X % Y
-      Worklist.AddValue(I.getOperand(1));
-      I.setOperand(1, RHSNeg);
-      return &I;
-    }
-
-  // If the sign bits of both operands are zero (i.e. we can prove they are
-  // unsigned inputs), turn this into a urem.
-  if (I.getType()->isInteger()) {
-    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
-    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
-      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
-      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
-    }
-  }
-
-  // If it's a constant vector, flip any negative values positive.
-  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
-    unsigned VWidth = RHSV->getNumOperands();
-
-    bool hasNegative = false;
-    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
-      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
-        if (RHS->getValue().isNegative())
-          hasNegative = true;
-
-    if (hasNegative) {
-      std::vector<Constant *> Elts(VWidth);
-      for (unsigned i = 0; i != VWidth; ++i) {
-        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
-          if (RHS->getValue().isNegative())
-            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
-          else
-            Elts[i] = RHS;
-        }
-      }
-
-      Constant *NewRHSV = ConstantVector::get(Elts);
-      if (NewRHSV != RHSV) {
-        Worklist.AddValue(I.getOperand(1));
-        I.setOperand(1, NewRHSV);
-        return &I;
-      }
-    }
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
-  return commonRemTransforms(I);
-}
-
-// isOneBitSet - Return true if there is exactly one bit set in the specified
-// constant.
-static bool isOneBitSet(const ConstantInt *CI) {
-  return CI->getValue().isPowerOf2();
-}
-
-// isHighOnes - Return true if the constant is of the form 1+0+.
-// This is the same as lowones(~X).
-static bool isHighOnes(const ConstantInt *CI) {
-  return (~CI->getValue() + 1).isPowerOf2();
-}
-
-/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
-/// are carefully arranged to allow folding of expressions such as:
-///
-///      (A < B) | (A > B) --> (A != B)
-///
-/// Note that this is only valid if the first and second predicates have the
-/// same sign. Is illegal to do: (A u< B) | (A s> B) 
-///
-/// Three bits are used to represent the condition, as follows:
-///   0  A > B
-///   1  A == B
-///   2  A < B
-///
-/// <=>  Value  Definition
-/// 000     0   Always false
-/// 001     1   A >  B
-/// 010     2   A == B
-/// 011     3   A >= B
-/// 100     4   A <  B
-/// 101     5   A != B
-/// 110     6   A <= B
-/// 111     7   Always true
-///  
-static unsigned getICmpCode(const ICmpInst *ICI) {
-  switch (ICI->getPredicate()) {
-    // False -> 0
-  case ICmpInst::ICMP_UGT: return 1;  // 001
-  case ICmpInst::ICMP_SGT: return 1;  // 001
-  case ICmpInst::ICMP_EQ:  return 2;  // 010
-  case ICmpInst::ICMP_UGE: return 3;  // 011
-  case ICmpInst::ICMP_SGE: return 3;  // 011
-  case ICmpInst::ICMP_ULT: return 4;  // 100
-  case ICmpInst::ICMP_SLT: return 4;  // 100
-  case ICmpInst::ICMP_NE:  return 5;  // 101
-  case ICmpInst::ICMP_ULE: return 6;  // 110
-  case ICmpInst::ICMP_SLE: return 6;  // 110
-    // True -> 7
-  default:
-    llvm_unreachable("Invalid ICmp predicate!");
-    return 0;
-  }
-}
-
-/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
-/// predicate into a three bit mask. It also returns whether it is an ordered
-/// predicate by reference.
-static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
-  isOrdered = false;
-  switch (CC) {
-  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
-  case FCmpInst::FCMP_UNO:                   return 0;  // 000
-  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
-  case FCmpInst::FCMP_UGT:                   return 1;  // 001
-  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
-  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
-  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
-  case FCmpInst::FCMP_UGE:                   return 3;  // 011
-  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
-  case FCmpInst::FCMP_ULT:                   return 4;  // 100
-  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
-  case FCmpInst::FCMP_UNE:                   return 5;  // 101
-  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
-  case FCmpInst::FCMP_ULE:                   return 6;  // 110
-    // True -> 7
-  default:
-    // Not expecting FCMP_FALSE and FCMP_TRUE;
-    llvm_unreachable("Unexpected FCmp predicate!");
-    return 0;
-  }
-}
-
-/// getICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand 
-/// new ICmp instruction. The sign is passed in to determine which kind
-/// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,
-                           LLVMContext *Context) {
-  switch (code) {
-  default: llvm_unreachable("Illegal ICmp code!");
-  case  0: return ConstantInt::getFalse(*Context);
-  case  1: 
-    if (sign)
-      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
-    else
-      return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
-  case  2: return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
-  case  3: 
-    if (sign)
-      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
-    else
-      return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
-  case  4: 
-    if (sign)
-      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
-    else
-      return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
-  case  5: return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
-  case  6: 
-    if (sign)
-      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
-    else
-      return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue(*Context);
-  }
-}
-
-/// getFCmpValue - This is the complement of getFCmpCode, which turns an
-/// opcode and two operands into either a FCmp instruction. isordered is passed
-/// in to determine which kind of predicate to use in the new fcmp instruction.
-static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS, LLVMContext *Context) {
-  switch (code) {
-  default: llvm_unreachable("Illegal FCmp code!");
-  case  0:
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
-  case  1: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
-  case  2: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
-  case  3: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
-  case  4: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
-  case  5: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
-  case  6: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue(*Context);
-  }
-}
-
-/// PredicatesFoldable - Return true if both predicates match sign or if at
-/// least one of them is an equality comparison (which is signless).
-static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
-         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
-         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
-}
-
-namespace { 
-// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-struct FoldICmpLogical {
-  InstCombiner &IC;
-  Value *LHS, *RHS;
-  ICmpInst::Predicate pred;
-  FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI)
-    : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)),
-      pred(ICI->getPredicate()) {}
-  bool shouldApply(Value *V) const {
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
-      if (PredicatesFoldable(pred, ICI->getPredicate()))
-        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
-                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
-    return false;
-  }
-  Instruction *apply(Instruction &Log) const {
-    ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0));
-    if (ICI->getOperand(0) != LHS) {
-      assert(ICI->getOperand(1) == LHS);
-      ICI->swapOperands();  // Swap the LHS and RHS of the ICmp
-    }
-
-    ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1));
-    unsigned LHSCode = getICmpCode(ICI);
-    unsigned RHSCode = getICmpCode(RHSICI);
-    unsigned Code;
-    switch (Log.getOpcode()) {
-    case Instruction::And: Code = LHSCode & RHSCode; break;
-    case Instruction::Or:  Code = LHSCode | RHSCode; break;
-    case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
-    default: llvm_unreachable("Illegal logical opcode!"); return 0;
-    }
-
-    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
-    Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext());
-    if (Instruction *I = dyn_cast<Instruction>(RV))
-      return I;
-    // Otherwise, it's a constant boolean value...
-    return IC.ReplaceInstUsesWith(Log, RV);
-  }
-};
-} // end anonymous namespace
-
-// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
-// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
-// guaranteed to be a binary operator.
-Instruction *InstCombiner::OptAndOp(Instruction *Op,
-                                    ConstantInt *OpRHS,
-                                    ConstantInt *AndRHS,
-                                    BinaryOperator &TheAnd) {
-  Value *X = Op->getOperand(0);
-  Constant *Together = 0;
-  if (!Op->isShift())
-    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
-
-  switch (Op->getOpcode()) {
-  case Instruction::Xor:
-    if (Op->hasOneUse()) {
-      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Value *And = Builder->CreateAnd(X, AndRHS);
-      And->takeName(Op);
-      return BinaryOperator::CreateXor(And, Together);
-    }
-    break;
-  case Instruction::Or:
-    if (Together == AndRHS) // (X | C) & C --> C
-      return ReplaceInstUsesWith(TheAnd, AndRHS);
-
-    if (Op->hasOneUse() && Together != OpRHS) {
-      // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Value *Or = Builder->CreateOr(X, Together);
-      Or->takeName(Op);
-      return BinaryOperator::CreateAnd(Or, AndRHS);
-    }
-    break;
-  case Instruction::Add:
-    if (Op->hasOneUse()) {
-      // Adding a one to a single bit bit-field should be turned into an XOR
-      // of the bit.  First thing to check is to see if this AND is with a
-      // single bit constant.
-      const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
-
-      // If there is only one bit set...
-      if (isOneBitSet(cast<ConstantInt>(AndRHS))) {
-        // Ok, at this point, we know that we are masking the result of the
-        // ADD down to exactly one bit.  If the constant we are adding has
-        // no bits set below this bit, then we can eliminate the ADD.
-        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
-
-        // Check to see if any bits below the one bit set in AndRHSV are set.
-        if ((AddRHS & (AndRHSV-1)) == 0) {
-          // If not, the only thing that can effect the output of the AND is
-          // the bit specified by AndRHSV.  If that bit is set, the effect of
-          // the XOR is to toggle the bit.  If it is clear, then the ADD has
-          // no effect.
-          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
-            TheAnd.setOperand(0, X);
-            return &TheAnd;
-          } else {
-            // Pull the XOR out of the AND.
-            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
-            NewAnd->takeName(Op);
-            return BinaryOperator::CreateXor(NewAnd, AndRHS);
-          }
-        }
-      }
-    }
-    break;
-
-  case Instruction::Shl: {
-    // We know that the AND will not produce any of the bits shifted in, so if
-    // the anded constant includes them, clear them now!
-    //
-    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask);
-
-    if (CI->getValue() == ShlMask) { 
-    // Masking out bits that the shift already masks
-      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
-    } else if (CI != AndRHS) {                  // Reducing bits set in and.
-      TheAnd.setOperand(1, CI);
-      return &TheAnd;
-    }
-    break;
-  }
-  case Instruction::LShr:
-  {
-    // We know that the AND will not produce any of the bits shifted in, so if
-    // the anded constant includes them, clear them now!  This only applies to
-    // unsigned shifts, because a signed shr may bring in set bits!
-    //
-    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
-
-    if (CI->getValue() == ShrMask) {   
-    // Masking out bits that the shift already masks.
-      return ReplaceInstUsesWith(TheAnd, Op);
-    } else if (CI != AndRHS) {
-      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
-      return &TheAnd;
-    }
-    break;
-  }
-  case Instruction::AShr:
-    // Signed shr.
-    // See if this is shifting in some sign extension, then masking it out
-    // with an and.
-    if (Op->hasOneUse()) {
-      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
-      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
-      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);
-      if (C == AndRHS) {          // Masking out bits shifted in.
-        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
-        // Make the argument unsigned.
-        Value *ShVal = Op->getOperand(0);
-        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
-        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
-      }
-    }
-    break;
-  }
-  return 0;
-}
-
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
-/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
-/// whether to treat the V, Lo and HI as signed or not. IB is the location to
-/// insert new instructions.
-Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                                           bool isSigned, bool Inside, 
-                                           Instruction &IB) {
-  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
-            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
-         "Lo is not <= Hi in range emission code!");
-    
-  if (Inside) {
-    if (Lo == Hi)  // Trivially false.
-      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
-
-    // V >= Min && V < Hi --> V < Hi
-    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-      ICmpInst::Predicate pred = (isSigned ? 
-        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
-      return new ICmpInst(pred, V, Hi);
-    }
-
-    // Emit V-Lo <u Hi-Lo
-    Constant *NegLo = ConstantExpr::getNeg(Lo);
-    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
-    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
-    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
-  }
-
-  if (Lo == Hi)  // Trivially true.
-    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
-
-  // V < Min || V >= Hi -> V > Hi-1
-  Hi = SubOne(cast<ConstantInt>(Hi));
-  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-    ICmpInst::Predicate pred = (isSigned ? 
-        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-    return new ICmpInst(pred, V, Hi);
-  }
-
-  // Emit V-Lo >u Hi-1-Lo
-  // Note that Hi has already had one subtracted from it, above.
-  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
-  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
-  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
-  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
-}
-
-// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
-// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
-// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
-// not, since all 1s are not contiguous.
-static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
-  const APInt& V = Val->getValue();
-  uint32_t BitWidth = Val->getType()->getBitWidth();
-  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
-
-  // look for the first zero bit after the run of ones
-  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
-  // look for the first non-zero bit
-  ME = V.getActiveBits(); 
-  return true;
-}
-
-/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
-/// where isSub determines whether the operator is a sub.  If we can fold one of
-/// the following xforms:
-/// 
-/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
-/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
-/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
-///
-/// return (A +/- B).
-///
-Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
-                                        ConstantInt *Mask, bool isSub,
-                                        Instruction &I) {
-  Instruction *LHSI = dyn_cast<Instruction>(LHS);
-  if (!LHSI || LHSI->getNumOperands() != 2 ||
-      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
-
-  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
-
-  switch (LHSI->getOpcode()) {
-  default: return 0;
-  case Instruction::And:
-    if (ConstantExpr::getAnd(N, Mask) == Mask) {
-      // If the AndRHS is a power of two minus one (0+1+), this is simple.
-      if ((Mask->getValue().countLeadingZeros() + 
-           Mask->getValue().countPopulation()) == 
-          Mask->getValue().getBitWidth())
-        break;
-
-      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
-      // part, we don't need any explicit masks to take them out of A.  If that
-      // is all N is, ignore it.
-      uint32_t MB = 0, ME = 0;
-      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
-        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
-        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
-        if (MaskedValueIsZero(RHS, Mask))
-          break;
-      }
-    }
-    return 0;
-  case Instruction::Or:
-  case Instruction::Xor:
-    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
-    if ((Mask->getValue().countLeadingZeros() + 
-         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && ConstantExpr::getAnd(N, Mask)->isNullValue())
-      break;
-    return 0;
-  }
-  
-  if (isSub)
-    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
-  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
-}
-
-/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
-Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
-                                          ICmpInst *LHS, ICmpInst *RHS) {
-  // (icmp eq A, null) & (icmp eq B, null) -->
-  //     (icmp eq (ptrtoint(A)|ptrtoint(B)), 0)
-  if (TD &&
-      LHS->getPredicate() == ICmpInst::ICMP_EQ &&
-      RHS->getPredicate() == ICmpInst::ICMP_EQ &&
-      isa<ConstantPointerNull>(LHS->getOperand(1)) &&
-      isa<ConstantPointerNull>(RHS->getOperand(1))) {
-    const Type *IntPtrTy = TD->getIntPtrType(I.getContext());
-    Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy);
-    Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy);
-    Value *NewOr = Builder->CreateOr(A, B);
-    return new ICmpInst(ICmpInst::ICMP_EQ, NewOr,
-                        Constant::getNullValue(IntPtrTy));
-  }
-  
-  Value *Val, *Val2;
-  ConstantInt *LHSCst, *RHSCst;
-  ICmpInst::Predicate LHSCC, RHSCC;
-  
-  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
-                         m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
-                         m_ConstantInt(RHSCst))))
-    return 0;
-  
-  if (LHSCst == RHSCst && LHSCC == RHSCC) {
-    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
-    // where C is a power of 2
-    if (LHSCC == ICmpInst::ICMP_ULT &&
-        LHSCst->getValue().isPowerOf2()) {
-      Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
-    }
-    
-    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
-    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
-      Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
-    }
-  }
-  
-  // From here on, we only handle:
-  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
-  if (Val != Val2) return 0;
-  
-  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
-  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
-      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
-      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
-      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
-    return 0;
-  
-  // We can't fold (ugt x, C) & (sgt x, C2).
-  if (!PredicatesFoldable(LHSCC, RHSCC))
-    return 0;
-    
-  // Ensure that the larger constant is on the RHS.
-  bool ShouldSwap;
-  if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
-       CmpInst::isSigned(RHSCC)))
-    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
-  else
-    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-    
-  if (ShouldSwap) {
-    std::swap(LHS, RHS);
-    std::swap(LHSCst, RHSCst);
-    std::swap(LHSCC, RHSCC);
-  }
-
-  // At this point, we know we have have two icmp instructions
-  // comparing a value against two constants and and'ing the result
-  // together.  Because of the above check, we know that we only have
-  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
-  // (from the FoldICmpLogical check above), that the two constants 
-  // are not equal and that the larger constant is on the RHS
-  assert(LHSCst != RHSCst && "Compares not folded above?");
-
-  switch (LHSCC) {
-  default: llvm_unreachable("Unknown integer condition code!");
-  case ICmpInst::ICMP_EQ:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
-    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
-    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
-    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
-    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
-      return ReplaceInstUsesWith(I, LHS);
-    }
-  case ICmpInst::ICMP_NE:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_ULT:
-      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
-      break;                        // (X != 13 & X u< 15) -> no change
-    case ICmpInst::ICMP_SLT:
-      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
-      break;                        // (X != 13 & X s< 15) -> no change
-    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
-    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
-    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
-    case ICmpInst::ICMP_NE:
-      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            ConstantInt::get(Add->getType(), 1));
-      }
-      break;                        // (X != 13 & X != 15) -> no change
-    }
-    break;
-  case ICmpInst::ICMP_ULT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
-    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
-    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
-      return ReplaceInstUsesWith(I, LHS);
-    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_SLT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
-    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
-    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
-      return ReplaceInstUsesWith(I, LHS);
-    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_UGT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
-    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
-      return ReplaceInstUsesWith(I, RHS);
-    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
-      break;                        // (X u> 13 & X != 15) -> no change
-    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, false, true, I);
-    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_SGT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
-    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
-    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:
-      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
-      break;                        // (X s> 13 & X != 15) -> no change
-    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, true, true, I);
-    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
-      break;
-    }
-    break;
-  }
- 
-  return 0;
-}
-
-Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
-                                          FCmpInst *RHS) {
-  
-  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
-      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
-    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
-    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-        // If either of the constants are nans, then the whole thing returns
-        // false.
-        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-        return new FCmpInst(FCmpInst::FCMP_ORD,
-                            LHS->getOperand(0), RHS->getOperand(0));
-      }
-    
-    // Handle vector zeros.  This occurs because the canonical form of
-    // "fcmp ord x,x" is "fcmp ord x, 0".
-    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
-        isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_ORD,
-                          LHS->getOperand(0), RHS->getOperand(0));
-    return 0;
-  }
-  
-  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
-  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
-  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
-  
-  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-    // Swap RHS operands to match LHS.
-    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-    std::swap(Op1LHS, Op1RHS);
-  }
-  
-  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
-    if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-    
-    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    if (Op0CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, RHS);
-    if (Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, LHS);
-    
-    bool Op0Ordered;
-    bool Op1Ordered;
-    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-    if (Op1Pred == 0) {
-      std::swap(LHS, RHS);
-      std::swap(Op0Pred, Op1Pred);
-      std::swap(Op0Ordered, Op1Ordered);
-    }
-    if (Op0Pred == 0) {
-      // uno && ueq -> uno && (uno || eq) -> ueq
-      // ord && olt -> ord && (ord && lt) -> olt
-      if (Op0Ordered == Op1Ordered)
-        return ReplaceInstUsesWith(I, RHS);
-      
-      // uno && oeq -> uno && (ord && eq) -> false
-      // uno && ord -> false
-      if (!Op0Ordered)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      // ord && ueq -> ord && (uno || eq) -> oeq
-      return cast<Instruction>(getFCmpValue(true, Op1Pred,
-                                            Op0LHS, Op0RHS, Context));
-    }
-  }
-
-  return 0;
-}
-
-
-Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-
-  // See if we can simplify any instructions used by the instruction whose sole 
-  // purpose is to compute bits we don't care about.
-  if (SimplifyDemandedInstructionBits(I))
-    return &I;
-  
-
-  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
-    const APInt &AndRHSMask = AndRHS->getValue();
-    APInt NotAndRHS(~AndRHSMask);
-
-    // Optimize a variety of ((val OP C1) & C2) combinations...
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      Value *Op0LHS = Op0I->getOperand(0);
-      Value *Op0RHS = Op0I->getOperand(1);
-      switch (Op0I->getOpcode()) {
-      default: break;
-      case Instruction::Xor:
-      case Instruction::Or:
-        // If the mask is only needed on one incoming arm, push it up.
-        if (!Op0I->hasOneUse()) break;
-          
-        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
-          // Not masking anything out for the LHS, move to RHS.
-          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
-                                             Op0RHS->getName()+".masked");
-          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
-        }
-        if (!isa<Constant>(Op0RHS) &&
-            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
-          // Not masking anything out for the RHS, move to LHS.
-          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
-                                             Op0LHS->getName()+".masked");
-          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
-        }
-
-        break;
-      case Instruction::Add:
-        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
-        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
-        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
-        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
-          return BinaryOperator::CreateAnd(V, AndRHS);
-        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
-          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
-        break;
-
-      case Instruction::Sub:
-        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
-        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
-        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
-        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
-          return BinaryOperator::CreateAnd(V, AndRHS);
-
-        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
-        // has 1's for all bits that the subtraction with A might affect.
-        if (Op0I->hasOneUse()) {
-          uint32_t BitWidth = AndRHSMask.getBitWidth();
-          uint32_t Zeros = AndRHSMask.countLeadingZeros();
-          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
-
-          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
-          if (!(A && A->isZero()) &&               // avoid infinite recursion.
-              MaskedValueIsZero(Op0LHS, Mask)) {
-            Value *NewNeg = Builder->CreateNeg(Op0RHS);
-            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
-          }
-        }
-        break;
-
-      case Instruction::Shl:
-      case Instruction::LShr:
-        // (1 << x) & 1 --> zext(x == 0)
-        // (1 >> x) & 1 --> zext(x == 0)
-        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
-          Value *NewICmp =
-            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
-          return new ZExtInst(NewICmp, I.getType());
-        }
-        break;
-      }
-
-      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
-        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
-          return Res;
-    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
-      // If this is an integer truncation or change from signed-to-unsigned, and
-      // if the source is an and/or with immediate, transform it.  This
-      // frequently occurs for bitfield accesses.
-      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
-        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
-            CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
-            if (CastOp->getOpcode() == Instruction::And) {
-              // Change: and (cast (and X, C1) to T), C2
-              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
-              // This will fold the two constants together, which may allow 
-              // other simplifications.
-              Value *NewCast = Builder->CreateTruncOrBitCast(
-                CastOp->getOperand(0), I.getType(), 
-                CastOp->getName()+".shrunk");
-              // trunc_or_bitcast(C1)&C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              C3 = ConstantExpr::getAnd(C3, AndRHS);
-              return BinaryOperator::CreateAnd(NewCast, C3);
-            } else if (CastOp->getOpcode() == Instruction::Or) {
-              // Change: and (cast (or X, C1) to T), C2
-              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
-                // trunc(C1)&C2
-                return ReplaceInstUsesWith(I, AndRHS);
-            }
-          }
-      }
-    }
-
-    // Try to fold constant and into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-    if (isa<PHINode>(Op0))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-
-  // (~A & ~B) == (~(A | B)) - De Morgan's Law
-  if (Value *Op0NotVal = dyn_castNotVal(Op0))
-    if (Value *Op1NotVal = dyn_castNotVal(Op1))
-      if (Op0->hasOneUse() && Op1->hasOneUse()) {
-        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
-                                      I.getName()+".demorgan");
-        return BinaryOperator::CreateNot(Or);
-      }
-
-  {
-    Value *A = 0, *B = 0, *C = 0, *D = 0;
-    // (A|B) & ~(A&B) -> A^B
-    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
-        ((A == C && B == D) || (A == D && B == C)))
-      return BinaryOperator::CreateXor(A, B);
-    
-    // ~(A&B) & (A|B) -> A^B
-    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
-        ((A == C && B == D) || (A == D && B == C)))
-      return BinaryOperator::CreateXor(A, B);
-    
-    if (Op0->hasOneUse() &&
-        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
-      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
-        I.swapOperands();     // Simplify below
-        std::swap(Op0, Op1);
-      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
-        cast<BinaryOperator>(Op0)->swapOperands();
-        I.swapOperands();     // Simplify below
-        std::swap(Op0, Op1);
-      }
-    }
-
-    if (Op1->hasOneUse() &&
-        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
-      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
-        cast<BinaryOperator>(Op1)->swapOperands();
-        std::swap(A, B);
-      }
-      if (A == Op0)                                // A&(A^B) -> A & ~B
-        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
-    }
-
-    // (A&((~A)|B)) -> A&B
-    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
-        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
-      return BinaryOperator::CreateAnd(A, Op1);
-    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
-        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
-      return BinaryOperator::CreateAnd(A, Op0);
-  }
-  
-  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
-    // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
-      return R;
-
-    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
-      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
-        return Res;
-  }
-
-  // fold (and (cast A), (cast B)) -> (cast (and A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() &&
-            SrcTy->isIntOrIntVector() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType(), TD) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType(), TD)) {
-          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
-                                            Op1C->getOperand(0), I.getName());
-          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
-        }
-      }
-    
-  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
-  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
-    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
-          SI0->getOperand(1) == SI1->getOperand(1) &&
-          (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Value *NewOp =
-          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
-                             SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
-                                      SI1->getOperand(1));
-      }
-  }
-
-  // If and'ing two fcmp, try combine them into one.
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
-  return Changed ? &I : 0;
-}
-
-/// CollectBSwapParts - Analyze the specified subexpression and see if it is
-/// capable of providing pieces of a bswap.  The subexpression provides pieces
-/// of a bswap if it is proven that each of the non-zero bytes in the output of
-/// the expression came from the corresponding "byte swapped" byte in some other
-/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
-/// we know that the expression deposits the low byte of %X into the high byte
-/// of the bswap result and that all other bytes are zero.  This expression is
-/// accepted, the high byte of ByteValues is set to X to indicate a correct
-/// match.
-///
-/// This function returns true if the match was unsuccessful and false if so.
-/// On entry to the function the "OverallLeftShift" is a signed integer value
-/// indicating the number of bytes that the subexpression is later shifted.  For
-/// example, if the expression is later right shifted by 16 bits, the
-/// OverallLeftShift value would be -2 on entry.  This is used to specify which
-/// byte of ByteValues is actually being set.
-///
-/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
-/// byte is masked to zero by a user.  For example, in (X & 255), X will be
-/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
-/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
-/// always in the local (OverallLeftShift) coordinate space.
-///
-static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
-                              SmallVector<Value*, 8> &ByteValues) {
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
-    // If this is an or instruction, it may be an inner node of the bswap.
-    if (I->getOpcode() == Instruction::Or) {
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
-                               ByteValues) ||
-             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
-                               ByteValues);
-    }
-  
-    // If this is a logical shift by a constant multiple of 8, recurse with
-    // OverallLeftShift and ByteMask adjusted.
-    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
-      unsigned ShAmt = 
-        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
-      // Ensure the shift amount is defined and of a byte value.
-      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
-        return true;
-
-      unsigned ByteShift = ShAmt >> 3;
-      if (I->getOpcode() == Instruction::Shl) {
-        // X << 2 -> collect(X, +2)
-        OverallLeftShift += ByteShift;
-        ByteMask >>= ByteShift;
-      } else {
-        // X >>u 2 -> collect(X, -2)
-        OverallLeftShift -= ByteShift;
-        ByteMask <<= ByteShift;
-        ByteMask &= (~0U >> (32-ByteValues.size()));
-      }
-
-      if (OverallLeftShift >= (int)ByteValues.size()) return true;
-      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
-
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
-                               ByteValues);
-    }
-
-    // If this is a logical 'and' with a mask that clears bytes, clear the
-    // corresponding bytes in ByteMask.
-    if (I->getOpcode() == Instruction::And &&
-        isa<ConstantInt>(I->getOperand(1))) {
-      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
-      unsigned NumBytes = ByteValues.size();
-      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
-      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
-      
-      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
-        // If this byte is masked out by a later operation, we don't care what
-        // the and mask is.
-        if ((ByteMask & (1 << i)) == 0)
-          continue;
-        
-        // If the AndMask is all zeros for this byte, clear the bit.
-        APInt MaskB = AndMask & Byte;
-        if (MaskB == 0) {
-          ByteMask &= ~(1U << i);
-          continue;
-        }
-        
-        // If the AndMask is not all ones for this byte, it's not a bytezap.
-        if (MaskB != Byte)
-          return true;
-
-        // Otherwise, this byte is kept.
-      }
-
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
-                               ByteValues);
-    }
-  }
-  
-  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
-  // the input value to the bswap.  Some observations: 1) if more than one byte
-  // is demanded from this input, then it could not be successfully assembled
-  // into a byteswap.  At least one of the two bytes would not be aligned with
-  // their ultimate destination.
-  if (!isPowerOf2_32(ByteMask)) return true;
-  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
-  
-  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
-  // is demanded, it needs to go into byte 0 of the result.  This means that the
-  // byte needs to be shifted until it lands in the right byte bucket.  The
-  // shift amount depends on the position: if the byte is coming from the high
-  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
-  // low part, it must be shifted left.
-  unsigned DestByteNo = InputByteNo + OverallLeftShift;
-  if (InputByteNo < ByteValues.size()/2) {
-    if (ByteValues.size()-1-DestByteNo != InputByteNo)
-      return true;
-  } else {
-    if (ByteValues.size()-1-DestByteNo != InputByteNo)
-      return true;
-  }
-  
-  // If the destination byte value is already defined, the values are or'd
-  // together, which isn't a bswap (unless it's an or of the same bits).
-  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
-    return true;
-  ByteValues[DestByteNo] = V;
-  return false;
-}
-
-/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
-/// If so, insert the new bswap intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-  if (!ITy || ITy->getBitWidth() % 16 || 
-      // ByteMask only allows up to 32-byte values.
-      ITy->getBitWidth() > 32*8) 
-    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
-  
-  /// ByteValues - For each byte of the result, we keep track of which value
-  /// defines each byte.
-  SmallVector<Value*, 8> ByteValues;
-  ByteValues.resize(ITy->getBitWidth()/8);
-    
-  // Try to find all the pieces corresponding to the bswap.
-  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
-  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
-    return 0;
-  
-  // Check to see if all of the bytes come from the same value.
-  Value *V = ByteValues[0];
-  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
-  
-  // Check to make sure that all of the bytes come from the same value.
-  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
-    if (ByteValues[i] != V)
-      return 0;
-  const Type *Tys[] = { ITy };
-  Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-  return CallInst::Create(F, V);
-}
-
-/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
-/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
-/// we can simplify this expression to "cond ? C : D or B".
-static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
-                                         Value *C, Value *D,
-                                         LLVMContext *Context) {
-  // If A is not a select of -1/0, this cannot match.
-  Value *Cond = 0;
-  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
-    return 0;
-
-  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
-  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
-    return SelectInst::Create(Cond, C, B);
-  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
-    return SelectInst::Create(Cond, C, B);
-  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
-  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
-    return SelectInst::Create(Cond, C, D);
-  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
-    return SelectInst::Create(Cond, C, D);
-  return 0;
-}
-
-/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
-Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
-                                         ICmpInst *LHS, ICmpInst *RHS) {
-  // (icmp ne A, null) | (icmp ne B, null) -->
-  //     (icmp ne (ptrtoint(A)|ptrtoint(B)), 0)
-  if (TD &&
-      LHS->getPredicate() == ICmpInst::ICMP_NE &&
-      RHS->getPredicate() == ICmpInst::ICMP_NE &&
-      isa<ConstantPointerNull>(LHS->getOperand(1)) &&
-      isa<ConstantPointerNull>(RHS->getOperand(1))) {
-    const Type *IntPtrTy = TD->getIntPtrType(I.getContext());
-    Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy);
-    Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy);
-    Value *NewOr = Builder->CreateOr(A, B);
-    return new ICmpInst(ICmpInst::ICMP_NE, NewOr,
-                        Constant::getNullValue(IntPtrTy));
-  }
-  
-  Value *Val, *Val2;
-  ConstantInt *LHSCst, *RHSCst;
-  ICmpInst::Predicate LHSCC, RHSCC;
-  
-  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
-  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
-      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
-    return 0;
-
-  
-  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
-  if (LHSCst == RHSCst && LHSCC == RHSCC &&
-      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
-    Value *NewOr = Builder->CreateOr(Val, Val2);
-    return new ICmpInst(LHSCC, NewOr, LHSCst);
-  }
-  
-  // From here on, we only handle:
-  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
-  if (Val != Val2) return 0;
-  
-  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
-  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
-      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
-      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
-      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
-    return 0;
-  
-  // We can't fold (ugt x, C) | (sgt x, C2).
-  if (!PredicatesFoldable(LHSCC, RHSCC))
-    return 0;
-  
-  // Ensure that the larger constant is on the RHS.
-  bool ShouldSwap;
-  if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
-       CmpInst::isSigned(RHSCC)))
-    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
-  else
-    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-  
-  if (ShouldSwap) {
-    std::swap(LHS, RHS);
-    std::swap(LHSCst, RHSCst);
-    std::swap(LHSCC, RHSCC);
-  }
-  
-  // At this point, we know we have have two icmp instructions
-  // comparing a value against two constants and or'ing the result
-  // together.  Because of the above check, we know that we only have
-  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
-  // FoldICmpLogical check above), that the two constants are not
-  // equal.
-  assert(LHSCst != RHSCst && "Compares not folded above?");
-
-  switch (LHSCC) {
-  default: llvm_unreachable("Unknown integer condition code!");
-  case ICmpInst::ICMP_EQ:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst)) {
-        // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
-      }
-      break;                         // (X == 13 | X == 15) -> no change
-    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
-    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
-      break;
-    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
-    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
-    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
-    }
-    break;
-  case ICmpInst::ICMP_NE:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
-    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
-    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
-      return ReplaceInstUsesWith(I, LHS);
-    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
-    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
-    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-    }
-    break;
-  case ICmpInst::ICMP_ULT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
-      break;
-    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
-      // If RHSCst is [us]MAXINT, it is always false.  Not handling
-      // this can cause overflow.
-      if (RHSCst->isMaxValue(false))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             false, false, I);
-    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
-    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
-      return ReplaceInstUsesWith(I, RHS);
-    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_SLT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
-      break;
-    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
-      // If RHSCst is [us]MAXINT, it is always false.  Not handling
-      // this can cause overflow.
-      if (RHSCst->isMaxValue(true))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             true, false, I);
-    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
-    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
-    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_UGT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
-    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
-      return ReplaceInstUsesWith(I, LHS);
-    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
-    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
-      break;
-    }
-    break;
-  case ICmpInst::ICMP_SGT:
-    switch (RHSCC) {
-    default: llvm_unreachable("Unknown integer condition code!");
-    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
-    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
-      return ReplaceInstUsesWith(I, LHS);
-    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
-      break;
-    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
-    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
-      break;
-    }
-    break;
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
-                                         FCmpInst *RHS) {
-  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
-      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
-    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
-      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
-        // If either of the constants are nans, then the whole thing returns
-        // true.
-        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-        
-        // Otherwise, no need to compare the two constants, compare the
-        // rest.
-        return new FCmpInst(FCmpInst::FCMP_UNO,
-                            LHS->getOperand(0), RHS->getOperand(0));
-      }
-    
-    // Handle vector zeros.  This occurs because the canonical form of
-    // "fcmp uno x,x" is "fcmp uno x, 0".
-    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
-        isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_UNO,
-                          LHS->getOperand(0), RHS->getOperand(0));
-    
-    return 0;
-  }
-  
-  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
-  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
-  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
-  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
-    // Swap RHS operands to match LHS.
-    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
-    std::swap(Op1LHS, Op1RHS);
-  }
-  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
-    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
-    if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC,
-                          Op0LHS, Op0RHS);
-    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-    if (Op0CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, RHS);
-    if (Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, LHS);
-    bool Op0Ordered;
-    bool Op1Ordered;
-    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
-    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
-    if (Op0Ordered == Op1Ordered) {
-      // If both are ordered or unordered, return a new fcmp with
-      // or'ed predicates.
-      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred,
-                               Op0LHS, Op0RHS, Context);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value...
-      return ReplaceInstUsesWith(I, RV);
-    }
-  }
-  return 0;
-}
-
-/// FoldOrWithConstants - This helper function folds:
-///
-///     ((A | B) & C1) | (B & C2)
-///
-/// into:
-/// 
-///     (A & C1) | B
-///
-/// when the XOR of the two constants is "all ones" (-1).
-Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
-                                               Value *A, Value *B, Value *C) {
-  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
-  if (!CI1) return 0;
-
-  Value *V1 = 0;
-  ConstantInt *CI2 = 0;
-  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
-
-  APInt Xor = CI1->getValue() ^ CI2->getValue();
-  if (!Xor.isAllOnesValue()) return 0;
-
-  if (V1 == A || V1 == B) {
-    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
-    return BinaryOperator::CreateOr(NewOp, V1);
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitOr(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-  
-  
-  // See if we can simplify any instructions used by the instruction whose sole 
-  // purpose is to compute bits we don't care about.
-  if (SimplifyDemandedInstructionBits(I))
-    return &I;
-
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    ConstantInt *C1 = 0; Value *X = 0;
-    // (X & C1) | C2 --> (X | C2) & (C1|C2)
-    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
-        isOnlyUse(Op0)) {
-      Value *Or = Builder->CreateOr(X, RHS);
-      Or->takeName(Op0);
-      return BinaryOperator::CreateAnd(Or, 
-               ConstantInt::get(*Context, RHS->getValue() | C1->getValue()));
-    }
-
-    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
-    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
-        isOnlyUse(Op0)) {
-      Value *Or = Builder->CreateOr(X, RHS);
-      Or->takeName(Op0);
-      return BinaryOperator::CreateXor(Or,
-                 ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue()));
-    }
-
-    // Try to fold constant and into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-    if (isa<PHINode>(Op0))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-  Value *A = 0, *B = 0;
-  ConstantInt *C1 = 0, *C2 = 0;
-
-  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
-  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
-  if (match(Op0, m_Or(m_Value(), m_Value())) ||
-      match(Op1, m_Or(m_Value(), m_Value())) ||
-      (match(Op0, m_Shift(m_Value(), m_Value())) &&
-       match(Op1, m_Shift(m_Value(), m_Value())))) {
-    if (Instruction *BSwap = MatchBSwap(I))
-      return BSwap;
-  }
-  
-  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
-  if (Op0->hasOneUse() &&
-      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
-      MaskedValueIsZero(Op1, C1->getValue())) {
-    Value *NOr = Builder->CreateOr(A, Op1);
-    NOr->takeName(Op0);
-    return BinaryOperator::CreateXor(NOr, C1);
-  }
-
-  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
-  if (Op1->hasOneUse() &&
-      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
-      MaskedValueIsZero(Op0, C1->getValue())) {
-    Value *NOr = Builder->CreateOr(A, Op0);
-    NOr->takeName(Op0);
-    return BinaryOperator::CreateXor(NOr, C1);
-  }
-
-  // (A & C)|(B & D)
-  Value *C = 0, *D = 0;
-  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
-      match(Op1, m_And(m_Value(B), m_Value(D)))) {
-    Value *V1 = 0, *V2 = 0, *V3 = 0;
-    C1 = dyn_cast<ConstantInt>(C);
-    C2 = dyn_cast<ConstantInt>(D);
-    if (C1 && C2) {  // (A & C1)|(B & C2)
-      // If we have: ((V + N) & C1) | (V & C2)
-      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-      // replace with V+N.
-      if (C1->getValue() == ~C2->getValue()) {
-        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
-            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-        }
-        // Or commutes, try both ways.
-        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
-            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-        }
-      }
-      V1 = 0; V2 = 0; V3 = 0;
-    }
-    
-    // Check to see if we have any common things being and'ed.  If so, find the
-    // terms for V1 & (V2|V3).
-    if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
-      if (A == B)      // (A & C)|(A & D) == A & (C|D)
-        V1 = A, V2 = C, V3 = D;
-      else if (A == D) // (A & C)|(B & A) == A & (B|C)
-        V1 = A, V2 = B, V3 = C;
-      else if (C == B) // (A & C)|(C & D) == C & (A|D)
-        V1 = C, V2 = A, V3 = D;
-      else if (C == D) // (A & C)|(B & C) == C & (A|B)
-        V1 = C, V2 = A, V3 = B;
-      
-      if (V1) {
-        Value *Or = Builder->CreateOr(V2, V3, "tmp");
-        return BinaryOperator::CreateAnd(V1, Or);
-      }
-    }
-
-    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
-    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context))
-      return Match;
-    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context))
-      return Match;
-
-    // ((A&~B)|(~A&B)) -> A^B
-    if ((match(C, m_Not(m_Specific(D))) &&
-         match(B, m_Not(m_Specific(A)))))
-      return BinaryOperator::CreateXor(A, D);
-    // ((~B&A)|(~A&B)) -> A^B
-    if ((match(A, m_Not(m_Specific(D))) &&
-         match(B, m_Not(m_Specific(C)))))
-      return BinaryOperator::CreateXor(C, D);
-    // ((A&~B)|(B&~A)) -> A^B
-    if ((match(C, m_Not(m_Specific(B))) &&
-         match(D, m_Not(m_Specific(A)))))
-      return BinaryOperator::CreateXor(A, B);
-    // ((~B&A)|(B&~A)) -> A^B
-    if ((match(A, m_Not(m_Specific(B))) &&
-         match(D, m_Not(m_Specific(C)))))
-      return BinaryOperator::CreateXor(C, B);
-  }
-  
-  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
-  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
-    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
-          SI0->getOperand(1) == SI1->getOperand(1) &&
-          (SI0->hasOneUse() || SI1->hasOneUse())) {
-        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
-                                         SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
-                                      SI1->getOperand(1));
-      }
-  }
-
-  // ((A|B)&1)|(B&-2) -> (A&1) | B
-  if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
-      match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
-    Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
-    if (Ret) return Ret;
-  }
-  // (B&-2)|((A|B)&1) -> (A&1) | B
-  if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
-      match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
-    Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
-    if (Ret) return Ret;
-  }
-
-  // (~A | ~B) == (~(A & B)) - De Morgan's Law
-  if (Value *Op0NotVal = dyn_castNotVal(Op0))
-    if (Value *Op1NotVal = dyn_castNotVal(Op1))
-      if (Op0->hasOneUse() && Op1->hasOneUse()) {
-        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
-                                        I.getName()+".demorgan");
-        return BinaryOperator::CreateNot(And);
-      }
-
-  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
-  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
-      return R;
-
-    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
-      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
-        return Res;
-  }
-    
-  // fold (or (cast A), (cast B)) -> (cast (or A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
-            !isa<ICmpInst>(Op1C->getOperand(0))) {
-          const Type *SrcTy = Op0C->getOperand(0)->getType();
-          if (SrcTy == Op1C->getOperand(0)->getType() &&
-              SrcTy->isIntOrIntVector() &&
-              // Only do this if the casts both really cause code to be
-              // generated.
-              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                                I.getType(), TD) &&
-              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                                I.getType(), TD)) {
-            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
-                                             Op1C->getOperand(0), I.getName());
-            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
-          }
-        }
-      }
-  }
-  
-    
-  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
-  return Changed ? &I : 0;
-}
-
-namespace {
-
-// XorSelf - Implements: X ^ X --> 0
-struct XorSelf {
-  Value *RHS;
-  XorSelf(Value *rhs) : RHS(rhs) {}
-  bool shouldApply(Value *LHS) const { return LHS == RHS; }
-  Instruction *apply(BinaryOperator &Xor) const {
-    return &Xor;
-  }
-};
-
-}
-
-Instruction *InstCombiner::visitXor(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (isa<UndefValue>(Op1)) {
-    if (isa<UndefValue>(Op0))
-      // Handle undef ^ undef -> 0 special case. This is a common
-      // idiom (misuse).
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
-  }
-
-  // xor X, X = 0, even if X is nested in a sequence of Xor's.
-  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
-    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-  
-  // See if we can simplify any instructions used by the instruction whose sole 
-  // purpose is to compute bits we don't care about.
-  if (SimplifyDemandedInstructionBits(I))
-    return &I;
-  if (isa<VectorType>(I.getType()))
-    if (isa<ConstantAggregateZero>(Op1))
-      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
-
-  // Is this a ~ operation?
-  if (Value *NotOp = dyn_castNotVal(&I)) {
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
-      if (Op0I->getOpcode() == Instruction::And || 
-          Op0I->getOpcode() == Instruction::Or) {
-        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
-        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
-        if (dyn_castNotVal(Op0I->getOperand(1)))
-          Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
-          Value *NotY =
-            Builder->CreateNot(Op0I->getOperand(1),
-                               Op0I->getOperand(1)->getName()+".not");
-          if (Op0I->getOpcode() == Instruction::And)
-            return BinaryOperator::CreateOr(Op0NotVal, NotY);
-          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
-        }
-        
-        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
-        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
-        if (isFreeToInvert(Op0I->getOperand(0)) && 
-            isFreeToInvert(Op0I->getOperand(1))) {
-          Value *NotX =
-            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
-          Value *NotY =
-            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
-          if (Op0I->getOpcode() == Instruction::And)
-            return BinaryOperator::CreateOr(NotX, NotY);
-          return BinaryOperator::CreateAnd(NotX, NotY);
-        }
-      }
-    }
-  }
-  
-  
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    if (RHS->isOne() && Op0->hasOneUse()) {
-      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-        return new ICmpInst(ICI->getInversePredicate(),
-                            ICI->getOperand(0), ICI->getOperand(1));
-
-      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
-        return new FCmpInst(FCI->getInversePredicate(),
-                            FCI->getOperand(0), FCI->getOperand(1));
-    }
-
-    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
-    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
-        if (CI->hasOneUse() && Op0C->hasOneUse()) {
-          Instruction::CastOps Opcode = Op0C->getOpcode();
-          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-              (RHS == ConstantExpr::getCast(Opcode, 
-                                            ConstantInt::getTrue(*Context),
-                                            Op0C->getDestTy()))) {
-            CI->setPredicate(CI->getInversePredicate());
-            return CastInst::Create(Opcode, CI, Op0C->getType());
-          }
-        }
-      }
-    }
-
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-      // ~(c-X) == X-c-1 == X+(-c-1)
-      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
-        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
-          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
-          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
-                                      ConstantInt::get(I.getType(), 1));
-          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
-        }
-          
-      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-        if (Op0I->getOpcode() == Instruction::Add) {
-          // ~(X-c) --> (-c-1)-X
-          if (RHS->isAllOnesValue()) {
-            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
-            return BinaryOperator::CreateSub(
-                           ConstantExpr::getSub(NegOp0CI,
-                                      ConstantInt::get(I.getType(), 1)),
-                                      Op0I->getOperand(0));
-          } else if (RHS->getValue().isSignBit()) {
-            // (X + C) ^ signbit -> (X + C + signbit)
-            Constant *C = ConstantInt::get(*Context,
-                                           RHS->getValue() + Op0CI->getValue());
-            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
-
-          }
-        } else if (Op0I->getOpcode() == Instruction::Or) {
-          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
-          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
-            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
-            // Anything in both C1 and C2 is known to be zero, remove it from
-            // NewRHS.
-            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
-            NewRHS = ConstantExpr::getAnd(NewRHS, 
-                                       ConstantExpr::getNot(CommonBits));
-            Worklist.Add(Op0I);
-            I.setOperand(0, Op0I->getOperand(0));
-            I.setOperand(1, NewRHS);
-            return &I;
-          }
-        }
-      }
-    }
-
-    // Try to fold constant and into select arguments.
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-    if (isa<PHINode>(Op0))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
-
-  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
-    if (X == Op1)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
-    if (X == Op0)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-  
-  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
-  if (Op1I) {
-    Value *A, *B;
-    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
-      if (A == Op0) {              // B^(B|A) == (A|B)^B
-        Op1I->swapOperands();
-        I.swapOperands();
-        std::swap(Op0, Op1);
-      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
-        I.swapOperands();     // Simplified below.
-        std::swap(Op0, Op1);
-      }
-    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
-      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
-    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
-      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
-    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
-               Op1I->hasOneUse()){
-      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
-        Op1I->swapOperands();
-        std::swap(A, B);
-      }
-      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
-        I.swapOperands();     // Simplified below.
-        std::swap(Op0, Op1);
-      }
-    }
-  }
-  
-  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
-  if (Op0I) {
-    Value *A, *B;
-    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
-        Op0I->hasOneUse()) {
-      if (A == Op1)                                  // (B|A)^B == (A|B)^B
-        std::swap(A, B);
-      if (B == Op1)                                  // (A|B)^B == A & ~B
-        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
-    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
-      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
-    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
-      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
-    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
-               Op0I->hasOneUse()){
-      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
-        std::swap(A, B);
-      if (B == Op1 &&                                      // (B&A)^A == ~B & A
-          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
-        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
-      }
-    }
-  }
-  
-  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
-  if (Op0I && Op1I && Op0I->isShift() && 
-      Op0I->getOpcode() == Op1I->getOpcode() && 
-      Op0I->getOperand(1) == Op1I->getOperand(1) &&
-      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
-    Value *NewOp =
-      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
-                         Op0I->getName());
-    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
-                                  Op1I->getOperand(1));
-  }
-    
-  if (Op0I && Op1I) {
-    Value *A, *B, *C, *D;
-    // (A & B)^(A | B) -> A ^ B
-    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
-        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::CreateXor(A, B);
-    }
-    // (A | B)^(A & B) -> A ^ B
-    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::CreateXor(A, B);
-    }
-    
-    // (A & B)^(C & D)
-    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
-        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
-        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
-      // (X & Y)^(X & Y) -> (Y^Z) & X
-      Value *X = 0, *Y = 0, *Z = 0;
-      if (A == C)
-        X = A, Y = B, Z = D;
-      else if (A == D)
-        X = A, Y = B, Z = C;
-      else if (B == C)
-        X = B, Y = A, Z = D;
-      else if (B == D)
-        X = B, Y = A, Z = C;
-      
-      if (X) {
-        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
-        return BinaryOperator::CreateAnd(NewOp, X);
-      }
-    }
-  }
-    
-  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
-  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
-    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
-      return R;
-
-  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType(), TD) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType(), TD)) {
-          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
-                                            Op1C->getOperand(0), I.getName());
-          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
-        }
-      }
-  }
-
-  return Changed ? &I : 0;
-}
-
-static ConstantInt *ExtractElement(Constant *V, Constant *Idx,
-                                   LLVMContext *Context) {
-  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
-}
-
-static bool HasAddOverflow(ConstantInt *Result,
-                           ConstantInt *In1, ConstantInt *In2,
-                           bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().sgt(In1->getValue());
-    else
-      return Result->getValue().slt(In1->getValue());
-  else
-    return Result->getValue().ult(In1->getValue());
-}
-
-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
-/// overflowed for this type.
-static bool AddWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext *Context,
-                            bool IsSigned = false) {
-  Result = ConstantExpr::getAdd(In1, In2);
-
-  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
-      if (HasAddOverflow(ExtractElement(Result, Idx, Context),
-                         ExtractElement(In1, Idx, Context),
-                         ExtractElement(In2, Idx, Context),
-                         IsSigned))
-        return true;
-    }
-    return false;
-  }
-
-  return HasAddOverflow(cast<ConstantInt>(Result),
-                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
-                        IsSigned);
-}
-
-static bool HasSubOverflow(ConstantInt *Result,
-                           ConstantInt *In1, ConstantInt *In2,
-                           bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().slt(In1->getValue());
-    else
-      return Result->getValue().sgt(In1->getValue());
-  else
-    return Result->getValue().ugt(In1->getValue());
-}
-
-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
-/// overflowed for this type.
-static bool SubWithOverflow(Constant *&Result, Constant *In1,
-                            Constant *In2, LLVMContext *Context,
-                            bool IsSigned = false) {
-  Result = ConstantExpr::getSub(In1, In2);
-
-  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);
-      if (HasSubOverflow(ExtractElement(Result, Idx, Context),
-                         ExtractElement(In1, Idx, Context),
-                         ExtractElement(In2, Idx, Context),
-                         IsSigned))
-        return true;
-    }
-    return false;
-  }
-
-  return HasSubOverflow(cast<ConstantInt>(Result),
-                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
-                        IsSigned);
-}
-
-
-/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
-/// else.  At this point we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
-                                       ICmpInst::Predicate Cond,
-                                       Instruction &I) {
-  // Look through bitcasts.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
-    RHS = BCI->getOperand(0);
-
-  Value *PtrBase = GEPLHS->getOperand(0);
-  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
-    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
-    // This transformation (ignoring the base and scales) is valid because we
-    // know pointers can't overflow since the gep is inbounds.  See if we can
-    // output an optimized form.
-    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
-    
-    // If not, synthesize the offset the hard way.
-    if (Offset == 0)
-      Offset = EmitGEPOffset(GEPLHS, *this);
-    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                        Constant::getNullValue(Offset->getType()));
-  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
-    // If the base pointers are different, but the indices are the same, just
-    // compare the base pointer.
-    if (PtrBase != GEPRHS->getOperand(0)) {
-      bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
-      IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
-                        GEPRHS->getOperand(0)->getType();
-      if (IndicesTheSame)
-        for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
-          if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
-            IndicesTheSame = false;
-            break;
-          }
-
-      // If all indices are the same, just compare the base pointers.
-      if (IndicesTheSame)
-        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
-                            GEPLHS->getOperand(0), GEPRHS->getOperand(0));
-
-      // Otherwise, the base pointers are different and the indices are
-      // different, bail out.
-      return 0;
-    }
-
-    // If one of the GEPs has all zero indices, recurse.
-    bool AllZeros = true;
-    for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEPLHS->getOperand(i)) ||
-          !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
-        AllZeros = false;
-        break;
-      }
-    if (AllZeros)
-      return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
-                          ICmpInst::getSwappedPredicate(Cond), I);
-
-    // If the other GEP has all zero indices, recurse.
-    AllZeros = true;
-    for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEPRHS->getOperand(i)) ||
-          !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
-        AllZeros = false;
-        break;
-      }
-    if (AllZeros)
-      return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
-
-    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
-      // If the GEPs only differ by one index, compare it.
-      unsigned NumDifferences = 0;  // Keep track of # differences.
-      unsigned DiffOperand = 0;     // The operand that differs.
-      for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
-        if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
-          if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
-                   GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
-            // Irreconcilable differences.
-            NumDifferences = 2;
-            break;
-          } else {
-            if (NumDifferences++) break;
-            DiffOperand = i;
-          }
-        }
-
-      if (NumDifferences == 0)   // SAME GEP?
-        return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   ConstantInt::get(Type::getInt1Ty(*Context),
-                                             ICmpInst::isTrueWhenEqual(Cond)));
-
-      else if (NumDifferences == 1) {
-        Value *LHSV = GEPLHS->getOperand(DiffOperand);
-        Value *RHSV = GEPRHS->getOperand(DiffOperand);
-        // Make sure we do a signed comparison here.
-        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
-      }
-    }
-
-    // Only lower this if the icmp is the only user of the GEP or if we expect
-    // the result to fold to a constant!
-    if (TD &&
-        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
-        (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
-      // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
-      Value *L = EmitGEPOffset(GEPLHS, *this);
-      Value *R = EmitGEPOffset(GEPRHS, *this);
-      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
-    }
-  }
-  return 0;
-}
-
-/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
-///
-Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
-                                                Instruction *LHSI,
-                                                Constant *RHSC) {
-  if (!isa<ConstantFP>(RHSC)) return 0;
-  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
-  
-  // Get the width of the mantissa.  We don't want to hack on conversions that
-  // might lose information from the integer, e.g. "i64 -> float"
-  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
-  if (MantissaWidth == -1) return 0;  // Unknown.
-  
-  // Check to see that the input is converted from an integer type that is small
-  // enough that preserves all bits.  TODO: check here for "known" sign bits.
-  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
-  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
-  
-  // If this is a uitofp instruction, we need an extra bit to hold the sign.
-  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
-  if (LHSUnsigned)
-    ++InputSize;
-  
-  // If the conversion would lose info, don't hack on this.
-  if ((int)InputSize > MantissaWidth)
-    return 0;
-  
-  // Otherwise, we can potentially simplify the comparison.  We know that it
-  // will always come through as an integer value and we know the constant is
-  // not a NAN (it would have been previously simplified).
-  assert(!RHS.isNaN() && "NaN comparison not already folded!");
-  
-  ICmpInst::Predicate Pred;
-  switch (I.getPredicate()) {
-  default: llvm_unreachable("Unexpected predicate!");
-  case FCmpInst::FCMP_UEQ:
-  case FCmpInst::FCMP_OEQ:
-    Pred = ICmpInst::ICMP_EQ;
-    break;
-  case FCmpInst::FCMP_UGT:
-  case FCmpInst::FCMP_OGT:
-    Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
-    break;
-  case FCmpInst::FCMP_UGE:
-  case FCmpInst::FCMP_OGE:
-    Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
-    break;
-  case FCmpInst::FCMP_ULT:
-  case FCmpInst::FCMP_OLT:
-    Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
-    break;
-  case FCmpInst::FCMP_ULE:
-  case FCmpInst::FCMP_OLE:
-    Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
-    break;
-  case FCmpInst::FCMP_UNE:
-  case FCmpInst::FCMP_ONE:
-    Pred = ICmpInst::ICMP_NE;
-    break;
-  case FCmpInst::FCMP_ORD:
-    return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-  case FCmpInst::FCMP_UNO:
-    return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-  }
-  
-  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
-  
-  // Now we know that the APFloat is a normal number, zero or inf.
-  
-  // See if the FP constant is too large for the integer.  For example,
-  // comparing an i8 to 300.0.
-  unsigned IntWidth = IntTy->getScalarSizeInBits();
-  
-  if (!LHSUnsigned) {
-    // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
-    // and large values.
-    APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
-    SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
-                          APFloat::rmNearestTiesToEven);
-    if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
-      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
-          Pred == ICmpInst::ICMP_SLE)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    }
-  } else {
-    // If the RHS value is > UnsignedMax, fold the comparison. This handles
-    // +INF and large values.
-    APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
-    UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
-                          APFloat::rmNearestTiesToEven);
-    if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
-      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
-          Pred == ICmpInst::ICMP_ULE)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    }
-  }
-  
-  if (!LHSUnsigned) {
-    // See if the RHS value is < SignedMin.
-    APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
-    SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
-                          APFloat::rmNearestTiesToEven);
-    if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
-      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
-          Pred == ICmpInst::ICMP_SGE)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-    }
-  }
-
-  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
-  // [0, UMAX], but it may still be fractional.  See if it is fractional by
-  // casting the FP value to the integer value and back, checking for equality.
-  // Don't do this for zero, because -0.0 is not fractional.
-  Constant *RHSInt = LHSUnsigned
-    ? ConstantExpr::getFPToUI(RHSC, IntTy)
-    : ConstantExpr::getFPToSI(RHSC, IntTy);
-  if (!RHS.isZero()) {
-    bool Equal = LHSUnsigned
-      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
-      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
-    if (!Equal) {
-      // If we had a comparison against a fractional value, we have to adjust
-      // the compare predicate and sometimes the value.  RHSC is rounded towards
-      // zero at this point.
-      switch (Pred) {
-      default: llvm_unreachable("Unexpected integer comparison!");
-      case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      case ICmpInst::ICMP_ULE:
-        // (float)int <= 4.4   --> int <= 4
-        // (float)int <= -4.4  --> false
-        if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-        break;
-      case ICmpInst::ICMP_SLE:
-        // (float)int <= 4.4   --> int <= 4
-        // (float)int <= -4.4  --> int < -4
-        if (RHS.isNegative())
-          Pred = ICmpInst::ICMP_SLT;
-        break;
-      case ICmpInst::ICMP_ULT:
-        // (float)int < -4.4   --> false
-        // (float)int < 4.4    --> int <= 4
-        if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-        Pred = ICmpInst::ICMP_ULE;
-        break;
-      case ICmpInst::ICMP_SLT:
-        // (float)int < -4.4   --> int < -4
-        // (float)int < 4.4    --> int <= 4
-        if (!RHS.isNegative())
-          Pred = ICmpInst::ICMP_SLE;
-        break;
-      case ICmpInst::ICMP_UGT:
-        // (float)int > 4.4    --> int > 4
-        // (float)int > -4.4   --> true
-        if (RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-        break;
-      case ICmpInst::ICMP_SGT:
-        // (float)int > 4.4    --> int > 4
-        // (float)int > -4.4   --> int >= -4
-        if (RHS.isNegative())
-          Pred = ICmpInst::ICMP_SGE;
-        break;
-      case ICmpInst::ICMP_UGE:
-        // (float)int >= -4.4   --> true
-        // (float)int >= 4.4    --> int > 4
-        if (!RHS.isNegative())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-        Pred = ICmpInst::ICMP_UGT;
-        break;
-      case ICmpInst::ICMP_SGE:
-        // (float)int >= -4.4   --> int >= -4
-        // (float)int >= 4.4    --> int > 4
-        if (!RHS.isNegative())
-          Pred = ICmpInst::ICMP_SGT;
-        break;
-      }
-    }
-  }
-
-  // Lower this FP comparison into an appropriate integer version of the
-  // comparison.
-  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
-}
-
-Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
-  bool Changed = false;
-  
-  /// Orders the operands of the compare so that they are listed from most
-  /// complex to least complex.  This puts constants before unary operators,
-  /// before binary operators.
-  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
-    I.swapOperands();
-    Changed = true;
-  }
-
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
-  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-
-  // Simplify 'fcmp pred X, X'
-  if (Op0 == Op1) {
-    switch (I.getPredicate()) {
-    default: llvm_unreachable("Unknown predicate!");
-    case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
-    case FCmpInst::FCMP_ULT:    // True if unordered or less than
-    case FCmpInst::FCMP_UGT:    // True if unordered or greater than
-    case FCmpInst::FCMP_UNE:    // True if unordered or not equal
-      // Canonicalize these to be 'fcmp uno %X, 0.0'.
-      I.setPredicate(FCmpInst::FCMP_UNO);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
-      return &I;
-      
-    case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
-    case FCmpInst::FCMP_OEQ:    // True if ordered and equal
-    case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
-    case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
-      // Canonicalize these to be 'fcmp ord %X, 0.0'.
-      I.setPredicate(FCmpInst::FCMP_ORD);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
-      return &I;
-    }
-  }
-    
-  // Handle fcmp with constant RHS
-  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      switch (LHSI->getOpcode()) {
-      case Instruction::PHI:
-        // Only fold fcmp into the PHI if the phi and fcmp are in the same
-        // block.  If in the same block, we're encouraging jump threading.  If
-        // not, we are just pessimizing the code by making an i1 phi.
-        if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I, true))
-            return NV;
-        break;
-      case Instruction::SIToFP:
-      case Instruction::UIToFP:
-        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
-          return NV;
-        break;
-      case Instruction::Select:
-        // If either operand of the select is a constant, we can fold the
-        // comparison into the select arms, which will cause one to be
-        // constant folded and the select turned into a bitwise or.
-        Value *Op1 = 0, *Op2 = 0;
-        if (LHSI->hasOneUse()) {
-          if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
-            // Fold the known value into the constant operand.
-            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
-            // Insert a new FCmp of the other select operand.
-            Op2 = Builder->CreateFCmp(I.getPredicate(),
-                                      LHSI->getOperand(2), RHSC, I.getName());
-          } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
-            // Fold the known value into the constant operand.
-            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
-            // Insert a new FCmp of the other select operand.
-            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
-                                      RHSC, I.getName());
-          }
-        }
-
-        if (Op1)
-          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
-        break;
-      }
-  }
-
-  return Changed ? &I : 0;
-}
-
-Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
-  bool Changed = false;
-  
-  /// Orders the operands of the compare so that they are listed from most
-  /// complex to least complex.  This puts constants before unary operators,
-  /// before binary operators.
-  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
-    I.swapOperands();
-    Changed = true;
-  }
-  
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
-  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-  
-  const Type *Ty = Op0->getType();
-
-  // icmp's with boolean values can always be turned into bitwise operations
-  if (Ty == Type::getInt1Ty(*Context)) {
-    switch (I.getPredicate()) {
-    default: llvm_unreachable("Invalid icmp instruction!");
-    case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
-      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
-      return BinaryOperator::CreateNot(Xor);
-    }
-    case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
-      return BinaryOperator::CreateXor(Op0, Op1);
-
-    case ICmpInst::ICMP_UGT:
-      std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
-      // FALL THROUGH
-    case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
-      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
-      return BinaryOperator::CreateAnd(Not, Op1);
-    }
-    case ICmpInst::ICMP_SGT:
-      std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
-      // FALL THROUGH
-    case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
-      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
-      return BinaryOperator::CreateAnd(Not, Op0);
-    }
-    case ICmpInst::ICMP_UGE:
-      std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
-      // FALL THROUGH
-    case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
-      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
-      return BinaryOperator::CreateOr(Not, Op1);
-    }
-    case ICmpInst::ICMP_SGE:
-      std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
-      // FALL THROUGH
-    case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
-      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
-      return BinaryOperator::CreateOr(Not, Op0);
-    }
-    }
-  }
-
-  unsigned BitWidth = 0;
-  if (TD)
-    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-  else if (Ty->isIntOrIntVector())
-    BitWidth = Ty->getScalarSizeInBits();
-
-  bool isSignBit = false;
-
-  // See if we are doing a comparison with a constant.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    Value *A = 0, *B = 0;
-    
-    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
-    if (I.isEquality() && CI->isNullValue() &&
-        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
-      // (icmp cond A B) if cond is equality
-      return new ICmpInst(I.getPredicate(), A, B);
-    }
-    
-    // If we have an icmp le or icmp ge instruction, turn it into the
-    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
-    // them being folded in the code below.  The SimplifyICmpInst code has
-    // already handled the edge cases for us, so we just assert on them.
-    switch (I.getPredicate()) {
-    default: break;
-    case ICmpInst::ICMP_ULE:
-      assert(!CI->isMaxValue(false));                 // A <=u MAX -> TRUE
-      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
-                          AddOne(CI));
-    case ICmpInst::ICMP_SLE:
-      assert(!CI->isMaxValue(true));                  // A <=s MAX -> TRUE
-      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                          AddOne(CI));
-    case ICmpInst::ICMP_UGE:
-      assert(!CI->isMinValue(false));                  // A >=u MIN -> TRUE
-      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
-                          SubOne(CI));
-    case ICmpInst::ICMP_SGE:
-      assert(!CI->isMinValue(true));                   // A >=s MIN -> TRUE
-      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                          SubOne(CI));
-    }
-    
-    // If this comparison is a normal comparison, it demands all
-    // bits, if it is a sign bit comparison, it only demands the sign bit.
-    bool UnusedBit;
-    isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
-  }
-
-  // See if we can fold the comparison based on range information we can get
-  // by checking whether bits are known to be zero or one in the input.
-  if (BitWidth != 0) {
-    APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
-    APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
-
-    if (SimplifyDemandedBits(I.getOperandUse(0),
-                             isSignBit ? APInt::getSignBit(BitWidth)
-                                       : APInt::getAllOnesValue(BitWidth),
-                             Op0KnownZero, Op0KnownOne, 0))
-      return &I;
-    if (SimplifyDemandedBits(I.getOperandUse(1),
-                             APInt::getAllOnesValue(BitWidth),
-                             Op1KnownZero, Op1KnownOne, 0))
-      return &I;
-
-    // Given the known and unknown bits, compute a range that the LHS could be
-    // in.  Compute the Min, Max and RHS values based on the known bits. For the
-    // EQ and NE we use unsigned values.
-    APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
-    APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
-    if (I.isSigned()) {
-      ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
-                                             Op0Min, Op0Max);
-      ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
-                                             Op1Min, Op1Max);
-    } else {
-      ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
-                                               Op0Min, Op0Max);
-      ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
-                                               Op1Min, Op1Max);
-    }
-
-    // If Min and Max are known to be the same, then SimplifyDemandedBits
-    // figured out that the LHS is a constant.  Just constant fold this now so
-    // that code below can assume that Min != Max.
-    if (!isa<Constant>(Op0) && Op0Min == Op0Max)
-      return new ICmpInst(I.getPredicate(),
-                          ConstantInt::get(*Context, Op0Min), Op1);
-    if (!isa<Constant>(Op1) && Op1Min == Op1Max)
-      return new ICmpInst(I.getPredicate(), Op0,
-                          ConstantInt::get(*Context, Op1Min));
-
-    // Based on the range information we know about the LHS, see if we can
-    // simplify this comparison.  For example, (x&4) < 8  is always true.
-    switch (I.getPredicate()) {
-    default: llvm_unreachable("Unknown icmp opcode!");
-    case ICmpInst::ICMP_EQ:
-      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      break;
-    case ICmpInst::ICMP_NE:
-      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              SubOne(CI));
-
-        // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
-        if (CI->isMinValue(true))
-          return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                           Constant::getAllOnesValue(Op0->getType()));
-      }
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-
-      if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              AddOne(CI));
-
-        // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
-        if (CI->isMaxValue(true))
-          return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                              Constant::getNullValue(Op0->getType()));
-      }
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              SubOne(CI));
-      }
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-
-      if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              AddOne(CI));
-      }
-      break;
-    case ICmpInst::ICMP_SGE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
-      if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      break;
-    case ICmpInst::ICMP_SLE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
-      if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      break;
-    case ICmpInst::ICMP_UGE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
-      if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      break;
-    case ICmpInst::ICMP_ULE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
-      if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
-      if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
-      break;
-    }
-
-    // Turn a signed comparison into an unsigned one if both operands
-    // are known to have the same sign.
-    if (I.isSigned() &&
-        ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
-         (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
-      return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
-  }
-
-  // Test if the ICmpInst instruction is used exclusively by a select as
-  // part of a minimum or maximum operation. If so, refrain from doing
-  // any other folding. This helps out other analyses which understand
-  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
-  // and CodeGen. And in this case, at least one of the comparison
-  // operands has at least one user besides the compare (the select),
-  // which would often largely negate the benefit of folding anyway.
-  if (I.hasOneUse())
-    if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
-      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
-          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
-        return 0;
-
-  // See if we are doing a comparison between a constant and an instruction that
-  // can be folded into the comparison.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    // Since the RHS is a ConstantInt (CI), if the left hand side is an 
-    // instruction, see if that instruction also has constants so that the 
-    // instruction can be folded into the icmp 
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
-        return Res;
-  }
-
-  // Handle icmp with constant (but not simple integer constant) RHS
-  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      switch (LHSI->getOpcode()) {
-      case Instruction::GetElementPtr:
-        if (RHSC->isNullValue()) {
-          // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
-          bool isAllZeros = true;
-          for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i)
-            if (!isa<Constant>(LHSI->getOperand(i)) ||
-                !cast<Constant>(LHSI->getOperand(i))->isNullValue()) {
-              isAllZeros = false;
-              break;
-            }
-          if (isAllZeros)
-            return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                    Constant::getNullValue(LHSI->getOperand(0)->getType()));
-        }
-        break;
-
-      case Instruction::PHI:
-        // Only fold icmp into the PHI if the phi and icmp are in the same
-        // block.  If in the same block, we're encouraging jump threading.  If
-        // not, we are just pessimizing the code by making an i1 phi.
-        if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I, true))
-            return NV;
-        break;
-      case Instruction::Select: {
-        // If either operand of the select is a constant, we can fold the
-        // comparison into the select arms, which will cause one to be
-        // constant folded and the select turned into a bitwise or.
-        Value *Op1 = 0, *Op2 = 0;
-        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
-          Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
-        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
-          Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
-
-        // We only want to perform this transformation if it will not lead to
-        // additional code. This is true if either both sides of the select
-        // fold to a constant (in which case the icmp is replaced with a select
-        // which will usually simplify) or this is the only user of the
-        // select (in which case we are trading a select+icmp for a simpler
-        // select+icmp).
-        if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
-          if (!Op1)
-            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
-                                      RHSC, I.getName());
-          if (!Op2)
-            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
-                                      RHSC, I.getName());
-          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
-        }
-        break;
-      }
-      case Instruction::Call:
-        // If we have (malloc != null), and if the malloc has a single use, we
-        // can assume it is successful and remove the malloc.
-        if (isMalloc(LHSI) && LHSI->hasOneUse() &&
-            isa<ConstantPointerNull>(RHSC)) {
-          // Need to explicitly erase malloc call here, instead of adding it to
-          // Worklist, because it won't get DCE'd from the Worklist since
-          // isInstructionTriviallyDead() returns false for function calls.
-          // It is OK to replace LHSI/MallocCall with Undef because the 
-          // instruction that uses it will be erased via Worklist.
-          if (extractMallocCall(LHSI)) {
-            LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
-            EraseInstFromFunction(*LHSI);
-            return ReplaceInstUsesWith(I,
-                                     ConstantInt::get(Type::getInt1Ty(*Context),
-                                                      !I.isTrueWhenEqual()));
-          }
-          if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
-            if (MallocCall->hasOneUse()) {
-              MallocCall->replaceAllUsesWith(
-                                        UndefValue::get(MallocCall->getType()));
-              EraseInstFromFunction(*MallocCall);
-              Worklist.Add(LHSI); // The malloc's bitcast use.
-              return ReplaceInstUsesWith(I,
-                                     ConstantInt::get(Type::getInt1Ty(*Context),
-                                                      !I.isTrueWhenEqual()));
-            }
-        }
-        break;
-      }
-  }
-
-  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
-    if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
-      return NI;
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
-    if (Instruction *NI = FoldGEPICmp(GEP, Op0,
-                           ICmpInst::getSwappedPredicate(I.getPredicate()), I))
-      return NI;
-
-  // Test to see if the operands of the icmp are casted versions of other
-  // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
-  // now.
-  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
-    if (isa<PointerType>(Op0->getType()) && 
-        (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 
-      // We keep moving the cast from the left operand over to the right
-      // operand, where it can often be eliminated completely.
-      Op0 = CI->getOperand(0);
-
-      // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
-      // so eliminate it as well.
-      if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
-        Op1 = CI2->getOperand(0);
-
-      // If Op1 is a constant, we can fold the cast into the constant.
-      if (Op0->getType() != Op1->getType()) {
-        if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
-        } else {
-          // Otherwise, cast the RHS right before the icmp
-          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
-        }
-      }
-      return new ICmpInst(I.getPredicate(), Op0, Op1);
-    }
-  }
-  
-  if (isa<CastInst>(Op0)) {
-    // Handle the special case of: icmp (cast bool to X), <cst>
-    // This comes up when you have code like
-    //   int X = A < B;
-    //   if (X) ...
-    // For generality, we handle any zero-extension of any operand comparison
-    // with a constant or another cast from the same type.
-    if (isa<Constant>(Op1) || isa<CastInst>(Op1))
-      if (Instruction *R = visitICmpInstWithCastAndCast(I))
-        return R;
-  }
-  
-  // See if it's the same type of instruction on the left and right.
-  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-      if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
-          Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
-        switch (Op0I->getOpcode()) {
-        default: break;
-        case Instruction::Add:
-        case Instruction::Sub:
-        case Instruction::Xor:
-          if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
-            return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
-                                Op1I->getOperand(0));
-          // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-            if (CI->getValue().isSignBit()) {
-              ICmpInst::Predicate Pred = I.isSigned()
-                                             ? I.getUnsignedPredicate()
-                                             : I.getSignedPredicate();
-              return new ICmpInst(Pred, Op0I->getOperand(0),
-                                  Op1I->getOperand(0));
-            }
-            
-            if (CI->getValue().isMaxSignedValue()) {
-              ICmpInst::Predicate Pred = I.isSigned()
-                                             ? I.getUnsignedPredicate()
-                                             : I.getSignedPredicate();
-              Pred = I.getSwappedPredicate(Pred);
-              return new ICmpInst(Pred, Op0I->getOperand(0),
-                                  Op1I->getOperand(0));
-            }
-          }
-          break;
-        case Instruction::Mul:
-          if (!I.isEquality())
-            break;
-
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-            // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
-            // Mask = -1 >> count-trailing-zeros(Cst).
-            if (!CI->isZero() && !CI->isOne()) {
-              const APInt &AP = CI->getValue();
-              ConstantInt *Mask = ConstantInt::get(*Context, 
-                                      APInt::getLowBitsSet(AP.getBitWidth(),
-                                                           AP.getBitWidth() -
-                                                      AP.countTrailingZeros()));
-              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
-              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
-              return new ICmpInst(I.getPredicate(), And1, And2);
-            }
-          }
-          break;
-        }
-      }
-    }
-  }
-  
-  // ~x < ~y --> y < x
-  { Value *A, *B;
-    if (match(Op0, m_Not(m_Value(A))) &&
-        match(Op1, m_Not(m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B, A);
-  }
-  
-  if (I.isEquality()) {
-    Value *A, *B, *C, *D;
-    
-    // -x == -y --> x == y
-    if (match(Op0, m_Neg(m_Value(A))) &&
-        match(Op1, m_Neg(m_Value(B))))
-      return new ICmpInst(I.getPredicate(), A, B);
-    
-    if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
-      if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
-        Value *OtherVal = A == Op1 ? B : A;
-        return new ICmpInst(I.getPredicate(), OtherVal,
-                            Constant::getNullValue(A->getType()));
-      }
-
-      if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
-        // A^c1 == C^c2 --> A == C^(c1^c2)
-        ConstantInt *C1, *C2;
-        if (match(B, m_ConstantInt(C1)) &&
-            match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
-          Constant *NC = 
-                   ConstantInt::get(*Context, C1->getValue() ^ C2->getValue());
-          Value *Xor = Builder->CreateXor(C, NC, "tmp");
-          return new ICmpInst(I.getPredicate(), A, Xor);
-        }
-        
-        // A^B == A^D -> B == D
-        if (A == C) return new ICmpInst(I.getPredicate(), B, D);
-        if (A == D) return new ICmpInst(I.getPredicate(), B, C);
-        if (B == C) return new ICmpInst(I.getPredicate(), A, D);
-        if (B == D) return new ICmpInst(I.getPredicate(), A, C);
-      }
-    }
-    
-    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
-        (A == Op0 || B == Op0)) {
-      // A == (A^B)  ->  B == 0
-      Value *OtherVal = A == Op0 ? B : A;
-      return new ICmpInst(I.getPredicate(), OtherVal,
-                          Constant::getNullValue(A->getType()));
-    }
-
-    // (A-B) == A  ->  B == 0
-    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B, 
-                          Constant::getNullValue(B->getType()));
-
-    // A == (A-B)  ->  B == 0
-    if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B,
-                          Constant::getNullValue(B->getType()));
-    
-    // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
-    if (Op0->hasOneUse() && Op1->hasOneUse() &&
-        match(Op0, m_And(m_Value(A), m_Value(B))) && 
-        match(Op1, m_And(m_Value(C), m_Value(D)))) {
-      Value *X = 0, *Y = 0, *Z = 0;
-      
-      if (A == C) {
-        X = B; Y = D; Z = A;
-      } else if (A == D) {
-        X = B; Y = C; Z = A;
-      } else if (B == C) {
-        X = A; Y = D; Z = B;
-      } else if (B == D) {
-        X = A; Y = C; Z = B;
-      }
-      
-      if (X) {   // Build (X^Y) & Z
-        Op1 = Builder->CreateXor(X, Y, "tmp");
-        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
-        I.setOperand(0, Op1);
-        I.setOperand(1, Constant::getNullValue(Op1->getType()));
-        return &I;
-      }
-    }
-  }
-  
-  {
-    Value *X; ConstantInt *Cst;
-    // icmp X+Cst, X
-    if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
-
-    // icmp X, X+Cst
-    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
-  }
-  return Changed ? &I : 0;
-}
-
-/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
-                                            Value *X, ConstantInt *CI,
-                                            ICmpInst::Predicate Pred,
-                                            Value *TheAdd) {
-  // If we have X+0, exit early (simplifying logic below) and let it get folded
-  // elsewhere.   icmp X+0, X  -> icmp X, X
-  if (CI->isZero()) {
-    bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
-    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
-  }
-  
-  // (X+4) == X -> false.
-  if (Pred == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
-
-  // (X+4) != X -> true.
-  if (Pred == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
-
-  // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
-  bool isNUW = false, isNSW = false;
-  if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
-    isNUW = Add->hasNoUnsignedWrap();
-    isNSW = Add->hasNoSignedWrap();
-  }      
-  
-  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
-  // so the values can never be equal.  Similiarly for all other "or equals"
-  // operators.
-  
-  // (X+1) <u X        --> X >u (MAXUINT-1)        --> X != 255
-  // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
-  // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
-  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
-    // If this is an NUW add, then this is always false.
-    if (isNUW)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); 
-    
-    Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI);
-    return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
-  }
-  
-  // (X+1) >u X        --> X <u (0-1)        --> X != 255
-  // (X+2) >u X        --> X <u (0-2)        --> X <u 254
-  // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
-  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
-    // If this is an NUW add, then this is always true.
-    if (isNUW)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); 
-    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
-  }
-  
-  unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
-  ConstantInt *SMax = ConstantInt::get(X->getContext(),
-                                       APInt::getSignedMaxValue(BitWidth));
-
-  // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
-  // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
-  // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
-  // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
-  // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
-  // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
-  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
-    // If this is an NSW add, then we have two cases: if the constant is
-    // positive, then this is always false, if negative, this is always true.
-    if (isNSW) {
-      bool isTrue = CI->getValue().isNegative();
-      return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
-    }
-    
-    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
-  }
-  
-  // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
-  // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
-  // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
-  // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
-  // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
-  // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
-  
-  // If this is an NSW add, then we have two cases: if the constant is
-  // positive, then this is always true, if negative, this is always false.
-  if (isNSW) {
-    bool isTrue = !CI->getValue().isNegative();
-    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
-  }
-  
-  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
-  Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
-  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
-}
-
-/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
-/// and CmpRHS are both known to be integer constants.
-Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
-                                          ConstantInt *DivRHS) {
-  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
-  const APInt &CmpRHSV = CmpRHS->getValue();
-  
-  // FIXME: If the operand types don't match the type of the divide 
-  // then don't attempt this transform. The code below doesn't have the
-  // logic to deal with a signed divide and an unsigned compare (and
-  // vice versa). This is because (x /s C1) <s C2  produces different 
-  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
-  // (x /u C1) <u C2.  Simply casting the operands and result won't 
-  // work. :(  The if statement below tests that condition and bails 
-  // if it finds it. 
-  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
-  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
-    return 0;
-  if (DivRHS->isZero())
-    return 0; // The ProdOV computation fails on divide by zero.
-  if (DivIsSigned && DivRHS->isAllOnesValue())
-    return 0; // The overflow computation also screws up here
-  if (DivRHS->isOne())
-    return 0; // Not worth bothering, and eliminates some funny cases
-              // with INT_MIN.
-
-  // Compute Prod = CI * DivRHS. We are essentially solving an equation
-  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
-  // C2 (CI). By solving for X we can turn this into a range check 
-  // instead of computing a divide. 
-  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
-
-  // Determine if the product overflows by seeing if the product is
-  // not equal to the divide. Make sure we do the same kind of divide
-  // as in the LHS instruction that we're folding. 
-  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
-
-  // Get the ICmp opcode
-  ICmpInst::Predicate Pred = ICI.getPredicate();
-
-  // Figure out the interval that is being checked.  For example, a comparison
-  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
-  // Compute this interval based on the constants involved and the signedness of
-  // the compare/divide.  This computes a half-open interval, keeping track of
-  // whether either value in the interval overflows.  After analysis each
-  // overflow variable is set to 0 if it's corresponding bound variable is valid
-  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
-  int LoOverflow = 0, HiOverflow = 0;
-  Constant *LoBound = 0, *HiBound = 0;
-  
-  if (!DivIsSigned) {  // udiv
-    // e.g. X/5 op 3  --> [15, 20)
-    LoBound = Prod;
-    HiOverflow = LoOverflow = ProdOV;
-    if (!HiOverflow)
-      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, Context, false);
-  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
-    if (CmpRHSV == 0) {       // (X / pos) op 0
-      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
-      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
-      HiBound = DivRHS;
-    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
-      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
-      HiOverflow = LoOverflow = ProdOV;
-      if (!HiOverflow)
-        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true);
-    } else {                       // (X / pos) op neg
-      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
-      HiBound = AddOne(Prod);
-      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
-      if (!LoOverflow) {
-        ConstantInt* DivNeg =
-                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
-        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context,
-                                     true) ? -1 : 0;
-       }
-    }
-  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
-    if (CmpRHSV == 0) {       // (X / neg) op 0
-      // e.g. X/-5 op 0  --> [-4, 5)
-      LoBound = AddOne(DivRHS);
-      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
-      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
-        HiOverflow = 1;            // [INTMIN+1, overflow)
-        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
-      }
-    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
-      // e.g. X/-5 op 3  --> [-19, -14)
-      HiBound = AddOne(Prod);
-      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
-      if (!LoOverflow)
-        LoOverflow = AddWithOverflow(LoBound, HiBound,
-                                     DivRHS, Context, true) ? -1 : 0;
-    } else {                       // (X / neg) op neg
-      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
-      LoOverflow = HiOverflow = ProdOV;
-      if (!HiOverflow)
-        HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, Context, true);
-    }
-    
-    // Dividing by a negative swaps the condition.  LT <-> GT
-    Pred = ICmpInst::getSwappedPredicate(Pred);
-  }
-
-  Value *X = DivI->getOperand(0);
-  switch (Pred) {
-  default: llvm_unreachable("Unhandled icmp opcode!");
-  case ICmpInst::ICMP_EQ:
-    if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
-    else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
-                          ICmpInst::ICMP_UGE, X, LoBound);
-    else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
-                          ICmpInst::ICMP_ULT, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
-  case ICmpInst::ICMP_NE:
-    if (LoOverflow && HiOverflow)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
-    else if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
-                          ICmpInst::ICMP_ULT, X, LoBound);
-    else if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
-                          ICmpInst::ICMP_UGE, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
-  case ICmpInst::ICMP_ULT:
-  case ICmpInst::ICMP_SLT:
-    if (LoOverflow == +1)   // Low bound is greater than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
-    if (LoOverflow == -1)   // Low bound is less than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
-    return new ICmpInst(Pred, X, LoBound);
-  case ICmpInst::ICMP_UGT:
-  case ICmpInst::ICMP_SGT:
-    if (HiOverflow == +1)       // High bound greater than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
-    else if (HiOverflow == -1)  // High bound less than input range.
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
-    if (Pred == ICmpInst::ICMP_UGT)
-      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
-    else
-      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
-  }
-}
-
-
-/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
-///
-Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
-                                                          Instruction *LHSI,
-                                                          ConstantInt *RHS) {
-  const APInt &RHSV = RHS->getValue();
-  
-  switch (LHSI->getOpcode()) {
-  case Instruction::Trunc:
-    if (ICI.isEquality() && LHSI->hasOneUse()) {
-      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
-      // of the high bits truncated out of x are known.
-      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
-             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
-      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
-      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
-      
-      // If all the high bits are known, we can do this xform.
-      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
-        // Pull in the high bits from known-ones set.
-        APInt NewRHS(RHS->getValue());
-        NewRHS.zext(SrcBits);
-        NewRHS |= KnownOne;
-        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            ConstantInt::get(*Context, NewRHS));
-      }
-    }
-    break;
-      
-  case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
-    if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
-      // fold the xor.
-      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
-          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
-        Value *CompareVal = LHSI->getOperand(0);
-        
-        // If the sign bit of the XorCST is not set, there is no change to
-        // the operation, just stop using the Xor.
-        if (!XorCST->getValue().isNegative()) {
-          ICI.setOperand(0, CompareVal);
-          Worklist.Add(LHSI);
-          return &ICI;
-        }
-        
-        // Was the old condition true if the operand is positive?
-        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
-        
-        // If so, the new one isn't.
-        isTrueIfPositive ^= true;
-        
-        if (isTrueIfPositive)
-          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
-                              SubOne(RHS));
-        else
-          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
-                              AddOne(RHS));
-      }
-
-      if (LHSI->hasOneUse()) {
-        // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
-        if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
-          const APInt &SignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSigned()
-                                         ? ICI.getUnsignedPredicate()
-                                         : ICI.getSignedPredicate();
-          return new ICmpInst(Pred, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, RHSV ^ SignBit));
-        }
-
-        // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
-        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
-          const APInt &NotSignBit = XorCST->getValue();
-          ICmpInst::Predicate Pred = ICI.isSigned()
-                                         ? ICI.getUnsignedPredicate()
-                                         : ICI.getSignedPredicate();
-          Pred = ICI.getSwappedPredicate(Pred);
-          return new ICmpInst(Pred, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, RHSV ^ NotSignBit));
-        }
-      }
-    }
-    break;
-  case Instruction::And:         // (icmp pred (and X, AndCST), RHS)
-    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
-        LHSI->getOperand(0)->hasOneUse()) {
-      ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
-      
-      // If the LHS is an AND of a truncating cast, we can widen the
-      // and/compare to be the input width without changing the value
-      // produced, eliminating a cast.
-      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
-        // We can do this transformation if either the AND constant does not
-        // have its sign bit set or if it is an equality comparison. 
-        // Extending a relational comparison when we're checking the sign
-        // bit would not work.
-        if (Cast->hasOneUse() &&
-            (ICI.isEquality() ||
-             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
-          uint32_t BitWidth = 
-            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
-          APInt NewCST = AndCST->getValue();
-          NewCST.zext(BitWidth);
-          APInt NewCI = RHSV;
-          NewCI.zext(BitWidth);
-          Value *NewAnd = 
-            Builder->CreateAnd(Cast->getOperand(0),
-                           ConstantInt::get(*Context, NewCST), LHSI->getName());
-          return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantInt::get(*Context, NewCI));
-        }
-      }
-      
-      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
-      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
-      // happens a LOT in code produced by the C front-end, for bitfield
-      // access.
-      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
-      if (Shift && !Shift->isShift())
-        Shift = 0;
-      
-      ConstantInt *ShAmt;
-      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
-      const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
-      const Type *AndTy = AndCST->getType();          // Type of the and.
-      
-      // We can fold this as long as we can't shift unknown bits
-      // into the mask.  This can only happen with signed shift
-      // rights, as they sign-extend.
-      if (ShAmt) {
-        bool CanFold = Shift->isLogicalShift();
-        if (!CanFold) {
-          // To test for the bad case of the signed shr, see if any
-          // of the bits shifted in could be tested after the mask.
-          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
-          int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
-          
-          uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
-          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
-               AndCST->getValue()) == 0)
-            CanFold = true;
-        }
-        
-        if (CanFold) {
-          Constant *NewCst;
-          if (Shift->getOpcode() == Instruction::Shl)
-            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
-          else
-            NewCst = ConstantExpr::getShl(RHS, ShAmt);
-          
-          // Check to see if we are shifting out any of the bits being
-          // compared.
-          if (ConstantExpr::get(Shift->getOpcode(),
-                                       NewCst, ShAmt) != RHS) {
-            // If we shifted bits out, the fold is not going to work out.
-            // As a special case, check to see if this means that the
-            // result is always true or false now.
-            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
-            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
-          } else {
-            ICI.setOperand(1, NewCst);
-            Constant *NewAndCST;
-            if (Shift->getOpcode() == Instruction::Shl)
-              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
-            else
-              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
-            LHSI->setOperand(1, NewAndCST);
-            LHSI->setOperand(0, Shift->getOperand(0));
-            Worklist.Add(Shift); // Shift is dead.
-            return &ICI;
-          }
-        }
-      }
-      
-      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
-      // preferable because it allows the C<<Y expression to be hoisted out
-      // of a loop if Y is invariant and X is not.
-      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
-          ICI.isEquality() && !Shift->isArithmeticShift() &&
-          !isa<Constant>(Shift->getOperand(0))) {
-        // Compute C << Y.
-        Value *NS;
-        if (Shift->getOpcode() == Instruction::LShr) {
-          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
-        } else {
-          // Insert a logical shift.
-          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
-        }
-        
-        // Compute X & (C << Y).
-        Value *NewAnd = 
-          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
-        
-        ICI.setOperand(0, NewAnd);
-        return &ICI;
-      }
-    }
-    break;
-    
-  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
-    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt) break;
-    
-    uint32_t TypeBits = RHSV.getBitWidth();
-    
-    // Check that the shift amount is in range.  If not, don't perform
-    // undefined shifts.  When the shift is visited it will be
-    // simplified.
-    if (ShAmt->uge(TypeBits))
-      break;
-    
-    if (ICI.isEquality()) {
-      // If we are comparing against bits always shifted out, the
-      // comparison cannot succeed.
-      Constant *Comp =
-        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
-                                                                 ShAmt);
-      if (Comp != RHS) {// Comparing against a bit that we know is zero.
-        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
-        return ReplaceInstUsesWith(ICI, Cst);
-      }
-      
-      if (LHSI->hasOneUse()) {
-        // Otherwise strength reduce the shift into an and.
-        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-        Constant *Mask =
-          ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, 
-                                                       TypeBits-ShAmtVal));
-        
-        Value *And =
-          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
-        return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantInt::get(*Context, RHSV.lshr(ShAmtVal)));
-      }
-    }
-    
-    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
-    bool TrueIfSigned = false;
-    if (LHSI->hasOneUse() &&
-        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
-      // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) <<
-                                           (TypeBits-ShAmt->getZExtValue()-1));
-      Value *And =
-        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
-      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
-                          And, Constant::getNullValue(And->getType()));
-    }
-    break;
-  }
-    
-  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr: {
-    // Only handle equality comparisons of shift-by-constant.
-    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt || !ICI.isEquality()) break;
-
-    // Check that the shift amount is in range.  If not, don't perform
-    // undefined shifts.  When the shift is visited it will be
-    // simplified.
-    uint32_t TypeBits = RHSV.getBitWidth();
-    if (ShAmt->uge(TypeBits))
-      break;
-    
-    uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-      
-    // If we are comparing against bits always shifted out, the
-    // comparison cannot succeed.
-    APInt Comp = RHSV << ShAmtVal;
-    if (LHSI->getOpcode() == Instruction::LShr)
-      Comp = Comp.lshr(ShAmtVal);
-    else
-      Comp = Comp.ashr(ShAmtVal);
-    
-    if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-      bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-      Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);
-      return ReplaceInstUsesWith(ICI, Cst);
-    }
-    
-    // Otherwise, check to see if the bits shifted out are known to be zero.
-    // If so, we can compare against the unshifted value:
-    //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
-    if (LHSI->hasOneUse() &&
-        MaskedValueIsZero(LHSI->getOperand(0), 
-                          APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
-      return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                          ConstantExpr::getShl(RHS, ShAmt));
-    }
-      
-    if (LHSI->hasOneUse()) {
-      // Otherwise strength reduce the shift into an and.
-      APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-      Constant *Mask = ConstantInt::get(*Context, Val);
-      
-      Value *And = Builder->CreateAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-      return new ICmpInst(ICI.getPredicate(), And,
-                          ConstantExpr::getShl(RHS, ShAmt));
-    }
-    break;
-  }
-    
-  case Instruction::SDiv:
-  case Instruction::UDiv:
-    // Fold: icmp pred ([us]div X, C1), C2 -> range test
-    // Fold this div into the comparison, producing a range check. 
-    // Determine, based on the divide type, what the range is being 
-    // checked.  If there is an overflow on the low or high side, remember 
-    // it, otherwise compute the range [low, hi) bounding the new value.
-    // See: InsertRangeTest above for the kinds of replacements possible.
-    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
-      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
-                                          DivRHS))
-        return R;
-    break;
-
-  case Instruction::Add:
-    // Fold: icmp pred (add X, C1), C2
-    if (!ICI.isEquality()) {
-      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-      if (!LHSC) break;
-      const APInt &LHSV = LHSC->getValue();
-
-      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
-                            .subtract(LHSV);
-
-      if (ICI.isSigned()) {
-        if (CR.getLower().isSignBit()) {
-          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, CR.getUpper()));
-        } else if (CR.getUpper().isSignBit()) {
-          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, CR.getLower()));
-        }
-      } else {
-        if (CR.getLower().isMinValue()) {
-          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, CR.getUpper()));
-        } else if (CR.getUpper().isMinValue()) {
-          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
-                              ConstantInt::get(*Context, CR.getLower()));
-        }
-      }
-    }
-    break;
-  }
-  
-  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
-  if (ICI.isEquality()) {
-    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-    
-    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
-    // the second operand is a constant, simplify a bit.
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
-      switch (BO->getOpcode()) {
-      case Instruction::SRem:
-        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
-        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
-          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
-          if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
-            Value *NewRem =
-              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
-                                  BO->getName());
-            return new ICmpInst(ICI.getPredicate(), NewRem,
-                                Constant::getNullValue(BO->getType()));
-          }
-        }
-        break;
-      case Instruction::Add:
-        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
-        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-          if (BO->hasOneUse())
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                ConstantExpr::getSub(RHS, BOp1C));
-        } else if (RHSV == 0) {
-          // Replace ((add A, B) != 0) with (A != -B) if A or B is
-          // efficiently invertible, or if the add has just this one use.
-          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
-          
-          if (Value *NegVal = dyn_castNegVal(BOp1))
-            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          else if (Value *NegVal = dyn_castNegVal(BOp0))
-            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
-          else if (BO->hasOneUse()) {
-            Value *Neg = Builder->CreateNeg(BOp1);
-            Neg->takeName(BO);
-            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
-          }
-        }
-        break;
-      case Instruction::Xor:
-        // For the xor case, we can xor two constants together, eliminating
-        // the explicit xor.
-        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
-          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
-                              ConstantExpr::getXor(RHS, BOC));
-        
-        // FALLTHROUGH
-      case Instruction::Sub:
-        // Replace (([sub|xor] A, B) != 0) with (A != B)
-        if (RHSV == 0)
-          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                              BO->getOperand(1));
-        break;
-        
-      case Instruction::Or:
-        // If bits are being or'd in that are not present in the constant we
-        // are comparing against, then the comparison could never succeed!
-        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-          Constant *NotCI = ConstantExpr::getNot(RHS);
-          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
-            return ReplaceInstUsesWith(ICI,
-                                       ConstantInt::get(Type::getInt1Ty(*Context), 
-                                       isICMP_NE));
-        }
-        break;
-        
-      case Instruction::And:
-        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-          // If bits are being compared against that are and'd out, then the
-          // comparison can never succeed!
-          if ((RHSV & ~BOC->getValue()) != 0)
-            return ReplaceInstUsesWith(ICI,
-                                       ConstantInt::get(Type::getInt1Ty(*Context),
-                                       isICMP_NE));
-          
-          // If we have ((X & C) == C), turn it into ((X & C) != 0).
-          if (RHS == BOC && RHSV.isPowerOf2())
-            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
-                                ICmpInst::ICMP_NE, LHSI,
-                                Constant::getNullValue(RHS->getType()));
-          
-          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-          if (BOC->getValue().isSignBit()) {
-            Value *X = BO->getOperand(0);
-            Constant *Zero = Constant::getNullValue(X->getType());
-            ICmpInst::Predicate pred = isICMP_NE ? 
-              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
-            return new ICmpInst(pred, X, Zero);
-          }
-          
-          // ((X & ~7) == 0) --> X < 8
-          if (RHSV == 0 && isHighOnes(BOC)) {
-            Value *X = BO->getOperand(0);
-            Constant *NegX = ConstantExpr::getNeg(BOC);
-            ICmpInst::Predicate pred = isICMP_NE ? 
-              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
-            return new ICmpInst(pred, X, NegX);
-          }
-        }
-      default: break;
-      }
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
-      // Handle icmp {eq|ne} <intrinsic>, intcst.
-      if (II->getIntrinsicID() == Intrinsic::bswap) {
-        Worklist.Add(II);
-        ICI.setOperand(0, II->getOperand(1));
-        ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap()));
-        return &ICI;
-      }
-    }
-  }
-  return 0;
-}
-
-/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
-/// We only handle extending casts so far.
-///
-Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
-  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
-  Value *LHSCIOp        = LHSCI->getOperand(0);
-  const Type *SrcTy     = LHSCIOp->getType();
-  const Type *DestTy    = LHSCI->getType();
-  Value *RHSCIOp;
-
-  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
-  // integer type is the same size as the pointer type.
-  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits() ==
-         cast<IntegerType>(DestTy)->getBitWidth()) {
-    Value *RHSOp = 0;
-    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
-      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
-    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
-      RHSOp = RHSC->getOperand(0);
-      // If the pointer types don't match, insert a bitcast.
-      if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
-    }
-
-    if (RHSOp)
-      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
-  }
-  
-  // The code below only handles extension cast instructions, so far.
-  // Enforce this.
-  if (LHSCI->getOpcode() != Instruction::ZExt &&
-      LHSCI->getOpcode() != Instruction::SExt)
-    return 0;
-
-  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
-  bool isSignedCmp = ICI.isSigned();
-
-  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
-    // Not an extension from the same type?
-    RHSCIOp = CI->getOperand(0);
-    if (RHSCIOp->getType() != LHSCIOp->getType()) 
-      return 0;
-    
-    // If the signedness of the two casts doesn't agree (i.e. one is a sext
-    // and the other is a zext), then we can't handle this.
-    if (CI->getOpcode() != LHSCI->getOpcode())
-      return 0;
-
-    // Deal with equality cases early.
-    if (ICI.isEquality())
-      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
-
-    // A signed comparison of sign extended values simplifies into a
-    // signed comparison.
-    if (isSignedCmp && isSignedExt)
-      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
-
-    // The other three cases all fold into an unsigned comparison.
-    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
-  }
-
-  // If we aren't dealing with a constant on the RHS, exit early
-  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
-  if (!CI)
-    return 0;
-
-  // Compute the constant that would happen if we truncated to SrcTy then
-  // reextended to DestTy.
-  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
-  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
-                                                Res1, DestTy);
-
-  // If the re-extended constant didn't change...
-  if (Res2 == CI) {
-    // Deal with equality cases early.
-    if (ICI.isEquality())
-      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
-
-    // A signed comparison of sign extended values simplifies into a
-    // signed comparison.
-    if (isSignedExt && isSignedCmp)
-      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
-
-    // The other three cases all fold into an unsigned comparison.
-    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
-  }
-
-  // The re-extended constant changed so the constant cannot be represented 
-  // in the shorter type. Consequently, we cannot emit a simple comparison.
-
-  // First, handle some easy cases. We know the result cannot be equal at this
-  // point so handle the ICI.isEquality() cases
-  if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
-  if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
-
-  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
-  // should have been folded away previously and not enter in here.
-  Value *Result;
-  if (isSignedCmp) {
-    // We're performing a signed comparison.
-    if (cast<ConstantInt>(CI)->getValue().isNegative())
-      Result = ConstantInt::getFalse(*Context);          // X < (small) --> false
-    else
-      Result = ConstantInt::getTrue(*Context);           // X < (large) --> true
-  } else {
-    // We're performing an unsigned comparison.
-    if (isSignedExt) {
-      // We're performing an unsigned comp with a sign extended value.
-      // This is true if the input is >= 0. [aka >s -1]
-      Constant *NegOne = Constant::getAllOnesValue(SrcTy);
-      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
-    } else {
-      // Unsigned extend & unsigned compare -> always true.
-      Result = ConstantInt::getTrue(*Context);
-    }
-  }
-
-  // Finally, return the value computed.
-  if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
-      ICI.getPredicate() == ICmpInst::ICMP_SLT)
-    return ReplaceInstUsesWith(ICI, Result);
-
-  assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || 
-          ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
-         "ICmp should be folded!");
-  if (Constant *CI = dyn_cast<Constant>(Result))
-    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
-  return BinaryOperator::CreateNot(Result);
-}
-
-Instruction *InstCombiner::visitShl(BinaryOperator &I) {
-  return commonShiftTransforms(I);
-}
-
-Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
-  return commonShiftTransforms(I);
-}
-
-Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
-  if (Instruction *R = commonShiftTransforms(I))
-    return R;
-  
-  Value *Op0 = I.getOperand(0);
-  
-  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
-  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-    if (CSI->isAllOnesValue())
-      return ReplaceInstUsesWith(I, CSI);
-
-  // See if we can turn a signed shr into an unsigned shr.
-  if (MaskedValueIsZero(Op0,
-                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
-    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
-  // Arithmetic shifting an all-sign-bit value is a no-op.
-  unsigned NumSignBits = ComputeNumSignBits(Op0);
-  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
-    return ReplaceInstUsesWith(I, Op0);
-
-  return 0;
-}
-
-Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
-  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // shl X, 0 == X and shr X, 0 == X
-  // shl 0, X == 0 and shr 0, X == 0
-  if (Op1 == Constant::getNullValue(Op1->getType()) ||
-      Op0 == Constant::getNullValue(Op0->getType()))
-    return ReplaceInstUsesWith(I, Op0);
-  
-  if (isa<UndefValue>(Op0)) {            
-    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
-      return ReplaceInstUsesWith(I, Op0);
-    else                                    // undef << X -> 0, undef >>u X -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-  if (isa<UndefValue>(Op1)) {
-    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
-      return ReplaceInstUsesWith(I, Op0);          
-    else                                     // X << undef, X >>u undef -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-
-  // See if we can fold away this shift.
-  if (SimplifyDemandedInstructionBits(I))
-    return &I;
-
-  // Try to fold constant and into select arguments.
-  if (isa<Constant>(Op0))
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
-      if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-        return R;
-
-  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
-    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
-      return Res;
-  return 0;
-}
-
-Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
-                                               BinaryOperator &I) {
-  bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
-  // See if we can simplify any instructions used by the instruction whose sole 
-  // purpose is to compute bits we don't care about.
-  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
-  
-  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
-  // a signed shift.
-  //
-  if (Op1->uge(TypeBits)) {
-    if (I.getOpcode() != Instruction::AShr)
-      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
-    else {
-      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
-      return &I;
-    }
-  }
-  
-  // ((X*C1) << C2) == (X * (C1 << C2))
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
-    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
-      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
-        return BinaryOperator::CreateMul(BO->getOperand(0),
-                                        ConstantExpr::getShl(BOOp, Op1));
-  
-  // Try to fold constant and into select arguments.
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
-    if (Instruction *R = FoldOpIntoSelect(I, SI, this))
-      return R;
-  if (isa<PHINode>(Op0))
-    if (Instruction *NV = FoldOpIntoPhi(I))
-      return NV;
-  
-  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
-  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
-    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
-    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
-    // place.  Don't try to do this transformation in this case.  Also, we
-    // require that the input operand is a shift-by-constant so that we have
-    // confidence that the shifts will get folded together.  We could do this
-    // xform in more cases, but it is unlikely to be profitable.
-    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
-        isa<ConstantInt>(TrOp->getOperand(1))) {
-      // Okay, we'll do this xform.  Make the shift of shift.
-      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
-      // (shift2 (shift1 & 0x00FF), c2)
-      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
-
-      // For logical shifts, the truncation has the effect of making the high
-      // part of the register be zeros.  Emulate this by inserting an AND to
-      // clear the top bits as needed.  This 'and' will usually be zapped by
-      // other xforms later if dead.
-      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
-      unsigned DstSize = TI->getType()->getScalarSizeInBits();
-      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
-      
-      // The mask we constructed says what the trunc would do if occurring
-      // between the shifts.  We want to know the effect *after* the second
-      // shift.  We know that it is a logical shift by a constant, so adjust the
-      // mask as appropriate.
-      if (I.getOpcode() == Instruction::Shl)
-        MaskV <<= Op1->getZExtValue();
-      else {
-        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
-        MaskV = MaskV.lshr(Op1->getZExtValue());
-      }
-
-      // shift1 & 0x00FF
-      Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV),
-                                      TI->getName());
-
-      // Return the value truncated to the interesting size.
-      return new TruncInst(And, I.getType());
-    }
-  }
-  
-  if (Op0->hasOneUse()) {
-    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
-      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
-      Value *V1, *V2;
-      ConstantInt *CC;
-      switch (Op0BO->getOpcode()) {
-        default: break;
-        case Instruction::Add:
-        case Instruction::And:
-        case Instruction::Or:
-        case Instruction::Xor: {
-          // These operators commute.
-          // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
-          if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
-              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
-                    m_Specific(Op1)))) {
-            Value *YS =         // (Y << C)
-              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
-            // (X + (Y << C))
-            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
-                                            Op0BO->getOperand(1)->getName());
-            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
-                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
-          }
-          
-          // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
-          Value *Op0BOOp1 = Op0BO->getOperand(1);
-          if (isLeftShift && Op0BOOp1->hasOneUse() &&
-              match(Op0BOOp1, 
-                    m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
-                          m_ConstantInt(CC))) &&
-              cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
-            Value *YS =   // (Y << C)
-              Builder->CreateShl(Op0BO->getOperand(0), Op1,
-                                           Op0BO->getName());
-            // X & (CC << C)
-            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                           V1->getName()+".mask");
-            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
-          }
-        }
-          
-        // FALL THROUGH.
-        case Instruction::Sub: {
-          // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
-          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
-              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
-                    m_Specific(Op1)))) {
-            Value *YS =  // (Y << C)
-              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
-            // (X + (Y << C))
-            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
-                                            Op0BO->getOperand(0)->getName());
-            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,
-                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
-          }
-          
-          // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
-          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
-              match(Op0BO->getOperand(0),
-                    m_And(m_Shr(m_Value(V1), m_Value(V2)),
-                          m_ConstantInt(CC))) && V2 == Op1 &&
-              cast<BinaryOperator>(Op0BO->getOperand(0))
-                  ->getOperand(0)->hasOneUse()) {
-            Value *YS = // (Y << C)
-              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
-            // X & (CC << C)
-            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                           V1->getName()+".mask");
-            
-            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
-          }
-          
-          break;
-        }
-      }
-      
-      
-      // If the operand is an bitwise operator with a constant RHS, and the
-      // shift is the only use, we can pull it out of the shift.
-      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
-        bool isValid = true;     // Valid only for And, Or, Xor
-        bool highBitSet = false; // Transform if high bit of constant set?
-        
-        switch (Op0BO->getOpcode()) {
-          default: isValid = false; break;   // Do not perform transform!
-          case Instruction::Add:
-            isValid = isLeftShift;
-            break;
-          case Instruction::Or:
-          case Instruction::Xor:
-            highBitSet = false;
-            break;
-          case Instruction::And:
-            highBitSet = true;
-            break;
-        }
-        
-        // If this is a signed shift right, and the high bit is modified
-        // by the logical operation, do not perform the transformation.
-        // The highBitSet boolean indicates the value of the high bit of
-        // the constant which would cause it to be modified for this
-        // operation.
-        //
-        if (isValid && I.getOpcode() == Instruction::AShr)
-          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        
-        if (isValid) {
-          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
-          
-          Value *NewShift =
-            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
-          NewShift->takeName(Op0BO);
-          
-          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
-                                        NewRHS);
-        }
-      }
-    }
-  }
-  
-  // Find out if this is a shift of a shift by a constant.
-  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
-  if (ShiftOp && !ShiftOp->isShift())
-    ShiftOp = 0;
-  
-  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
-    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
-    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
-    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
-    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
-    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
-    Value *X = ShiftOp->getOperand(0);
-    
-    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
-    
-    const IntegerType *Ty = cast<IntegerType>(I.getType());
-    
-    // Check for (X << c1) << c2  and  (X >> c1) >> c2
-    if (I.getOpcode() == ShiftOp->getOpcode()) {
-      // If this is oversized composite shift, then unsigned shifts get 0, ashr
-      // saturates.
-      if (AmtSum >= TypeBits) {
-        if (I.getOpcode() != Instruction::AShr)
-          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
-      }
-      
-      return BinaryOperator::Create(I.getOpcode(), X,
-                                    ConstantInt::get(Ty, AmtSum));
-    }
-    
-    if (ShiftOp->getOpcode() == Instruction::LShr &&
-        I.getOpcode() == Instruction::AShr) {
-      if (AmtSum >= TypeBits)
-        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-      
-      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
-    }
-    
-    if (ShiftOp->getOpcode() == Instruction::AShr &&
-        I.getOpcode() == Instruction::LShr) {
-      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
-      if (AmtSum >= TypeBits)
-        AmtSum = TypeBits-1;
-      
-      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
-
-      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask));
-    }
-    
-    // Okay, if we get here, one shift must be left, and the other shift must be
-    // right.  See if the amounts are equal.
-    if (ShiftAmt1 == ShiftAmt2) {
-      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
-      if (I.getOpcode() == Instruction::Shl) {
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
-      }
-      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
-      if (I.getOpcode() == Instruction::LShr) {
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));
-      }
-      // We can simplify ((X << C) >>s C) into a trunc + sext.
-      // NOTE: we could do this for any C, but that would make 'unusual' integer
-      // types.  For now, just stick to ones well-supported by the code
-      // generators.
-      const Type *SExtType = 0;
-      switch (Ty->getBitWidth() - ShiftAmt1) {
-      case 1  :
-      case 8  :
-      case 16 :
-      case 32 :
-      case 64 :
-      case 128:
-        SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1);
-        break;
-      default: break;
-      }
-      if (SExtType)
-        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
-      // Otherwise, we can't handle it yet.
-    } else if (ShiftAmt1 < ShiftAmt2) {
-      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
-      
-      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
-      if (I.getOpcode() == Instruction::Shl) {
-        assert(ShiftOp->getOpcode() == Instruction::LShr ||
-               ShiftOp->getOpcode() == Instruction::AShr);
-        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
-        
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(*Context, Mask));
-      }
-      
-      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr) {
-        assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
-        
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(*Context, Mask));
-      }
-      
-      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
-    } else {
-      assert(ShiftAmt2 < ShiftAmt1);
-      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
-
-      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
-      if (I.getOpcode() == Instruction::Shl) {
-        assert(ShiftOp->getOpcode() == Instruction::LShr ||
-               ShiftOp->getOpcode() == Instruction::AShr);
-        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
-                                            ConstantInt::get(Ty, ShiftDiff));
-        
-        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(*Context, Mask));
-      }
-      
-      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr) {
-        assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
-        
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(*Context, Mask));
-      }
-      
-      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
-    }
-  }
-  return 0;
-}
-
-
-/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
-/// expression.  If so, decompose it, returning some value X, such that Val is
-/// X*Scale+Offset.
-///
-static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        int &Offset, LLVMContext *Context) {
-  assert(Val->getType() == Type::getInt32Ty(*Context) && 
-         "Unexpected allocation size type!");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
-    Offset = CI->getZExtValue();
-    Scale  = 0;
-    return ConstantInt::get(Type::getInt32Ty(*Context), 0);
-  } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      if (I->getOpcode() == Instruction::Shl) {
-        // This is a value scaled by '1 << the shift amt'.
-        Scale = 1U << RHS->getZExtValue();
-        Offset = 0;
-        return I->getOperand(0);
-      } else if (I->getOpcode() == Instruction::Mul) {
-        // This value is scaled by 'RHS'.
-        Scale = RHS->getZExtValue();
-        Offset = 0;
-        return I->getOperand(0);
-      } else if (I->getOpcode() == Instruction::Add) {
-        // We have X+C.  Check to see if we really have (X*C2)+C1, 
-        // where C1 is divisible by C2.
-        unsigned SubScale;
-        Value *SubVal = 
-          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale,
-                                    Offset, Context);
-        Offset += RHS->getZExtValue();
-        Scale = SubScale;
-        return SubVal;
-      }
-    }
-  }
-
-  // Otherwise, we can't look past this.
-  Scale = 1;
-  Offset = 0;
-  return Val;
-}
-
-
-/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
-/// try to eliminate the cast by moving the type information into the alloc.
-Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
-                                                   AllocaInst &AI) {
-  const PointerType *PTy = cast<PointerType>(CI.getType());
-  
-  BuilderTy AllocaBuilder(*Builder);
-  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
-  
-  // Remove any uses of AI that are dead.
-  assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
-  
-  for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) {
-    Instruction *User = cast<Instruction>(*UI++);
-    if (isInstructionTriviallyDead(User)) {
-      while (UI != E && *UI == User)
-        ++UI; // If this instruction uses AI more than once, don't break UI.
-      
-      ++NumDeadInst;
-      DEBUG(errs() << "IC: DCE: " << *User << '\n');
-      EraseInstFromFunction(*User);
-    }
-  }
-
-  // This requires TargetData to get the alloca alignment and size information.
-  if (!TD) return 0;
-
-  // Get the type really allocated and the type casted to.
-  const Type *AllocElTy = AI.getAllocatedType();
-  const Type *CastElTy = PTy->getElementType();
-  if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
-
-  unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
-  unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
-  if (CastElTyAlign < AllocElTyAlign) return 0;
-
-  // If the allocation has multiple uses, only promote it if we are strictly
-  // increasing the alignment of the resultant allocation.  If we keep it the
-  // same, we open the door to infinite loops of various kinds.  (A reference
-  // from a dbg.declare doesn't count as a use for this purpose.)
-  if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
-      CastElTyAlign == AllocElTyAlign) return 0;
-
-  uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
-  uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
-  if (CastElTySize == 0 || AllocElTySize == 0) return 0;
-
-  // See if we can satisfy the modulus by pulling a scale out of the array
-  // size argument.
-  unsigned ArraySizeScale;
-  int ArrayOffset;
-  Value *NumElements = // See if the array size is a decomposable linear expr.
-    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale,
-                              ArrayOffset, Context);
- 
-  // If we can now satisfy the modulus, by using a non-1 scale, we really can
-  // do the xform.
-  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
-      (AllocElTySize*ArrayOffset   ) % CastElTySize != 0) return 0;
-
-  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
-  Value *Amt = 0;
-  if (Scale == 1) {
-    Amt = NumElements;
-  } else {
-    Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale);
-    // Insert before the alloca, not before the cast.
-    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
-  }
-  
-  if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true);
-    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
-  }
-  
-  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
-  New->setAlignment(AI.getAlignment());
-  New->takeName(&AI);
-  
-  // If the allocation has one real use plus a dbg.declare, just remove the
-  // declare.
-  if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
-    EraseInstFromFunction(*DI);
-  }
-  // If the allocation has multiple real uses, insert a cast and change all
-  // things that used it to use the new cast.  This will also hack on CI, but it
-  // will die soon.
-  else if (!AI.hasOneUse()) {
-    // New is the allocation instruction, pointer typed. AI is the original
-    // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
-    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
-    AI.replaceAllUsesWith(NewCast);
-  }
-  return ReplaceInstUsesWith(CI, New);
-}
-
-/// CanEvaluateInDifferentType - Return true if we can take the specified value
-/// and return it as type Ty without inserting any new casts and without
-/// changing the computed value.  This is used by code that tries to decide
-/// whether promoting or shrinking integer operations to wider or smaller types
-/// will allow us to eliminate a truncate or extend.
-///
-/// This is a truncation operation if Ty is smaller than V->getType(), or an
-/// extension operation if Ty is larger.
-///
-/// If CastOpc is a truncation, then Ty will be a type smaller than V.  We
-/// should return true if trunc(V) can be computed by computing V in the smaller
-/// type.  If V is an instruction, then trunc(inst(x,y)) can be computed as
-/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
-/// efficiently truncated.
-///
-/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
-/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
-/// the final result.
-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
-                                              unsigned CastOpc,
-                                              int &NumCastsRemoved){
-  // We can always evaluate constants in another type.
-  if (isa<Constant>(V))
-    return true;
-  
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return false;
-  
-  const Type *OrigTy = V->getType();
-  
-  // If this is an extension or truncate, we can often eliminate it.
-  if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
-    // If this is a cast from the destination type, we can trivially eliminate
-    // it, and this will remove a cast overall.
-    if (I->getOperand(0)->getType() == Ty) {
-      // If the first operand is itself a cast, and is eliminable, do not count
-      // this as an eliminable cast.  We would prefer to eliminate those two
-      // casts first.
-      if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
-        ++NumCastsRemoved;
-      return true;
-    }
-  }
-
-  // We can't extend or shrink something that has multiple uses: doing so would
-  // require duplicating the instruction in general, which isn't profitable.
-  if (!I->hasOneUse()) return false;
-
-  unsigned Opc = I->getOpcode();
-  switch (Opc) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-    // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
-                                      NumCastsRemoved) &&
-           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
-                                      NumCastsRemoved);
-
-  case Instruction::UDiv:
-  case Instruction::URem: {
-    // UDiv and URem can be truncated if all the truncated bits are zero.
-    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
-    uint32_t BitWidth = Ty->getScalarSizeInBits();
-    if (BitWidth < OrigBitWidth) {
-      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
-      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
-          MaskedValueIsZero(I->getOperand(1), Mask)) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
-                                          NumCastsRemoved) &&
-               CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
-                                          NumCastsRemoved);
-      }
-    }
-    break;
-  }
-  case Instruction::Shl:
-    // If we are truncating the result of this SHL, and if it's a shift of a
-    // constant amount, we can always perform a SHL in a smaller type.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t BitWidth = Ty->getScalarSizeInBits();
-      if (BitWidth < OrigTy->getScalarSizeInBits() &&
-          CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
-                                          NumCastsRemoved);
-    }
-    break;
-  case Instruction::LShr:
-    // If this is a truncate of a logical shr, we can truncate it to a smaller
-    // lshr iff we know that the bits we would otherwise be shifting in are
-    // already zeros.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
-      uint32_t BitWidth = Ty->getScalarSizeInBits();
-      if (BitWidth < OrigBitWidth &&
-          MaskedValueIsZero(I->getOperand(0),
-            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
-          CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
-                                          NumCastsRemoved);
-      }
-    }
-    break;
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::Trunc:
-    // If this is the same kind of case as our original (e.g. zext+zext), we
-    // can safely replace it.  Note that replacing it does not reduce the number
-    // of casts in the input.
-    if (Opc == CastOpc)
-      return true;
-
-    // sext (zext ty1), ty2 -> zext ty2
-    if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt)
-      return true;
-    break;
-  case Instruction::Select: {
-    SelectInst *SI = cast<SelectInst>(I);
-    return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc,
-                                      NumCastsRemoved) &&
-           CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc,
-                                      NumCastsRemoved);
-  }
-  case Instruction::PHI: {
-    // We can change a phi if we can change all operands.
-    PHINode *PN = cast<PHINode>(I);
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
-                                      NumCastsRemoved))
-        return false;
-    return true;
-  }
-  default:
-    // TODO: Can handle more cases here.
-    break;
-  }
-  
-  return false;
-}
-
-/// EvaluateInDifferentType - Given an expression that 
-/// CanEvaluateInDifferentType returns true for, actually insert the code to
-/// evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
-                                             bool isSigned) {
-  if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
-
-  // Otherwise, it must be an instruction.
-  Instruction *I = cast<Instruction>(V);
-  Instruction *Res = 0;
-  unsigned Opc = I->getOpcode();
-  switch (Opc) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::AShr:
-  case Instruction::LShr:
-  case Instruction::Shl:
-  case Instruction::UDiv:
-  case Instruction::URem: {
-    Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
-    Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
-    Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
-    break;
-  }    
-  case Instruction::Trunc:
-  case Instruction::ZExt:
-  case Instruction::SExt:
-    // If the source type of the cast is the type we're trying for then we can
-    // just return the source.  There's no need to insert it because it is not
-    // new.
-    if (I->getOperand(0)->getType() == Ty)
-      return I->getOperand(0);
-    
-    // Otherwise, must be the same type of cast, so just reinsert a new one.
-    Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty);
-    break;
-  case Instruction::Select: {
-    Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
-    Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
-    Res = SelectInst::Create(I->getOperand(0), True, False);
-    break;
-  }
-  case Instruction::PHI: {
-    PHINode *OPN = cast<PHINode>(I);
-    PHINode *NPN = PHINode::Create(Ty);
-    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
-      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
-      NPN->addIncoming(V, OPN->getIncomingBlock(i));
-    }
-    Res = NPN;
-    break;
-  }
-  default: 
-    // TODO: Can handle more cases here.
-    llvm_unreachable("Unreachable!");
-    break;
-  }
-  
-  Res->takeName(I);
-  return InsertNewInstBefore(Res, *I);
-}
-
-/// @brief Implement the transforms common to all CastInst visitors.
-Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
-  Value *Src = CI.getOperand(0);
-
-  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
-  // eliminate it now.
-  if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
-    if (Instruction::CastOps opc = 
-        isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
-      // The first cast (CSrc) is eliminable so we need to fix up or replace
-      // the second cast (CI). CSrc will then have a good chance of being dead.
-      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
-    }
-  }
-
-  // If we are casting a select then fold the cast into the select
-  if (SelectInst *SI = dyn_cast<SelectInst>(Src))
-    if (Instruction *NV = FoldOpIntoSelect(CI, SI, this))
-      return NV;
-
-  // If we are casting a PHI then fold the cast into the PHI
-  if (isa<PHINode>(Src)) {
-    // We don't do this if this would create a PHI node with an illegal type if
-    // it is currently legal.
-    if (!isa<IntegerType>(Src->getType()) ||
-        !isa<IntegerType>(CI.getType()) ||
-        ShouldChangeType(CI.getType(), Src->getType(), TD))
-      if (Instruction *NV = FoldOpIntoPhi(CI))
-        return NV;
-  }
-  
-  return 0;
-}
-
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset.  If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
-                                       SmallVectorImpl<Value*> &NewIndices,
-                                       const TargetData *TD,
-                                       LLVMContext *Context) {
-  if (!TD) return 0;
-  if (!Ty->isSized()) return 0;
-  
-  // Start with the index over the outer type.  Note that the type size
-  // might be zero (even if the offset isn't zero) if the indexed type
-  // is something like [0 x {int, int}]
-  const Type *IntPtrTy = TD->getIntPtrType(*Context);
-  int64_t FirstIdx = 0;
-  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
-    FirstIdx = Offset/TySize;
-    Offset -= FirstIdx*TySize;
-    
-    // Handle hosts where % returns negative instead of values [0..TySize).
-    if (Offset < 0) {
-      --FirstIdx;
-      Offset += TySize;
-      assert(Offset >= 0);
-    }
-    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
-  }
-  
-  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
-    
-  // Index into the types.  If we fail, set OrigBase to null.
-  while (Offset) {
-    // Indexing into tail padding between struct/array elements.
-    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
-      return 0;
-    
-    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-      const StructLayout *SL = TD->getStructLayout(STy);
-      assert(Offset < (int64_t)SL->getSizeInBytes() &&
-             "Offset must stay within the indexed type");
-      
-      unsigned Elt = SL->getElementContainingOffset(Offset);
-      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt));
-      
-      Offset -= SL->getElementOffset(Elt);
-      Ty = STy->getElementType(Elt);
-    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
-      assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
-      Offset %= EltSize;
-      Ty = AT->getElementType();
-    } else {
-      // Otherwise, we can't index into the middle of this atomic type, bail.
-      return 0;
-    }
-  }
-  
-  return Ty;
-}
-
-/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
-Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
-  Value *Src = CI.getOperand(0);
-  
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
-    // If casting the result of a getelementptr instruction with no offset, turn
-    // this into a cast of the original pointer!
-    if (GEP->hasAllZeroIndices()) {
-      // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
-      // pointer operand so the opcode doesn't need to change.
-      Worklist.Add(GEP);
-      CI.setOperand(0, GEP->getOperand(0));
-      return &CI;
-    }
-    
-    // If the GEP has a single use, and the base pointer is a bitcast, and the
-    // GEP computes a constant offset, see if we can convert these three
-    // instructions into fewer.  This typically happens with unions and other
-    // non-type-safe code.
-    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
-      if (GEP->hasAllConstantIndices()) {
-        // We are guaranteed to get a constant from EmitGEPOffset.
-        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, *this));
-        int64_t Offset = OffsetV->getSExtValue();
-        
-        // Get the base pointer input of the bitcast, and the type it points to.
-        Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
-        const Type *GEPIdxTy =
-          cast<PointerType>(OrigBase->getType())->getElementType();
-        SmallVector<Value*, 8> NewIndices;
-        if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD, Context)) {
-          // If we were able to index down into an element, create the GEP
-          // and bitcast the result.  This eliminates one bitcast, potentially
-          // two.
-          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
-            Builder->CreateInBoundsGEP(OrigBase,
-                                       NewIndices.begin(), NewIndices.end()) :
-            Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
-          NGEP->takeName(GEP);
-          
-          if (isa<BitCastInst>(CI))
-            return new BitCastInst(NGEP, CI.getType());
-          assert(isa<PtrToIntInst>(CI));
-          return new PtrToIntInst(NGEP, CI.getType());
-        }
-      }      
-    }
-  }
-    
-  return commonCastTransforms(CI);
-}
-
-/// commonIntCastTransforms - This function implements the common transforms
-/// for trunc, zext, and sext.
-Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
-  if (Instruction *Result = commonCastTransforms(CI))
-    return Result;
-
-  Value *Src = CI.getOperand(0);
-  const Type *SrcTy = Src->getType();
-  const Type *DestTy = CI.getType();
-  uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
-  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
-  // See if we can simplify any instructions used by the LHS whose sole 
-  // purpose is to compute bits we don't care about.
-  if (SimplifyDemandedInstructionBits(CI))
-    return &CI;
-
-  // If the source isn't an instruction or has more than one use then we
-  // can't do anything more. 
-  Instruction *SrcI = dyn_cast<Instruction>(Src);
-  if (!SrcI || !Src->hasOneUse())
-    return 0;
-
-  // Attempt to propagate the cast into the instruction for int->int casts.
-  int NumCastsRemoved = 0;
-  // Only do this if the dest type is a simple type, don't convert the
-  // expression tree to something weird like i93 unless the source is also
-  // strange.
-  if ((isa<VectorType>(DestTy) ||
-       ShouldChangeType(SrcI->getType(), DestTy, TD)) &&
-      CanEvaluateInDifferentType(SrcI, DestTy,
-                                 CI.getOpcode(), NumCastsRemoved)) {
-    // If this cast is a truncate, evaluting in a different type always
-    // eliminates the cast, so it is always a win.  If this is a zero-extension,
-    // we need to do an AND to maintain the clear top-part of the computation,
-    // so we require that the input have eliminated at least one cast.  If this
-    // is a sign extension, we insert two new casts (to do the extension) so we
-    // require that two casts have been eliminated.
-    bool DoXForm = false;
-    bool JustReplace = false;
-    switch (CI.getOpcode()) {
-    default:
-      // All the others use floating point so we shouldn't actually 
-      // get here because of the check above.
-      llvm_unreachable("Unknown cast type");
-    case Instruction::Trunc:
-      DoXForm = true;
-      break;
-    case Instruction::ZExt: {
-      DoXForm = NumCastsRemoved >= 1;
-      
-      if (!DoXForm && 0) {
-        // If it's unnecessary to issue an AND to clear the high bits, it's
-        // always profitable to do this xform.
-        Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false);
-        APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize));
-        if (MaskedValueIsZero(TryRes, Mask))
-          return ReplaceInstUsesWith(CI, TryRes);
-        
-        if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
-          if (TryI->use_empty())
-            EraseInstFromFunction(*TryI);
-      }
-      break;
-    }
-    case Instruction::SExt: {
-      DoXForm = NumCastsRemoved >= 2;
-      if (!DoXForm && !isa<TruncInst>(SrcI) && 0) {
-        // If we do not have to emit the truncate + sext pair, then it's always
-        // profitable to do this xform.
-        //
-        // It's not safe to eliminate the trunc + sext pair if one of the
-        // eliminated cast is a truncate. e.g.
-        // t2 = trunc i32 t1 to i16
-        // t3 = sext i16 t2 to i32
-        // !=
-        // i32 t1
-        Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true);
-        unsigned NumSignBits = ComputeNumSignBits(TryRes);
-        if (NumSignBits > (DestBitSize - SrcBitSize))
-          return ReplaceInstUsesWith(CI, TryRes);
-        
-        if (Instruction *TryI = dyn_cast<Instruction>(TryRes))
-          if (TryI->use_empty())
-            EraseInstFromFunction(*TryI);
-      }
-      break;
-    }
-    }
-    
-    if (DoXForm) {
-      DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type"
-            " to avoid cast: " << CI);
-      Value *Res = EvaluateInDifferentType(SrcI, DestTy, 
-                                           CI.getOpcode() == Instruction::SExt);
-      if (JustReplace)
-        // Just replace this cast with the result.
-        return ReplaceInstUsesWith(CI, Res);
-
-      assert(Res->getType() == DestTy);
-      switch (CI.getOpcode()) {
-      default: llvm_unreachable("Unknown cast type!");
-      case Instruction::Trunc:
-        // Just replace this cast with the result.
-        return ReplaceInstUsesWith(CI, Res);
-      case Instruction::ZExt: {
-        assert(SrcBitSize < DestBitSize && "Not a zext?");
-
-        // If the high bits are already zero, just replace this cast with the
-        // result.
-        APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize));
-        if (MaskedValueIsZero(Res, Mask))
-          return ReplaceInstUsesWith(CI, Res);
-
-        // We need to emit an AND to clear the high bits.
-        Constant *C = ConstantInt::get(*Context, 
-                                 APInt::getLowBitsSet(DestBitSize, SrcBitSize));
-        return BinaryOperator::CreateAnd(Res, C);
-      }
-      case Instruction::SExt: {
-        // If the high bits are already filled with sign bit, just replace this
-        // cast with the result.
-        unsigned NumSignBits = ComputeNumSignBits(Res);
-        if (NumSignBits > (DestBitSize - SrcBitSize))
-          return ReplaceInstUsesWith(CI, Res);
-
-        // We need to emit a cast to truncate, then a cast to sext.
-        return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy);
-      }
-      }
-    }
-  }
-  
-  Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0;
-  Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0;
-
-  switch (SrcI->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Mul:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-    // If we are discarding information, rewrite.
-    if (DestBitSize < SrcBitSize && DestBitSize != 1) {
-      // Don't insert two casts unless at least one can be eliminated.
-      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||
-          !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) {
-        Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
-        Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
-        return BinaryOperator::Create(
-            cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
-      }
-    }
-
-    // cast (xor bool X, true) to int  --> xor (cast bool X to int), 1
-    if (isa<ZExtInst>(CI) && SrcBitSize == 1 && 
-        SrcI->getOpcode() == Instruction::Xor &&
-        Op1 == ConstantInt::getTrue(*Context) &&
-        (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
-      Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName());
-      return BinaryOperator::CreateXor(New,
-                                      ConstantInt::get(CI.getType(), 1));
-    }
-    break;
-
-  case Instruction::Shl: {
-    // Canonicalize trunc inside shl, if we can.
-    ConstantInt *CI = dyn_cast<ConstantInt>(Op1);
-    if (CI && DestBitSize < SrcBitSize &&
-        CI->getLimitedValue(DestBitSize) < DestBitSize) {
-      Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName());
-      Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());
-      return BinaryOperator::CreateShl(Op0c, Op1c);
-    }
-    break;
-  }
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
-  if (Instruction *Result = commonIntCastTransforms(CI))
-    return Result;
-  
-  Value *Src = CI.getOperand(0);
-  const Type *Ty = CI.getType();
-  uint32_t DestBitWidth = Ty->getScalarSizeInBits();
-  uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
-
-  // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
-  if (DestBitWidth == 1) {
-    Constant *One = ConstantInt::get(Src->getType(), 1);
-    Src = Builder->CreateAnd(Src, One, "tmp");
-    Value *Zero = Constant::getNullValue(Src->getType());
-    return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
-  }
-
-  // Optimize trunc(lshr(), c) to pull the shift through the truncate.
-  ConstantInt *ShAmtV = 0;
-  Value *ShiftOp = 0;
-  if (Src->hasOneUse() &&
-      match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) {
-    uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth);
-    
-    // Get a mask for the bits shifting in.
-    APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));
-    if (MaskedValueIsZero(ShiftOp, Mask)) {
-      if (ShAmt >= DestBitWidth)        // All zeros.
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));
-      
-      // Okay, we can shrink this.  Truncate the input, then return a new
-      // shift.
-      Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName());
-      Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);
-      return BinaryOperator::CreateLShr(V1, V2);
-    }
-  }
- 
-  return 0;
-}
-
-/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
-Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
-                                             bool DoXform) {
-  // If we are just checking for a icmp eq of a single bit and zext'ing it
-  // to an integer, then shift the bit to the appropriate place and then
-  // cast to integer to avoid the comparison.
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
-    const APInt &Op1CV = Op1C->getValue();
-      
-    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
-    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
-        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
-      if (!DoXform) return ICI;
-
-      Value *In = ICI->getOperand(0);
-      Value *Sh = ConstantInt::get(In->getType(),
-                                   In->getType()->getScalarSizeInBits()-1);
-      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
-      if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
-
-      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-        Constant *One = ConstantInt::get(In->getType(), 1);
-        In = Builder->CreateXor(In, One, In->getName()+".not");
-      }
-
-      return ReplaceInstUsesWith(CI, In);
-    }
-      
-      
-      
-    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
-    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
-    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
-    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
-    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
-    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
-    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
-        // This only works for EQ and NE
-        ICI->isEquality()) {
-      // If Op1C some other power of two, convert:
-      uint32_t BitWidth = Op1C->getType()->getBitWidth();
-      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
-        
-      APInt KnownZeroMask(~KnownZero);
-      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-        if (!DoXform) return ICI;
-
-        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
-          // (X&4) == 2 --> false
-          // (X&4) != 2 --> true
-          Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE);
-          Res = ConstantExpr::getZExt(Res, CI.getType());
-          return ReplaceInstUsesWith(CI, Res);
-        }
-          
-        uint32_t ShiftAmt = KnownZeroMask.logBase2();
-        Value *In = ICI->getOperand(0);
-        if (ShiftAmt) {
-          // Perform a logical shr by shiftamt.
-          // Insert the shift to put the result in the low bit.
-          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
-                                   In->getName()+".lobit");
-        }
-          
-        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-          Constant *One = ConstantInt::get(In->getType(), 1);
-          In = Builder->CreateXor(In, One, "tmp");
-        }
-          
-        if (CI.getType() == In->getType())
-          return ReplaceInstUsesWith(CI, In);
-        else
-          return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
-      }
-    }
-  }
-
-  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
-  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
-  // may lead to additional simplifications.
-  if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
-    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
-      uint32_t BitWidth = ITy->getBitWidth();
-      Value *LHS = ICI->getOperand(0);
-      Value *RHS = ICI->getOperand(1);
-
-      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
-      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
-      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
-
-      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
-        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
-        APInt UnknownBit = ~KnownBits;
-        if (UnknownBit.countPopulation() == 1) {
-          if (!DoXform) return ICI;
-
-          Value *Result = Builder->CreateXor(LHS, RHS);
-
-          // Mask off any bits that are set and won't be shifted away.
-          if (KnownOneLHS.uge(UnknownBit))
-            Result = Builder->CreateAnd(Result,
-                                        ConstantInt::get(ITy, UnknownBit));
-
-          // Shift the bit we're testing down to the lsb.
-          Result = Builder->CreateLShr(
-               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
-
-          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
-            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
-          Result->takeName(ICI);
-          return ReplaceInstUsesWith(CI, Result);
-        }
-      }
-    }
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
-  // If one of the common conversion will work ..
-  if (Instruction *Result = commonIntCastTransforms(CI))
-    return Result;
-
-  Value *Src = CI.getOperand(0);
-
-  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
-  // types and if the sizes are just right we can convert this into a logical
-  // 'and' which will be much cheaper than the pair of casts.
-  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
-    // Get the sizes of the types involved.  We know that the intermediate type
-    // will be smaller than A or C, but don't know the relation between A and C.
-    Value *A = CSrc->getOperand(0);
-    unsigned SrcSize = A->getType()->getScalarSizeInBits();
-    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
-    unsigned DstSize = CI.getType()->getScalarSizeInBits();
-    // If we're actually extending zero bits, then if
-    // SrcSize <  DstSize: zext(a & mask)
-    // SrcSize == DstSize: a & mask
-    // SrcSize  > DstSize: trunc(a) & mask
-    if (SrcSize < DstSize) {
-      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
-      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
-      return new ZExtInst(And, CI.getType());
-    }
-    
-    if (SrcSize == DstSize) {
-      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
-                                                           AndValue));
-    }
-    if (SrcSize > DstSize) {
-      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
-      APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, 
-                                       ConstantInt::get(Trunc->getType(),
-                                                               AndValue));
-    }
-  }
-
-  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
-    return transformZExtICmp(ICI, CI);
-
-  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
-  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
-    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
-    // of the (zext icmp) will be transformed.
-    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
-    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
-    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
-        (transformZExtICmp(LHS, CI, false) ||
-         transformZExtICmp(RHS, CI, false))) {
-      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
-      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
-      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
-    }
-  }
-
-  // zext(trunc(t) & C) -> (t & zext(C)).
-  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
-    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
-      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
-        Value *TI0 = TI->getOperand(0);
-        if (TI0->getType() == CI.getType())
-          return
-            BinaryOperator::CreateAnd(TI0,
-                                ConstantExpr::getZExt(C, CI.getType()));
-      }
-
-  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
-  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
-    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
-      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
-        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
-            And->getOperand(1) == C)
-          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
-            Value *TI0 = TI->getOperand(0);
-            if (TI0->getType() == CI.getType()) {
-              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
-              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
-              return BinaryOperator::CreateXor(NewAnd, ZC);
-            }
-          }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitSExt(SExtInst &CI) {
-  if (Instruction *I = commonIntCastTransforms(CI))
-    return I;
-  
-  Value *Src = CI.getOperand(0);
-  
-  // Canonicalize sign-extend from i1 to a select.
-  if (Src->getType() == Type::getInt1Ty(*Context))
-    return SelectInst::Create(Src,
-                              Constant::getAllOnesValue(CI.getType()),
-                              Constant::getNullValue(CI.getType()));
-
-  // See if the value being truncated is already sign extended.  If so, just
-  // eliminate the trunc/sext pair.
-  if (Operator::getOpcode(Src) == Instruction::Trunc) {
-    Value *Op = cast<User>(Src)->getOperand(0);
-    unsigned OpBits   = Op->getType()->getScalarSizeInBits();
-    unsigned MidBits  = Src->getType()->getScalarSizeInBits();
-    unsigned DestBits = CI.getType()->getScalarSizeInBits();
-    unsigned NumSignBits = ComputeNumSignBits(Op);
-
-    if (OpBits == DestBits) {
-      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
-      // bits, it is already ready.
-      if (NumSignBits > DestBits-MidBits)
-        return ReplaceInstUsesWith(CI, Op);
-    } else if (OpBits < DestBits) {
-      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
-      // bits, just sext from i32.
-      if (NumSignBits > OpBits-MidBits)
-        return new SExtInst(Op, CI.getType(), "tmp");
-    } else {
-      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
-      // bits, just truncate to i32.
-      if (NumSignBits > OpBits-MidBits)
-        return new TruncInst(Op, CI.getType(), "tmp");
-    }
-  }
-
-  // If the input is a shl/ashr pair of a same constant, then this is a sign
-  // extension from a smaller value.  If we could trust arbitrary bitwidth
-  // integers, we could turn this into a truncate to the smaller bit and then
-  // use a sext for the whole extension.  Since we don't, look deeper and check
-  // for a truncate.  If the source and dest are the same type, eliminate the
-  // trunc and extend and just do shifts.  For example, turn:
-  //   %a = trunc i32 %i to i8
-  //   %b = shl i8 %a, 6
-  //   %c = ashr i8 %b, 6
-  //   %d = sext i8 %c to i32
-  // into:
-  //   %a = shl i32 %i, 30
-  //   %d = ashr i32 %a, 30
-  Value *A = 0;
-  ConstantInt *BA = 0, *CA = 0;
-  if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)),
-                        m_ConstantInt(CA))) &&
-      BA == CA && isa<TruncInst>(A)) {
-    Value *I = cast<TruncInst>(A)->getOperand(0);
-    if (I->getType() == CI.getType()) {
-      unsigned MidSize = Src->getType()->getScalarSizeInBits();
-      unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
-      unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
-      Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
-      I = Builder->CreateShl(I, ShAmtV, CI.getName());
-      return BinaryOperator::CreateAShr(I, ShAmtV);
-    }
-  }
-  
-  return 0;
-}
-
-/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
-/// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem,
-                              LLVMContext *Context) {
-  bool losesInfo;
-  APFloat F = CFP->getValueAPF();
-  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
-  if (!losesInfo)
-    return ConstantFP::get(*Context, F);
-  return 0;
-}
-
-/// LookThroughFPExtensions - If this is an fp extension instruction, look
-/// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) {
-  if (Instruction *I = dyn_cast<Instruction>(V))
-    if (I->getOpcode() == Instruction::FPExt)
-      return LookThroughFPExtensions(I->getOperand(0), Context);
-  
-  // If this value is a constant, return the constant in the smallest FP type
-  // that can accurately represent it.  This allows us to turn
-  // (float)((double)X+2.0) into x+2.0f.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType() == Type::getPPC_FP128Ty(*Context))
-      return V;  // No constant folding of this.
-    // See if the value can be truncated to float and then reextended.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context))
-      return V;
-    if (CFP->getType() == Type::getDoubleTy(*Context))
-      return V;  // Won't shrink.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context))
-      return V;
-    // Don't try to shrink to various long double types.
-  }
-  
-  return V;
-}
-
-Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
-  if (Instruction *I = commonCastTransforms(CI))
-    return I;
-  
-  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
-  // smaller than the destination type, we can eliminate the truncate by doing
-  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well as
-  // many builtins (sqrt, etc).
-  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
-  if (OpI && OpI->hasOneUse()) {
-    switch (OpI->getOpcode()) {
-    default: break;
-    case Instruction::FAdd:
-    case Instruction::FSub:
-    case Instruction::FMul:
-    case Instruction::FDiv:
-    case Instruction::FRem:
-      const Type *SrcTy = OpI->getType();
-      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0), Context);
-      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1), Context);
-      if (LHSTrunc->getType() != SrcTy && 
-          RHSTrunc->getType() != SrcTy) {
-        unsigned DstSize = CI.getType()->getScalarSizeInBits();
-        // If the source types were both smaller than the destination type of
-        // the cast, do this xform.
-        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
-            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
-          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
-          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
-          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
-        }
-      }
-      break;  
-    }
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::visitFPExt(CastInst &CI) {
-  return commonCastTransforms(CI);
-}
-
-Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
-  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
-  if (OpI == 0)
-    return commonCastTransforms(FI);
-
-  // fptoui(uitofp(X)) --> X
-  // fptoui(sitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-
-  return commonCastTransforms(FI);
-}
-
-Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
-  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
-  if (OpI == 0)
-    return commonCastTransforms(FI);
-  
-  // fptosi(sitofp(X)) --> X
-  // fptosi(uitofp(X)) --> X
-  // This is safe if the intermediate type has enough bits in its mantissa to
-  // accurately represent all values of X.  For example, do not do this with
-  // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
-  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
-      OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getScalarSizeInBits() <=
-                    OpI->getType()->getFPMantissaWidth())
-    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-  
-  return commonCastTransforms(FI);
-}
-
-Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
-  return commonCastTransforms(CI);
-}
-
-Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
-  return commonCastTransforms(CI);
-}
-
-Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
-  // If the destination integer type is smaller than the intptr_t type for
-  // this target, do a ptrtoint to intptr_t then do a trunc.  This allows the
-  // trunc to be exposed to other transforms.  Don't do this for extending
-  // ptrtoint's, because we don't know if the target sign or zero extends its
-  // pointers.
-  if (TD &&
-      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                       TD->getIntPtrType(CI.getContext()),
-                                       "tmp");
-    return new TruncInst(P, CI.getType());
-  }
-  
-  return commonPointerCastTransforms(CI);
-}
-
-Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
-  // If the source integer type is larger than the intptr_t type for
-  // this target, do a trunc to the intptr_t type, then inttoptr of it.  This
-  // allows the trunc to be exposed to other transforms.  Don't do this for
-  // extending inttoptr's, because we don't know if the target sign or zero
-  // extends to pointers.
-  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >
-      TD->getPointerSizeInBits()) {
-    Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                    TD->getIntPtrType(CI.getContext()), "tmp");
-    return new IntToPtrInst(P, CI.getType());
-  }
-  
-  if (Instruction *I = commonCastTransforms(CI))
-    return I;
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
-  // If the operands are integer typed then apply the integer transforms,
-  // otherwise just apply the common ones.
-  Value *Src = CI.getOperand(0);
-  const Type *SrcTy = Src->getType();
-  const Type *DestTy = CI.getType();
-
-  if (isa<PointerType>(SrcTy)) {
-    if (Instruction *I = commonPointerCastTransforms(CI))
-      return I;
-  } else {
-    if (Instruction *Result = commonCastTransforms(CI))
-      return Result;
-  }
-
-
-  // Get rid of casts from one type to the same type. These are useless and can
-  // be replaced by the operand.
-  if (DestTy == Src->getType())
-    return ReplaceInstUsesWith(CI, Src);
-
-  if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
-    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
-    const Type *DstElTy = DstPTy->getElementType();
-    const Type *SrcElTy = SrcPTy->getElementType();
-    
-    // If the address spaces don't match, don't eliminate the bitcast, which is
-    // required for changing types.
-    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
-      return 0;
-    
-    // If we are casting a alloca to a pointer to a type of the same
-    // size, rewrite the allocation instruction to allocate the "right" type.
-    // There is no need to modify malloc calls because it is their bitcast that
-    // needs to be cleaned up.
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
-      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
-        return V;
-    
-    // If the source and destination are pointers, and this cast is equivalent
-    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
-    // This can enhance SROA and other transforms that want type-safe pointers.
-    Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context));
-    unsigned NumZeros = 0;
-    while (SrcElTy != DstElTy && 
-           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
-           SrcElTy->getNumContainedTypes() /* not "{}" */) {
-      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
-      ++NumZeros;
-    }
-
-    // If we found a path from the src to dest, create the getelementptr now.
-    if (SrcElTy == DstElTy) {
-      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "",
-                                               ((Instruction*) NULL));
-    }
-  }
-
-  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
-    if (DestVTy->getNumElements() == 1) {
-      if (!isa<VectorType>(SrcTy)) {
-        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
-        return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
-                            Constant::getNullValue(Type::getInt32Ty(*Context)));
-      }
-      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
-    }
-  }
-
-  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
-    if (SrcVTy->getNumElements() == 1) {
-      if (!isa<VectorType>(DestTy)) {
-        Value *Elem = 
-          Builder->CreateExtractElement(Src,
-                            Constant::getNullValue(Type::getInt32Ty(*Context)));
-        return CastInst::Create(Instruction::BitCast, Elem, DestTy);
-      }
-    }
-  }
-
-  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
-    if (SVI->hasOneUse()) {
-      // Okay, we have (bitconvert (shuffle ..)).  Check to see if this is
-      // a bitconvert to a vector with the same # elts.
-      if (isa<VectorType>(DestTy) && 
-          cast<VectorType>(DestTy)->getNumElements() ==
-                SVI->getType()->getNumElements() &&
-          SVI->getType()->getNumElements() ==
-            cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
-        CastInst *Tmp;
-        // If either of the operands is a cast from CI.getType(), then
-        // evaluating the shuffle in the casted destination's type will allow
-        // us to eliminate at least one cast.
-        if (((Tmp = dyn_cast<CastInst>(SVI->getOperand(0))) && 
-             Tmp->getOperand(0)->getType() == DestTy) ||
-            ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && 
-             Tmp->getOperand(0)->getType() == DestTy)) {
-          Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
-          Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
-          // Return a new shuffle vector.  Use the same element ID's, as we
-          // know the vector types match #elts.
-          return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-/// GetSelectFoldableOperands - We want to turn code that looks like this:
-///   %C = or %A, %B
-///   %D = select %cond, %C, %A
-/// into:
-///   %C = select %cond, %B, 0
-///   %D = or %A, %C
-///
-/// Assuming that the specified instruction is an operand to the select, return
-/// a bitmask indicating which operands of this instruction are foldable if they
-/// equal the other incoming value of the select.
-///
-static unsigned GetSelectFoldableOperands(Instruction *I) {
-  switch (I->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Mul:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-    return 3;              // Can fold through either operand.
-  case Instruction::Sub:   // Can only fold on the amount subtracted.
-  case Instruction::Shl:   // Can only fold on the shift amount.
-  case Instruction::LShr:
-  case Instruction::AShr:
-    return 1;
-  default:
-    return 0;              // Cannot fold
-  }
-}
-
-/// GetSelectFoldableConstant - For the same transformation as the previous
-/// function, return the identity constant that goes into the select.
-static Constant *GetSelectFoldableConstant(Instruction *I,
-                                           LLVMContext *Context) {
-  switch (I->getOpcode()) {
-  default: llvm_unreachable("This cannot happen!");
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-    return Constant::getNullValue(I->getType());
-  case Instruction::And:
-    return Constant::getAllOnesValue(I->getType());
-  case Instruction::Mul:
-    return ConstantInt::get(I->getType(), 1);
-  }
-}
-
-/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
-/// have the same opcode and only one use each.  Try to simplify this.
-Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
-                                          Instruction *FI) {
-  if (TI->getNumOperands() == 1) {
-    // If this is a non-volatile load or a cast from the same type,
-    // merge.
-    if (TI->isCast()) {
-      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
-        return 0;
-    } else {
-      return 0;  // unknown unary op.
-    }
-
-    // Fold this by inserting a select from the input values.
-    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
-                                          FI->getOperand(0), SI.getName()+".v");
-    InsertNewInstBefore(NewSI, SI);
-    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
-                            TI->getType());
-  }
-
-  // Only handle binary operators here.
-  if (!isa<BinaryOperator>(TI))
-    return 0;
-
-  // Figure out if the operations have any operands in common.
-  Value *MatchOp, *OtherOpT, *OtherOpF;
-  bool MatchIsOpZero;
-  if (TI->getOperand(0) == FI->getOperand(0)) {
-    MatchOp  = TI->getOperand(0);
-    OtherOpT = TI->getOperand(1);
-    OtherOpF = FI->getOperand(1);
-    MatchIsOpZero = true;
-  } else if (TI->getOperand(1) == FI->getOperand(1)) {
-    MatchOp  = TI->getOperand(1);
-    OtherOpT = TI->getOperand(0);
-    OtherOpF = FI->getOperand(0);
-    MatchIsOpZero = false;
-  } else if (!TI->isCommutative()) {
-    return 0;
-  } else if (TI->getOperand(0) == FI->getOperand(1)) {
-    MatchOp  = TI->getOperand(0);
-    OtherOpT = TI->getOperand(1);
-    OtherOpF = FI->getOperand(0);
-    MatchIsOpZero = true;
-  } else if (TI->getOperand(1) == FI->getOperand(0)) {
-    MatchOp  = TI->getOperand(1);
-    OtherOpT = TI->getOperand(0);
-    OtherOpF = FI->getOperand(1);
-    MatchIsOpZero = true;
-  } else {
-    return 0;
-  }
-
-  // If we reach here, they do have operations in common.
-  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
-                                         OtherOpF, SI.getName()+".v");
-  InsertNewInstBefore(NewSI, SI);
-
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
-    if (MatchIsOpZero)
-      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
-    else
-      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
-  }
-  llvm_unreachable("Shouldn't get here");
-  return 0;
-}
-
-static bool isSelect01(Constant *C1, Constant *C2) {
-  ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
-  if (!C1I)
-    return false;
-  ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
-  if (!C2I)
-    return false;
-  return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
-}
-
-/// FoldSelectIntoOp - Try fold the select into one of the operands to
-/// facilitate further optimization.
-Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
-                                            Value *FalseVal) {
-  // See the comment above GetSelectFoldableOperands for a description of the
-  // transformation we are doing here.
-  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
-    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
-        !isa<Constant>(FalseVal)) {
-      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
-        unsigned OpToFold = 0;
-        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
-          OpToFold = 1;
-        } else  if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
-          OpToFold = 2;
-        }
-
-        if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(TVI, Context);
-          Value *OOp = TVI->getOperand(2-OpToFold);
-          // Avoid creating select between 2 constants unless it's selecting
-          // between 0 and 1.
-          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
-            Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
-            InsertNewInstBefore(NewSel, SI);
-            NewSel->takeName(TVI);
-            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
-              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
-            llvm_unreachable("Unknown instruction!!");
-          }
-        }
-      }
-    }
-  }
-
-  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
-    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
-        !isa<Constant>(TrueVal)) {
-      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
-        unsigned OpToFold = 0;
-        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
-          OpToFold = 1;
-        } else  if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
-          OpToFold = 2;
-        }
-
-        if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(FVI, Context);
-          Value *OOp = FVI->getOperand(2-OpToFold);
-          // Avoid creating select between 2 constants unless it's selecting
-          // between 0 and 1.
-          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
-            Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
-            InsertNewInstBefore(NewSel, SI);
-            NewSel->takeName(FVI);
-            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
-              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
-            llvm_unreachable("Unknown instruction!!");
-          }
-        }
-      }
-    }
-  }
-
-  return 0;
-}
-
-/// visitSelectInstWithICmp - Visit a SelectInst that has an
-/// ICmpInst as its first operand.
-///
-Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
-                                                   ICmpInst *ICI) {
-  bool Changed = false;
-  ICmpInst::Predicate Pred = ICI->getPredicate();
-  Value *CmpLHS = ICI->getOperand(0);
-  Value *CmpRHS = ICI->getOperand(1);
-  Value *TrueVal = SI.getTrueValue();
-  Value *FalseVal = SI.getFalseValue();
-
-  // Check cases where the comparison is with a constant that
-  // can be adjusted to fit the min/max idiom. We may edit ICI in
-  // place here, so make sure the select is the only user.
-  if (ICI->hasOneUse())
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
-      switch (Pred) {
-      default: break;
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_SLT: {
-        // X < MIN ? T : F  -->  F
-        if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
-          return ReplaceInstUsesWith(SI, FalseVal);
-        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        Constant *AdjustedRHS = SubOne(CI);
-        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
-            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
-          Pred = ICmpInst::getSwappedPredicate(Pred);
-          CmpRHS = AdjustedRHS;
-          std::swap(FalseVal, TrueVal);
-          ICI->setPredicate(Pred);
-          ICI->setOperand(1, CmpRHS);
-          SI.setOperand(1, TrueVal);
-          SI.setOperand(2, FalseVal);
-          Changed = true;
-        }
-        break;
-      }
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_SGT: {
-        // X > MAX ? T : F  -->  F
-        if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
-          return ReplaceInstUsesWith(SI, FalseVal);
-        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        Constant *AdjustedRHS = AddOne(CI);
-        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
-            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
-          Pred = ICmpInst::getSwappedPredicate(Pred);
-          CmpRHS = AdjustedRHS;
-          std::swap(FalseVal, TrueVal);
-          ICI->setPredicate(Pred);
-          ICI->setOperand(1, CmpRHS);
-          SI.setOperand(1, TrueVal);
-          SI.setOperand(2, FalseVal);
-          Changed = true;
-        }
-        break;
-      }
-      }
-
-      // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
-      // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
-      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
-      if (match(TrueVal, m_ConstantInt<-1>()) &&
-          match(FalseVal, m_ConstantInt<0>()))
-        Pred = ICI->getPredicate();
-      else if (match(TrueVal, m_ConstantInt<0>()) &&
-               match(FalseVal, m_ConstantInt<-1>()))
-        Pred = CmpInst::getInversePredicate(ICI->getPredicate());
-      
-      if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
-        // If we are just checking for a icmp eq of a single bit and zext'ing it
-        // to an integer, then shift the bit to the appropriate place and then
-        // cast to integer to avoid the comparison.
-        const APInt &Op1CV = CI->getValue();
-    
-        // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
-        // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
-        if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
-            (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
-          Value *In = ICI->getOperand(0);
-          Value *Sh = ConstantInt::get(In->getType(),
-                                       In->getType()->getScalarSizeInBits()-1);
-          In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                                   *ICI);
-          if (In->getType() != SI.getType())
-            In = CastInst::CreateIntegerCast(In, SI.getType(),
-                                             true/*SExt*/, "tmp", ICI);
-    
-          if (Pred == ICmpInst::ICMP_SGT)
-            In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
-                                       In->getName()+".not"), *ICI);
-    
-          return ReplaceInstUsesWith(SI, In);
-        }
-      }
-    }
-
-  if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
-    // Transform (X == Y) ? X : Y  -> Y
-    if (Pred == ICmpInst::ICMP_EQ)
-      return ReplaceInstUsesWith(SI, FalseVal);
-    // Transform (X != Y) ? X : Y  -> X
-    if (Pred == ICmpInst::ICMP_NE)
-      return ReplaceInstUsesWith(SI, TrueVal);
-    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
-
-  } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
-    // Transform (X == Y) ? Y : X  -> X
-    if (Pred == ICmpInst::ICMP_EQ)
-      return ReplaceInstUsesWith(SI, FalseVal);
-    // Transform (X != Y) ? Y : X  -> Y
-    if (Pred == ICmpInst::ICMP_NE)
-      return ReplaceInstUsesWith(SI, TrueVal);
-    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
-  }
-  return Changed ? &SI : 0;
-}
-
-
-/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
-/// PHI node (but the two may be in different blocks).  See if the true/false
-/// values (V) are live in all of the predecessor blocks of the PHI.  For
-/// example, cases like this cannot be mapped:
-///
-///   X = phi [ C1, BB1], [C2, BB2]
-///   Y = add
-///   Z = select X, Y, 0
-///
-/// because Y is not live in BB1/BB2.
-///
-static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
-                                                   const SelectInst &SI) {
-  // If the value is a non-instruction value like a constant or argument, it
-  // can always be mapped.
-  const Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0) return true;
-  
-  // If V is a PHI node defined in the same block as the condition PHI, we can
-  // map the arguments.
-  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
-  
-  if (const PHINode *VP = dyn_cast<PHINode>(I))
-    if (VP->getParent() == CondPHI->getParent())
-      return true;
-  
-  // Otherwise, if the PHI and select are defined in the same block and if V is
-  // defined in a different block, then we can transform it.
-  if (SI.getParent() == CondPHI->getParent() &&
-      I->getParent() != CondPHI->getParent())
-    return true;
-  
-  // Otherwise we have a 'hard' case and we can't tell without doing more
-  // detailed dominator based analysis, punt.
-  return false;
-}
-
-/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
-///   SPF2(SPF1(A, B), C) 
-Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
-                                        SelectPatternFlavor SPF1,
-                                        Value *A, Value *B,
-                                        Instruction &Outer,
-                                        SelectPatternFlavor SPF2, Value *C) {
-  if (C == A || C == B) {
-    // MAX(MAX(A, B), B) -> MAX(A, B)
-    // MIN(MIN(a, b), a) -> MIN(a, b)
-    if (SPF1 == SPF2)
-      return ReplaceInstUsesWith(Outer, Inner);
-    
-    // MAX(MIN(a, b), a) -> a
-    // MIN(MAX(a, b), a) -> a
-    if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
-        (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
-        (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
-        (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
-      return ReplaceInstUsesWith(Outer, C);
-  }
-  
-  // TODO: MIN(MIN(A, 23), 97)
-  return 0;
-}
-
-
-
-
-Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
-  Value *CondVal = SI.getCondition();
-  Value *TrueVal = SI.getTrueValue();
-  Value *FalseVal = SI.getFalseValue();
-
-  // select true, X, Y  -> X
-  // select false, X, Y -> Y
-  if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
-    return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
-
-  // select C, X, X -> X
-  if (TrueVal == FalseVal)
-    return ReplaceInstUsesWith(SI, TrueVal);
-
-  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
-    return ReplaceInstUsesWith(SI, FalseVal);
-  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
-    return ReplaceInstUsesWith(SI, TrueVal);
-  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
-    if (isa<Constant>(TrueVal))
-      return ReplaceInstUsesWith(SI, TrueVal);
-    else
-      return ReplaceInstUsesWith(SI, FalseVal);
-  }
-
-  if (SI.getType() == Type::getInt1Ty(*Context)) {
-    if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
-      if (C->getZExtValue()) {
-        // Change: A = select B, true, C --> A = or B, C
-        return BinaryOperator::CreateOr(CondVal, FalseVal);
-      } else {
-        // Change: A = select B, false, C --> A = and !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateAnd(NotCond, FalseVal);
-      }
-    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
-      if (C->getZExtValue() == false) {
-        // Change: A = select B, C, false --> A = and B, C
-        return BinaryOperator::CreateAnd(CondVal, TrueVal);
-      } else {
-        // Change: A = select B, C, true --> A = or !B, C
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                             "not."+CondVal->getName()), SI);
-        return BinaryOperator::CreateOr(NotCond, TrueVal);
-      }
-    }
-    
-    // select a, b, a  -> a&b
-    // select a, a, b  -> a|b
-    if (CondVal == TrueVal)
-      return BinaryOperator::CreateOr(CondVal, FalseVal);
-    else if (CondVal == FalseVal)
-      return BinaryOperator::CreateAnd(CondVal, TrueVal);
-  }
-
-  // Selecting between two integer constants?
-  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
-    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
-      // select C, 1, 0 -> zext C to int
-      if (FalseValC->isZero() && TrueValC->getValue() == 1) {
-        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
-      } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
-        // select C, 0, 1 -> zext !C to int
-        Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
-                                               "not."+CondVal->getName()), SI);
-        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
-      }
-
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
-        // If one of the constants is zero (we know they can't both be) and we
-        // have an icmp instruction with zero, and we have an 'and' with the
-        // non-constant value, eliminate this whole mess.  This corresponds to
-        // cases like this: ((X & 27) ? 27 : 0)
-        if (TrueValC->isZero() || FalseValC->isZero())
-          if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
-              cast<Constant>(IC->getOperand(1))->isNullValue())
-            if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
-              if (ICA->getOpcode() == Instruction::And &&
-                  isa<ConstantInt>(ICA->getOperand(1)) &&
-                  (ICA->getOperand(1) == TrueValC ||
-                   ICA->getOperand(1) == FalseValC) &&
-                  isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) {
-                // Okay, now we know that everything is set up, we just don't
-                // know whether we have a icmp_ne or icmp_eq and whether the 
-                // true or false val is the zero.
-                bool ShouldNotVal = !TrueValC->isZero();
-                ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
-                Value *V = ICA;
-                if (ShouldNotVal)
-                  V = InsertNewInstBefore(BinaryOperator::Create(
-                                  Instruction::Xor, V, ICA->getOperand(1)), SI);
-                return ReplaceInstUsesWith(SI, V);
-              }
-      }
-    }
-
-  // See if we are selecting two values based on a comparison of the two values.
-  if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
-    if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
-      // Transform (X == Y) ? X : Y  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:  
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-        return ReplaceInstUsesWith(SI, FalseVal);
-      }
-      // Transform (X != Y) ? X : Y  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
-        return ReplaceInstUsesWith(SI, TrueVal);
-      // NOTE: if we wanted to, this is where to detect MIN/MAX
-
-    } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
-      // Transform (X == Y) ? Y : X  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:  
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-          return ReplaceInstUsesWith(SI, FalseVal);
-      }
-      // Transform (X != Y) ? Y : X  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
-        return ReplaceInstUsesWith(SI, TrueVal);
-      // NOTE: if we wanted to, this is where to detect MIN/MAX
-    }
-    // NOTE: if we wanted to, this is where to detect ABS
-  }
-
-  // See if we are selecting two values based on a comparison of the two values.
-  if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
-    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
-      return Result;
-
-  if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
-    if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
-      if (TI->hasOneUse() && FI->hasOneUse()) {
-        Instruction *AddOp = 0, *SubOp = 0;
-
-        // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
-        if (TI->getOpcode() == FI->getOpcode())
-          if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
-            return IV;
-
-        // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
-        // even legal for FP.
-        if ((TI->getOpcode() == Instruction::Sub &&
-             FI->getOpcode() == Instruction::Add) ||
-            (TI->getOpcode() == Instruction::FSub &&
-             FI->getOpcode() == Instruction::FAdd)) {
-          AddOp = FI; SubOp = TI;
-        } else if ((FI->getOpcode() == Instruction::Sub &&
-                    TI->getOpcode() == Instruction::Add) ||
-                   (FI->getOpcode() == Instruction::FSub &&
-                    TI->getOpcode() == Instruction::FAdd)) {
-          AddOp = TI; SubOp = FI;
-        }
-
-        if (AddOp) {
-          Value *OtherAddOp = 0;
-          if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
-            OtherAddOp = AddOp->getOperand(1);
-          } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
-            OtherAddOp = AddOp->getOperand(0);
-          }
-
-          if (OtherAddOp) {
-            // So at this point we know we have (Y -> OtherAddOp):
-            //        select C, (add X, Y), (sub X, Z)
-            Value *NegVal;  // Compute -Z
-            if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
-              NegVal = ConstantExpr::getNeg(C);
-            } else {
-              NegVal = InsertNewInstBefore(
-                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
-                                              "tmp"), SI);
-            }
-
-            Value *NewTrueOp = OtherAddOp;
-            Value *NewFalseOp = NegVal;
-            if (AddOp != TI)
-              std::swap(NewTrueOp, NewFalseOp);
-            Instruction *NewSel =
-              SelectInst::Create(CondVal, NewTrueOp,
-                                 NewFalseOp, SI.getName() + ".p");
-
-            NewSel = InsertNewInstBefore(NewSel, SI);
-            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
-          }
-        }
-      }
-
-  // See if we can fold the select into one of our operands.
-  if (SI.getType()->isInteger()) {
-    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
-      return FoldI;
-    
-    // MAX(MAX(a, b), a) -> MAX(a, b)
-    // MIN(MIN(a, b), a) -> MIN(a, b)
-    // MAX(MIN(a, b), a) -> a
-    // MIN(MAX(a, b), a) -> a
-    Value *LHS, *RHS, *LHS2, *RHS2;
-    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
-      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
-                                          SI, SPF, RHS))
-          return R;
-      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
-                                          SI, SPF, LHS))
-          return R;
-    }
-
-    // TODO.
-    // ABS(-X) -> ABS(X)
-    // ABS(ABS(X)) -> ABS(X)
-  }
-
-  // See if we can fold the select into a phi node if the condition is a select.
-  if (isa<PHINode>(SI.getCondition())) 
-    // The true/false values have to be live in the PHI predecessor's blocks.
-    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
-        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
-      if (Instruction *NV = FoldOpIntoPhi(SI))
-        return NV;
-
-  if (BinaryOperator::isNot(CondVal)) {
-    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
-    SI.setOperand(1, FalseVal);
-    SI.setOperand(2, TrueVal);
-    return &SI;
-  }
-
-  return 0;
-}
-
-/// EnforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-///
-static unsigned EnforceKnownAlignment(Value *V,
-                                      unsigned Align, unsigned PrefAlign) {
-
-  User *U = dyn_cast<User>(V);
-  if (!U) return Align;
-
-  switch (Operator::getOpcode(U)) {
-  default: break;
-  case Instruction::BitCast:
-    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-  case Instruction::GetElementPtr: {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
-      if (!isa<Constant>(*i) ||
-          !cast<Constant>(*i)->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-    }
-    break;
-  }
-  }
-
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    // If there is a large requested alignment and we can, bump up the alignment
-    // of the global.
-    if (!GV->isDeclaration()) {
-      if (GV->getAlignment() >= PrefAlign)
-        Align = GV->getAlignment();
-      else {
-        GV->setAlignment(PrefAlign);
-        Align = PrefAlign;
-      }
-    }
-  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    // If there is a requested alignment and if this is an alloca, round up.
-    if (AI->getAlignment() >= PrefAlign)
-      Align = AI->getAlignment();
-    else {
-      AI->setAlignment(PrefAlign);
-      Align = PrefAlign;
-    }
-  }
-
-  return Align;
-}
-
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
-                                                  unsigned PrefAlign) {
-  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
-                      sizeof(PrefAlign) * CHAR_BIT;
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
-  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
-  unsigned TrailZ = KnownZero.countTrailingOnes();
-  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
-  if (PrefAlign > Align)
-    Align = EnforceKnownAlignment(V, Align, PrefAlign);
-  
-    // We don't need to make any adjustment.
-  return Align;
-}
-
-Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
-  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
-  unsigned MinAlign = std::min(DstAlign, SrcAlign);
-  unsigned CopyAlign = MI->getAlignment();
-
-  if (CopyAlign < MinAlign) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
-                                             MinAlign, false));
-    return MI;
-  }
-  
-  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
-  // load/store.
-  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
-  if (MemOpLength == 0) return 0;
-  
-  // Source and destination pointer types are always "i8*" for intrinsic.  See
-  // if the size is something we can handle with a single primitive load/store.
-  // A single load+store correctly handles overlapping memory in the memmove
-  // case.
-  unsigned Size = MemOpLength->getZExtValue();
-  if (Size == 0) return MI;  // Delete this mem transfer.
-  
-  if (Size > 8 || (Size&(Size-1)))
-    return 0;  // If not 1/2/4/8 bytes, exit.
-  
-  // Use an integer load+store unless we can find something better.
-  Type *NewPtrTy =
-                PointerType::getUnqual(IntegerType::get(*Context, Size<<3));
-  
-  // Memcpy forces the use of i8* for the source and destination.  That means
-  // that if you're using memcpy to move one double around, you'll get a cast
-  // from double* to i8*.  We'd much rather use a double load+store rather than
-  // an i64 load+store, here because this improves the odds that the source or
-  // dest address will be promotable.  See if we can find a better type than the
-  // integer datatype.
-  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
-    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
-    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
-      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
-      // down through these levels if so.
-      while (!SrcETy->isSingleValueType()) {
-        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
-          if (STy->getNumElements() == 1)
-            SrcETy = STy->getElementType(0);
-          else
-            break;
-        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
-          if (ATy->getNumElements() == 1)
-            SrcETy = ATy->getElementType();
-          else
-            break;
-        } else
-          break;
-      }
-      
-      if (SrcETy->isSingleValueType())
-        NewPtrTy = PointerType::getUnqual(SrcETy);
-    }
-  }
-  
-  
-  // If the memcpy/memmove provides better alignment info than we can
-  // infer, use it.
-  SrcAlign = std::max(SrcAlign, CopyAlign);
-  DstAlign = std::max(DstAlign, CopyAlign);
-  
-  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
-  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
-  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
-  InsertNewInstBefore(L, *MI);
-  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
-
-  // Set the size of the copy to 0, it will be deleted on the next iteration.
-  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
-  return MI;
-}
-
-Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
-  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
-  if (MI->getAlignment() < Alignment) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
-                                             Alignment, false));
-    return MI;
-  }
-  
-  // Extract the length and alignment and fill if they are constant.
-  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
-  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
-  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context))
-    return 0;
-  uint64_t Len = LenC->getZExtValue();
-  Alignment = MI->getAlignment();
-  
-  // If the length is zero, this is a no-op
-  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
-  
-  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
-  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
-    const Type *ITy = IntegerType::get(*Context, Len*8);  // n=1 -> i8.
-    
-    Value *Dest = MI->getDest();
-    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
-
-    // Alignment 0 is identity for alignment 1 for memset, but not store.
-    if (Alignment == 0) Alignment = 1;
-    
-    // Extract the fill value and store.
-    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
-    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
-                                      Dest, false, Alignment), *MI);
-    
-    // Set the size of the copy to 0, it will be deleted on the next iteration.
-    MI->setLength(Constant::getNullValue(LenC->getType()));
-    return MI;
-  }
-
-  return 0;
-}
-
-
-/// visitCallInst - CallInst simplification.  This mostly only handles folding 
-/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
-/// the heavy lifting.
-///
-Instruction *InstCombiner::visitCallInst(CallInst &CI) {
-  if (isFreeCall(&CI))
-    return visitFree(CI);
-
-  // If the caller function is nounwind, mark the call as nounwind, even if the
-  // callee isn't.
-  if (CI.getParent()->getParent()->doesNotThrow() &&
-      !CI.doesNotThrow()) {
-    CI.setDoesNotThrow();
-    return &CI;
-  }
-  
-  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
-  if (!II) return visitCallSite(&CI);
-  
-  // Intrinsics cannot occur in an invoke, so handle them here instead of in
-  // visitCallSite.
-  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
-    bool Changed = false;
-
-    // memmove/cpy/set of zero bytes is a noop.
-    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
-      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
-
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
-        if (CI->getZExtValue() == 1) {
-          // Replace the instruction with just byte operations.  We would
-          // transform other cases to loads/stores, but we don't know if
-          // alignment is sufficient.
-        }
-    }
-
-    // If we have a memmove and the source operation is a constant global,
-    // then the source and dest pointers can't alias, so we can change this
-    // into a call to memcpy.
-    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
-      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
-        if (GVSrc->isConstant()) {
-          Module *M = CI.getParent()->getParent()->getParent();
-          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[1];
-          Tys[0] = CI.getOperand(3)->getType();
-          CI.setOperand(0, 
-                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
-          Changed = true;
-        }
-    }
-
-    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
-      // memmove(x,x,size) -> noop.
-      if (MTI->getSource() == MTI->getDest())
-        return EraseInstFromFunction(CI);
-    }
-
-    // If we can determine a pointer alignment that is bigger than currently
-    // set, update the alignment.
-    if (isa<MemTransferInst>(MI)) {
-      if (Instruction *I = SimplifyMemTransfer(MI))
-        return I;
-    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
-      if (Instruction *I = SimplifyMemSet(MSI))
-        return I;
-    }
-          
-    if (Changed) return II;
-  }
-  
-  switch (II->getIntrinsicID()) {
-  default: break;
-  case Intrinsic::bswap:
-    // bswap(bswap(x)) -> x
-    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
-      if (Operand->getIntrinsicID() == Intrinsic::bswap)
-        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
-    break;
-  case Intrinsic::powi:
-    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
-      // powi(x, 0) -> 1.0
-      if (Power->isZero())
-        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
-      // powi(x, 1) -> x
-      if (Power->isOne())
-        return ReplaceInstUsesWith(CI, II->getOperand(1));
-      // powi(x, -1) -> 1/x
-      if (Power->isAllOnesValue())
-        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
-                                          II->getOperand(1));
-    }
-    break;
-      
-  case Intrinsic::uadd_with_overflow: {
-    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
-    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
-    uint32_t BitWidth = IT->getBitWidth();
-    APInt Mask = APInt::getSignBit(BitWidth);
-    APInt LHSKnownZero(BitWidth, 0);
-    APInt LHSKnownOne(BitWidth, 0);
-    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
-    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
-    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
-
-    if (LHSKnownNegative || LHSKnownPositive) {
-      APInt RHSKnownZero(BitWidth, 0);
-      APInt RHSKnownOne(BitWidth, 0);
-      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
-      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
-      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
-      if (LHSKnownNegative && RHSKnownNegative) {
-        // The sign bit is set in both cases: this MUST overflow.
-        // Create a simple add instruction, and insert it into the struct.
-        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
-        Worklist.Add(Add);
-        Constant *V[] = {
-          UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context)
-        };
-        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
-        return InsertValueInst::Create(Struct, Add, 0);
-      }
-      
-      if (LHSKnownPositive && RHSKnownPositive) {
-        // The sign bit is clear in both cases: this CANNOT overflow.
-        // Create a simple add instruction, and insert it into the struct.
-        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
-        Worklist.Add(Add);
-        Constant *V[] = {
-          UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context)
-        };
-        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
-        return InsertValueInst::Create(Struct, Add, 0);
-      }
-    }
-  }
-  // FALL THROUGH uadd into sadd
-  case Intrinsic::sadd_with_overflow:
-    // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getOperand(1)) &&
-        !isa<Constant>(II->getOperand(2))) {
-      Value *LHS = II->getOperand(1);
-      II->setOperand(1, II->getOperand(2));
-      II->setOperand(2, LHS);
-      return II;
-    }
-
-    // X + undef -> undef
-    if (isa<UndefValue>(II->getOperand(2)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
-      // X + 0 -> {X, false}
-      if (RHS->isZero()) {
-        Constant *V[] = {
-          UndefValue::get(II->getOperand(0)->getType()),
-          ConstantInt::getFalse(*Context)
-        };
-        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
-      }
-    }
-    break;
-  case Intrinsic::usub_with_overflow:
-  case Intrinsic::ssub_with_overflow:
-    // undef - X -> undef
-    // X - undef -> undef
-    if (isa<UndefValue>(II->getOperand(1)) ||
-        isa<UndefValue>(II->getOperand(2)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
-      // X - 0 -> {X, false}
-      if (RHS->isZero()) {
-        Constant *V[] = {
-          UndefValue::get(II->getOperand(1)->getType()),
-          ConstantInt::getFalse(*Context)
-        };
-        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
-      }
-    }
-    break;
-  case Intrinsic::umul_with_overflow:
-  case Intrinsic::smul_with_overflow:
-    // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getOperand(1)) &&
-        !isa<Constant>(II->getOperand(2))) {
-      Value *LHS = II->getOperand(1);
-      II->setOperand(1, II->getOperand(2));
-      II->setOperand(2, LHS);
-      return II;
-    }
-
-    // X * undef -> undef
-    if (isa<UndefValue>(II->getOperand(2)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
-    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
-      // X*0 -> {0, false}
-      if (RHSI->isZero())
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
-      
-      // X * 1 -> {X, false}
-      if (RHSI->equalsInt(1)) {
-        Constant *V[] = {
-          UndefValue::get(II->getOperand(1)->getType()),
-          ConstantInt::getFalse(*Context)
-        };
-        Constant *Struct = ConstantStruct::get(*Context, V, 2, false);
-        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
-      }
-    }
-    break;
-  case Intrinsic::ppc_altivec_lvx:
-  case Intrinsic::ppc_altivec_lvxl:
-  case Intrinsic::x86_sse_loadu_ps:
-  case Intrinsic::x86_sse2_loadu_pd:
-  case Intrinsic::x86_sse2_loadu_dq:
-    // Turn PPC lvx     -> load if the pointer is known aligned.
-    // Turn X86 loadups -> load if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
-                                         PointerType::getUnqual(II->getType()));
-      return new LoadInst(Ptr);
-    }
-    break;
-  case Intrinsic::ppc_altivec_stvx:
-  case Intrinsic::ppc_altivec_stvxl:
-    // Turn stvx -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
-      const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(1)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
-      return new StoreInst(II->getOperand(1), Ptr);
-    }
-    break;
-  case Intrinsic::x86_sse_storeu_ps:
-  case Intrinsic::x86_sse2_storeu_pd:
-  case Intrinsic::x86_sse2_storeu_dq:
-    // Turn X86 storeu -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
-      const Type *OpPtrTy = 
-        PointerType::getUnqual(II->getOperand(2)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
-      return new StoreInst(II->getOperand(2), Ptr);
-    }
-    break;
-    
-  case Intrinsic::x86_sse_cvttss2si: {
-    // These intrinsics only demands the 0th element of its input vector.  If
-    // we can simplify the input based on that, do so now.
-    unsigned VWidth =
-      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
-    APInt DemandedElts(VWidth, 1);
-    APInt UndefElts(VWidth, 0);
-    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
-                                              UndefElts)) {
-      II->setOperand(1, V);
-      return II;
-    }
-    break;
-  }
-    
-  case Intrinsic::ppc_altivec_vperm:
-    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
-    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
-      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
-      
-      // Check that all of the elements are integer constants or undefs.
-      bool AllEltsOk = true;
-      for (unsigned i = 0; i != 16; ++i) {
-        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
-            !isa<UndefValue>(Mask->getOperand(i))) {
-          AllEltsOk = false;
-          break;
-        }
-      }
-      
-      if (AllEltsOk) {
-        // Cast the input vectors to byte vectors.
-        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
-        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
-        Value *Result = UndefValue::get(Op0->getType());
-        
-        // Only extract each element once.
-        Value *ExtractedElts[32];
-        memset(ExtractedElts, 0, sizeof(ExtractedElts));
-        
-        for (unsigned i = 0; i != 16; ++i) {
-          if (isa<UndefValue>(Mask->getOperand(i)))
-            continue;
-          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
-          Idx &= 31;  // Match the hardware behavior.
-          
-          if (ExtractedElts[Idx] == 0) {
-            ExtractedElts[Idx] = 
-              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
-                  ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false),
-                                            "tmp");
-          }
-        
-          // Insert this value into the result vector.
-          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
-                         ConstantInt::get(Type::getInt32Ty(*Context), i, false),
-                                                "tmp");
-        }
-        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
-      }
-    }
-    break;
-
-  case Intrinsic::stackrestore: {
-    // If the save is right next to the restore, remove the restore.  This can
-    // happen when variable allocas are DCE'd.
-    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
-      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
-        BasicBlock::iterator BI = SS;
-        if (&*++BI == II)
-          return EraseInstFromFunction(CI);
-      }
-    }
-    
-    // Scan down this block to see if there is another stack restore in the
-    // same block without an intervening call/alloca.
-    BasicBlock::iterator BI = II;
-    TerminatorInst *TI = II->getParent()->getTerminator();
-    bool CannotRemove = false;
-    for (++BI; &*BI != TI; ++BI) {
-      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
-        CannotRemove = true;
-        break;
-      }
-      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
-        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
-          // If there is a stackrestore below this one, remove this one.
-          if (II->getIntrinsicID() == Intrinsic::stackrestore)
-            return EraseInstFromFunction(CI);
-          // Otherwise, ignore the intrinsic.
-        } else {
-          // If we found a non-intrinsic call, we can't remove the stack
-          // restore.
-          CannotRemove = true;
-          break;
-        }
-      }
-    }
-    
-    // If the stack restore is in a return/unwind block and if there are no
-    // allocas or calls between the restore and the return, nuke the restore.
-    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
-      return EraseInstFromFunction(CI);
-    break;
-  }
-  }
-
-  return visitCallSite(II);
-}
-
-// InvokeInst simplification
-//
-Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
-  return visitCallSite(&II);
-}
-
-/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
-/// passed through the varargs area, we can eliminate the use of the cast.
-static bool isSafeToEliminateVarargsCast(const CallSite CS,
-                                         const CastInst * const CI,
-                                         const TargetData * const TD,
-                                         const int ix) {
-  if (!CI->isLosslessCast())
-    return false;
-
-  // The size of ByVal arguments is derived from the type, so we
-  // can't change to a type with a different size.  If the size were
-  // passed explicitly we could avoid this check.
-  if (!CS.paramHasAttr(ix, Attribute::ByVal))
-    return true;
-
-  const Type* SrcTy = 
-            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
-  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
-  if (!SrcTy->isSized() || !DstTy->isSized())
-    return false;
-  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
-    return false;
-  return true;
-}
-
-// visitCallSite - Improvements for call and invoke instructions.
-//
-Instruction *InstCombiner::visitCallSite(CallSite CS) {
-  bool Changed = false;
-
-  // If the callee is a constexpr cast of a function, attempt to move the cast
-  // to the arguments of the call/invoke.
-  if (transformConstExprCastCall(CS)) return 0;
-
-  Value *Callee = CS.getCalledValue();
-
-  if (Function *CalleeF = dyn_cast<Function>(Callee))
-    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
-      Instruction *OldCall = CS.getInstruction();
-      // If the call and callee calling conventions don't match, this call must
-      // be unreachable, as the call is undefined.
-      new StoreInst(ConstantInt::getTrue(*Context),
-                UndefValue::get(Type::getInt1PtrTy(*Context)), 
-                                  OldCall);
-      // If OldCall dues not return void then replaceAllUsesWith undef.
-      // This allows ValueHandlers and custom metadata to adjust itself.
-      if (!OldCall->getType()->isVoidTy())
-        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
-      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
-        return EraseInstFromFunction(*OldCall);
-      return 0;
-    }
-
-  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
-    // This instruction is not reachable, just remove it.  We insert a store to
-    // undef so that we know that this code is not reachable, despite the fact
-    // that we can't modify the CFG here.
-    new StoreInst(ConstantInt::getTrue(*Context),
-               UndefValue::get(Type::getInt1PtrTy(*Context)),
-                  CS.getInstruction());
-
-    // If CS dues not return void then replaceAllUsesWith undef.
-    // This allows ValueHandlers and custom metadata to adjust itself.
-    if (!CS.getInstruction()->getType()->isVoidTy())
-      CS.getInstruction()->
-        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
-
-    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
-      // Don't break the CFG, insert a dummy cond branch.
-      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
-                         ConstantInt::getTrue(*Context), II);
-    }
-    return EraseInstFromFunction(*CS.getInstruction());
-  }
-
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
-    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
-      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
-        return transformCallThroughTrampoline(CS);
-
-  const PointerType *PTy = cast<PointerType>(Callee->getType());
-  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  if (FTy->isVarArg()) {
-    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
-    // See if we can optimize any arguments passed through the varargs area of
-    // the call.
-    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
-           E = CS.arg_end(); I != E; ++I, ++ix) {
-      CastInst *CI = dyn_cast<CastInst>(*I);
-      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
-        *I = CI->getOperand(0);
-        Changed = true;
-      }
-    }
-  }
-
-  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
-    // Inline asm calls cannot throw - mark them 'nounwind'.
-    CS.setDoesNotThrow();
-    Changed = true;
-  }
-
-  return Changed ? CS.getInstruction() : 0;
-}
-
-// transformConstExprCastCall - If the callee is a constexpr cast of a function,
-// attempt to move the cast to the arguments of the call/invoke.
-//
-bool InstCombiner::transformConstExprCastCall(CallSite CS) {
-  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
-  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
-  if (CE->getOpcode() != Instruction::BitCast || 
-      !isa<Function>(CE->getOperand(0)))
-    return false;
-  Function *Callee = cast<Function>(CE->getOperand(0));
-  Instruction *Caller = CS.getInstruction();
-  const AttrListPtr &CallerPAL = CS.getAttributes();
-
-  // Okay, this is a cast from a function to a different type.  Unless doing so
-  // would cause a type conversion of one of our arguments, change this call to
-  // be a direct call with arguments casted to the appropriate types.
-  //
-  const FunctionType *FT = Callee->getFunctionType();
-  const Type *OldRetTy = Caller->getType();
-  const Type *NewRetTy = FT->getReturnType();
-
-  if (isa<StructType>(NewRetTy))
-    return false; // TODO: Handle multiple return values.
-
-  // Check to see if we are changing the return type...
-  if (OldRetTy != NewRetTy) {
-    if (Callee->isDeclaration() &&
-        // Conversion is ok if changing from one pointer type to another or from
-        // a pointer to an integer of the same size.
-        !((isa<PointerType>(OldRetTy) || !TD ||
-           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
-          (isa<PointerType>(NewRetTy) || !TD ||
-           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
-      return false;   // Cannot transform this return value.
-
-    if (!Caller->use_empty() &&
-        // void -> non-void is handled specially
-        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
-      return false;   // Cannot transform this return value.
-
-    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
-      Attributes RAttrs = CallerPAL.getRetAttributes();
-      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
-        return false;   // Attribute not compatible with transformed value.
-    }
-
-    // If the callsite is an invoke instruction, and the return value is used by
-    // a PHI node in a successor, we cannot change the return type of the call
-    // because there is no place to put the cast instruction (without breaking
-    // the critical edge).  Bail out in this case.
-    if (!Caller->use_empty())
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
-        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
-             UI != E; ++UI)
-          if (PHINode *PN = dyn_cast<PHINode>(*UI))
-            if (PN->getParent() == II->getNormalDest() ||
-                PN->getParent() == II->getUnwindDest())
-              return false;
-  }
-
-  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
-  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
-
-  CallSite::arg_iterator AI = CS.arg_begin();
-  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
-    const Type *ParamTy = FT->getParamType(i);
-    const Type *ActTy = (*AI)->getType();
-
-    if (!CastInst::isCastable(ActTy, ParamTy))
-      return false;   // Cannot transform this parameter value.
-
-    if (CallerPAL.getParamAttributes(i + 1) 
-        & Attribute::typeIncompatible(ParamTy))
-      return false;   // Attribute not compatible with transformed value.
-
-    // Converting from one pointer type to another or between a pointer and an
-    // integer of the same size is safe even if we do not have a body.
-    bool isConvertible = ActTy == ParamTy ||
-      (TD && ((isa<PointerType>(ParamTy) ||
-      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
-              (isa<PointerType>(ActTy) ||
-              ActTy == TD->getIntPtrType(Caller->getContext()))));
-    if (Callee->isDeclaration() && !isConvertible) return false;
-  }
-
-  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
-      Callee->isDeclaration())
-    return false;   // Do not delete arguments unless we have a function body.
-
-  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
-      !CallerPAL.isEmpty())
-    // In this case we have more arguments than the new function type, but we
-    // won't be dropping them.  Check that these extra arguments have attributes
-    // that are compatible with being a vararg call argument.
-    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
-      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
-        break;
-      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
-      if (PAttrs & Attribute::VarArgsIncompatible)
-        return false;
-    }
-
-  // Okay, we decided that this is a safe thing to do: go ahead and start
-  // inserting cast instructions as necessary...
-  std::vector<Value*> Args;
-  Args.reserve(NumActualArgs);
-  SmallVector<AttributeWithIndex, 8> attrVec;
-  attrVec.reserve(NumCommonArgs);
-
-  // Get any return attributes.
-  Attributes RAttrs = CallerPAL.getRetAttributes();
-
-  // If the return value is not being used, the type may not be compatible
-  // with the existing attributes.  Wipe out any problematic attributes.
-  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
-
-  // Add the new return attributes.
-  if (RAttrs)
-    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
-
-  AI = CS.arg_begin();
-  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
-    const Type *ParamTy = FT->getParamType(i);
-    if ((*AI)->getType() == ParamTy) {
-      Args.push_back(*AI);
-    } else {
-      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
-          false, ParamTy, false);
-      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
-    }
-
-    // Add any parameter attributes.
-    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
-      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
-  }
-
-  // If the function takes more arguments than the call was taking, add them
-  // now.
-  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
-    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
-
-  // If we are removing arguments to the function, emit an obnoxious warning.
-  if (FT->getNumParams() < NumActualArgs) {
-    if (!FT->isVarArg()) {
-      errs() << "WARNING: While resolving call to function '"
-             << Callee->getName() << "' arguments were dropped!\n";
-    } else {
-      // Add all of the arguments in their promoted form to the arg list.
-      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
-        const Type *PTy = getPromotedType((*AI)->getType());
-        if (PTy != (*AI)->getType()) {
-          // Must promote to pass through va_arg area!
-          Instruction::CastOps opcode =
-            CastInst::getCastOpcode(*AI, false, PTy, false);
-          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
-        } else {
-          Args.push_back(*AI);
-        }
-
-        // Add any parameter attributes.
-        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
-          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
-      }
-    }
-  }
-
-  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
-    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
-
-  if (NewRetTy->isVoidTy())
-    Caller->setName("");   // Void type should not have a name.
-
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
-                                                     attrVec.end());
-
-  Instruction *NC;
-  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
-                            Args.begin(), Args.end(),
-                            Caller->getName(), Caller);
-    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
-    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
-  } else {
-    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
-                          Caller->getName(), Caller);
-    CallInst *CI = cast<CallInst>(Caller);
-    if (CI->isTailCall())
-      cast<CallInst>(NC)->setTailCall();
-    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
-    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
-  }
-
-  // Insert a cast of the return type as necessary.
-  Value *NV = NC;
-  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
-    if (!NV->getType()->isVoidTy()) {
-      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
-                                                            OldRetTy, false);
-      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
-
-      // If this is an invoke instruction, we should insert it after the first
-      // non-phi, instruction in the normal successor block.
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
-        InsertNewInstBefore(NC, *I);
-      } else {
-        // Otherwise, it's a call, just insert cast right after the call instr
-        InsertNewInstBefore(NC, *Caller);
-      }
-      Worklist.AddUsersToWorkList(*Caller);
-    } else {
-      NV = UndefValue::get(Caller->getType());
-    }
-  }
-
-
-  if (!Caller->use_empty())
-    Caller->replaceAllUsesWith(NV);
-  
-  EraseInstFromFunction(*Caller);
-  return true;
-}
-
-// transformCallThroughTrampoline - Turn a call to a function created by the
-// init_trampoline intrinsic into a direct call to the underlying function.
-//
-Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
-  Value *Callee = CS.getCalledValue();
-  const PointerType *PTy = cast<PointerType>(Callee->getType());
-  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  const AttrListPtr &Attrs = CS.getAttributes();
-
-  // If the call already has the 'nest' attribute somewhere then give up -
-  // otherwise 'nest' would occur twice after splicing in the chain.
-  if (Attrs.hasAttrSomewhere(Attribute::Nest))
-    return 0;
-
-  IntrinsicInst *Tramp =
-    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
-
-  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
-  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
-  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
-
-  const AttrListPtr &NestAttrs = NestF->getAttributes();
-  if (!NestAttrs.isEmpty()) {
-    unsigned NestIdx = 1;
-    const Type *NestTy = 0;
-    Attributes NestAttr = Attribute::None;
-
-    // Look for a parameter marked with the 'nest' attribute.
-    for (FunctionType::param_iterator I = NestFTy->param_begin(),
-         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
-      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
-        // Record the parameter type and any other attributes.
-        NestTy = *I;
-        NestAttr = NestAttrs.getParamAttributes(NestIdx);
-        break;
-      }
-
-    if (NestTy) {
-      Instruction *Caller = CS.getInstruction();
-      std::vector<Value*> NewArgs;
-      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
-
-      SmallVector<AttributeWithIndex, 8> NewAttrs;
-      NewAttrs.reserve(Attrs.getNumSlots() + 1);
-
-      // Insert the nest argument into the call argument list, which may
-      // mean appending it.  Likewise for attributes.
-
-      // Add any result attributes.
-      if (Attributes Attr = Attrs.getRetAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
-
-      {
-        unsigned Idx = 1;
-        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-        do {
-          if (Idx == NestIdx) {
-            // Add the chain argument and attributes.
-            Value *NestVal = Tramp->getOperand(3);
-            if (NestVal->getType() != NestTy)
-              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
-            NewArgs.push_back(NestVal);
-            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
-          }
-
-          if (I == E)
-            break;
-
-          // Add the original argument and attributes.
-          NewArgs.push_back(*I);
-          if (Attributes Attr = Attrs.getParamAttributes(Idx))
-            NewAttrs.push_back
-              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
-
-          ++Idx, ++I;
-        } while (1);
-      }
-
-      // Add any function attributes.
-      if (Attributes Attr = Attrs.getFnAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
-
-      // The trampoline may have been bitcast to a bogus type (FTy).
-      // Handle this by synthesizing a new function type, equal to FTy
-      // with the chain parameter inserted.
-
-      std::vector<const Type*> NewTypes;
-      NewTypes.reserve(FTy->getNumParams()+1);
-
-      // Insert the chain's type into the list of parameter types, which may
-      // mean appending it.
-      {
-        unsigned Idx = 1;
-        FunctionType::param_iterator I = FTy->param_begin(),
-          E = FTy->param_end();
-
-        do {
-          if (Idx == NestIdx)
-            // Add the chain's type.
-            NewTypes.push_back(NestTy);
-
-          if (I == E)
-            break;
-
-          // Add the original type.
-          NewTypes.push_back(*I);
-
-          ++Idx, ++I;
-        } while (1);
-      }
-
-      // Replace the trampoline call with a direct call.  Let the generic
-      // code sort out any function type mismatches.
-      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
-                                                FTy->isVarArg());
-      Constant *NewCallee =
-        NestF->getType() == PointerType::getUnqual(NewFTy) ?
-        NestF : ConstantExpr::getBitCast(NestF, 
-                                         PointerType::getUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
-                                                   NewAttrs.end());
-
-      Instruction *NewCaller;
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        NewCaller = InvokeInst::Create(NewCallee,
-                                       II->getNormalDest(), II->getUnwindDest(),
-                                       NewArgs.begin(), NewArgs.end(),
-                                       Caller->getName(), Caller);
-        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
-        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
-      } else {
-        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
-                                     Caller->getName(), Caller);
-        if (cast<CallInst>(Caller)->isTailCall())
-          cast<CallInst>(NewCaller)->setTailCall();
-        cast<CallInst>(NewCaller)->
-          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
-        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
-      }
-      if (!Caller->getType()->isVoidTy())
-        Caller->replaceAllUsesWith(NewCaller);
-      Caller->eraseFromParent();
-      Worklist.Remove(Caller);
-      return 0;
-    }
-  }
-
-  // Replace the trampoline call with a direct call.  Since there is no 'nest'
-  // parameter, there is no need to adjust the argument list.  Let the generic
-  // code sort out any function type mismatches.
-  Constant *NewCallee =
-    NestF->getType() == PTy ? NestF : 
-                              ConstantExpr::getBitCast(NestF, PTy);
-  CS.setCalledFunction(NewCallee);
-  return CS.getInstruction();
-}
-
-/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
-/// and if a/b/c and the add's all have a single use, turn this into a phi
-/// and a single binop.
-Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
-  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
-  assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
-  unsigned Opc = FirstInst->getOpcode();
-  Value *LHSVal = FirstInst->getOperand(0);
-  Value *RHSVal = FirstInst->getOperand(1);
-    
-  const Type *LHSType = LHSVal->getType();
-  const Type *RHSType = RHSVal->getType();
-  
-  // Scan to see if all operands are the same opcode, and all have one use.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
-    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
-        // Verify type of the LHS matches so we don't fold cmp's of different
-        // types or GEP's with different index types.
-        I->getOperand(0)->getType() != LHSType ||
-        I->getOperand(1)->getType() != RHSType)
-      return 0;
-
-    // If they are CmpInst instructions, check their predicates
-    if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
-      if (cast<CmpInst>(I)->getPredicate() !=
-          cast<CmpInst>(FirstInst)->getPredicate())
-        return 0;
-    
-    // Keep track of which operand needs a phi node.
-    if (I->getOperand(0) != LHSVal) LHSVal = 0;
-    if (I->getOperand(1) != RHSVal) RHSVal = 0;
-  }
-
-  // If both LHS and RHS would need a PHI, don't do this transformation,
-  // because it would increase the number of PHIs entering the block,
-  // which leads to higher register pressure. This is especially
-  // bad when the PHIs are in the header of a loop.
-  if (!LHSVal && !RHSVal)
-    return 0;
-  
-  // Otherwise, this is safe to transform!
-  
-  Value *InLHS = FirstInst->getOperand(0);
-  Value *InRHS = FirstInst->getOperand(1);
-  PHINode *NewLHS = 0, *NewRHS = 0;
-  if (LHSVal == 0) {
-    NewLHS = PHINode::Create(LHSType,
-                             FirstInst->getOperand(0)->getName() + ".pn");
-    NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
-    NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
-    InsertNewInstBefore(NewLHS, PN);
-    LHSVal = NewLHS;
-  }
-  
-  if (RHSVal == 0) {
-    NewRHS = PHINode::Create(RHSType,
-                             FirstInst->getOperand(1)->getName() + ".pn");
-    NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
-    NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
-    InsertNewInstBefore(NewRHS, PN);
-    RHSVal = NewRHS;
-  }
-  
-  // Add all operands to the new PHIs.
-  if (NewLHS || NewRHS) {
-    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
-      if (NewLHS) {
-        Value *NewInLHS = InInst->getOperand(0);
-        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
-      }
-      if (NewRHS) {
-        Value *NewInRHS = InInst->getOperand(1);
-        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
-      }
-    }
-  }
-    
-  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
-  CmpInst *CIOp = cast<CmpInst>(FirstInst);
-  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
-                         LHSVal, RHSVal);
-}
-
-Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
-  GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-  
-  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
-                                        FirstInst->op_end());
-  // This is true if all GEP bases are allocas and if all indices into them are
-  // constants.
-  bool AllBasePointersAreAllocas = true;
-
-  // We don't want to replace this phi if the replacement would require
-  // more than one phi, which leads to higher register pressure. This is
-  // especially bad when the PHIs are in the header of a loop.
-  bool NeededPhi = false;
-  
-  // Scan to see if all operands are the same opcode, and all have one use.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
-    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
-      GEP->getNumOperands() != FirstInst->getNumOperands())
-      return 0;
-
-    // Keep track of whether or not all GEPs are of alloca pointers.
-    if (AllBasePointersAreAllocas &&
-        (!isa<AllocaInst>(GEP->getOperand(0)) ||
-         !GEP->hasAllConstantIndices()))
-      AllBasePointersAreAllocas = false;
-    
-    // Compare the operand lists.
-    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
-      if (FirstInst->getOperand(op) == GEP->getOperand(op))
-        continue;
-      
-      // Don't merge two GEPs when two operands differ (introducing phi nodes)
-      // if one of the PHIs has a constant for the index.  The index may be
-      // substantially cheaper to compute for the constants, so making it a
-      // variable index could pessimize the path.  This also handles the case
-      // for struct indices, which must always be constant.
-      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
-          isa<ConstantInt>(GEP->getOperand(op)))
-        return 0;
-      
-      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
-        return 0;
-
-      // If we already needed a PHI for an earlier operand, and another operand
-      // also requires a PHI, we'd be introducing more PHIs than we're
-      // eliminating, which increases register pressure on entry to the PHI's
-      // block.
-      if (NeededPhi)
-        return 0;
-
-      FixedOperands[op] = 0;  // Needs a PHI.
-      NeededPhi = true;
-    }
-  }
-  
-  // If all of the base pointers of the PHI'd GEPs are from allocas, don't
-  // bother doing this transformation.  At best, this will just save a bit of
-  // offset calculation, but all the predecessors will have to materialize the
-  // stack address into a register anyway.  We'd actually rather *clone* the
-  // load up into the predecessors so that we have a load of a gep of an alloca,
-  // which can usually all be folded into the load.
-  if (AllBasePointersAreAllocas)
-    return 0;
-  
-  // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
-  // that is variable.
-  SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-  
-  bool HasAnyPHIs = false;
-  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
-    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
-    Value *FirstOp = FirstInst->getOperand(i);
-    PHINode *NewPN = PHINode::Create(FirstOp->getType(),
-                                     FirstOp->getName()+".pn");
-    InsertNewInstBefore(NewPN, PN);
-    
-    NewPN->reserveOperandSpace(e);
-    NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
-    OperandPhis[i] = NewPN;
-    FixedOperands[i] = NewPN;
-    HasAnyPHIs = true;
-  }
-
-  
-  // Add all operands to the new PHIs.
-  if (HasAnyPHIs) {
-    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
-      BasicBlock *InBB = PN.getIncomingBlock(i);
-      
-      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
-        if (PHINode *OpPhi = OperandPhis[op])
-          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
-    }
-  }
-  
-  Value *Base = FixedOperands[0];
-  return cast<GEPOperator>(FirstInst)->isInBounds() ?
-    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
-                                      FixedOperands.end()) :
-    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
-                              FixedOperands.end());
-}
-
-
-/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
-/// sink the load out of the block that defines it.  This means that it must be
-/// obvious the value of the load is not changed from the point of the load to
-/// the end of the block it is in.
-///
-/// Finally, it is safe, but not profitable, to sink a load targetting a
-/// non-address-taken alloca.  Doing so will cause us to not promote the alloca
-/// to a register.
-static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
-  BasicBlock::iterator BBI = L, E = L->getParent()->end();
-  
-  for (++BBI; BBI != E; ++BBI)
-    if (BBI->mayWriteToMemory())
-      return false;
-  
-  // Check for non-address taken alloca.  If not address-taken already, it isn't
-  // profitable to do this xform.
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
-    bool isAddressTaken = false;
-    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
-         UI != E; ++UI) {
-      if (isa<LoadInst>(UI)) continue;
-      if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
-        // If storing TO the alloca, then the address isn't taken.
-        if (SI->getOperand(1) == AI) continue;
-      }
-      isAddressTaken = true;
-      break;
-    }
-    
-    if (!isAddressTaken && AI->isStaticAlloca())
-      return false;
-  }
-  
-  // If this load is a load from a GEP with a constant offset from an alloca,
-  // then we don't want to sink it.  In its present form, it will be
-  // load [constant stack offset].  Sinking it will cause us to have to
-  // materialize the stack addresses in each predecessor in a register only to
-  // do a shared load from register in the successor.
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
-      if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
-        return false;
-  
-  return true;
-}
-
-Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
-  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
-  
-  // When processing loads, we need to propagate two bits of information to the
-  // sunk load: whether it is volatile, and what its alignment is.  We currently
-  // don't sink loads when some have their alignment specified and some don't.
-  // visitLoadInst will propagate an alignment onto the load when TD is around,
-  // and if TD isn't around, we can't handle the mixed case.
-  bool isVolatile = FirstLI->isVolatile();
-  unsigned LoadAlignment = FirstLI->getAlignment();
-  
-  // We can't sink the load if the loaded value could be modified between the
-  // load and the PHI.
-  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
-      !isSafeAndProfitableToSinkLoad(FirstLI))
-    return 0;
-  
-  // If the PHI is of volatile loads and the load block has multiple
-  // successors, sinking it would remove a load of the volatile value from
-  // the path through the other successor.
-  if (isVolatile && 
-      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
-    return 0;
-  
-  // Check to see if all arguments are the same operation.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
-    if (!LI || !LI->hasOneUse())
-      return 0;
-    
-    // We can't sink the load if the loaded value could be modified between 
-    // the load and the PHI.
-    if (LI->isVolatile() != isVolatile ||
-        LI->getParent() != PN.getIncomingBlock(i) ||
-        !isSafeAndProfitableToSinkLoad(LI))
-      return 0;
-      
-    // If some of the loads have an alignment specified but not all of them,
-    // we can't do the transformation.
-    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
-      return 0;
-    
-    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-    
-    // If the PHI is of volatile loads and the load block has multiple
-    // successors, sinking it would remove a load of the volatile value from
-    // the path through the other successor.
-    if (isVolatile &&
-        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
-      return 0;
-  }
-  
-  // Okay, they are all the same operation.  Create a new PHI node of the
-  // correct type, and PHI together all of the LHS's of the instructions.
-  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
-                                   PN.getName()+".in");
-  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
-  
-  Value *InVal = FirstLI->getOperand(0);
-  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-  
-  // Add all operands to the new PHI.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
-    if (NewInVal != InVal)
-      InVal = 0;
-    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
-  }
-  
-  Value *PhiVal;
-  if (InVal) {
-    // The new PHI unions all of the same values together.  This is really
-    // common, so we handle it intelligently here for compile-time speed.
-    PhiVal = InVal;
-    delete NewPN;
-  } else {
-    InsertNewInstBefore(NewPN, PN);
-    PhiVal = NewPN;
-  }
-  
-  // If this was a volatile load that we are merging, make sure to loop through
-  // and mark all the input loads as non-volatile.  If we don't do this, we will
-  // insert a new volatile load and the old ones will not be deletable.
-  if (isVolatile)
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
-      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-  
-  return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
-}
-
-
-
-/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-/// operator and they all are only used by the PHI, PHI together their
-/// inputs, and do the operation once, to the result of the PHI.
-Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
-  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
-
-  if (isa<GetElementPtrInst>(FirstInst))
-    return FoldPHIArgGEPIntoPHI(PN);
-  if (isa<LoadInst>(FirstInst))
-    return FoldPHIArgLoadIntoPHI(PN);
-  
-  // Scan the instruction, looking for input operations that can be folded away.
-  // If all input operands to the phi are the same instruction (e.g. a cast from
-  // the same type or "+42") we can pull the operation through the PHI, reducing
-  // code size and simplifying code.
-  Constant *ConstantOp = 0;
-  const Type *CastSrcTy = 0;
-  
-  if (isa<CastInst>(FirstInst)) {
-    CastSrcTy = FirstInst->getOperand(0)->getType();
-
-    // Be careful about transforming integer PHIs.  We don't want to pessimize
-    // the code by turning an i32 into an i1293.
-    if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
-      if (!ShouldChangeType(PN.getType(), CastSrcTy, TD))
-        return 0;
-    }
-  } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
-    // Can fold binop, compare or shift here if the RHS is a constant, 
-    // otherwise call FoldPHIArgBinOpIntoPHI.
-    ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
-    if (ConstantOp == 0)
-      return FoldPHIArgBinOpIntoPHI(PN);
-  } else {
-    return 0;  // Cannot fold this operation.
-  }
-
-  // Check to see if all arguments are the same operation.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
-    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
-      return 0;
-    if (CastSrcTy) {
-      if (I->getOperand(0)->getType() != CastSrcTy)
-        return 0;  // Cast operation must match.
-    } else if (I->getOperand(1) != ConstantOp) {
-      return 0;
-    }
-  }
-
-  // Okay, they are all the same operation.  Create a new PHI node of the
-  // correct type, and PHI together all of the LHS's of the instructions.
-  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
-                                   PN.getName()+".in");
-  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
-
-  Value *InVal = FirstInst->getOperand(0);
-  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-
-  // Add all operands to the new PHI.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
-    if (NewInVal != InVal)
-      InVal = 0;
-    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
-  }
-
-  Value *PhiVal;
-  if (InVal) {
-    // The new PHI unions all of the same values together.  This is really
-    // common, so we handle it intelligently here for compile-time speed.
-    PhiVal = InVal;
-    delete NewPN;
-  } else {
-    InsertNewInstBefore(NewPN, PN);
-    PhiVal = NewPN;
-  }
-
-  // Insert and return the new operation.
-  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
-    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
-  
-  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
-  
-  CmpInst *CIOp = cast<CmpInst>(FirstInst);
-  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
-                         PhiVal, ConstantOp);
-}
-
-/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
-/// that is dead.
-static bool DeadPHICycle(PHINode *PN,
-                         SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
-  if (PN->use_empty()) return true;
-  if (!PN->hasOneUse()) return false;
-
-  // Remember this node, and if we find the cycle, return.
-  if (!PotentiallyDeadPHIs.insert(PN))
-    return true;
-  
-  // Don't scan crazily complex things.
-  if (PotentiallyDeadPHIs.size() == 16)
-    return false;
-
-  if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
-    return DeadPHICycle(PU, PotentiallyDeadPHIs);
-
-  return false;
-}
-
-/// PHIsEqualValue - Return true if this phi node is always equal to
-/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
-///   z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
-                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
-  // See if we already saw this PHI node.
-  if (!ValueEqualPHIs.insert(PN))
-    return true;
-  
-  // Don't scan crazily complex things.
-  if (ValueEqualPHIs.size() == 16)
-    return false;
- 
-  // Scan the operands to see if they are either phi nodes or are equal to
-  // the value.
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    Value *Op = PN->getIncomingValue(i);
-    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
-      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
-        return false;
-    } else if (Op != NonPhiInVal)
-      return false;
-  }
-  
-  return true;
-}
-
-
-namespace {
-struct PHIUsageRecord {
-  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
-  unsigned Shift;     // The amount shifted.
-  Instruction *Inst;  // The trunc instruction.
-  
-  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
-    : PHIId(pn), Shift(Sh), Inst(User) {}
-  
-  bool operator<(const PHIUsageRecord &RHS) const {
-    if (PHIId < RHS.PHIId) return true;
-    if (PHIId > RHS.PHIId) return false;
-    if (Shift < RHS.Shift) return true;
-    if (Shift > RHS.Shift) return false;
-    return Inst->getType()->getPrimitiveSizeInBits() <
-           RHS.Inst->getType()->getPrimitiveSizeInBits();
-  }
-};
-  
-struct LoweredPHIRecord {
-  PHINode *PN;        // The PHI that was lowered.
-  unsigned Shift;     // The amount shifted.
-  unsigned Width;     // The width extracted.
-  
-  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
-    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-  
-  // Ctor form used by DenseMap.
-  LoweredPHIRecord(PHINode *pn, unsigned Sh)
-    : PN(pn), Shift(Sh), Width(0) {}
-};
-}
-
-namespace llvm {
-  template<>
-  struct DenseMapInfo<LoweredPHIRecord> {
-    static inline LoweredPHIRecord getEmptyKey() {
-      return LoweredPHIRecord(0, 0);
-    }
-    static inline LoweredPHIRecord getTombstoneKey() {
-      return LoweredPHIRecord(0, 1);
-    }
-    static unsigned getHashValue(const LoweredPHIRecord &Val) {
-      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
-             (Val.Width>>3);
-    }
-    static bool isEqual(const LoweredPHIRecord &LHS,
-                        const LoweredPHIRecord &RHS) {
-      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
-             LHS.Width == RHS.Width;
-    }
-  };
-  template <>
-  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
-}
-
-
-/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
-/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
-/// so, we split the PHI into the various pieces being extracted.  This sort of
-/// thing is introduced when SROA promotes an aggregate to large integer values.
-///
-/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
-/// inttoptr.  We should produce new PHIs in the right type.
-///
-Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
-  // PHIUsers - Keep track of all of the truncated values extracted from a set
-  // of PHIs, along with their offset.  These are the things we want to rewrite.
-  SmallVector<PHIUsageRecord, 16> PHIUsers;
-  
-  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
-  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
-  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
-  // check the uses of (to ensure they are all extracts).
-  SmallVector<PHINode*, 8> PHIsToSlice;
-  SmallPtrSet<PHINode*, 8> PHIsInspected;
-  
-  PHIsToSlice.push_back(&FirstPhi);
-  PHIsInspected.insert(&FirstPhi);
-  
-  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
-    PHINode *PN = PHIsToSlice[PHIId];
-    
-    // Scan the input list of the PHI.  If any input is an invoke, and if the
-    // input is defined in the predecessor, then we won't be split the critical
-    // edge which is required to insert a truncate.  Because of this, we have to
-    // bail out.
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
-      if (II == 0) continue;
-      if (II->getParent() != PN->getIncomingBlock(i))
-        continue;
-     
-      // If we have a phi, and if it's directly in the predecessor, then we have
-      // a critical edge where we need to put the truncate.  Since we can't
-      // split the edge in instcombine, we have to bail out.
-      return 0;
-    }
-      
-    
-    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
-         UI != E; ++UI) {
-      Instruction *User = cast<Instruction>(*UI);
-      
-      // If the user is a PHI, inspect its uses recursively.
-      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-        if (PHIsInspected.insert(UserPN))
-          PHIsToSlice.push_back(UserPN);
-        continue;
-      }
-      
-      // Truncates are always ok.
-      if (isa<TruncInst>(User)) {
-        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
-        continue;
-      }
-      
-      // Otherwise it must be a lshr which can only be used by one trunc.
-      if (User->getOpcode() != Instruction::LShr ||
-          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
-          !isa<ConstantInt>(User->getOperand(1)))
-        return 0;
-      
-      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
-      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
-    }
-  }
-  
-  // If we have no users, they must be all self uses, just nuke the PHI.
-  if (PHIUsers.empty())
-    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-  
-  // If this phi node is transformable, create new PHIs for all the pieces
-  // extracted out of it.  First, sort the users by their offset and size.
-  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-  
-  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
-            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
-              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
-        );
-  
-  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
-  // hoisted out here to avoid construction/destruction thrashing.
-  DenseMap<BasicBlock*, Value*> PredValues;
-  
-  // ExtractedVals - Each new PHI we introduce is saved here so we don't
-  // introduce redundant PHIs.
-  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-  
-  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
-    unsigned PHIId = PHIUsers[UserI].PHIId;
-    PHINode *PN = PHIsToSlice[PHIId];
-    unsigned Offset = PHIUsers[UserI].Shift;
-    const Type *Ty = PHIUsers[UserI].Inst->getType();
-    
-    PHINode *EltPHI;
-    
-    // If we've already lowered a user like this, reuse the previously lowered
-    // value.
-    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-      
-      // Otherwise, Create the new PHI node for this user.
-      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
-      assert(EltPHI->getType() != PN->getType() &&
-             "Truncate didn't shrink phi?");
-    
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        BasicBlock *Pred = PN->getIncomingBlock(i);
-        Value *&PredVal = PredValues[Pred];
-        
-        // If we already have a value for this predecessor, reuse it.
-        if (PredVal) {
-          EltPHI->addIncoming(PredVal, Pred);
-          continue;
-        }
-
-        // Handle the PHI self-reuse case.
-        Value *InVal = PN->getIncomingValue(i);
-        if (InVal == PN) {
-          PredVal = EltPHI;
-          EltPHI->addIncoming(PredVal, Pred);
-          continue;
-        }
-        
-        if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
-          // If the incoming value was a PHI, and if it was one of the PHIs we
-          // already rewrote it, just use the lowered value.
-          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
-            PredVal = Res;
-            EltPHI->addIncoming(PredVal, Pred);
-            continue;
-          }
-        }
-        
-        // Otherwise, do an extract in the predecessor.
-        Builder->SetInsertPoint(Pred, Pred->getTerminator());
-        Value *Res = InVal;
-        if (Offset)
-          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
-                                                          Offset), "extract");
-        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
-        PredVal = Res;
-        EltPHI->addIncoming(Res, Pred);
-        
-        // If the incoming value was a PHI, and if it was one of the PHIs we are
-        // rewriting, we will ultimately delete the code we inserted.  This
-        // means we need to revisit that PHI to make sure we extract out the
-        // needed piece.
-        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
-          if (PHIsInspected.count(OldInVal)) {
-            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
-                                          OldInVal)-PHIsToSlice.begin();
-            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
-                                              cast<Instruction>(Res)));
-            ++UserE;
-          }
-      }
-      PredValues.clear();
-      
-      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
-                   << *EltPHI << '\n');
-      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
-    }
-    
-    // Replace the use of this piece with the PHI node.
-    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
-  }
-  
-  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
-  // with undefs.
-  Value *Undef = UndefValue::get(FirstPhi.getType());
-  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
-    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
-  return ReplaceInstUsesWith(FirstPhi, Undef);
-}
-
-// PHINode simplification
-//
-Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  // If LCSSA is around, don't mess with Phi nodes
-  if (MustPreserveLCSSA) return 0;
-  
-  if (Value *V = PN.hasConstantValue())
-    return ReplaceInstUsesWith(PN, V);
-
-  // If all PHI operands are the same operation, pull them through the PHI,
-  // reducing code size.
-  if (isa<Instruction>(PN.getIncomingValue(0)) &&
-      isa<Instruction>(PN.getIncomingValue(1)) &&
-      cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
-      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
-      // FIXME: The hasOneUse check will fail for PHIs that use the value more
-      // than themselves more than once.
-      PN.getIncomingValue(0)->hasOneUse())
-    if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
-      return Result;
-
-  // If this is a trivial cycle in the PHI node graph, remove it.  Basically, if
-  // this PHI only has a single use (a PHI), and if that PHI only has one use (a
-  // PHI)... break the cycle.
-  if (PN.hasOneUse()) {
-    Instruction *PHIUser = cast<Instruction>(PN.use_back());
-    if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
-      SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
-      PotentiallyDeadPHIs.insert(&PN);
-      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
-        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
-    }
-   
-    // If this phi has a single use, and if that use just computes a value for
-    // the next iteration of a loop, delete the phi.  This occurs with unused
-    // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
-    // common case here is good because the only other things that catch this
-    // are induction variable analysis (sometimes) and ADCE, which is only run
-    // late.
-    if (PHIUser->hasOneUse() &&
-        (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
-        PHIUser->use_back() == &PN) {
-      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
-    }
-  }
-
-  // We sometimes end up with phi cycles that non-obviously end up being the
-  // same value, for example:
-  //   z = some value; x = phi (y, z); y = phi (x, z)
-  // where the phi nodes don't necessarily need to be in the same block.  Do a
-  // quick check to see if the PHI node only contains a single non-phi value, if
-  // so, scan to see if the phi cycle is actually equal to that value.
-  {
-    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
-    // Scan for the first non-phi operand.
-    while (InValNo != NumOperandVals && 
-           isa<PHINode>(PN.getIncomingValue(InValNo)))
-      ++InValNo;
-
-    if (InValNo != NumOperandVals) {
-      Value *NonPhiInVal = PN.getOperand(InValNo);
-      
-      // Scan the rest of the operands to see if there are any conflicts, if so
-      // there is no need to recursively scan other phis.
-      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
-        Value *OpVal = PN.getIncomingValue(InValNo);
-        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
-          break;
-      }
-      
-      // If we scanned over all operands, then we have one unique value plus
-      // phi values.  Scan PHI nodes to see if they all merge in each other or
-      // the value.
-      if (InValNo == NumOperandVals) {
-        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
-        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
-          return ReplaceInstUsesWith(PN, NonPhiInVal);
-      }
-    }
-  }
-
-  // If there are multiple PHIs, sort their operands so that they all list
-  // the blocks in the same order. This will help identical PHIs be eliminated
-  // by other passes. Other passes shouldn't depend on this for correctness
-  // however.
-  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
-  if (&PN != FirstPN)
-    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
-      BasicBlock *BBA = PN.getIncomingBlock(i);
-      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
-      if (BBA != BBB) {
-        Value *VA = PN.getIncomingValue(i);
-        unsigned j = PN.getBasicBlockIndex(BBB);
-        Value *VB = PN.getIncomingValue(j);
-        PN.setIncomingBlock(i, BBB);
-        PN.setIncomingValue(i, VB);
-        PN.setIncomingBlock(j, BBA);
-        PN.setIncomingValue(j, VA);
-        // NOTE: Instcombine normally would want us to "return &PN" if we
-        // modified any of the operands of an instruction.  However, since we
-        // aren't adding or removing uses (just rearranging them) we don't do
-        // this in this case.
-      }
-    }
-
-  // If this is an integer PHI and we know that it has an illegal type, see if
-  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
-  // PHI into the various pieces being extracted.  This sort of thing is
-  // introduced when SROA promotes an aggregate to a single large integer type.
-  if (isa<IntegerType>(PN.getType()) && TD &&
-      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
-    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
-      return Res;
-  
-  return 0;
-}
-
-Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
-
-  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
-    return ReplaceInstUsesWith(GEP, V);
-
-  Value *PtrOp = GEP.getOperand(0);
-
-  if (isa<UndefValue>(GEP.getOperand(0)))
-    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
-
-  // Eliminate unneeded casts for indices.
-  if (TD) {
-    bool MadeChange = false;
-    unsigned PtrSize = TD->getPointerSizeInBits();
-    
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
-         I != E; ++I, ++GTI) {
-      if (!isa<SequentialType>(*GTI)) continue;
-      
-      // If we are using a wider index than needed for this platform, shrink it
-      // to what we need.  If narrower, sign-extend it to what we need.  This
-      // explicit cast can make subsequent optimizations more obvious.
-      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
-      if (OpBits == PtrSize)
-        continue;
-      
-      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
-      MadeChange = true;
-    }
-    if (MadeChange) return &GEP;
-  }
-
-  // Combine Indices - If the source pointer to this getelementptr instruction
-  // is a getelementptr instruction, combine the indices of the two
-  // getelementptr instructions into a single instruction.
-  //
-  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
-    // Note that if our source is a gep chain itself that we wait for that
-    // chain to be resolved before we perform this transformation.  This
-    // avoids us creating a TON of code in some cases.
-    //
-    if (GetElementPtrInst *SrcGEP =
-          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
-      if (SrcGEP->getNumOperands() == 2)
-        return 0;   // Wait until our source is folded to completion.
-
-    SmallVector<Value*, 8> Indices;
-
-    // Find out whether the last index in the source GEP is a sequential idx.
-    bool EndsWithSequential = false;
-    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
-         I != E; ++I)
-      EndsWithSequential = !isa<StructType>(*I);
-
-    // Can we combine the two pointer arithmetics offsets?
-    if (EndsWithSequential) {
-      // Replace: gep (gep %P, long B), long A, ...
-      // With:    T = long A+B; gep %P, T, ...
-      //
-      Value *Sum;
-      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
-      Value *GO1 = GEP.getOperand(1);
-      if (SO1 == Constant::getNullValue(SO1->getType())) {
-        Sum = GO1;
-      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
-        Sum = SO1;
-      } else {
-        // If they aren't the same type, then the input hasn't been processed
-        // by the loop above yet (which canonicalizes sequential index types to
-        // intptr_t).  Just avoid transforming this until the input has been
-        // normalized.
-        if (SO1->getType() != GO1->getType())
-          return 0;
-        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
-      }
-
-      // Update the GEP in place if possible.
-      if (Src->getNumOperands() == 2) {
-        GEP.setOperand(0, Src->getOperand(0));
-        GEP.setOperand(1, Sum);
-        return &GEP;
-      }
-      Indices.append(Src->op_begin()+1, Src->op_end()-1);
-      Indices.push_back(Sum);
-      Indices.append(GEP.op_begin()+2, GEP.op_end());
-    } else if (isa<Constant>(*GEP.idx_begin()) &&
-               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
-               Src->getNumOperands() != 1) {
-      // Otherwise we can do the fold if the first index of the GEP is a zero
-      Indices.append(Src->op_begin()+1, Src->op_end());
-      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
-    }
-
-    if (!Indices.empty())
-      return (cast<GEPOperator>(&GEP)->isInBounds() &&
-              Src->isInBounds()) ?
-        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
-                                          Indices.end(), GEP.getName()) :
-        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
-                                  Indices.end(), GEP.getName());
-  }
-  
-  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
-  if (Value *X = getBitCastOperand(PtrOp)) {
-    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
-
-    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't 
-    // want to change the gep until the bitcasts are eliminated.
-    if (getBitCastOperand(X)) {
-      Worklist.AddValue(PtrOp);
-      return 0;
-    }
-    
-    bool HasZeroPointerIndex = false;
-    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
-      HasZeroPointerIndex = C->isZero();
-    
-    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
-    // into     : GEP [10 x i8]* X, i32 0, ...
-    //
-    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
-    //           into     : GEP i8* X, ...
-    // 
-    // This occurs when the program declares an array extern like "int X[];"
-    if (HasZeroPointerIndex) {
-      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
-      const PointerType *XTy = cast<PointerType>(X->getType());
-      if (const ArrayType *CATy =
-          dyn_cast<ArrayType>(CPTy->getElementType())) {
-        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
-        if (CATy->getElementType() == XTy->getElementType()) {
-          // -> GEP i8* X, ...
-          SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
-          return cast<GEPOperator>(&GEP)->isInBounds() ?
-            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
-                                              GEP.getName()) :
-            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
-                                      GEP.getName());
-        }
-        
-        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
-          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
-          if (CATy->getElementType() == XATy->getElementType()) {
-            // -> GEP [10 x i8]* X, i32 0, ...
-            // At this point, we know that the cast source type is a pointer
-            // to an array of the same type as the destination pointer
-            // array.  Because the array type is never stepped over (there
-            // is a leading zero) we can fold the cast into this GEP.
-            GEP.setOperand(0, X);
-            return &GEP;
-          }
-        }
-      }
-    } else if (GEP.getNumOperands() == 2) {
-      // Transform things like:
-      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
-      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
-      const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
-      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
-      if (TD && isa<ArrayType>(SrcElTy) &&
-          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
-          TD->getTypeAllocSize(ResElTy)) {
-        Value *Idx[2];
-        Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
-        Idx[1] = GEP.getOperand(1);
-        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
-          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
-          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
-        // V and GEP are both pointer types --> BitCast
-        return new BitCastInst(NewGEP, GEP.getType());
-      }
-      
-      // Transform things like:
-      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
-      //   (where tmp = 8*tmp2) into:
-      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-      
-      if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) {
-        uint64_t ArrayEltSize =
-            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
-        
-        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
-        // allow either a mul, shift, or constant here.
-        Value *NewIdx = 0;
-        ConstantInt *Scale = 0;
-        if (ArrayEltSize == 1) {
-          NewIdx = GEP.getOperand(1);
-          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
-        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
-          NewIdx = ConstantInt::get(CI->getType(), 1);
-          Scale = CI;
-        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
-          if (Inst->getOpcode() == Instruction::Shl &&
-              isa<ConstantInt>(Inst->getOperand(1))) {
-            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
-            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
-                                     1ULL << ShAmtVal);
-            NewIdx = Inst->getOperand(0);
-          } else if (Inst->getOpcode() == Instruction::Mul &&
-                     isa<ConstantInt>(Inst->getOperand(1))) {
-            Scale = cast<ConstantInt>(Inst->getOperand(1));
-            NewIdx = Inst->getOperand(0);
-          }
-        }
-        
-        // If the index will be to exactly the right offset with the scale taken
-        // out, perform the transformation. Note, we don't know whether Scale is
-        // signed or not. We'll use unsigned version of division/modulo
-        // operation after making sure Scale doesn't have the sign bit set.
-        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
-            Scale->getZExtValue() % ArrayEltSize == 0) {
-          Scale = ConstantInt::get(Scale->getType(),
-                                   Scale->getZExtValue() / ArrayEltSize);
-          if (Scale->getZExtValue() != 1) {
-            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
-                                                       false /*ZExt*/);
-            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
-          }
-
-          // Insert the new GEP instruction.
-          Value *Idx[2];
-          Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
-          Idx[1] = NewIdx;
-          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
-            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
-            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
-          // The NewGEP must be pointer typed, so must the old one -> BitCast
-          return new BitCastInst(NewGEP, GEP.getType());
-        }
-      }
-    }
-  }
-  
-  /// See if we can simplify:
-  ///   X = bitcast A* to B*
-  ///   Y = gep X, <...constant indices...>
-  /// into a gep of the original struct.  This is important for SROA and alias
-  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
-    if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
-      // Determine how much the GEP moves the pointer.  We are guaranteed to get
-      // a constant back from EmitGEPOffset.
-      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, *this));
-      int64_t Offset = OffsetV->getSExtValue();
-      
-      // If this GEP instruction doesn't move the pointer, just replace the GEP
-      // with a bitcast of the real input to the dest type.
-      if (Offset == 0) {
-        // If the bitcast is of an allocation, and the allocation will be
-        // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(BCI->getOperand(0)) ||
-            isMalloc(BCI->getOperand(0))) {
-          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
-          if (Instruction *I = visitBitCast(*BCI)) {
-            if (I != BCI) {
-              I->takeName(BCI);
-              BCI->getParent()->getInstList().insert(BCI, I);
-              ReplaceInstUsesWith(*BCI, I);
-            }
-            return &GEP;
-          }
-        }
-        return new BitCastInst(BCI->getOperand(0), GEP.getType());
-      }
-      
-      // Otherwise, if the offset is non-zero, we need to find out if there is a
-      // field at Offset in 'A's type.  If so, we can pull the cast through the
-      // GEP.
-      SmallVector<Value*, 8> NewIndices;
-      const Type *InTy =
-        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
-      if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) {
-        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
-          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
-                                     NewIndices.end()) :
-          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
-                             NewIndices.end());
-        
-        if (NGEP->getType() == GEP.getType())
-          return ReplaceInstUsesWith(GEP, NGEP);
-        NGEP->takeName(&GEP);
-        return new BitCastInst(NGEP, GEP.getType());
-      }
-    }
-  }    
-    
-  return 0;
-}
-
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
-  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
-  if (AI.isArrayAllocation()) {  // Check C != 1
-    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-      const Type *NewTy = 
-        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
-      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
-      New->setAlignment(AI.getAlignment());
-
-      // Scan to the end of the allocation instructions, to skip over a block of
-      // allocas if possible...also skip interleaved debug info
-      //
-      BasicBlock::iterator It = New;
-      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
-      // Now that I is pointing to the first non-allocation-inst in the block,
-      // insert our getelementptr instruction...
-      //
-      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context));
-      Value *Idx[2];
-      Idx[0] = NullIdx;
-      Idx[1] = NullIdx;
-      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
-                                                   New->getName()+".sub", It);
-
-      // Now make everything use the getelementptr instead of the original
-      // allocation.
-      return ReplaceInstUsesWith(AI, V);
-    } else if (isa<UndefValue>(AI.getArraySize())) {
-      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
-    }
-  }
-
-  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
-    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
-    // Note that we only do this for alloca's, because malloc should allocate
-    // and return a unique pointer, even for a zero byte allocation.
-    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
-      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
-
-    // If the alignment is 0 (unspecified), assign it the preferred alignment.
-    if (AI.getAlignment() == 0)
-      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
-  }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitFree(Instruction &FI) {
-  Value *Op = FI.getOperand(1);
-
-  // free undef -> unreachable.
-  if (isa<UndefValue>(Op)) {
-    // Insert a new store to null because we cannot modify the CFG here.
-    new StoreInst(ConstantInt::getTrue(*Context),
-           UndefValue::get(Type::getInt1PtrTy(*Context)), &FI);
-    return EraseInstFromFunction(FI);
-  }
-  
-  // If we have 'free null' delete the instruction.  This can happen in stl code
-  // when lots of inlining happens.
-  if (isa<ConstantPointerNull>(Op))
-    return EraseInstFromFunction(FI);
-
-  // If we have a malloc call whose only use is a free call, delete both.
-  if (isMalloc(Op)) {
-    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
-      if (Op->hasOneUse() && CI->hasOneUse()) {
-        EraseInstFromFunction(FI);
-        EraseInstFromFunction(*CI);
-        return EraseInstFromFunction(*cast<Instruction>(Op));
-      }
-    } else {
-      // Op is a call to malloc
-      if (Op->hasOneUse()) {
-        EraseInstFromFunction(FI);
-        return EraseInstFromFunction(*cast<Instruction>(Op));
-      }
-    }
-  }
-
-  return 0;
-}
-
-/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
-static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
-                                        const TargetData *TD) {
-  User *CI = cast<User>(LI.getOperand(0));
-  Value *CastOp = CI->getOperand(0);
-  LLVMContext *Context = IC.getContext();
-
-  const PointerType *DestTy = cast<PointerType>(CI->getType());
-  const Type *DestPTy = DestTy->getElementType();
-  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
-
-    // If the address spaces don't match, don't eliminate the cast.
-    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
-      return 0;
-
-    const Type *SrcPTy = SrcTy->getElementType();
-
-    if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 
-         isa<VectorType>(DestPTy)) {
-      // If the source is an array, the code below will not succeed.  Check to
-      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
-      // constants.
-      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
-        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
-          if (ASrcTy->getNumElements() != 0) {
-            Value *Idxs[2];
-            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
-            Idxs[1] = Idxs[0];
-            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
-            SrcTy = cast<PointerType>(CastOp->getType());
-            SrcPTy = SrcTy->getElementType();
-          }
-
-      if (IC.getTargetData() &&
-          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
-            isa<VectorType>(SrcPTy)) &&
-          // Do not allow turning this into a load of an integer, which is then
-          // casted to a pointer, this pessimizes pointer analysis a lot.
-          (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
-          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
-               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
-
-        // Okay, we are casting from one integer or pointer type to another of
-        // the same size.  Instead of casting the pointer before the load, cast
-        // the result of the loaded value.
-        Value *NewLoad = 
-          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
-        // Now cast the result of the load.
-        return new BitCastInst(NewLoad, LI.getType());
-      }
-    }
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
-  Value *Op = LI.getOperand(0);
-
-  // Attempt to improve the alignment.
-  if (TD) {
-    unsigned KnownAlign =
-      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
-    if (KnownAlign >
-        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
-                                  LI.getAlignment()))
-      LI.setAlignment(KnownAlign);
-  }
-
-  // load (cast X) --> cast (load X) iff safe.
-  if (isa<CastInst>(Op))
-    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
-      return Res;
-
-  // None of the following transforms are legal for volatile loads.
-  if (LI.isVolatile()) return 0;
-  
-  // Do really simple store-to-load forwarding and load CSE, to catch cases
-  // where there are several consequtive memory accesses to the same location,
-  // separated by a few arithmetic operations.
-  BasicBlock::iterator BBI = &LI;
-  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
-    return ReplaceInstUsesWith(LI, AvailableVal);
-
-  // load(gep null, ...) -> unreachable
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
-    const Value *GEPI0 = GEPI->getOperand(0);
-    // TODO: Consider a target hook for valid address spaces for this xform.
-    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
-      // Insert a new store to null instruction before the load to indicate
-      // that this code is not reachable.  We do this instead of inserting
-      // an unreachable instruction directly because we cannot modify the
-      // CFG.
-      new StoreInst(UndefValue::get(LI.getType()),
-                    Constant::getNullValue(Op->getType()), &LI);
-      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-    }
-  } 
-
-  // load null/undef -> unreachable
-  // TODO: Consider a target hook for valid address spaces for this xform.
-  if (isa<UndefValue>(Op) ||
-      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
-    // Insert a new store to null instruction before the load to indicate that
-    // this code is not reachable.  We do this instead of inserting an
-    // unreachable instruction directly because we cannot modify the CFG.
-    new StoreInst(UndefValue::get(LI.getType()),
-                  Constant::getNullValue(Op->getType()), &LI);
-    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
-  }
-
-  // Instcombine load (constantexpr_cast global) -> cast (load global)
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
-    if (CE->isCast())
-      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
-        return Res;
-  
-  if (Op->hasOneUse()) {
-    // Change select and PHI nodes to select values instead of addresses: this
-    // helps alias analysis out a lot, allows many others simplifications, and
-    // exposes redundancy in the code.
-    //
-    // Note that we cannot do the transformation unless we know that the
-    // introduced loads cannot trap!  Something like this is valid as long as
-    // the condition is always false: load (select bool %C, int* null, int* %G),
-    // but it would not be valid if we transformed it to load from null
-    // unconditionally.
-    //
-    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
-      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
-      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
-          isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
-        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
-                                        SI->getOperand(1)->getName()+".val");
-        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
-                                        SI->getOperand(2)->getName()+".val");
-        return SelectInst::Create(SI->getCondition(), V1, V2);
-      }
-
-      // load (select (cond, null, P)) -> load P
-      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
-        if (C->isNullValue()) {
-          LI.setOperand(0, SI->getOperand(2));
-          return &LI;
-        }
-
-      // load (select (cond, P, null)) -> load P
-      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
-        if (C->isNullValue()) {
-          LI.setOperand(0, SI->getOperand(1));
-          return &LI;
-        }
-    }
-  }
-  return 0;
-}
-
-/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
-/// when possible.  This makes it generally easy to do alias analysis and/or
-/// SROA/mem2reg of the memory object.
-static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
-  User *CI = cast<User>(SI.getOperand(1));
-  Value *CastOp = CI->getOperand(0);
-
-  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
-  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
-  if (SrcTy == 0) return 0;
-  
-  const Type *SrcPTy = SrcTy->getElementType();
-
-  if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
-    return 0;
-  
-  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
-  /// to its first element.  This allows us to handle things like:
-  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
-  /// on 32-bit hosts.
-  SmallVector<Value*, 4> NewGEPIndices;
-  
-  // If the source is an array, the code below will not succeed.  Check to
-  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
-  // constants.
-  if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
-    // Index through pointer.
-    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext()));
-    NewGEPIndices.push_back(Zero);
-    
-    while (1) {
-      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
-        if (!STy->getNumElements()) /* Struct can be empty {} */
-          break;
-        NewGEPIndices.push_back(Zero);
-        SrcPTy = STy->getElementType(0);
-      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
-        NewGEPIndices.push_back(Zero);
-        SrcPTy = ATy->getElementType();
-      } else {
-        break;
-      }
-    }
-    
-    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
-  }
-
-  if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
-    return 0;
-  
-  // If the pointers point into different address spaces or if they point to
-  // values with different sizes, we can't do the transformation.
-  if (!IC.getTargetData() ||
-      SrcTy->getAddressSpace() != 
-        cast<PointerType>(CI->getType())->getAddressSpace() ||
-      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
-      IC.getTargetData()->getTypeSizeInBits(DestPTy))
-    return 0;
-
-  // Okay, we are casting from one integer or pointer type to another of
-  // the same size.  Instead of casting the pointer before 
-  // the store, cast the value to be stored.
-  Value *NewCast;
-  Value *SIOp0 = SI.getOperand(0);
-  Instruction::CastOps opcode = Instruction::BitCast;
-  const Type* CastSrcTy = SIOp0->getType();
-  const Type* CastDstTy = SrcPTy;
-  if (isa<PointerType>(CastDstTy)) {
-    if (CastSrcTy->isInteger())
-      opcode = Instruction::IntToPtr;
-  } else if (isa<IntegerType>(CastDstTy)) {
-    if (isa<PointerType>(SIOp0->getType()))
-      opcode = Instruction::PtrToInt;
-  }
-  
-  // SIOp0 is a pointer to aggregate and this is a store to the first field,
-  // emit a GEP to index into its first field.
-  if (!NewGEPIndices.empty())
-    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
-                                           NewGEPIndices.end());
-  
-  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
-                                   SIOp0->getName()+".c");
-  return new StoreInst(NewCast, CastOp);
-}
-
-/// equivalentAddressValues - Test if A and B will obviously have the same
-/// value. This includes recognizing that %t0 and %t1 will have the same
-/// value in code like this:
-///   %t0 = getelementptr \@a, 0, 3
-///   store i32 0, i32* %t0
-///   %t1 = getelementptr \@a, 0, 3
-///   %t2 = load i32* %t1
-///
-static bool equivalentAddressValues(Value *A, Value *B) {
-  // Test if the values are trivially equivalent.
-  if (A == B) return true;
-  
-  // Test if the values come form identical arithmetic instructions.
-  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
-  // its only used to compare two uses within the same basic block, which
-  // means that they'll always either have the same value or one of them
-  // will have an undefined value.
-  if (isa<BinaryOperator>(A) ||
-      isa<CastInst>(A) ||
-      isa<PHINode>(A) ||
-      isa<GetElementPtrInst>(A))
-    if (Instruction *BI = dyn_cast<Instruction>(B))
-      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
-        return true;
-  
-  // Otherwise they may not be equivalent.
-  return false;
-}
-
-// If this instruction has two uses, one of which is a llvm.dbg.declare,
-// return the llvm.dbg.declare.
-DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
-  if (!V->hasNUses(2))
-    return 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
-      return DI;
-    if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
-      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
-        return DI;
-      }
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
-  Value *Val = SI.getOperand(0);
-  Value *Ptr = SI.getOperand(1);
-
-  // If the RHS is an alloca with a single use, zapify the store, making the
-  // alloca dead.
-  // If the RHS is an alloca with a two uses, the other one being a 
-  // llvm.dbg.declare, zapify the store and the declare, making the
-  // alloca dead.  We must do this to prevent declare's from affecting
-  // codegen.
-  if (!SI.isVolatile()) {
-    if (Ptr->hasOneUse()) {
-      if (isa<AllocaInst>(Ptr)) {
-        EraseInstFromFunction(SI);
-        ++NumCombined;
-        return 0;
-      }
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
-        if (isa<AllocaInst>(GEP->getOperand(0))) {
-          if (GEP->getOperand(0)->hasOneUse()) {
-            EraseInstFromFunction(SI);
-            ++NumCombined;
-            return 0;
-          }
-          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
-            EraseInstFromFunction(*DI);
-            EraseInstFromFunction(SI);
-            ++NumCombined;
-            return 0;
-          }
-        }
-      }
-    }
-    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
-      EraseInstFromFunction(*DI);
-      EraseInstFromFunction(SI);
-      ++NumCombined;
-      return 0;
-    }
-  }
-
-  // Attempt to improve the alignment.
-  if (TD) {
-    unsigned KnownAlign =
-      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
-    if (KnownAlign >
-        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
-                                  SI.getAlignment()))
-      SI.setAlignment(KnownAlign);
-  }
-
-  // Do really simple DSE, to catch cases where there are several consecutive
-  // stores to the same location, separated by a few arithmetic operations. This
-  // situation often occurs with bitfield accesses.
-  BasicBlock::iterator BBI = &SI;
-  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
-       --ScanInsts) {
-    --BBI;
-    // Don't count debug info directives, lest they affect codegen,
-    // and we skip pointer-to-pointer bitcasts, which are NOPs.
-    // It is necessary for correctness to skip those that feed into a
-    // llvm.dbg.declare, as these are not present when debugging is off.
-    if (isa<DbgInfoIntrinsic>(BBI) ||
-        (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
-      ScanInsts++;
-      continue;
-    }    
-    
-    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
-      // Prev store isn't volatile, and stores to the same location?
-      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
-                                                          SI.getOperand(1))) {
-        ++NumDeadStore;
-        ++BBI;
-        EraseInstFromFunction(*PrevSI);
-        continue;
-      }
-      break;
-    }
-    
-    // If this is a load, we have to stop.  However, if the loaded value is from
-    // the pointer we're loading and is producing the pointer we're storing,
-    // then *this* store is dead (X = load P; store X -> P).
-    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
-          !SI.isVolatile()) {
-        EraseInstFromFunction(SI);
-        ++NumCombined;
-        return 0;
-      }
-      // Otherwise, this is a load from some other location.  Stores before it
-      // may not be dead.
-      break;
-    }
-    
-    // Don't skip over loads or things that can modify memory.
-    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
-      break;
-  }
-  
-  
-  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
-
-  // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
-    if (!isa<UndefValue>(Val)) {
-      SI.setOperand(0, UndefValue::get(Val->getType()));
-      if (Instruction *U = dyn_cast<Instruction>(Val))
-        Worklist.Add(U);  // Dropped a use.
-      ++NumCombined;
-    }
-    return 0;  // Do not modify these!
-  }
-
-  // store undef, Ptr -> noop
-  if (isa<UndefValue>(Val)) {
-    EraseInstFromFunction(SI);
-    ++NumCombined;
-    return 0;
-  }
-
-  // If the pointer destination is a cast, see if we can fold the cast into the
-  // source instead.
-  if (isa<CastInst>(Ptr))
-    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
-      return Res;
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-    if (CE->isCast())
-      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
-        return Res;
-
-  
-  // If this store is the last instruction in the basic block (possibly
-  // excepting debug info instructions and the pointer bitcasts that feed
-  // into them), and if the block ends with an unconditional branch, try
-  // to move it to the successor block.
-  BBI = &SI; 
-  do {
-    ++BBI;
-  } while (isa<DbgInfoIntrinsic>(BBI) ||
-           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
-  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
-    if (BI->isUnconditional())
-      if (SimplifyStoreAtEndOfBlock(SI))
-        return 0;  // xform done!
-  
-  return 0;
-}
-
-/// SimplifyStoreAtEndOfBlock - Turn things like:
-///   if () { *P = v1; } else { *P = v2 }
-/// into a phi node with a store in the successor.
-///
-/// Simplify things like:
-///   *P = v1; if () { *P = v2; }
-/// into a phi node with a store in the successor.
-///
-bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
-  BasicBlock *StoreBB = SI.getParent();
-  
-  // Check to see if the successor block has exactly two incoming edges.  If
-  // so, see if the other predecessor contains a store to the same location.
-  // if so, insert a PHI node (if needed) and move the stores down.
-  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-  
-  // Determine whether Dest has exactly two predecessors and, if so, compute
-  // the other predecessor.
-  pred_iterator PI = pred_begin(DestBB);
-  BasicBlock *OtherBB = 0;
-  if (*PI != StoreBB)
-    OtherBB = *PI;
-  ++PI;
-  if (PI == pred_end(DestBB))
-    return false;
-  
-  if (*PI != StoreBB) {
-    if (OtherBB)
-      return false;
-    OtherBB = *PI;
-  }
-  if (++PI != pred_end(DestBB))
-    return false;
-
-  // Bail out if all the relevant blocks aren't distinct (this can happen,
-  // for example, if SI is in an infinite loop)
-  if (StoreBB == DestBB || OtherBB == DestBB)
-    return false;
-
-  // Verify that the other block ends in a branch and is not otherwise empty.
-  BasicBlock::iterator BBI = OtherBB->getTerminator();
-  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
-  if (!OtherBr || BBI == OtherBB->begin())
-    return false;
-  
-  // If the other block ends in an unconditional branch, check for the 'if then
-  // else' case.  there is an instruction before the branch.
-  StoreInst *OtherStore = 0;
-  if (OtherBr->isUnconditional()) {
-    --BBI;
-    // Skip over debugging info.
-    while (isa<DbgInfoIntrinsic>(BBI) ||
-           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
-      if (BBI==OtherBB->begin())
-        return false;
-      --BBI;
-    }
-    // If this isn't a store, isn't a store to the same location, or if the
-    // alignments differ, bail out.
-    OtherStore = dyn_cast<StoreInst>(BBI);
-    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
-        OtherStore->getAlignment() != SI.getAlignment())
-      return false;
-  } else {
-    // Otherwise, the other block ended with a conditional branch. If one of the
-    // destinations is StoreBB, then we have the if/then case.
-    if (OtherBr->getSuccessor(0) != StoreBB && 
-        OtherBr->getSuccessor(1) != StoreBB)
-      return false;
-    
-    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
-    // if/then triangle.  See if there is a store to the same ptr as SI that
-    // lives in OtherBB.
-    for (;; --BBI) {
-      // Check to see if we find the matching store.
-      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
-        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
-            OtherStore->getAlignment() != SI.getAlignment())
-          return false;
-        break;
-      }
-      // If we find something that may be using or overwriting the stored
-      // value, or if we run out of instructions, we can't do the xform.
-      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
-          BBI == OtherBB->begin())
-        return false;
-    }
-    
-    // In order to eliminate the store in OtherBr, we have to
-    // make sure nothing reads or overwrites the stored value in
-    // StoreBB.
-    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
-      // FIXME: This should really be AA driven.
-      if (I->mayReadFromMemory() || I->mayWriteToMemory())
-        return false;
-    }
-  }
-  
-  // Insert a PHI node now if we need it.
-  Value *MergedVal = OtherStore->getOperand(0);
-  if (MergedVal != SI.getOperand(0)) {
-    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
-    PN->reserveOperandSpace(2);
-    PN->addIncoming(SI.getOperand(0), SI.getParent());
-    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
-    MergedVal = InsertNewInstBefore(PN, DestBB->front());
-  }
-  
-  // Advance to a place where it is safe to insert the new store and
-  // insert it.
-  BBI = DestBB->getFirstNonPHI();
-  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
-                                    OtherStore->isVolatile(),
-                                    SI.getAlignment()), *BBI);
-  
-  // Nuke the old stores.
-  EraseInstFromFunction(SI);
-  EraseInstFromFunction(*OtherStore);
-  ++NumCombined;
-  return true;
-}
-
-
-Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
-  // Change br (not X), label True, label False to: br X, label False, True
-  Value *X = 0;
-  BasicBlock *TrueDest;
-  BasicBlock *FalseDest;
-  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
-      !isa<Constant>(X)) {
-    // Swap Destinations and condition...
-    BI.setCondition(X);
-    BI.setSuccessor(0, FalseDest);
-    BI.setSuccessor(1, TrueDest);
-    return &BI;
-  }
-
-  // Cannonicalize fcmp_one -> fcmp_oeq
-  FCmpInst::Predicate FPred; Value *Y;
-  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
-                             TrueDest, FalseDest)) &&
-      BI.getCondition()->hasOneUse())
-    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
-        FPred == FCmpInst::FCMP_OGE) {
-      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
-      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
-      
-      // Swap Destinations and condition.
-      BI.setSuccessor(0, FalseDest);
-      BI.setSuccessor(1, TrueDest);
-      Worklist.Add(Cond);
-      return &BI;
-    }
-
-  // Cannonicalize icmp_ne -> icmp_eq
-  ICmpInst::Predicate IPred;
-  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
-                      TrueDest, FalseDest)) &&
-      BI.getCondition()->hasOneUse())
-    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
-        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
-        IPred == ICmpInst::ICMP_SGE) {
-      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
-      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
-      // Swap Destinations and condition.
-      BI.setSuccessor(0, FalseDest);
-      BI.setSuccessor(1, TrueDest);
-      Worklist.Add(Cond);
-      return &BI;
-    }
-
-  return 0;
-}
-
-Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
-  Value *Cond = SI.getCondition();
-  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
-    if (I->getOpcode() == Instruction::Add)
-      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
-        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
-          SI.setOperand(i,
-                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
-                                                AddRHS));
-        SI.setOperand(0, I->getOperand(0));
-        Worklist.Add(I);
-        return &SI;
-      }
-  }
-  return 0;
-}
-
-Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
-  Value *Agg = EV.getAggregateOperand();
-
-  if (!EV.hasIndices())
-    return ReplaceInstUsesWith(EV, Agg);
-
-  if (Constant *C = dyn_cast<Constant>(Agg)) {
-    if (isa<UndefValue>(C))
-      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
-      
-    if (isa<ConstantAggregateZero>(C))
-      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
-
-    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
-      // Extract the element indexed by the first index out of the constant
-      Value *V = C->getOperand(*EV.idx_begin());
-      if (EV.getNumIndices() > 1)
-        // Extract the remaining indices out of the constant indexed by the
-        // first index
-        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
-      else
-        return ReplaceInstUsesWith(EV, V);
-    }
-    return 0; // Can't handle other constants
-  } 
-  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
-    // We're extracting from an insertvalue instruction, compare the indices
-    const unsigned *exti, *exte, *insi, *inse;
-    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
-         exte = EV.idx_end(), inse = IV->idx_end();
-         exti != exte && insi != inse;
-         ++exti, ++insi) {
-      if (*insi != *exti)
-        // The insert and extract both reference distinctly different elements.
-        // This means the extract is not influenced by the insert, and we can
-        // replace the aggregate operand of the extract with the aggregate
-        // operand of the insert. i.e., replace
-        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
-        // %E = extractvalue { i32, { i32 } } %I, 0
-        // with
-        // %E = extractvalue { i32, { i32 } } %A, 0
-        return ExtractValueInst::Create(IV->getAggregateOperand(),
-                                        EV.idx_begin(), EV.idx_end());
-    }
-    if (exti == exte && insi == inse)
-      // Both iterators are at the end: Index lists are identical. Replace
-      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
-      // %C = extractvalue { i32, { i32 } } %B, 1, 0
-      // with "i32 42"
-      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
-    if (exti == exte) {
-      // The extract list is a prefix of the insert list. i.e. replace
-      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
-      // %E = extractvalue { i32, { i32 } } %I, 1
-      // with
-      // %X = extractvalue { i32, { i32 } } %A, 1
-      // %E = insertvalue { i32 } %X, i32 42, 0
-      // by switching the order of the insert and extract (though the
-      // insertvalue should be left in, since it may have other uses).
-      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
-                                                 EV.idx_begin(), EV.idx_end());
-      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
-                                     insi, inse);
-    }
-    if (insi == inse)
-      // The insert list is a prefix of the extract list
-      // We can simply remove the common indices from the extract and make it
-      // operate on the inserted value instead of the insertvalue result.
-      // i.e., replace
-      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
-      // %E = extractvalue { i32, { i32 } } %I, 1, 0
-      // with
-      // %E extractvalue { i32 } { i32 42 }, 0
-      return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
-                                      exti, exte);
-  }
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
-    // We're extracting from an intrinsic, see if we're the only user, which
-    // allows us to simplify multiple result intrinsics to simpler things that
-    // just get one value..
-    if (II->hasOneUse()) {
-      // Check if we're grabbing the overflow bit or the result of a 'with
-      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
-      // and replace it with a traditional binary instruction.
-      switch (II->getIntrinsicID()) {
-      case Intrinsic::uadd_with_overflow:
-      case Intrinsic::sadd_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
-          II->replaceAllUsesWith(UndefValue::get(II->getType()));
-          EraseInstFromFunction(*II);
-          return BinaryOperator::CreateAdd(LHS, RHS);
-        }
-        break;
-      case Intrinsic::usub_with_overflow:
-      case Intrinsic::ssub_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
-          II->replaceAllUsesWith(UndefValue::get(II->getType()));
-          EraseInstFromFunction(*II);
-          return BinaryOperator::CreateSub(LHS, RHS);
-        }
-        break;
-      case Intrinsic::umul_with_overflow:
-      case Intrinsic::smul_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
-          II->replaceAllUsesWith(UndefValue::get(II->getType()));
-          EraseInstFromFunction(*II);
-          return BinaryOperator::CreateMul(LHS, RHS);
-        }
-        break;
-      default:
-        break;
-      }
-    }
-  }
-  // Can't simplify extracts from other values. Note that nested extracts are
-  // already simplified implicitely by the above (extract ( extract (insert) )
-  // will be translated into extract ( insert ( extract ) ) first and then just
-  // the value inserted, if appropriate).
-  return 0;
-}
-
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation.
-static bool CheapToScalarize(Value *V, bool isConstant) {
-  if (isa<ConstantAggregateZero>(V)) 
-    return true;
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
-    if (isConstant) return true;
-    // If all elts are the same, we can extract.
-    Constant *Op0 = C->getOperand(0);
-    for (unsigned i = 1; i < C->getNumOperands(); ++i)
-      if (C->getOperand(i) != Op0)
-        return false;
-    return true;
-  }
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return false;
-  
-  // Insert element gets simplified to the inserted element or is deleted if
-  // this is constant idx extract element and its a constant idx insertelt.
-  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
-      isa<ConstantInt>(I->getOperand(2)))
-    return true;
-  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
-    return true;
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
-    if (BO->hasOneUse() &&
-        (CheapToScalarize(BO->getOperand(0), isConstant) ||
-         CheapToScalarize(BO->getOperand(1), isConstant)))
-      return true;
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (CI->hasOneUse() &&
-        (CheapToScalarize(CI->getOperand(0), isConstant) ||
-         CheapToScalarize(CI->getOperand(1), isConstant)))
-      return true;
-  
-  return false;
-}
-
-/// Read and decode a shufflevector mask.
-///
-/// It turns undef elements into values that are larger than the number of
-/// elements in the input.
-static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
-  unsigned NElts = SVI->getType()->getNumElements();
-  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
-    return std::vector<unsigned>(NElts, 0);
-  if (isa<UndefValue>(SVI->getOperand(2)))
-    return std::vector<unsigned>(NElts, 2*NElts);
-
-  std::vector<unsigned> Result;
-  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
-  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
-    if (isa<UndefValue>(*i))
-      Result.push_back(NElts*2);  // undef -> 8
-    else
-      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
-  return Result;
-}
-
-/// FindScalarElement - Given a vector and an element number, see if the scalar
-/// value is already around as a register, for example if it were inserted then
-/// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo,
-                                LLVMContext *Context) {
-  assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
-  const VectorType *PTy = cast<VectorType>(V->getType());
-  unsigned Width = PTy->getNumElements();
-  if (EltNo >= Width)  // Out of range access.
-    return UndefValue::get(PTy->getElementType());
-  
-  if (isa<UndefValue>(V))
-    return UndefValue::get(PTy->getElementType());
-  else if (isa<ConstantAggregateZero>(V))
-    return Constant::getNullValue(PTy->getElementType());
-  else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
-    return CP->getOperand(EltNo);
-  else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
-    // If this is an insert to a variable element, we don't know what it is.
-    if (!isa<ConstantInt>(III->getOperand(2))) 
-      return 0;
-    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-    
-    // If this is an insert to the element we are looking for, return the
-    // inserted value.
-    if (EltNo == IIElt) 
-      return III->getOperand(1);
-    
-    // Otherwise, the insertelement doesn't modify the value, recurse on its
-    // vector input.
-    return FindScalarElement(III->getOperand(0), EltNo, Context);
-  } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
-    unsigned LHSWidth =
-      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-    unsigned InEl = getShuffleMask(SVI)[EltNo];
-    if (InEl < LHSWidth)
-      return FindScalarElement(SVI->getOperand(0), InEl, Context);
-    else if (InEl < LHSWidth*2)
-      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context);
-    else
-      return UndefValue::get(PTy->getElementType());
-  }
-  
-  // Otherwise, we don't know.
-  return 0;
-}
-
-Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-  // If vector val is undef, replace extract with scalar undef.
-  if (isa<UndefValue>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
-  // If vector val is constant 0, replace extract with scalar 0.
-  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-  
-  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If vector val is constant with all elements the same, replace EI with
-    // that element. When the elements are not identical, we cannot replace yet
-    // (we do that below, but only when the index is constant).
-    Constant *op0 = C->getOperand(0);
-    for (unsigned i = 1; i != C->getNumOperands(); ++i)
-      if (C->getOperand(i) != op0) {
-        op0 = 0; 
-        break;
-      }
-    if (op0)
-      return ReplaceInstUsesWith(EI, op0);
-  }
-  
-  // If extracting a specified index from the vector, see if we can recursively
-  // find a previously computed scalar that was inserted into the vector.
-  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-    unsigned IndexVal = IdxC->getZExtValue();
-    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
-      
-    // If this is extracting an invalid index, turn this into undef, to avoid
-    // crashing the code below.
-    if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-    
-    // This instruction only demands the single element from the input vector.
-    // If the input vector has a single use, simplify it based on this use
-    // property.
-    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
-      APInt UndefElts(VectorWidth, 0);
-      APInt DemandedMask(VectorWidth, 1 << IndexVal);
-      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
-                                                DemandedMask, UndefElts)) {
-        EI.setOperand(0, V);
-        return &EI;
-      }
-    }
-    
-    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal, Context))
-      return ReplaceInstUsesWith(EI, Elt);
-    
-    // If the this extractelement is directly using a bitcast from a vector of
-    // the same number of elements, see if we can find the source element from
-    // it.  In this case, we will end up needing to bitcast the scalars.
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
-      if (const VectorType *VT = 
-              dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
-        if (VT->getNumElements() == VectorWidth)
-          if (Value *Elt = FindScalarElement(BCI->getOperand(0),
-                                             IndexVal, Context))
-            return new BitCastInst(Elt, EI.getType());
-    }
-  }
-  
-  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
-    // Push extractelement into predecessor operation if legal and
-    // profitable to do so
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-      if (I->hasOneUse() &&
-          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
-        Value *newEI0 =
-          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
-                                        EI.getName()+".lhs");
-        Value *newEI1 =
-          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
-                                        EI.getName()+".rhs");
-        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
-      }
-    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
-      // Extracting the inserted element?
-      if (IE->getOperand(2) == EI.getOperand(1))
-        return ReplaceInstUsesWith(EI, IE->getOperand(1));
-      // If the inserted and extracted elements are constants, they must not
-      // be the same value, extract from the pre-inserted value instead.
-      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
-        Worklist.AddValue(EI.getOperand(0));
-        EI.setOperand(0, IE->getOperand(0));
-        return &EI;
-      }
-    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
-      // If this is extracting an element from a shufflevector, figure out where
-      // it came from and extract from the appropriate input element instead.
-      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-        unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
-        Value *Src;
-        unsigned LHSWidth =
-          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-
-        if (SrcIdx < LHSWidth)
-          Src = SVI->getOperand(0);
-        else if (SrcIdx < LHSWidth*2) {
-          SrcIdx -= LHSWidth;
-          Src = SVI->getOperand(1);
-        } else {
-          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-        }
-        return ExtractElementInst::Create(Src,
-                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx,
-                                          false));
-      }
-    }
-    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
-  }
-  return 0;
-}
-
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true. 
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
-                                         std::vector<Constant*> &Mask,
-                                         LLVMContext *Context) {
-  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
-         "Invalid CollectSingleShuffleElements");
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
-  if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
-    return true;
-  } else if (V == LHS) {
-    for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
-    return true;
-  } else if (V == RHS) {
-    for (unsigned i = 0; i != NumElts; ++i)
-      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts));
-    return true;
-  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
-    // If this is an insert of an extract from some other vector, include it.
-    Value *VecOp    = IEI->getOperand(0);
-    Value *ScalarOp = IEI->getOperand(1);
-    Value *IdxOp    = IEI->getOperand(2);
-    
-    if (!isa<ConstantInt>(IdxOp))
-      return false;
-    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-    
-    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
-      // Okay, we can handle this if the vector we are insertinting into is
-      // transitively ok.
-      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
-        // If so, update the mask to reflect the inserted undef.
-        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context));
-        return true;
-      }      
-    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
-      if (isa<ConstantInt>(EI->getOperand(1)) &&
-          EI->getOperand(0)->getType() == V->getType()) {
-        unsigned ExtractedIdx =
-          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-        
-        // This must be extracting from either LHS or RHS.
-        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
-          // Okay, we can handle this if the vector we are insertinting into is
-          // transitively ok.
-          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {
-            // If so, update the mask to reflect the inserted value.
-            if (EI->getOperand(0) == LHS) {
-              Mask[InsertedIdx % NumElts] = 
-                 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx);
-            } else {
-              assert(EI->getOperand(0) == RHS);
-              Mask[InsertedIdx % NumElts] = 
-                ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts);
-              
-            }
-            return true;
-          }
-        }
-      }
-    }
-  }
-  // TODO: Handle shufflevector here!
-  
-  return false;
-}
-
-/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
-/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
-/// that computes V and the LHS value of the shuffle.
-static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
-                                     Value *&RHS, LLVMContext *Context) {
-  assert(isa<VectorType>(V->getType()) && 
-         (RHS == 0 || V->getType() == RHS->getType()) &&
-         "Invalid shuffle!");
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
-  if (isa<UndefValue>(V)) {
-    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));
-    return V;
-  } else if (isa<ConstantAggregateZero>(V)) {
-    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0));
-    return V;
-  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
-    // If this is an insert of an extract from some other vector, include it.
-    Value *VecOp    = IEI->getOperand(0);
-    Value *ScalarOp = IEI->getOperand(1);
-    Value *IdxOp    = IEI->getOperand(2);
-    
-    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
-      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
-          EI->getOperand(0)->getType() == V->getType()) {
-        unsigned ExtractedIdx =
-          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-        
-        // Either the extracted from or inserted into vector must be RHSVec,
-        // otherwise we'd end up with a shuffle of three inputs.
-        if (EI->getOperand(0) == RHS || RHS == 0) {
-          RHS = EI->getOperand(0);
-          Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context);
-          Mask[InsertedIdx % NumElts] = 
-            ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx);
-          return V;
-        }
-        
-        if (VecOp == RHS) {
-          Value *V = CollectShuffleElements(EI->getOperand(0), Mask,
-                                            RHS, Context);
-          // Everything but the extracted element is replaced with the RHS.
-          for (unsigned i = 0; i != NumElts; ++i) {
-            if (i != InsertedIdx)
-              Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i);
-          }
-          return V;
-        }
-        
-        // If this insertelement is a chain that comes from exactly these two
-        // vectors, return the vector and the effective shuffle.
-        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask,
-                                         Context))
-          return EI->getOperand(0);
-        
-      }
-    }
-  }
-  // TODO: Handle shufflevector here!
-  
-  // Otherwise, can't do anything fancy.  Return an identity vector.
-  for (unsigned i = 0; i != NumElts; ++i)
-    Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));
-  return V;
-}
-
-Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
-  Value *VecOp    = IE.getOperand(0);
-  Value *ScalarOp = IE.getOperand(1);
-  Value *IdxOp    = IE.getOperand(2);
-  
-  // Inserting an undef or into an undefined place, remove this.
-  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
-    ReplaceInstUsesWith(IE, VecOp);
-  
-  // If the inserted element was extracted from some other vector, and if the 
-  // indexes are constant, try to turn this into a shufflevector operation.
-  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
-    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
-        EI->getOperand(0)->getType() == IE.getType()) {
-      unsigned NumVectorElts = IE.getType()->getNumElements();
-      unsigned ExtractedIdx =
-        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-      
-      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
-        return ReplaceInstUsesWith(IE, VecOp);
-      
-      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
-        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-      
-      // If we are extracting a value from a vector, then inserting it right
-      // back into the same place, just use the input vector.
-      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
-        return ReplaceInstUsesWith(IE, VecOp);      
-      
-      // If this insertelement isn't used by some other insertelement, turn it
-      // (and any insertelements it points to), into one big shuffle.
-      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
-        std::vector<Constant*> Mask;
-        Value *RHS = 0;
-        Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context);
-        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
-        // We now have a shuffle of LHS, RHS, Mask.
-        return new ShuffleVectorInst(LHS, RHS,
-                                     ConstantVector::get(Mask));
-      }
-    }
-  }
-
-  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
-  APInt UndefElts(VWidth, 0);
-  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
-    return &IE;
-
-  return 0;
-}
-
-
-Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
-  Value *LHS = SVI.getOperand(0);
-  Value *RHS = SVI.getOperand(1);
-  std::vector<unsigned> Mask = getShuffleMask(&SVI);
-
-  bool MadeChange = false;
-
-  // Undefined shuffle mask -> undefined value.
-  if (isa<UndefValue>(SVI.getOperand(2)))
-    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
-
-  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
-
-  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
-    return 0;
-
-  APInt UndefElts(VWidth, 0);
-  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
-    LHS = SVI.getOperand(0);
-    RHS = SVI.getOperand(1);
-    MadeChange = true;
-  }
-  
-  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
-  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
-  if (LHS == RHS || isa<UndefValue>(LHS)) {
-    if (isa<UndefValue>(LHS) && LHS == RHS) {
-      // shuffle(undef,undef,mask) -> undef.
-      return ReplaceInstUsesWith(SVI, LHS);
-    }
-    
-    // Remap any references to RHS to use LHS.
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-      if (Mask[i] >= 2*e)
-        Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
-      else {
-        if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
-            (Mask[i] <  e && isa<UndefValue>(LHS))) {
-          Mask[i] = 2*e;     // Turn into undef.
-          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
-        } else {
-          Mask[i] = Mask[i] % e;  // Force to LHS.
-          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i]));
-        }
-      }
-    }
-    SVI.setOperand(0, SVI.getOperand(1));
-    SVI.setOperand(1, UndefValue::get(RHS->getType()));
-    SVI.setOperand(2, ConstantVector::get(Elts));
-    LHS = SVI.getOperand(0);
-    RHS = SVI.getOperand(1);
-    MadeChange = true;
-  }
-  
-  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
-  bool isLHSID = true, isRHSID = true;
-    
-  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-    if (Mask[i] >= e*2) continue;  // Ignore undef values.
-    // Is this an identity shuffle of the LHS value?
-    isLHSID &= (Mask[i] == i);
-      
-    // Is this an identity shuffle of the RHS value?
-    isRHSID &= (Mask[i]-e == i);
-  }
-
-  // Eliminate identity shuffles.
-  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
-  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
-  
-  // If the LHS is a shufflevector itself, see if we can combine it with this
-  // one without producing an unusual shuffle.  Here we are really conservative:
-  // we are absolutely afraid of producing a shuffle mask not in the input
-  // program, because the code gen may not be smart enough to turn a merged
-  // shuffle into two specific shuffles: it may produce worse code.  As such,
-  // we only merge two shuffles if the result is one of the two input shuffle
-  // masks.  In this case, merging the shuffles just removes one instruction,
-  // which we know is safe.  This is good for things like turning:
-  // (splat(splat)) -> splat.
-  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
-    if (isa<UndefValue>(RHS)) {
-      std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
-
-      if (LHSMask.size() == Mask.size()) {
-        std::vector<unsigned> NewMask;
-        for (unsigned i = 0, e = Mask.size(); i != e; ++i)
-          if (Mask[i] >= e)
-            NewMask.push_back(2*e);
-          else
-            NewMask.push_back(LHSMask[Mask[i]]);
-      
-        // If the result mask is equal to the src shuffle or this
-        // shuffle mask, do the replacement.
-        if (NewMask == LHSMask || NewMask == Mask) {
-          unsigned LHSInNElts =
-            cast<VectorType>(LHSSVI->getOperand(0)->getType())->
-            getNumElements();
-          std::vector<Constant*> Elts;
-          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
-            if (NewMask[i] >= LHSInNElts*2) {
-              Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
-            } else {
-              Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
-                                              NewMask[i]));
-            }
-          }
-          return new ShuffleVectorInst(LHSSVI->getOperand(0),
-                                       LHSSVI->getOperand(1),
-                                       ConstantVector::get(Elts));
-        }
-      }
-    }
-  }
-
-  return MadeChange ? &SVI : 0;
-}
-
-
-
-
-/// TryToSinkInstruction - Try to move the specified instruction from its
-/// current block into the beginning of DestBlock, which can only happen if it's
-/// safe to move the instruction past all of the instructions between it and the
-/// end of its block.
-static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
-  assert(I->hasOneUse() && "Invariants didn't hold!");
-
-  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
-  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
-    return false;
-
-  // Do not sink alloca instructions out of the entry block.
-  if (isa<AllocaInst>(I) && I->getParent() ==
-        &DestBlock->getParent()->getEntryBlock())
-    return false;
-
-  // We can only sink load instructions if there is nothing between the load and
-  // the end of block that could change the value.
-  if (I->mayReadFromMemory()) {
-    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
-         Scan != E; ++Scan)
-      if (Scan->mayWriteToMemory())
-        return false;
-  }
-
-  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
-
-  CopyPrecedingStopPoint(I, InsertPos);
-  I->moveBefore(InsertPos);
-  ++NumSunkInst;
-  return true;
-}
-
-
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
-///
-/// This has a couple of tricks to make the code faster and more powerful.  In
-/// particular, we constant fold and DCE instructions as we go, to avoid adding
-/// them to the worklist (this significantly speeds up instcombine on code where
-/// many instructions are dead or constant).  Additionally, if we find a branch
-/// whose condition is a known constant, we only visit the reachable successors.
-///
-static bool AddReachableCodeToWorklist(BasicBlock *BB, 
-                                       SmallPtrSet<BasicBlock*, 64> &Visited,
-                                       InstCombiner &IC,
-                                       const TargetData *TD) {
-  bool MadeIRChange = false;
-  SmallVector<BasicBlock*, 256> Worklist;
-  Worklist.push_back(BB);
-  
-  std::vector<Instruction*> InstrsForInstCombineWorklist;
-  InstrsForInstCombineWorklist.reserve(128);
-
-  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
-  
-  while (!Worklist.empty()) {
-    BB = Worklist.back();
-    Worklist.pop_back();
-    
-    // We have now visited this block!  If we've already been here, ignore it.
-    if (!Visited.insert(BB)) continue;
-
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
-      Instruction *Inst = BBI++;
-      
-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst)) {
-        ++NumDeadInst;
-        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
-        Inst->eraseFromParent();
-        continue;
-      }
-      
-      // ConstantProp instruction if trivially constant.
-      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
-          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
-                       << *Inst << '\n');
-          Inst->replaceAllUsesWith(C);
-          ++NumConstProp;
-          Inst->eraseFromParent();
-          continue;
-        }
-      
-      
-      
-      if (TD) {
-        // See if we can constant fold its operands.
-        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
-             i != e; ++i) {
-          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
-          if (CE == 0) continue;
-          
-          // If we already folded this constant, don't try again.
-          if (!FoldedConstants.insert(CE))
-            continue;
-          
-          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
-          if (NewC && NewC != CE) {
-            *i = NewC;
-            MadeIRChange = true;
-          }
-        }
-      }
-      
-
-      InstrsForInstCombineWorklist.push_back(Inst);
-    }
-
-    // Recursively visit successors.  If this is a branch or switch on a
-    // constant, only visit the reachable successor.
-    TerminatorInst *TI = BB->getTerminator();
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
-        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
-        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
-        Worklist.push_back(ReachableBB);
-        continue;
-      }
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
-        // See if this is an explicit destination.
-        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
-          if (SI->getCaseValue(i) == Cond) {
-            BasicBlock *ReachableBB = SI->getSuccessor(i);
-            Worklist.push_back(ReachableBB);
-            continue;
-          }
-        
-        // Otherwise it is the default destination.
-        Worklist.push_back(SI->getSuccessor(0));
-        continue;
-      }
-    }
-    
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      Worklist.push_back(TI->getSuccessor(i));
-  }
-  
-  // Once we've found all of the instructions to add to instcombine's worklist,
-  // add them in reverse order.  This way instcombine will visit from the top
-  // of the function down.  This jives well with the way that it adds all uses
-  // of instructions to the worklist after doing a transformation, thus avoiding
-  // some N^2 behavior in pathological cases.
-  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
-                              InstrsForInstCombineWorklist.size());
-  
-  return MadeIRChange;
-}
-
-bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
-  MadeIRChange = false;
-  
-  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
-        << F.getNameStr() << "\n");
-
-  {
-    // Do a depth-first traversal of the function, populate the worklist with
-    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
-    // track of which blocks we visit.
-    SmallPtrSet<BasicBlock*, 64> Visited;
-    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
-
-    // Do a quick scan over the function.  If we find any blocks that are
-    // unreachable, remove any instructions inside of them.  This prevents
-    // the instcombine code from having to deal with some bad special cases.
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-      if (!Visited.count(BB)) {
-        Instruction *Term = BB->getTerminator();
-        while (Term != BB->begin()) {   // Remove instrs bottom-up
-          BasicBlock::iterator I = Term; --I;
-
-          DEBUG(errs() << "IC: DCE: " << *I << '\n');
-          // A debug intrinsic shouldn't force another iteration if we weren't
-          // going to do one without it.
-          if (!isa<DbgInfoIntrinsic>(I)) {
-            ++NumDeadInst;
-            MadeIRChange = true;
-          }
-
-          // If I is not void type then replaceAllUsesWith undef.
-          // This allows ValueHandlers and custom metadata to adjust itself.
-          if (!I->getType()->isVoidTy())
-            I->replaceAllUsesWith(UndefValue::get(I->getType()));
-          I->eraseFromParent();
-        }
-      }
-  }
-
-  while (!Worklist.isEmpty()) {
-    Instruction *I = Worklist.RemoveOne();
-    if (I == 0) continue;  // skip null values.
-
-    // Check to see if we can DCE the instruction.
-    if (isInstructionTriviallyDead(I)) {
-      DEBUG(errs() << "IC: DCE: " << *I << '\n');
-      EraseInstFromFunction(*I);
-      ++NumDeadInst;
-      MadeIRChange = true;
-      continue;
-    }
-
-    // Instruction isn't dead, see if we can constant propagate it.
-    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
-      if (Constant *C = ConstantFoldInstruction(I, TD)) {
-        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
-
-        // Add operands to the worklist.
-        ReplaceInstUsesWith(*I, C);
-        ++NumConstProp;
-        EraseInstFromFunction(*I);
-        MadeIRChange = true;
-        continue;
-      }
-
-    // See if we can trivially sink this instruction to a successor basic block.
-    if (I->hasOneUse()) {
-      BasicBlock *BB = I->getParent();
-      Instruction *UserInst = cast<Instruction>(I->use_back());
-      BasicBlock *UserParent;
-      
-      // Get the block the use occurs in.
-      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
-        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
-      else
-        UserParent = UserInst->getParent();
-      
-      if (UserParent != BB) {
-        bool UserIsSuccessor = false;
-        // See if the user is one of our successors.
-        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
-          if (*SI == UserParent) {
-            UserIsSuccessor = true;
-            break;
-          }
-
-        // If the user is one of our immediate successors, and if that successor
-        // only has us as a predecessors (we'd have to split the critical edge
-        // otherwise), we can keep going.
-        if (UserIsSuccessor && UserParent->getSinglePredecessor())
-          // Okay, the CFG is simple enough, try to sink this instruction.
-          MadeIRChange |= TryToSinkInstruction(I, UserParent);
-      }
-    }
-
-    // Now that we have an instruction, try combining it to simplify it.
-    Builder->SetInsertPoint(I->getParent(), I);
-    
-#ifndef NDEBUG
-    std::string OrigI;
-#endif
-    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
-    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
-
-    if (Instruction *Result = visit(*I)) {
-      ++NumCombined;
-      // Should we replace the old instruction with a new one?
-      if (Result != I) {
-        DEBUG(errs() << "IC: Old = " << *I << '\n'
-                     << "    New = " << *Result << '\n');
-
-        // Everything uses the new instruction now.
-        I->replaceAllUsesWith(Result);
-
-        // Push the new instruction and any users onto the worklist.
-        Worklist.Add(Result);
-        Worklist.AddUsersToWorkList(*Result);
-
-        // Move the name to the new instruction first.
-        Result->takeName(I);
-
-        // Insert the new instruction into the basic block...
-        BasicBlock *InstParent = I->getParent();
-        BasicBlock::iterator InsertPos = I;
-
-        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
-          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
-            ++InsertPos;
-
-        InstParent->getInstList().insert(InsertPos, Result);
-
-        EraseInstFromFunction(*I);
-      } else {
-#ifndef NDEBUG
-        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
-                     << "    New = " << *I << '\n');
-#endif
-
-        // If the instruction was modified, it's possible that it is now dead.
-        // if so, remove it.
-        if (isInstructionTriviallyDead(I)) {
-          EraseInstFromFunction(*I);
-        } else {
-          Worklist.Add(I);
-          Worklist.AddUsersToWorkList(*I);
-        }
-      }
-      MadeIRChange = true;
-    }
-  }
-
-  Worklist.Zap();
-  return MadeIRChange;
-}
-
-
-bool InstCombiner::runOnFunction(Function &F) {
-  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-  Context = &F.getContext();
-  TD = getAnalysisIfAvailable<TargetData>();
-
-  
-  /// Builder - This is an IRBuilder that automatically inserts new
-  /// instructions into the worklist when they are created.
-  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
-    TheBuilder(F.getContext(), TargetFolder(TD),
-               InstCombineIRInserter(Worklist));
-  Builder = &TheBuilder;
-  
-  bool EverMadeChange = false;
-
-  // Iterate while there is work to do.
-  unsigned Iteration = 0;
-  while (DoOneIteration(F, Iteration++))
-    EverMadeChange = true;
-  
-  Builder = 0;
-  return EverMadeChange;
-}
-
-FunctionPass *llvm::createInstructionCombiningPass() {
-  return new InstCombiner();
-}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 7e6cf79..9531311 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -89,7 +89,7 @@ namespace {
     bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
                     BasicBlock *SuccBB);
     bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
-                                          BasicBlock *PredBB);
+                                  const SmallVectorImpl<BasicBlock *> &PredBBs);
     
     typedef SmallVectorImpl<std::pair<ConstantInt*,
                                       BasicBlock*> > PredValueInfo;
@@ -102,7 +102,8 @@ namespace {
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
 
-    bool ProcessJumpOnPHI(PHINode *PN);
+    bool ProcessBranchOnPHI(PHINode *PN);
+    bool ProcessBranchOnXOR(BinaryOperator *BO);
     
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
   };
@@ -118,16 +119,15 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 /// runOnFunction - Top level algorithm.
 ///
 bool JumpThreading::runOnFunction(Function &F) {
-  DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
+  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
   LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
   
   FindLoopHeaders(F);
   
-  bool AnotherIteration = true, EverChanged = false;
-  while (AnotherIteration) {
-    AnotherIteration = false;
-    bool Changed = false;
+  bool Changed, EverChanged = false;
+  do {
+    Changed = false;
     for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
       BasicBlock *BB = I;
       // Thread all of the branches we can over this block. 
@@ -140,7 +140,7 @@ bool JumpThreading::runOnFunction(Function &F) {
       // edges which simplifies the CFG.
       if (pred_begin(BB) == pred_end(BB) &&
           BB != &BB->getParent()->getEntryBlock()) {
-        DEBUG(errs() << "  JT: Deleting dead block '" << BB->getName()
+        DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
               << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
         DeleteDeadBlock(BB);
@@ -176,9 +176,8 @@ bool JumpThreading::runOnFunction(Function &F) {
         }
       }
     }
-    AnotherIteration = Changed;
     EverChanged |= Changed;
-  }
+  } while (Changed);
   
   LoopHeaders.clear();
   return EverChanged;
@@ -490,7 +489,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // terminator to an unconditional branch.  This can occur due to threading in
   // other blocks.
   if (isa<ConstantInt>(Condition)) {
-    DEBUG(errs() << "  In block '" << BB->getName()
+    DEBUG(dbgs() << "  In block '" << BB->getName()
           << "' folding terminator: " << *BB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(BB);
@@ -509,7 +508,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
       RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
     }
     
-    DEBUG(errs() << "  In block '" << BB->getName()
+    DEBUG(dbgs() << "  In block '" << BB->getName()
           << "' folding undef terminator: " << *BBTerm << '\n');
     BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
@@ -552,11 +551,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   }  
     
   
-  // See if this is a phi node in the current block.
-  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
-    if (PN->getParent() == BB)
-      return ProcessJumpOnPHI(PN);
-  
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
     if (!LVI &&
         (!isa<PHINode>(CondCmp->getOperand(0)) ||
@@ -585,8 +579,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // we see one, check to see if it's partially redundant.  If so, insert a PHI
   // which can then be used to thread the values.
   //
-  // This is particularly important because reg2mem inserts loads and stores all
-  // over the place, and this blocks jump threading if we don't zap them.
   Value *SimplifyValue = CondInst;
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
     if (isa<Constant>(CondCmp->getOperand(1)))
@@ -606,9 +598,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   if (ProcessThreadableEdges(CondInst, BB))
     return true;
   
+  // If this is an otherwise-unfoldable branch on a phi node in the current
+  // block, see if we can simplify.
+  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+    if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+      return ProcessBranchOnPHI(PN);
+  
+  
+  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+  if (CondInst->getOpcode() == Instruction::Xor &&
+      CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+    return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+  
   
   // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
-  // "(X == 4)" thread through this block.
+  // "(X == 4)", thread through this block.
   
   return false;
 }
@@ -636,7 +640,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   else if (PredBI->getSuccessor(0) != BB)
     BranchDir = false;
   else {
-    DEBUG(errs() << "  In block '" << PredBB->getName()
+    DEBUG(dbgs() << "  In block '" << PredBB->getName()
           << "' folding terminator: " << *PredBB->getTerminator() << '\n');
     ++NumFolds;
     ConstantFoldTerminator(PredBB);
@@ -648,7 +652,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
   // If the dest block has one predecessor, just fix the branch condition to a
   // constant and fold it.
   if (BB->getSinglePredecessor()) {
-    DEBUG(errs() << "  In block '" << BB->getName()
+    DEBUG(dbgs() << "  In block '" << BB->getName()
           << "' folding condition to '" << BranchDir << "': "
           << *BB->getTerminator() << '\n');
     ++NumFolds;
@@ -727,8 +731,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
 
       // Otherwise, we're safe to make the change.  Make sure that the edge from
       // DestSI to DestSucc is not critical and has no PHI nodes.
-      DEBUG(errs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI);
-      DEBUG(errs() << "THROUGH: " << *DestSI);
+      DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI);
+      DEBUG(dbgs() << "THROUGH: " << *DestSI);
 
       // If the destination has PHI nodes, just split the edge for updating
       // simplicity.
@@ -979,14 +983,14 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
   assert(!PredValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
 
-  DEBUG(errs() << "IN BB: " << *BB;
+  DEBUG(dbgs() << "IN BB: " << *BB;
         for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
-          errs() << "  BB '" << BB->getName() << "': FOUND condition = ";
+          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = ";
           if (PredValues[i].first)
-            errs() << *PredValues[i].first;
+            dbgs() << *PredValues[i].first;
           else
-            errs() << "UNDEF";
-          errs() << " for pred '" << PredValues[i].second->getName()
+            dbgs() << "UNDEF";
+          dbgs() << " for pred '" << PredValues[i].second->getName()
           << "'.\n";
         });
   
@@ -1070,36 +1074,110 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
   return ThreadEdge(BB, PredsToFactor, MostPopularDest);
 }
 
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block.  See if there are any simplifications we can do based on
-/// inputs to the phi node.
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block.  See if there are any simplifications we
+/// can do based on inputs to the phi node.
 /// 
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
   BasicBlock *BB = PN->getParent();
   
-  // If any of the predecessor blocks end in an unconditional branch, we can
-  // *duplicate* the jump into that block in order to further encourage jump
-  // threading and to eliminate cases where we have branch on a phi of an icmp
-  // (branch on icmp is much better).
-
-  // We don't want to do this tranformation for switches, because we don't
-  // really want to duplicate a switch.
-  if (isa<SwitchInst>(BB->getTerminator()))
-    return false;
+  // TODO: We could make use of this to do it once for blocks with common PHI
+  // values.
+  SmallVector<BasicBlock*, 1> PredBBs;
+  PredBBs.resize(1);
   
-  // Look for unconditional branch predecessors.
+  // If any of the predecessor blocks end in an unconditional branch, we can
+  // *duplicate* the conditional branch into that block in order to further
+  // encourage jump threading and to eliminate cases where we have branch on a
+  // phi of an icmp (branch on icmp is much better).
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     BasicBlock *PredBB = PN->getIncomingBlock(i);
     if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
-      if (PredBr->isUnconditional() &&
-          // Try to duplicate BB into PredBB.
-          DuplicateCondBranchOnPHIIntoPred(BB, PredBB))
-        return true;
+      if (PredBr->isUnconditional()) {
+        PredBBs[0] = PredBB;
+        // Try to duplicate BB into PredBB.
+        if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+          return true;
+      }
   }
 
   return false;
 }
 
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block.  See if there are any
+/// simplifications we can do based on inputs to the xor.
+/// 
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+  BasicBlock *BB = BO->getParent();
+  
+  // If either the LHS or RHS of the xor is a constant, don't do this
+  // optimization.
+  if (isa<ConstantInt>(BO->getOperand(0)) ||
+      isa<ConstantInt>(BO->getOperand(1)))
+    return false;
+  
+  // If we have a xor as the branch input to this block, and we know that the
+  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+  // the condition into the predecessor and fix that value to true, saving some
+  // logical ops on that path and encouraging other paths to simplify.
+  //
+  // This copies something like this:
+  //
+  //  BB:
+  //    %X = phi i1 [1],  [%X']
+  //    %Y = icmp eq i32 %A, %B
+  //    %Z = xor i1 %X, %Y
+  //    br i1 %Z, ...
+  //
+  // Into:
+  //  BB':
+  //    %Y = icmp ne i32 %A, %B
+  //    br i1 %Z, ...
+
+  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+  bool isLHS = true;
+  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+    assert(XorOpValues.empty());
+    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+      return false;
+    isLHS = false;
+  }
+  
+  assert(!XorOpValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  // Scan the information to see which is most popular: true or false.  The
+  // predecessors can be of the set true, false, or undef.
+  unsigned NumTrue = 0, NumFalse = 0;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (!XorOpValues[i].first) continue;  // Ignore undefs for the count.
+    if (XorOpValues[i].first->isZero())
+      ++NumFalse;
+    else
+      ++NumTrue;
+  }
+  
+  // Determine which value to split on, true, false, or undef if neither.
+  ConstantInt *SplitVal = 0;
+  if (NumTrue > NumFalse)
+    SplitVal = ConstantInt::getTrue(BB->getContext());
+  else if (NumTrue != 0 || NumFalse != 0)
+    SplitVal = ConstantInt::getFalse(BB->getContext());
+  
+  // Collect all of the blocks that this can be folded into so that we can
+  // factor this once and clone it once.
+  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+
+    BlocksToFoldInto.push_back(XorOpValues[i].second);
+  }
+  
+  // Try to duplicate BB into PredBB.
+  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
 
 /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
 /// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
@@ -1133,7 +1211,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
                                BasicBlock *SuccBB) {
   // If threading to the same block as we come from, we would infinite loop.
   if (SuccBB == BB) {
-    DEBUG(errs() << "  Not threading across BB '" << BB->getName()
+    DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
           << "' - would thread to self!\n");
     return false;
   }
@@ -1141,7 +1219,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // If threading this would thread across a loop header, don't thread the edge.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
-    DEBUG(errs() << "  Not threading across loop header BB '" << BB->getName()
+    DEBUG(dbgs() << "  Not threading across loop header BB '" << BB->getName()
           << "' to dest BB '" << SuccBB->getName()
           << "' - it might create an irreducible loop!\n");
     return false;
@@ -1149,7 +1227,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
 
   unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
   if (JumpThreadCost > Threshold) {
-    DEBUG(errs() << "  Not threading BB '" << BB->getName()
+    DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
           << "' - Cost is too high: " << JumpThreadCost << "\n");
     return false;
   }
@@ -1159,14 +1237,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   if (PredBBs.size() == 1)
     PredBB = PredBBs[0];
   else {
-    DEBUG(errs() << "  Factoring out " << PredBBs.size()
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
     PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
                                     ".thr_comm", this);
   }
   
   // And finally, do it!
-  DEBUG(errs() << "  Threading edge from '" << PredBB->getName() << "' to '"
+  DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
         << SuccBB->getName() << "' with cost: " << JumpThreadCost
         << ", across block:\n    "
         << *BB << "\n");
@@ -1235,7 +1313,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     if (UsesToRename.empty())
       continue;
     
-    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
 
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
@@ -1246,7 +1324,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
-    DEBUG(errs() << "\n");
+    DEBUG(dbgs() << "\n");
   }
   
   
@@ -1263,20 +1341,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // At this point, the IR is fully up to date and consistent.  Do a quick scan
   // over the new instructions and zap any that are constants or dead.  This
   // frequently happens because of phi translation.
-  BI = NewBB->begin();
-  for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
-    Instruction *Inst = BI++;
-    
-    if (Value *V = SimplifyInstruction(Inst, TD)) {
-      WeakVH BIHandle(BI);
-      ReplaceAndSimplifyAllUses(Inst, V, TD);
-      if (BIHandle == 0)
-        BI = NewBB->begin();
-      continue;
-    }
-    
-    RecursivelyDeleteTriviallyDeadInstructions(Inst);
-  }
+  SimplifyInstructionsInBlock(NewBB, TD);
   
   // Threaded an edge!
   ++NumThreads;
@@ -1289,30 +1354,52 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
 /// improves the odds that the branch will be on an analyzable instruction like
 /// a compare.
 bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
-                                                     BasicBlock *PredBB) {
+                                 const SmallVectorImpl<BasicBlock *> &PredBBs) {
+  assert(!PredBBs.empty() && "Can't handle an empty set");
+
   // If BB is a loop header, then duplicating this block outside the loop would
   // cause us to transform this into an irreducible loop, don't do this.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
-    DEBUG(errs() << "  Not duplicating loop header '" << BB->getName()
-          << "' into predecessor block '" << PredBB->getName()
+    DEBUG(dbgs() << "  Not duplicating loop header '" << BB->getName()
+          << "' into predecessor block '" << PredBBs[0]->getName()
           << "' - it might create an irreducible loop!\n");
     return false;
   }
   
   unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
   if (DuplicationCost > Threshold) {
-    DEBUG(errs() << "  Not duplicating BB '" << BB->getName()
+    DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
           << "' - Cost is too high: " << DuplicationCost << "\n");
     return false;
   }
   
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+  
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
   // of PredBB.
-  DEBUG(errs() << "  Duplicating block '" << BB->getName() << "' into end of '"
+  DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
         << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
         << DuplicationCost << " block is:" << *BB << "\n");
   
+  // Unless PredBB ends with an unconditional branch, split the edge so that we
+  // can just clone the bits from BB into the end of the new PredBB.
+  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  
+  if (!OldPredBranch->isUnconditional()) {
+    PredBB = SplitEdge(PredBB, BB, this);
+    OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  }
+  
   // We are going to have to map operands from the original BB block into the
   // PredBB block.  Evaluate PHI nodes in BB.
   DenseMap<Instruction*, Value*> ValueMapping;
@@ -1321,15 +1408,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
   
-  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
-  
   // Clone the non-phi instructions of BB into PredBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
   for (; BI != BB->end(); ++BI) {
     Instruction *New = BI->clone();
-    New->setName(BI->getName());
-    PredBB->getInstList().insert(OldPredBranch, New);
-    ValueMapping[BI] = New;
     
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -1338,6 +1420,19 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
         if (I != ValueMapping.end())
           New->setOperand(i, I->second);
       }
+
+    // If this instruction can be simplified after the operands are updated,
+    // just use the simplified value instead.  This frequently happens due to
+    // phi translation.
+    if (Value *IV = SimplifyInstruction(New, TD)) {
+      delete New;
+      ValueMapping[BI] = IV;
+    } else {
+      // Otherwise, insert the new instruction into the block.
+      New->setName(BI->getName());
+      PredBB->getInstList().insert(OldPredBranch, New);
+      ValueMapping[BI] = New;
+    }
   }
   
   // Check to see if the targets of the branch had PHI nodes. If so, we need to
@@ -1373,7 +1468,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     if (UsesToRename.empty())
       continue;
     
-    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n");
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
     
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
@@ -1384,7 +1479,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
-    DEBUG(errs() << "\n");
+    DEBUG(dbgs() << "\n");
   }
   
   // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 99f3ae0..81f9ae6 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -384,10 +384,6 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
       Size = AA->getTypeStoreSize(LI->getType());
     return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-    if (isa<DbgStopPointInst>(CI)) {
-      // Don't hoist/sink dbgstoppoints, we handle them separately
-      return false;
-    }
     // Handle obvious cases efficiently.
     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
@@ -461,7 +457,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
 /// position, and may either delete it or move it to outside of the loop.
 ///
 void LICM::sink(Instruction &I) {
-  DEBUG(errs() << "LICM sinking instruction: " << I);
+  DEBUG(dbgs() << "LICM sinking instruction: " << I);
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
@@ -603,7 +599,7 @@ void LICM::sink(Instruction &I) {
 /// that is safe to hoist, this instruction is called to do the dirty work.
 ///
 void LICM::hoist(Instruction &I) {
-  DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": "
+  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
         << I << "\n");
 
   // Remove the instruction from its current basic block... but don't delete the
@@ -859,7 +855,7 @@ void LICM::FindPromotableValuesInLoop(
     for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
       ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
 
-    DEBUG(errs() << "LICM: Promoting value: " << *V << "\n");
+    DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
   }
 }
 
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 1d9dd68..16d3f2f 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -708,7 +708,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
   }
 
   while (!WorkList.empty()) {
-    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    BasicBlock *BB = WorkList.pop_back_val();
     LPM->deleteSimpleAnalysisValue(BB, LP);
     for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); 
         BBI != BBE; ) {
@@ -726,7 +726,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
 
   // Update Frontier BBs' dominator info.
   while (!FrontierBBs.empty()) {
-    BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back();
+    BasicBlock *FBB = FrontierBBs.pop_back_val();
     BasicBlock *NewDominator = FBB->getSinglePredecessor();
     if (!NewDominator) {
       pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 85f7368..fa820ed 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2723,7 +2723,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // At this point, it is worth checking to see if any recurrence PHIs are also
   // dead, so that we can remove them as well.
-  DeleteDeadPHIs(L->getHeader());
+  Changed |= DeleteDeadPHIs(L->getHeader());
 
   return Changed;
 }
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index c2bf9f2..ee8cb4f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -89,7 +89,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
 
   BasicBlock *Header = L->getHeader();
-  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
+  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
 
@@ -111,13 +111,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Enforce the threshold.
   if (UnrollThreshold != NoThreshold) {
     unsigned LoopSize = ApproximateLoopSize(L);
-    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");
+    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
     uint64_t Size = (uint64_t)LoopSize*Count;
     if (TripCount != 1 && Size > UnrollThreshold) {
-      DEBUG(errs() << "  Too large to fully unroll with count: " << Count
+      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << UnrollThreshold << "\n");
       if (!UnrollAllowPartial) {
-        DEBUG(errs() << "  will not try to unroll partially because "
+        DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
@@ -127,10 +127,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         Count--;
       }
       if (Count < 2) {
-        DEBUG(errs() << "  could not unroll partially\n");
+        DEBUG(dbgs() << "  could not unroll partially\n");
         return false;
       }
-      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");
+      DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
     }
   }
 
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0c19133..527a7b5 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -436,7 +436,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
     if (Metrics.NumInsts > Threshold ||
         Metrics.NumBlocks * 5 > Threshold ||
         Metrics.NeverInline) {
-      DEBUG(errs() << "NOT unswitching loop %"
+      DEBUG(dbgs() << "NOT unswitching loop %"
             << currentLoop->getHeader()->getName() << ", cost too high: "
             << currentLoop->getBlocks().size() << "\n");
       return false;
@@ -522,7 +522,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
                                             Constant *Val, 
                                             BasicBlock *ExitBlock) {
-  DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %"
+  DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
         << loopHeader->getName() << " [" << L->getBlocks().size()
         << " blocks] in Function " << L->getHeader()->getParent()->getName()
         << " on cond: " << *Val << " == " << *Cond << "\n");
@@ -581,7 +581,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
                                                Loop *L) {
   Function *F = loopHeader->getParent();
-  DEBUG(errs() << "loop-unswitch: Unswitching loop %"
+  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
         << loopHeader->getName() << " [" << L->getBlocks().size()
         << " blocks] in Function " << F->getName()
         << " when '" << *Val << "' == " << *LIC << "\n");
@@ -707,7 +707,7 @@ static void RemoveFromWorklist(Instruction *I,
 static void ReplaceUsesOfWith(Instruction *I, Value *V, 
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
-  DEBUG(errs() << "Replace with '" << *V << "': " << *I);
+  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
 
   // Add uses to the worklist, which may be dead now.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -769,7 +769,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     return;
   }
 
-  DEBUG(errs() << "Nuking dead block: " << *BB);
+  DEBUG(dbgs() << "Nuking dead block: " << *BB);
   
   // Remove the instructions in the basic block from the worklist.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
@@ -867,7 +867,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
   // in the loop with the appropriate one directly.
   if (IsEqual || (isa<ConstantInt>(Val) &&
-      Val->getType() == Type::getInt1Ty(Val->getContext()))) {
+      Val->getType()->isInteger(1))) {
     Value *Replacement;
     if (IsEqual)
       Replacement = Val;
@@ -968,7 +968,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
-      DEBUG(errs() << "Remove dead instruction '" << *I);
+      DEBUG(dbgs() << "Remove dead instruction '" << *I);
       
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -993,10 +993,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::And:
       if (isa<ConstantInt>(I->getOperand(0)) && 
           // constant -> RHS
-          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))   
+          I->getOperand(0)->getType()->isInteger(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
+        if (CB->getType()->isInteger(1)) {
           if (CB->isOne())      // X & 1 -> X
             ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
           else                  // X & 0 -> 0
@@ -1007,10 +1007,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::Or:
       if (isa<ConstantInt>(I->getOperand(0)) &&
           // constant -> RHS
-          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))
+          I->getOperand(0)->getType()->isInteger(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType() == Type::getInt1Ty(I->getContext())) {
+        if (CB->getType()->isInteger(1)) {
           if (CB->isOne())   // X | 1 -> 1
             ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
           else                  // X | 0 -> X
@@ -1029,7 +1029,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         if (!SinglePred) continue;  // Nothing to do.
         assert(SinglePred == Pred && "CFG broken");
 
-        DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " 
+        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
               << Succ->getName() << "\n");
         
         // Resolve any single entry PHI nodes in Succ.
@@ -1057,7 +1057,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         // remove dead blocks.
         break;  // FIXME: Enable.
 
-        DEBUG(errs() << "Folded branch: " << *BI);
+        DEBUG(dbgs() << "Folded branch: " << *BI);
         BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
         BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
         DeadSucc->removePredecessor(BI->getParent(), true);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index c922814..e0aa491 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) {
   LLVMContext &Context = V->getContext();
   
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType() == Type::getInt8Ty(Context)) return V;
+  if (V->getType()->isInteger(8)) return V;
   
   // Constant float and double values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0. 
@@ -456,10 +456,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)
     };
     Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
-    DEBUG(errs() << "Replace stores:\n";
+    DEBUG(dbgs() << "Replace stores:\n";
           for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
-            errs() << *Range.TheStores[i];
-          errs() << "With: " << *C); C=C;
+            dbgs() << *Range.TheStores[i];
+          dbgs() << "With: " << *C); C=C;
   
     // Don't invalidate the iterator
     BBI = BI;
@@ -562,8 +562,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
                                    srcAlloca->use_end());
   while (!srcUseList.empty()) {
-    User *UI = srcUseList.back();
-    srcUseList.pop_back();
+    User *UI = srcUseList.pop_back_val();
 
     if (isa<BitCastInst>(UI)) {
       for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
@@ -725,7 +724,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
       AliasAnalysis::NoAlias)
     return false;
   
-  DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+  DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
   
   // If not, then we know we can transform this.
   Module *Mod = M->getParent()->getParent()->getParent();
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 827b47d..4a99f4a 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -60,12 +60,12 @@ namespace {
 ///
 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
   Module *M = I->getParent()->getParent()->getParent();
-  errs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
        << *Ops[0].Op->getType() << '\t';
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    errs() << "[ ";
-    WriteAsOperand(errs(), Ops[i].Op, false, M);
-    errs() << ", #" << Ops[i].Rank << "] ";
+    dbgs() << "[ ";
+    WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+    dbgs() << ", #" << Ops[i].Rank << "] ";
   }
 }
 #endif
@@ -186,7 +186,7 @@ unsigned Reassociate::getRank(Value *V) {
       (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
     ++Rank;
 
-  //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = "
+  //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
   //     << Rank << "\n");
 
   return ValueRankMap[I] = Rank;
@@ -226,7 +226,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
          isReassociableOp(RHS, I->getOpcode()) &&
          "Not an expression that needs linearization?");
 
-  DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
+  DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
 
   // Move the RHS instruction to live immediately before I, avoiding breaking
   // dominator properties.
@@ -239,7 +239,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
 
   ++NumLinear;
   MadeChange = true;
-  DEBUG(errs() << "Linearized: " << *I << '\n');
+  DEBUG(dbgs() << "Linearized: " << *I << '\n');
 
   // If D is part of this expression tree, tail recurse.
   if (isReassociableOp(I->getOperand(1), I->getOpcode()))
@@ -335,10 +335,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
     if (I->getOperand(0) != Ops[i].Op ||
         I->getOperand(1) != Ops[i+1].Op) {
       Value *OldLHS = I->getOperand(0);
-      DEBUG(errs() << "RA: " << *I << '\n');
+      DEBUG(dbgs() << "RA: " << *I << '\n');
       I->setOperand(0, Ops[i].Op);
       I->setOperand(1, Ops[i+1].Op);
-      DEBUG(errs() << "TO: " << *I << '\n');
+      DEBUG(dbgs() << "TO: " << *I << '\n');
       MadeChange = true;
       ++NumChanged;
       
@@ -351,9 +351,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
   assert(i+2 < Ops.size() && "Ops index out of range!");
 
   if (I->getOperand(1) != Ops[i].Op) {
-    DEBUG(errs() << "RA: " << *I << '\n');
+    DEBUG(dbgs() << "RA: " << *I << '\n');
     I->setOperand(1, Ops[i].Op);
-    DEBUG(errs() << "TO: " << *I << '\n');
+    DEBUG(dbgs() << "TO: " << *I << '\n');
     MadeChange = true;
     ++NumChanged;
   }
@@ -414,6 +414,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
     // non-instruction value) or right after the definition.  These negates will
     // be zapped by reassociate later, so we don't need much finesse here.
     BinaryOperator *TheNeg = cast<BinaryOperator>(*UI);
+
+    // Verify that the negate is in this function, V might be a constant expr.
+    if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+      continue;
     
     BasicBlock::iterator InsertPt;
     if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
@@ -480,7 +484,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
   Sub->replaceAllUsesWith(New);
   Sub->eraseFromParent();
 
-  DEBUG(errs() << "Negated: " << *New << '\n');
+  DEBUG(dbgs() << "Negated: " << *New << '\n');
   return New;
 }
 
@@ -788,6 +792,11 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
     SmallVector<Value*, 4> NewMulOps;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      // Only try to remove factors from expressions we're allowed to.
+      BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+      if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+        continue;
+      
       if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
         NewMulOps.push_back(V);
         Ops.erase(Ops.begin()+i);
@@ -797,14 +806,15 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     
     // No need for extra uses anymore.
     delete DummyInst;
-    
+
     unsigned NumAddedValues = NewMulOps.size();
     Value *V = EmitAddTreeOfValues(I, NewMulOps);
-    
+
     // Now that we have inserted the add tree, optimize it. This allows us to
     // handle cases that require multiple factoring steps, such as this:
     // A*A*B + A*A*C   -->   A*(A*B+A*C)   -->   A*(A*(B+C))
     assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+    (void)NumAddedValues;
     V = ReassociateExpression(cast<BinaryOperator>(V));
 
     // Create the multiply.
@@ -928,6 +938,10 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     if (BI->getOpcode() == Instruction::Sub) {
       if (ShouldBreakUpSubtract(BI)) {
         BI = BreakUpSubtract(BI, ValueRankMap);
+        // Reset the BBI iterator in case BreakUpSubtract changed the
+        // instruction it points to.
+        BBI = BI;
+        ++BBI;
         MadeChange = true;
       } else if (BinaryOperator::isNeg(BI)) {
         // Otherwise, this is a negation.  See if the operand is a multiply tree
@@ -967,7 +981,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
   SmallVector<ValueEntry, 8> Ops;
   LinearizeExprTree(I, Ops);
   
-  DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << '\n');
+  DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
   
   // Now that we have linearized the tree to a list and have gathered all of
   // the operands and their ranks, sort the operands by their rank.  Use a
@@ -982,7 +996,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
   if (Value *V = OptimizeExpression(I, Ops)) {
     // This expression tree simplified to something that isn't a tree,
     // eliminate it.
-    DEBUG(errs() << "Reassoc to scalar: " << *V << '\n');
+    DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
     I->replaceAllUsesWith(V);
     RemoveDeadBinaryOp(I);
     ++NumAnnihil;
@@ -1001,7 +1015,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
     Ops.insert(Ops.begin(), Tmp);
   }
   
-  DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << '\n');
+  DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
   
   if (Ops.size() == 1) {
     // This expression tree simplified to something that isn't a tree,
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index d8c59b1..02b45a1 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -218,7 +218,7 @@ public:
   /// This returns true if the block was not considered live before.
   bool MarkBlockExecutable(BasicBlock *BB) {
     if (!BBExecutable.insert(BB)) return false;
-    DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");
+    DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
     BBWorkList.push_back(BB);  // Add the block to the work list!
     return true;
   }
@@ -316,7 +316,7 @@ private:
   //
   void markConstant(LatticeVal &IV, Value *V, Constant *C) {
     if (!IV.markConstant(C)) return;
-    DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');
+    DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
   }
   
@@ -328,7 +328,7 @@ private:
   void markForcedConstant(Value *V, Constant *C) {
     assert(!isa<StructType>(V->getType()) && "Should use other method");
     ValueState[V].markForcedConstant(C);
-    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+    DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
   }
   
@@ -339,11 +339,11 @@ private:
   void markOverdefined(LatticeVal &IV, Value *V) {
     if (!IV.markOverdefined()) return;
     
-    DEBUG(errs() << "markOverdefined: ";
+    DEBUG(dbgs() << "markOverdefined: ";
           if (Function *F = dyn_cast<Function>(V))
-            errs() << "Function '" << F->getName() << "'\n";
+            dbgs() << "Function '" << F->getName() << "'\n";
           else
-            errs() << *V << '\n');
+            dbgs() << *V << '\n');
     // Only instructions go on the work list
     OverdefinedInstWorkList.push_back(V);
   }
@@ -431,7 +431,7 @@ private:
       // If the destination is already executable, we just made an *edge*
       // feasible that wasn't before.  Revisit the PHI nodes in the block
       // because they have potentially new operands.
-      DEBUG(errs() << "Marking Edge Executable: " << Source->getName()
+      DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
             << " -> " << Dest->getName() << "\n");
 
       PHINode *PN;
@@ -516,7 +516,7 @@ private:
 
   void visitInstruction(Instruction &I) {
     // If a new instruction is added to LLVM that we don't handle.
-    errs() << "SCCP: Don't know how to handle: " << I;
+    dbgs() << "SCCP: Don't know how to handle: " << I;
     markAnythingOverdefined(&I);   // Just in case
   }
 };
@@ -580,7 +580,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
   }
   
 #ifndef NDEBUG
-  errs() << "Unknown terminator instruction: " << TI << '\n';
+  dbgs() << "Unknown terminator instruction: " << TI << '\n';
 #endif
   llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
@@ -640,7 +640,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     return true;
   
 #ifndef NDEBUG
-  errs() << "Unknown terminator instruction: " << *TI << '\n';
+  dbgs() << "Unknown terminator instruction: " << *TI << '\n';
 #endif
   llvm_unreachable(0);
 }
@@ -1324,7 +1324,7 @@ void SCCPSolver::Solve() {
     while (!OverdefinedInstWorkList.empty()) {
       Value *I = OverdefinedInstWorkList.pop_back_val();
 
-      DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n');
+      DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
       // bottom to constant
@@ -1343,7 +1343,7 @@ void SCCPSolver::Solve() {
     while (!InstWorkList.empty()) {
       Value *I = InstWorkList.pop_back_val();
 
-      DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n');
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
 
       // "I" got into the work list because it made the transition from undef to
       // constant.
@@ -1364,7 +1364,7 @@ void SCCPSolver::Solve() {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
-      DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n');
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
 
       // Notify all instructions in this basic block that they are newly
       // executable.
@@ -1597,7 +1597,7 @@ FunctionPass *llvm::createSCCPPass() {
 }
 
 static void DeleteInstructionInBlock(BasicBlock *BB) {
-  DEBUG(errs() << "  BasicBlock Dead:" << *BB);
+  DEBUG(dbgs() << "  BasicBlock Dead:" << *BB);
   ++NumDeadBlocks;
   
   // Delete the instructions backwards, as it has a reduced likelihood of
@@ -1616,7 +1616,7 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
 // and return true if the function was modified.
 //
 bool SCCP::runOnFunction(Function &F) {
-  DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n");
+  DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
   SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
 
   // Mark the first block of the function as being executable.
@@ -1630,7 +1630,7 @@ bool SCCP::runOnFunction(Function &F) {
   bool ResolvedUndefs = true;
   while (ResolvedUndefs) {
     Solver.Solve();
-    DEBUG(errs() << "RESOLVING UNDEFs\n");
+    DEBUG(dbgs() << "RESOLVING UNDEFs\n");
     ResolvedUndefs = Solver.ResolvedUndefsIn(F);
   }
 
@@ -1665,7 +1665,7 @@ bool SCCP::runOnFunction(Function &F) {
       
       Constant *Const = IV.isConstant()
         ? IV.getConstant() : UndefValue::get(Inst->getType());
-      DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
+      DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
 
       // Replaces all of the uses of a variable with uses of the constant.
       Inst->replaceAllUsesWith(Const);
@@ -1775,7 +1775,7 @@ bool IPSCCP::runOnModule(Module &M) {
   while (ResolvedUndefs) {
     Solver.Solve();
 
-    DEBUG(errs() << "RESOLVING UNDEFS\n");
+    DEBUG(dbgs() << "RESOLVING UNDEFS\n");
     ResolvedUndefs = false;
     for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
       ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
@@ -1802,7 +1802,7 @@ bool IPSCCP::runOnModule(Module &M) {
         
         Constant *CST = IV.isConstant() ?
         IV.getConstant() : UndefValue::get(AI->getType());
-        DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");
+        DEBUG(dbgs() << "***  Arg " << *AI << " = " << *CST <<"\n");
         
         // Replaces all of the uses of a variable with uses of the
         // constant.
@@ -1847,7 +1847,7 @@ bool IPSCCP::runOnModule(Module &M) {
         
         Constant *Const = IV.isConstant()
           ? IV.getConstant() : UndefValue::get(Inst->getType());
-        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);
+        DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
 
         // Replaces all of the uses of a variable with uses of the
         // constant.
@@ -1944,7 +1944,7 @@ bool IPSCCP::runOnModule(Module &M) {
     GlobalVariable *GV = I->first;
     assert(!I->second.isOverdefined() &&
            "Overdefined values should have been taken out of the map!");
-    DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+    DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
     while (!GV->use_empty()) {
       StoreInst *SI = cast<StoreInst>(GV->use_back());
       SI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index f91fbda..9685a29 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -678,8 +678,7 @@ bool SCCVN::runOnFunction(Function& F) {
           stack.push_back(*PI);
       
       while (!stack.empty()) {
-        BasicBlock* CurrBB = stack.back();
-        stack.pop_back();
+        BasicBlock* CurrBB = stack.pop_back_val();
         visited.insert(CurrBB);
         
         ValueNumberScope* S = BBMap[CurrBB];
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 79bb7c5..9e1e79a 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -252,8 +252,8 @@ bool SROA::performScalarRepl(Function &F) {
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
     if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
-      DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n');
-      DEBUG(errs() << "  memcpy = " << *TheCopy << '\n');
+      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
       Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
       TheCopy->eraseFromParent();  // Don't mutate the global.
@@ -314,14 +314,14 @@ bool SROA::performScalarRepl(Function &F) {
       // we just get a lot of insert/extracts.  If at least one vector is
       // involved, then we probably really do have a union of vector/array.
       if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
-        DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+        DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
                      << *VectorTy << '\n');
         
         // Create and insert the vector alloca.
         NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
         ConvertUsesToScalar(AI, NewAI, 0);
       } else {
-        DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+        DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
         
         // Create and insert the integer alloca.
         const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
@@ -345,7 +345,7 @@ bool SROA::performScalarRepl(Function &F) {
 /// predicate, do SROA now.
 void SROA::DoScalarReplacement(AllocaInst *AI, 
                                std::vector<AllocaInst*> &WorkList) {
-  DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');
+  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
   SmallVector<AllocaInst*, 32> ElementAllocas;
   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
     ElementAllocas.reserve(ST->getNumContainedTypes());
@@ -919,7 +919,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
                           IntegerType::get(SI->getContext(), AllocaSizeBits), 
                           "", SI);
 
-  DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
                << '\n');
 
   // There are two forms here: AI could be an array or struct.  Both cases
@@ -1029,7 +1029,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   const Type *AllocaEltTy = AI->getAllocatedType();
   uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
   
-  DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
                << '\n');
   
   // There are two forms here: AI could be an array or struct.  Both cases
@@ -1153,7 +1153,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   
   isSafeForScalarRepl(AI, AI, 0, Info);
   if (Info.isUnsafe) {
-    DEBUG(errs() << "Cannot transform: " << *AI << '\n');
+    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
     return 0;
   }
   
@@ -1181,7 +1181,7 @@ void SROA::CleanupAllocaUsers(Value *V) {
     if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
       // Safe to remove debug info uses.
       while (!DbgInUses.empty()) {
-        DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
+        DbgInfoIntrinsic *DI = DbgInUses.pop_back_val();
         DI->eraseFromParent();
       }
       I->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index a36da78..43447de 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -99,9 +99,8 @@ static bool MarkAliveBlocks(BasicBlock *BB,
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
   bool Changed = false;
-  while (!Worklist.empty()) {
-    BB = Worklist.back();
-    Worklist.pop_back();
+  do {
+    BB = Worklist.pop_back_val();
     
     if (!Reachable.insert(BB))
       continue;
@@ -150,7 +149,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
     Changed |= ConstantFoldTerminator(BB);
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       Worklist.push_back(*SI);
-  }
+  } while (!Worklist.empty());
   return Changed;
 }
 
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 3c28ad2..9183f3a 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -80,7 +80,7 @@ public:
   /// specified pointer and character.  Ptr is required to be some pointer type,
   /// and the return value has 'i8*' type.
   Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B);
-  
+
   /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
   /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
   Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
@@ -101,10 +101,11 @@ public:
   /// EmitMemSet - Emit a call to the memset function
   Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B);
 
-  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
-  /// 'floor').  This function is known to take a single of type matching 'Op'
-  /// and returns one value with the same type.  If 'Op' is a long double, 'l'
-  /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
+  /// (e.g.  'floor').  This function is known to take a single of type matching
+  /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
+  /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
+  /// suffix.
   Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
                               const AttrListPtr &Attrs);
 
@@ -163,7 +164,7 @@ Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) {
   Module *M = Caller->getParent();
   AttributeWithIndex AWI =
     AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-  
+
   const Type *I8Ptr = Type::getInt8PtrTy(*Context);
   const Type *I32Ty = Type::getInt32Ty(*Context);
   Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
@@ -236,8 +237,8 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
                                          Type::getInt32Ty(*Context),
-                                    Type::getInt8PtrTy(*Context),
-                                    Type::getInt8PtrTy(*Context),
+                                         Type::getInt8PtrTy(*Context),
+                                         Type::getInt8PtrTy(*Context),
                                          TD->getIntPtrType(*Context), NULL);
   CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
                                Len, "memcmp");
@@ -504,8 +505,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
 
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array ||
-      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
+  if (!Array || !Array->getType()->getElementType()->isInteger(8))
     return false;
 
   // Get the number of elements in the array
@@ -677,8 +677,7 @@ struct StrChrOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 ||
-          FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32.
+      if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32.
         return 0;
 
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
@@ -720,7 +719,7 @@ struct StrCmpOpt : public LibCallOptimization {
     // Verify the "strcmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-	FT->getReturnType() != Type::getInt32Ty(*Context) ||
+	!FT->getReturnType()->isInteger(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
@@ -768,7 +767,7 @@ struct StrNCmpOpt : public LibCallOptimization {
     // Verify the "strncmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-	FT->getReturnType() != Type::getInt32Ty(*Context) ||
+	!FT->getReturnType()->isInteger(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
         !isa<IntegerType>(FT->getParamType(2)))
@@ -949,20 +948,20 @@ struct StrStrOpt : public LibCallOptimization {
     // fold strstr(x, x) -> x.
     if (CI->getOperand(1) == CI->getOperand(2))
       return B.CreateBitCast(CI->getOperand(1), CI->getType());
-    
+
     // See if either input string is a constant string.
     std::string SearchStr, ToFindStr;
     bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
     bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
-    
+
     // fold strstr(x, "") -> x.
     if (HasStr2 && ToFindStr.empty())
       return B.CreateBitCast(CI->getOperand(1), CI->getType());
-    
+
     // If both strings are known, constant fold it.
     if (HasStr1 && HasStr2) {
       std::string::size_type Offset = SearchStr.find(ToFindStr);
-      
+
       if (Offset == std::string::npos) // strstr("foo", "bar") -> null
         return Constant::getNullValue(CI->getType());
 
@@ -971,7 +970,7 @@ struct StrStrOpt : public LibCallOptimization {
       Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
       return B.CreateBitCast(Result, CI->getType());
     }
-    
+
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1)
       return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B),
@@ -979,7 +978,7 @@ struct StrStrOpt : public LibCallOptimization {
     return 0;
   }
 };
-  
+
 
 //===---------------------------------------===//
 // 'memcmp' Optimizations
@@ -989,7 +988,7 @@ struct MemCmpOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
         !isa<PointerType>(FT->getParamType(1)) ||
-        FT->getReturnType() != Type::getInt32Ty(*Context))
+        !FT->getReturnType()->isInteger(32))
       return 0;
 
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
@@ -1096,27 +1095,6 @@ struct MemSetOpt : public LibCallOptimization {
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------===//
-// 'object size'
-namespace {
-struct SizeOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // TODO: We can do more with this, but delaying to here should be no change
-    // in behavior.
-    ConstantInt *Const = dyn_cast<ConstantInt>(CI->getOperand(2));
-
-    if (!Const) return 0;
-
-    const Type *Ty = Callee->getFunctionType()->getReturnType();
-
-    if (Const->getZExtValue() == 0)
-      return Constant::getAllOnesValue(Ty);
-    else
-      return ConstantInt::get(Ty, 0);
-  }
-};
-}
-
-//===---------------------------------------===//
 // 'memcpy_chk' Optimizations
 
 struct MemCpyChkOpt : public LibCallOptimization {
@@ -1351,7 +1329,7 @@ struct FFSOpt : public LibCallOptimization {
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
     if (FT->getNumParams() != 1 ||
-	FT->getReturnType() != Type::getInt32Ty(*Context) ||
+	!FT->getReturnType()->isInteger(32) ||
         !isa<IntegerType>(FT->getParamType(0)))
       return 0;
 
@@ -1387,7 +1365,7 @@ struct IsDigitOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::getInt32Ty(*Context))
+        !FT->getParamType(0)->isInteger(32))
       return 0;
 
     // isdigit(c) -> (c-'0') <u 10
@@ -1408,7 +1386,7 @@ struct IsAsciiOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
     if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        FT->getParamType(0) != Type::getInt32Ty(*Context))
+        !FT->getParamType(0)->isInteger(32))
       return 0;
 
     // isascii(c) -> c <u 128
@@ -1449,7 +1427,7 @@ struct ToAsciiOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != Type::getInt32Ty(*Context))
+        !FT->getParamType(0)->isInteger(32))
       return 0;
 
     // isascii(c) -> c & 0x7f
@@ -1558,7 +1536,8 @@ struct SPrintFOpt : public LibCallOptimization {
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-          ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+          ConstantInt::get
+                 (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1688,8 +1667,9 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require TargetData.
       if (!TD) return 0;
 
-      EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context),
-                                                     FormatStr.size()),
+      EmitFWrite(CI->getOperand(2),
+                 ConstantInt::get(TD->getIntPtrType(*Context),
+                                  FormatStr.size()),
                  CI->getOperand(1), B);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
@@ -1744,7 +1724,6 @@ namespace {
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
 
     // Object Size Checking
-    SizeOpt ObjectSize;
     MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk;
 
     bool Modified;  // This is only used by doInitialization.
@@ -1854,8 +1833,6 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["fprintf"] = &FPrintF;
 
   // Object Size Checking
-  Optimizations["llvm.objectsize.i32"] = &ObjectSize;
-  Optimizations["llvm.objectsize.i64"] = &ObjectSize;
   Optimizations["__memcpy_chk"] = &MemCpyChk;
   Optimizations["__memset_chk"] = &MemSetChk;
   Optimizations["__memmove_chk"] = &MemMoveChk;
@@ -1896,8 +1873,8 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
       Value *Result = LCO->OptimizeCall(CI, TD, Builder);
       if (Result == 0) continue;
 
-      DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI;
-            errs() << "  into: " << *Result << "\n");
+      DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+            dbgs() << "  into: " << *Result << "\n");
 
       // Something changed!
       Changed = true;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index b06ae3d..2306a77 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
   BasicBlock *DestBlock = Branch->getSuccessor(0);
   assert(SourceBlock != DestBlock && "Our predicate is broken!");
 
-  DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+  DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
         << "]: Eliminating branch: " << *Branch);
 
   // See if we can avoid duplicating code by moving it up to a dominator of both
   // blocks.
   if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
-    DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n");
+    DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
 
     // If there are non-phi instructions in DestBlock that have no operands
     // defined in DestBlock, and if the instruction has no side effects, we can
@@ -272,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
           // Remove from DestBlock, move right before the term in DomBlock.
           DestBlock->getInstList().remove(I);
           DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
-          DEBUG(errs() << "Hoisted: " << *I);
+          DEBUG(dbgs() << "Hoisted: " << *I);
         }
       }
     }
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 135a621..8c4aa59 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Instruction.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
@@ -54,8 +55,8 @@ void ExtAddrMode::print(raw_ostream &OS) const {
 }
 
 void ExtAddrMode::dump() const {
-  print(errs());
-  errs() << '\n';
+  print(dbgs());
+  dbgs() << '\n';
 }
 
 
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2962e84..e902688 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -78,7 +78,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
 /// is dead. Also recursively delete any operands that become dead as
 /// a result. This includes tracing the def-use list from the PHI to see if
 /// it is ultimately unused or if it reaches an unused cycle.
-void llvm::DeleteDeadPHIs(BasicBlock *BB) {
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
   // Recursively deleting a PHI may cause multiple PHIs to be deleted
   // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
   SmallVector<WeakVH, 8> PHIs;
@@ -86,9 +86,12 @@ void llvm::DeleteDeadPHIs(BasicBlock *BB) {
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PHIs.push_back(PN);
 
+  bool Changed = false;
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
     if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
-      RecursivelyDeleteDeadPHINode(PN);
+      Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+  return Changed;
 }
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
@@ -252,7 +255,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
       Value *RetVal = 0;
 
       // Create a value to return... if the function doesn't return null...
-      if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext()))
+      if (!BB->getParent()->getReturnType()->isVoidTy())
         RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
 
       // Create the return...
@@ -673,16 +676,3 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   return 0;
 }
 
-/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint,
-/// make a copy of the stoppoint before InsertPos (presumably before copying
-/// or moving I).
-void llvm::CopyPrecedingStopPoint(Instruction *I, 
-                                  BasicBlock::iterator InsertPos) {
-  if (I != I->getParent()->begin()) {
-    BasicBlock::iterator BBI = I;  --BBI;
-    if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BBI)) {
-      CallInst *newDSPI = cast<CallInst>(DSPI->clone());
-      newDSPI->insertBefore(InsertPos);
-    }
-  }
-}
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index b5ffe06..c580b8f 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() {
       }
   }
   
-  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
   
   // Inline call sites.
   bool Changed = false;
@@ -109,21 +109,21 @@ void BasicInlinerImpl::inlineFunctions() {
         }
         InlineCost IC = CA.getInlineCost(CS, NeverInline);
         if (IC.isAlways()) {        
-          DEBUG(errs() << "  Inlining: cost=always"
+          DEBUG(dbgs() << "  Inlining: cost=always"
                        <<", call: " << *CS.getInstruction());
         } else if (IC.isNever()) {
-          DEBUG(errs() << "  NOT Inlining: cost=never"
+          DEBUG(dbgs() << "  NOT Inlining: cost=never"
                        <<", call: " << *CS.getInstruction());
           continue;
         } else {
           int Cost = IC.getValue();
           
           if (Cost >= (int) BasicInlineThreshold) {
-            DEBUG(errs() << "  NOT Inlining: cost = " << Cost
+            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
                          << ", call: " <<  *CS.getInstruction());
             continue;
           } else {
-            DEBUG(errs() << "  Inlining: cost = " << Cost
+            DEBUG(dbgs() << "  Inlining: cost = " << Cost
                          << ", call: " <<  *CS.getInstruction());
           }
         }
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index c287747..bd750cc 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -184,7 +184,6 @@ namespace {
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
-    Value *DbgFnStart;
   public:
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           DenseMap<const Value*, Value*> &valueMap,
@@ -193,7 +192,7 @@ namespace {
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
     : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
-      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td), DbgFnStart(NULL) {
+      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
     }
 
     /// CloneBlock - The specified block is found to be reachable, clone it and
@@ -235,19 +234,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       continue;
     }
 
-    // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.
-    if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {
-      if (DbgFnStart == NULL) {
-        DISubprogram SP(DFSI->getSubprogram());
-        if (SP.describes(BB->getParent()))
-          DbgFnStart = DFSI->getSubprogram();
-      }
-    } 
-    if (const DbgRegionEndInst *DREIS = dyn_cast<DbgRegionEndInst>(II)) {
-      if (DREIS->getContext() == DbgFnStart)
-        continue;
-    }
-      
     Instruction *NewInst = II->clone();
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
index 7e000a1..38928dc 100644
--- a/lib/Transforms/Utils/CloneLoop.cpp
+++ b/lib/Transforms/Utils/CloneLoop.cpp
@@ -91,7 +91,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
 
 
   Loop *NewParentLoop = NULL;
-  while (!LoopNest.empty()) {
+  do {
     Loop *L = LoopNest.pop_back_val();
     Loop *NewLoop = new Loop();
 
@@ -123,7 +123,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
     // Process sub loops
     for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
       LoopNest.push_back(*I);
-  }
+  } while (!LoopNest.empty());
 
   // Remap instructions to reference operands from ValueMap.
   for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), 
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f966681..b208494 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <set>
@@ -44,8 +45,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
 
 namespace {
   class CodeExtractor {
-    typedef std::vector<Value*> Values;
-    std::set<BasicBlock*> BlocksToExtract;
+    typedef SetVector<Value*> Values;
+    SetVector<BasicBlock*> BlocksToExtract;
     DominatorTree* DT;
     bool AggregateArgs;
     unsigned NumExitBlocks;
@@ -135,7 +136,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
   // We only want to code extract the second block now, and it becomes the new
   // header of the region.
   BasicBlock *OldPred = Header;
-  BlocksToExtract.erase(OldPred);
+  BlocksToExtract.remove(OldPred);
   BlocksToExtract.insert(NewBB);
   Header = NewBB;
 
@@ -180,7 +181,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
 }
 
 void CodeExtractor::splitReturnBlocks() {
-  for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+  for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
          E = BlocksToExtract.end(); I != E; ++I)
     if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
       BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
@@ -206,7 +207,7 @@ void CodeExtractor::splitReturnBlocks() {
 //
 void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
   std::set<BasicBlock*> ExitBlocks;
-  for (std::set<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+  for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
        ce = BlocksToExtract.end(); ci != ce; ++ci) {
     BasicBlock *BB = *ci;
 
@@ -215,13 +216,13 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
       // instruction is used outside the region, it's an output.
       for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
         if (definedInCaller(*O))
-          inputs.push_back(*O);
+          inputs.insert(*O);
 
       // Consider uses of this instruction (outputs).
       for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
            UI != E; ++UI)
         if (!definedInRegion(*UI)) {
-          outputs.push_back(I);
+          outputs.insert(I);
           break;
         }
     } // for: insts
@@ -234,12 +235,6 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
   } // for: basic blocks
 
   NumExitBlocks = ExitBlocks.size();
-
-  // Eliminate duplicates.
-  std::sort(inputs.begin(), inputs.end());
-  inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end());
-  std::sort(outputs.begin(), outputs.end());
-  outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end());
 }
 
 /// constructFunction - make a function based on inputs and outputs, as follows:
@@ -252,8 +247,8 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
                                            BasicBlock *newHeader,
                                            Function *oldFunction,
                                            Module *M) {
-  DEBUG(errs() << "inputs: " << inputs.size() << "\n");
-  DEBUG(errs() << "outputs: " << outputs.size() << "\n");
+  DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
 
   // This function returns unsigned, outputs will go back by reference.
   switch (NumExitBlocks) {
@@ -269,25 +264,25 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   for (Values::const_iterator i = inputs.begin(),
          e = inputs.end(); i != e; ++i) {
     const Value *value = *i;
-    DEBUG(errs() << "value used in func: " << *value << "\n");
+    DEBUG(dbgs() << "value used in func: " << *value << "\n");
     paramTy.push_back(value->getType());
   }
 
   // Add the types of the output values to the function's argument list.
   for (Values::const_iterator I = outputs.begin(), E = outputs.end();
        I != E; ++I) {
-    DEBUG(errs() << "instr used in func: " << **I << "\n");
+    DEBUG(dbgs() << "instr used in func: " << **I << "\n");
     if (AggregateArgs)
       paramTy.push_back((*I)->getType());
     else
       paramTy.push_back(PointerType::getUnqual((*I)->getType()));
   }
 
-  DEBUG(errs() << "Function type: " << *RetTy << " f(");
+  DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
   for (std::vector<const Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
-    DEBUG(errs() << **i << ", ");
-  DEBUG(errs() << ")\n");
+    DEBUG(dbgs() << **i << ", ");
+  DEBUG(dbgs() << ")\n");
 
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
     PointerType *StructPtr =
@@ -482,7 +477,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
 
   unsigned switchVal = 0;
-  for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
          e = BlocksToExtract.end(); i != e; ++i) {
     TerminatorInst *TI = (*i)->getTerminator();
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
@@ -593,7 +588,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // this should be rewritten as a `ret'
 
     // Check if the function should return a value
-    if (OldFnRetTy == Type::getVoidTy(Context)) {
+    if (OldFnRetTy->isVoidTy()) {
       ReturnInst::Create(Context, 0, TheSwitch);  // Return void
     } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
       // return what we have
@@ -633,7 +628,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
   Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
   Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
 
-  for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
          e = BlocksToExtract.end(); i != e; ++i) {
     // Delete the basic block from the old function, and the list of blocks
     oldBlocks.remove(*i);
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 043046c..17f8827 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -210,34 +210,6 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
-/// findFnRegionEndMarker - This is a utility routine that is used by
-/// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds
-/// to the llvm.dbg.func.start of the function F. Otherwise return NULL.
-///
-static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {
-
-  MDNode *FnStart = NULL;
-  const DbgRegionEndInst *FnEnd = NULL;
-  for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) 
-    for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-         ++BI) {
-      if (FnStart == NULL)  {
-        if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) {
-          DISubprogram SP(FSI->getSubprogram());
-          assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");
-          if (SP.describes(F))
-            FnStart = SP.getNode();
-        }
-        continue;
-      }
-      
-      if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI))
-        if (REI->getContext() == FnStart)
-          FnEnd = REI;
-    }
-  return FnEnd;
-}
-
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -364,23 +336,6 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
       ValueMap[I] = ActualArg;
     }
 
-    // Adjust llvm.dbg.region.end. If the CalledFunc has region end
-    // marker then clone that marker after next stop point at the 
-    // call site. The function body cloner does not clone original
-    // region end marker from the CalledFunc. This will ensure that
-    // inlined function's scope ends at the right place. 
-    if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) {
-      for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end();
-           BI != BE; ++BI) {
-        if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {
-          if (DbgRegionEndInst *NewDREI = 
-                dyn_cast<DbgRegionEndInst>(DREI->clone()))
-            NewDREI->insertAfter(DSPI);
-          break;
-        }
-      }
-    }
-
     // We want the inliner to prune the code as it copies.  We would LOVE to
     // have no dead or constant instructions leftover after inlining occurs
     // (which can happen, e.g., because an argument was constant), but we'll be
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 7f11acf..090af95 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -32,7 +32,7 @@ namespace {
     bool runOnFunction(Function &F) {
       for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
            AI != AE; ++AI)
-        if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))
+        if (!AI->hasName() && !AI->getType()->isVoidTy())
           AI->setName("arg");
 
       for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -40,7 +40,7 @@ namespace {
           BB->setName("bb");
         
         for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-          if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))
+          if (!I->hasName() && !I->getType()->isVoidTy())
             I->setName("tmp");
       }
       return true;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 2426e3e..90e929e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -268,16 +268,17 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
 
 /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
 /// trivially dead instruction, delete it.  If that makes any of its operands
-/// trivially dead, delete them too, recursively.
-void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
-    return;
+    return false;
   
   SmallVector<Instruction*, 16> DeadInsts;
   DeadInsts.push_back(I);
   
-  while (!DeadInsts.empty()) {
+  do {
     I = DeadInsts.pop_back_val();
 
     // Null out all of the instruction's operands to see if any operand becomes
@@ -297,22 +298,25 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
     }
     
     I->eraseFromParent();
-  }
+  } while (!DeadInsts.empty());
+
+  return true;
 }
 
 /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
 /// dead PHI node, due to being a def-use chain of single-use nodes that
 /// either forms a cycle or is terminated by a trivially dead instruction,
 /// delete it.  If that makes any of its operands trivially dead, delete them
-/// too, recursively.
-void
+/// too, recursively.  Return true if the PHI node is actually deleted.
+bool
 llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
   // We can remove a PHI if it is on a cycle in the def-use graph
   // where each node in the cycle has degree one, i.e. only one use,
   // and is an instruction with no side effects.
   if (!PN->hasOneUse())
-    return;
+    return false;
 
+  bool Changed = false;
   SmallPtrSet<PHINode *, 4> PHIs;
   PHIs.insert(PN);
   for (Instruction *J = cast<Instruction>(*PN->use_begin());
@@ -324,9 +328,35 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
       if (!PHIs.insert(cast<PHINode>(JP))) {
         // Break the cycle and delete the PHI and its operands.
         JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
-        RecursivelyDeleteTriviallyDeadInstructions(JP);
+        (void)RecursivelyDeleteTriviallyDeadInstructions(JP);
+        Changed = true;
         break;
       }
+  return Changed;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+  bool MadeChange = false;
+  for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+    Instruction *Inst = BI++;
+    
+    if (Value *V = SimplifyInstruction(Inst, TD)) {
+      WeakVH BIHandle(BI);
+      ReplaceAndSimplifyAllUses(Inst, V, TD);
+      MadeChange = true;
+      if (BIHandle == 0)
+        BI = BB->begin();
+      continue;
+    }
+    
+    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+  }
+  return MadeChange;
 }
 
 //===----------------------------------------------------------------------===//
@@ -421,7 +451,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
-  DEBUG(errs() << "Looking to fold " << BB->getName() << " into " 
+  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " 
         << Succ->getName() << "\n");
   // Shortcut, if there is only a single predecessor it must be BB and merging
   // is always safe
@@ -456,7 +486,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
             PI != PE; PI++) {
         if (BBPN->getIncomingValueForBlock(*PI) 
               != PN->getIncomingValueForBlock(*PI)) {
-          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with " 
                 << BBPN->getName() << " with regard to common predecessor "
                 << (*PI)->getName() << "\n");
@@ -471,7 +501,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
         if (Val != PN->getIncomingValueForBlock(*PI)) {
-          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " 
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with regard to common "
                 << "predecessor " << (*PI)->getName() << "\n");
           return false;
@@ -525,7 +555,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
     }
   }
 
-  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
+  DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
   
   if (isa<PHINode>(Succ->begin())) {
     // If there is more than one pred of succ, and there are PHI nodes in
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6b2c591..53117a0 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -63,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
     return 0;
 
-  DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred);
+  DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
 
   // Resolve any PHI nodes at the start of the block.  They are all
   // guaranteed to have exactly one entry if they exist, unless there are
@@ -110,13 +110,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
-    DEBUG(errs() << "  Can't unroll; loop preheader-insertion failed.\n");
+    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
     return false;
   }
 
   BasicBlock *LatchBlock = L->getLoopLatch();
   if (!LatchBlock) {
-    DEBUG(errs() << "  Can't unroll; loop exit-block-insertion failed.\n");
+    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
     return false;
   }
 
@@ -125,7 +125,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   
   if (!BI || BI->isUnconditional()) {
     // The loop-rotate pass can be helpful to avoid this in many cases.
-    DEBUG(errs() <<
+    DEBUG(dbgs() <<
              "  Can't unroll; loop not terminated by a conditional branch.\n");
     return false;
   }
@@ -138,9 +138,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     TripMultiple = L->getSmallConstantTripMultiple();
 
   if (TripCount != 0)
-    DEBUG(errs() << "  Trip Count = " << TripCount << "\n");
+    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
-    DEBUG(errs() << "  Trip Multiple = " << TripMultiple << "\n");
+    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
 
   // Effectively "DCE" unrolled iterations that are beyond the tripcount
   // and will never be executed.
@@ -166,17 +166,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   }
 
   if (CompletelyUnroll) {
-    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
           << " with trip count " << TripCount << "!\n");
   } else {
-    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
+    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
           << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
-      DEBUG(errs() << " with a breakout at trip " << BreakoutTrip);
+      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
-      DEBUG(errs() << " with " << TripMultiple << " trips per branch");
+      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
     }
-    DEBUG(errs() << "!\n");
+    DEBUG(dbgs() << "!\n");
   }
 
   std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 6e6e8d2..766c4d9 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -255,7 +255,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       // Insert a return instruction.  This really should be a "barrier", as it
       // is unreachable.
       ReturnInst::Create(F.getContext(),
-                         F.getReturnType() == Type::getVoidTy(F.getContext()) ?
+                         F.getReturnType()->isVoidTy() ?
                           0 : Constant::getNullValue(F.getReturnType()), UI);
 
       // Remove the unwind instruction now.
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 743bb6e..468a5fe 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -137,12 +137,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
 
   unsigned Mid = Size / 2;
   std::vector<CaseRange> LHS(Begin, Begin + Mid);
-  DEBUG(errs() << "LHS: " << LHS << "\n");
+  DEBUG(dbgs() << "LHS: " << LHS << "\n");
   std::vector<CaseRange> RHS(Begin + Mid, End);
-  DEBUG(errs() << "RHS: " << RHS << "\n");
+  DEBUG(dbgs() << "RHS: " << RHS << "\n");
 
   CaseRange& Pivot = *(Begin + Mid);
-  DEBUG(errs() << "Pivot ==> " 
+  DEBUG(dbgs() << "Pivot ==> " 
                << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
                << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
 
@@ -306,9 +306,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   CaseVector Cases;
   unsigned numCmps = Clusterify(Cases, SI);
 
-  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                << ". Total compares: " << numCmps << "\n");
-  DEBUG(errs() << "Cases: " << Cases << "\n");
+  DEBUG(dbgs() << "Cases: " << Cases << "\n");
   (void)numCmps;
   
   BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 846e432..baaa130 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -448,13 +448,13 @@ void PromoteMem2Reg::run() {
   //
   std::vector<RenamePassData> RenamePassWorkList;
   RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
-  while (!RenamePassWorkList.empty()) {
+  do {
     RenamePassData RPD;
     RPD.swap(RenamePassWorkList.back());
     RenamePassWorkList.pop_back();
     // RenamePass may add new worklist entries.
     RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
-  }
+  } while (!RenamePassWorkList.empty());
   
   // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
   Visited.clear();
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9881b3c..161bf21 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -191,7 +191,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // If the client wants to know about all new instructions, tell it.
   if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
 
-  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
   return InsertedPHI;
 }
 
@@ -352,7 +352,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
     InsertedPHI->eraseFromParent();
     InsertedVal = ConstVal;
   } else {
-    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
index 1c4afff..4e813dd 100644
--- a/lib/Transforms/Utils/SSI.cpp
+++ b/lib/Transforms/Utils/SSI.cpp
@@ -416,7 +416,7 @@ bool SSIEverything::runOnFunction(Function &F) {
 
   for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
     for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
-      if (I->getType() != Type::getVoidTy(F.getContext()))
+      if (!I->getType()->isVoidTy())
         Insts.push_back(I);
 
   ssi.createSSI(Insts);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index d7ca45e..cb53296 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -459,7 +459,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         // Remove PHI node entries for the dead edge.
         ThisCases[0].second->removePredecessor(TI->getParent());
 
-        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+        DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
         EraseTerminatorInstAndDCECond(TI);
@@ -472,7 +472,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
         for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
           DeadCases.insert(PredCases[i].first);
 
-        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+        DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
                      << "Through successor TI: " << *TI);
 
         for (unsigned i = SI->getNumCases()-1; i != 0; --i)
@@ -481,7 +481,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
             SI->removeCase(i);
           }
 
-        DEBUG(errs() << "Leaving: " << *TI << "\n");
+        DEBUG(dbgs() << "Leaving: " << *TI << "\n");
         return true;
       }
     }
@@ -524,7 +524,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     Instruction *NI = BranchInst::Create(TheRealDest, TI);
     (void) NI;
 
-    DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator()
+    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
               << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
     EraseTerminatorInstAndDCECond(TI);
@@ -753,7 +753,7 @@ HoistTerminator:
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
-  if (NT->getType() != Type::getVoidTy(BB1->getContext())) {
+  if (!NT->getType()->isVoidTy()) {
     I1->replaceAllUsesWith(NT);
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
@@ -1011,7 +1011,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType() == Type::getInt1Ty(BB->getContext())) {
+        CB->getType()->isInteger(1)) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -1111,7 +1111,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
     if (NumPhis > 2)
       return false;
   
-  DEBUG(errs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
         << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
   
   // Loop over the PHI's seeing if we can promote them all to select
@@ -1295,7 +1295,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
               ReturnInst::Create(BI->getContext(), TrueValue, BI);
   (void) RI;
       
-  DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+  DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
                << "\n  " << *BI << "NewRet = " << *RI
                << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
       
@@ -1377,7 +1377,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     else
       continue;
 
-    DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+    DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
     
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
@@ -1511,7 +1511,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   // Finally, if everything is ok, fold the branches to logical ops.
   BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
   
-  DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent()
+  DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
                << "AND: " << *BI->getParent());
   
   
@@ -1531,7 +1531,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     OtherDest = InfLoopBlock;
   }  
   
-  DEBUG(errs() << *PBI->getParent()->getParent());
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
   
   // BI may have other predecessors.  Because of this, we leave
   // it alone, but modify PBI.
@@ -1581,8 +1581,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     }
   }
   
-  DEBUG(errs() << "INTO: " << *PBI->getParent());
-  DEBUG(errs() << *PBI->getParent()->getParent());
+  DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
   
   // This basic block is probably dead.  We know it has at least
   // one fewer predecessor.
@@ -1608,7 +1608,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
   // Remove basic blocks that have no predecessors... or that just have themself
   // as a predecessor.  These are unreachable.
   if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
-    DEBUG(errs() << "Removing BB: \n" << *BB);
+    DEBUG(dbgs() << "Removing BB: \n" << *BB);
     DeleteDeadBlock(BB);
     return true;
   }
@@ -1651,20 +1651,13 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
       if (!UncondBranchPreds.empty()) {
         while (!UncondBranchPreds.empty()) {
           BasicBlock *Pred = UncondBranchPreds.pop_back_val();
-          DEBUG(errs() << "FOLDING: " << *BB
+          DEBUG(dbgs() << "FOLDING: " << *BB
                        << "INTO UNCOND BRANCH PRED: " << *Pred);
           Instruction *UncondBranch = Pred->getTerminator();
           // Clone the return and add it to the end of the predecessor.
           Instruction *NewRet = RI->clone();
           Pred->getInstList().push_back(NewRet);
 
-          BasicBlock::iterator BBI = RI;
-          if (BBI != BB->begin()) {
-            // Move region end info into the predecessor.
-            if (DbgRegionEndInst *DREI = dyn_cast<DbgRegionEndInst>(--BBI))
-              DREI->moveBefore(NewRet);
-          }
-
           // If the return instruction returns a value, and if the value was a
           // PHI node in "BB", propagate the right value into the return.
           for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 30cb94d..3fa8b70 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -112,7 +112,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
                                                "UnifiedReturnBlock", &F);
 
   PHINode *PN = 0;
-  if (F.getReturnType() == Type::getVoidTy(F.getContext())) {
+  if (F.getReturnType()->isVoidTy()) {
     ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
   } else {
     // If the function doesn't return void... add a PHI node to the block...
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index d3c9d77..eff2c77 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -563,11 +564,14 @@ static SlotTracker *createSlotTracker(const Value *V) {
   if (const Function *Func = dyn_cast<Function>(V))
     return new SlotTracker(Func);
 
+  if (isa<MDNode>(V))
+    return new SlotTracker((Function *)0);
+
   return 0;
 }
 
 #if 0
-#define ST_DEBUG(X) errs() << X
+#define ST_DEBUG(X) dbgs() << X
 #else
 #define ST_DEBUG(X)
 #endif
@@ -614,8 +618,7 @@ void SlotTracker::processModule() {
          E = TheModule->named_metadata_end(); I != E; ++I) {
     const NamedMDNode *NMD = I;
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      // FIXME: Change accessor to be type safe.
-      if (MDNode *MD = cast_or_null<MDNode>(NMD->getOperand(i)))
+      if (MDNode *MD = NMD->getOperand(i))
         CreateMetadataSlot(MD);
     }
   }
@@ -832,7 +835,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
 static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
                              TypePrinting &TypePrinter, SlotTracker *Machine) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (CI->getType() == Type::getInt1Ty(CV->getContext())) {
+    if (CI->getType()->isInteger(1)) {
       Out << (CI->getZExtValue() ? "true" : "false");
       return;
     }
@@ -1136,6 +1139,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
       return;
     }
   
+    if (!Machine)
+      Machine = createSlotTracker(V);
     Out << '!' << Machine->getMetadataSlot(N);
     return;
   }
@@ -1369,10 +1374,10 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
   Out << "!" << NMD->getName() << " = !{";
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
     if (i) Out << ", ";
-    // FIXME: Change accessor to be typesafe.
-    // FIXME: This doesn't handle null??
-    MDNode *MD = cast_or_null<MDNode>(NMD->getOperand(i));
-    Out << '!' << Machine.getMetadataSlot(MD);
+    if (MDNode *MD = NMD->getOperand(i))
+      Out << '!' << Machine.getMetadataSlot(MD);
+    else
+      Out << "null";
   }
   Out << "}\n";
 }
@@ -2057,8 +2062,9 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
     else
       W.printAlias(cast<GlobalAlias>(GV));
   } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
-    SlotTracker SlotTable((Function*)0);
-    AssemblyWriter W(OS, SlotTable, 0, AAW);
+    Function *F = N->getFunction();
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(F), AAW);
     W.printMDNodeBody(N);
   } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
     SlotTracker SlotTable(N->getParent());
@@ -2085,17 +2091,17 @@ void Value::printCustom(raw_ostream &OS) const {
 }
 
 // Value::dump - allow easy printing of Values from the debugger.
-void Value::dump() const { print(errs()); errs() << '\n'; }
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
 
 // Type::dump - allow easy printing of Types from the debugger.
 // This one uses type names from the given context module
 void Type::dump(const Module *Context) const {
-  WriteTypeSymbolic(errs(), this, Context);
-  errs() << '\n';
+  WriteTypeSymbolic(dbgs(), this, Context);
+  dbgs() << '\n';
 }
 
 // Type::dump - allow easy printing of Types from the debugger.
 void Type::dump() const { dump(0); }
 
 // Module::dump() - Allow printing of Modules from the debugger.
-void Module::dump() const { print(errs(), 0); }
+void Module::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index d68bba3..a371c6f 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/System/Atomic.h"
 #include "llvm/System/Mutex.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -318,11 +319,11 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
 }
 
 void AttrListPtr::dump() const {
-  errs() << "PAL[ ";
+  dbgs() << "PAL[ ";
   for (unsigned i = 0; i < getNumSlots(); ++i) {
     const AttributeWithIndex &PAWI = getSlot(i);
-    errs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
   }
   
-  errs() << "]\n";
+  dbgs() << "]\n";
 }
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 77ab19f..2161841 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -480,61 +480,42 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   }
 }
 
-/// This function checks debug info intrinsics. If an intrinsic is invalid
-/// then this function simply removes the intrinsic. 
+/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
+/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
+/// strips that use.
 void llvm::CheckDebugInfoIntrinsics(Module *M) {
 
 
   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    if (!FuncStart->use_empty()) {
-      DbgFuncStartInst *DFSI = cast<DbgFuncStartInst>(FuncStart->use_back());
-      if (!isa<MDNode>(DFSI->getOperand(1))) {
-        while (!FuncStart->use_empty()) {
-          CallInst *CI = cast<CallInst>(FuncStart->use_back());
-          CI->eraseFromParent();
-        }
-        FuncStart->eraseFromParent();
-      }
+    while (!FuncStart->use_empty()) {
+      CallInst *CI = cast<CallInst>(FuncStart->use_back());
+      CI->eraseFromParent();
     }
+    FuncStart->eraseFromParent();
   }
-
+  
   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    if (!StopPoint->use_empty()) {
-      DbgStopPointInst *DSPI = cast<DbgStopPointInst>(StopPoint->use_back());
-      if (!isa<MDNode>(DSPI->getOperand(3))) {
-        while (!StopPoint->use_empty()) {
-          CallInst *CI = cast<CallInst>(StopPoint->use_back());
-          CI->eraseFromParent();
-        }
-        StopPoint->eraseFromParent();
-      }
+    while (!StopPoint->use_empty()) {
+      CallInst *CI = cast<CallInst>(StopPoint->use_back());
+      CI->eraseFromParent();
     }
+    StopPoint->eraseFromParent();
   }
 
   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    if (!RegionStart->use_empty()) {
-      DbgRegionStartInst *DRSI = cast<DbgRegionStartInst>(RegionStart->use_back());
-      if (!isa<MDNode>(DRSI->getOperand(1))) {
-        while (!RegionStart->use_empty()) {
-          CallInst *CI = cast<CallInst>(RegionStart->use_back());
-          CI->eraseFromParent();
-        }
-        RegionStart->eraseFromParent();
-      }
+    while (!RegionStart->use_empty()) {
+      CallInst *CI = cast<CallInst>(RegionStart->use_back());
+      CI->eraseFromParent();
     }
+    RegionStart->eraseFromParent();
   }
 
   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    if (!RegionEnd->use_empty()) {
-      DbgRegionEndInst *DREI = cast<DbgRegionEndInst>(RegionEnd->use_back());
-      if (!isa<MDNode>(DREI->getOperand(1))) {
-        while (!RegionEnd->use_empty()) {
-          CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-          CI->eraseFromParent();
-      }
-        RegionEnd->eraseFromParent();
-      }
+    while (!RegionEnd->use_empty()) {
+      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
+      CI->eraseFromParent();
     }
+    RegionEnd->eraseFromParent();
   }
   
   if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 2449739..3a24389 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -1162,7 +1162,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context,
   }
 
   // i1 can be simplified in many cases.
-  if (C1->getType() == Type::getInt1Ty(Context)) {
+  if (C1->getType()->isInteger(1)) {
     switch (Opcode) {
     case Instruction::Add:
     case Instruction::Sub:
@@ -1229,10 +1229,10 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2,
 
   // Ok, we have two differing integer indices.  Sign extend them to be the same
   // type.  Long is always big enough, so we use it.
-  if (C1->getType() != Type::getInt64Ty(Context))
+  if (!C1->getType()->isInteger(64))
     C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
 
-  if (C2->getType() != Type::getInt64Ty(Context))
+  if (!C2->getType()->isInteger(64))
     C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
 
   if (C1 == C2) return 0;  // They are equal
@@ -1587,7 +1587,7 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context,
   }
 
   // If the comparison is a comparison between two i1's, simplify it.
-  if (C1->getType() == Type::getInt1Ty(Context)) {
+  if (C1->getType()->isInteger(1)) {
     switch(pred) {
     case ICmpInst::ICMP_EQ:
       if (isa<ConstantInt>(C2))
@@ -2042,10 +2042,10 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context,
 
             // Before adding, extend both operands to i64 to avoid
             // overflow trouble.
-            if (PrevIdx->getType() != Type::getInt64Ty(Context))
+            if (!PrevIdx->getType()->isInteger(64))
               PrevIdx = ConstantExpr::getSExt(PrevIdx,
                                               Type::getInt64Ty(Context));
-            if (Div->getType() != Type::getInt64Ty(Context))
+            if (!Div->getType()->isInteger(64))
               Div = ConstantExpr::getSExt(Div,
                                           Type::getInt64Ty(Context));
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index e3c6144..cc8961f 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -110,7 +110,7 @@ void Constant::destroyConstantImpl() {
     Value *V = use_back();
 #ifndef NDEBUG      // Only in -g mode...
     if (!isa<Constant>(V)) {
-      errs() << "While deleting: " << *this
+      dbgs() << "While deleting: " << *this
              << "\n\nUse still stuck around after Def is destroyed: "
              << *V << "\n\n";
     }
@@ -197,6 +197,24 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
   if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
     return BA->getFunction()->getRelocationInfo();
   
+  // While raw uses of blockaddress need to be relocated, differences between
+  // two of them don't when they are for labels in the same function.  This is a
+  // common idiom when creating a table for the indirect goto extension, so we
+  // handle it efficiently here.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+    if (CE->getOpcode() == Instruction::Sub) {
+      ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+      if (LHS && RHS &&
+          LHS->getOpcode() == Instruction::PtrToInt &&
+          RHS->getOpcode() == Instruction::PtrToInt &&
+          isa<BlockAddress>(LHS->getOperand(0)) &&
+          isa<BlockAddress>(RHS->getOperand(0)) &&
+          cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+        return NoRelocation;
+    }
+  
   PossibleRelocationsTy Result = NoRelocation;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     Result = std::max(Result,
@@ -910,7 +928,7 @@ void ConstantArray::destroyConstant() {
 /// if the elements of the array are all ConstantInt's.
 bool ConstantArray::isString() const {
   // Check the element type for i8...
-  if (getType()->getElementType() != Type::getInt8Ty(getContext()))
+  if (!getType()->getElementType()->isInteger(8))
     return false;
   // Check the elements to make sure they are all integers, not constant
   // expressions.
@@ -925,7 +943,7 @@ bool ConstantArray::isString() const {
 /// null bytes except its terminator.
 bool ConstantArray::isCString() const {
   // Check the element type for i8...
-  if (getType()->getElementType() != Type::getInt8Ty(getContext()))
+  if (!getType()->getElementType()->isInteger(8))
     return false;
 
   // Last element must be a null.
@@ -1671,7 +1689,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(isa<VectorType>(Val->getType()) &&
          "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) &&
+  assert(Idx->getType()->isInteger(32) &&
          "Extractelement index must be i32 type!");
   return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
                              Val, Idx);
@@ -1698,7 +1716,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
          "Tried to create insertelement operation on non-vector type!");
   assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
          && "Insertelement types must match!");
-  assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) &&
+  assert(Idx->getType()->isInteger(32) &&
          "Insertelement index must be i32 type!");
   return getInsertElementTy(Val->getType(), Val, Elt, Idx);
 }
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 268a660..08224e4 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -764,7 +764,7 @@ public:
   }
 
   void dump() const {
-    DEBUG(errs() << "Constant.cpp: ConstantUniqueMap\n");
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
   }
 };
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 449e967..984d245 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -89,7 +89,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
 }
 
 /*--.. Type names ..........................................................--*/
-int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
+LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
   return unwrap(M)->addTypeName(Name, unwrap(Ty));
 }
 
@@ -237,7 +237,7 @@ LLVMTypeRef LLVMPPCFP128Type(void) {
 
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
-                             int IsVarArg) {
+                             LLVMBool IsVarArg) {
   std::vector<const Type*> Tys;
   for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I)
     Tys.push_back(unwrap(*I));
@@ -245,7 +245,7 @@ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
   return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
 }
 
-int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
   return unwrap<FunctionType>(FunctionTy)->isVarArg();
 }
 
@@ -267,7 +267,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
 /*--.. Operations on struct types ..........................................--*/
 
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, int Packed) {
+                           unsigned ElementCount, LLVMBool Packed) {
   std::vector<const Type*> Tys;
   for (LLVMTypeRef *I = ElementTypes,
                    *E = ElementTypes + ElementCount; I != E; ++I)
@@ -277,7 +277,7 @@ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
 }
 
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, int Packed) {
+                           unsigned ElementCount, LLVMBool Packed) {
   return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
                                  ElementCount, Packed);
 }
@@ -294,7 +294,7 @@ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
     *Dest++ = wrap(*I);
 }
 
-int LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isPacked();
 }
 
@@ -442,17 +442,17 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
   return wrap(UndefValue::get(unwrap(Ty)));
 }
 
-int LLVMIsConstant(LLVMValueRef Ty) {
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
   return isa<Constant>(unwrap(Ty));
 }
 
-int LLVMIsNull(LLVMValueRef Val) {
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
   if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
     return C->isNullValue();
   return false;
 }
 
-int LLVMIsUndef(LLVMValueRef Val) {
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
   return isa<UndefValue>(unwrap(Val));
 }
 
@@ -464,7 +464,7 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
 /*--.. Operations on scalar constants ......................................--*/
 
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
-                          int SignExtend) {
+                          LLVMBool SignExtend) {
   return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
 }
 
@@ -504,7 +504,8 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
 /*--.. Operations on composite constants ...................................--*/
 
 LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
-                                      unsigned Length, int DontNullTerminate) {
+                                      unsigned Length,
+                                      LLVMBool DontNullTerminate) {
   /* Inverted the sense of AddNull because ', 0)' is a
      better mnemonic for null termination than ', 1)'. */
   return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length),
@@ -512,14 +513,14 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
 }
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
-                                      unsigned Count, int Packed) {
+                                      unsigned Count, LLVMBool Packed) {
   return wrap(ConstantStruct::get(*unwrap(C),
                                   unwrap<Constant>(ConstantVals, Count),
                                   Count, Packed != 0));
 }
 
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
-                             int DontNullTerminate) {
+                             LLVMBool DontNullTerminate) {
   return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
                                   DontNullTerminate);
 }
@@ -530,7 +531,7 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                                  Length));
 }
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
-                             int Packed) {
+                             LLVMBool Packed) {
   return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
                                   Packed);
 }
@@ -820,7 +821,7 @@ LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
 }
 
 LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
-                              unsigned isSigned) {
+                              LLVMBool isSigned) {
   return wrap(ConstantExpr::getIntegerCast(
                                            unwrap<Constant>(ConstantVal),
                                            unwrap(ToType),
@@ -883,10 +884,11 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                            IdxList, NumIdx));
 }
 
-LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, 
-                                const char *Constraints, int HasSideEffects,
-                                int IsAlignStack) {
-  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, 
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+                                const char *Constraints,
+                                LLVMBool HasSideEffects,
+                                LLVMBool IsAlignStack) {
+  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
                              Constraints, HasSideEffects, IsAlignStack));
 }
 
@@ -896,7 +898,7 @@ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
   return wrap(unwrap<GlobalValue>(Global)->getParent());
 }
 
-int LLVMIsDeclaration(LLVMValueRef Global) {
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
   return unwrap<GlobalValue>(Global)->isDeclaration();
 }
 
@@ -1079,19 +1081,19 @@ void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
     ->setInitializer(unwrap<Constant>(ConstantVal));
 }
 
-int LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
   return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
 }
 
-void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal) {
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
   unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
 }
 
-int LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
   return unwrap<GlobalVariable>(GlobalVar)->isConstant();
 }
 
-void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant) {
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
   unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
 }
 
@@ -1285,7 +1287,7 @@ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
   return wrap(static_cast<Value*>(unwrap(BB)));
 }
 
-int LLVMValueIsBasicBlock(LLVMValueRef Val) {
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
   return isa<BasicBlock>(unwrap(Val));
 }
 
@@ -1452,11 +1454,11 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
 
 /*--.. Operations on call instructions (only) ..............................--*/
 
-int LLVMIsTailCall(LLVMValueRef Call) {
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
   return unwrap<CallInst>(Call)->isTailCall();
 }
 
-void LLVMSetTailCall(LLVMValueRef Call, int isTailCall) {
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
   unwrap<CallInst>(Call)->setTailCall(isTailCall);
 }
 
@@ -1973,9 +1975,11 @@ void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
 
 /*===-- Memory buffers ----------------------------------------------------===*/
 
-int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
-                                             LLVMMemoryBufferRef *OutMemBuf,
-                                             char **OutMessage) {
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+    const char *Path,
+    LLVMMemoryBufferRef *OutMemBuf,
+    char **OutMessage) {
+
   std::string Error;
   if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) {
     *OutMemBuf = wrap(MB);
@@ -1986,8 +1990,8 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
   return 1;
 }
 
-int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
-                                    char **OutMessage) {
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage) {
   MemoryBuffer *MB = MemoryBuffer::getSTDIN();
   if (!MB->getBufferSize()) {
     delete MB;
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index e04b6d6..f00f6ee 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -189,7 +189,7 @@ void Function::BuildLazyArguments() const {
   // Create the arguments vector, all arguments start out unnamed.
   const FunctionType *FT = getFunctionType();
   for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
-    assert(FT->getParamType(i) != Type::getVoidTy(FT->getContext()) &&
+    assert(!FT->getParamType(i)->isVoidTy() &&
            "Cannot have void typed arguments!");
     ArgumentList.push_back(new Argument(FT->getParamType(i)));
   }
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 16de1af..ec21773 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -217,7 +217,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
   
   switch (NumOutputs) {
   case 0:
-    if (Ty->getReturnType() != Type::getVoidTy(Ty->getContext())) return false;
+    if (!Ty->getReturnType()->isVoidTy()) return false;
     break;
   case 1:
     if (isa<StructType>(Ty->getReturnType())) return false;
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index a5500e6..3fabfd0 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -374,37 +374,6 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-// Code here matches isMalloc from MemoryBuiltins, which is not in VMCore.
-static bool isMalloc(const Value* I) {
-  const CallInst *CI = dyn_cast<CallInst>(I);
-  if (!CI) {
-    const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
-    if (!BCI) return false;
-
-    CI = dyn_cast<CallInst>(BCI->getOperand(0));
-  }
-
-  if (!CI)
-    return false;
-  Function *Callee = CI->getCalledFunction();
-  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc")
-    return false;
-
-  // Check malloc prototype.
-  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
-  // attribute will exist.
-  const FunctionType *FTy = Callee->getFunctionType();
-  if (FTy->getNumParams() != 1)
-    return false;
-  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
-    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
-      return false;
-    return true;
-  }
-
-  return false;
-}
-
 bool Instruction::isSafeToSpeculativelyExecute() const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (Constant *C = dyn_cast<Constant>(getOperand(i)))
@@ -430,7 +399,9 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
   case Load: {
     if (cast<LoadInst>(this)->isVolatile())
       return false;
-    if (isa<AllocaInst>(getOperand(0)) || isMalloc(getOperand(0)))
+    // Note that it is not safe to speculate into a malloc'd region because
+    // malloc may return null.
+    if (isa<AllocaInst>(getOperand(0)))
       return true;
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
       return !GV->hasExternalWeakLinkage();
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 3e9950e..2619047 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -523,8 +523,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
     MCall->setCallingConv(F->getCallingConv());
     if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
   }
-  assert(MCall->getType() != Type::getVoidTy(BB->getContext()) &&
-         "Malloc has void return type");
+  assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
 
   return Result;
 }
@@ -788,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
 
 void BranchInst::AssertOK() {
   if (isConditional())
-    assert(getCondition()->getType() == Type::getInt1Ty(getContext()) &&
+    assert(getCondition()->getType()->isInteger(1) &&
            "May only branch on boolean predicates!");
 }
 
@@ -893,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
   else {
     assert(!isa<BasicBlock>(Amt) &&
            "Passed basic block into allocation size parameter! Use other ctor");
-    assert(Amt->getType() == Type::getInt32Ty(Context) &&
+    assert(Amt->getType()->isInteger(32) &&
            "Allocation array size is not a 32-bit integer!");
   }
   return Amt;
@@ -904,7 +903,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertBefore) {
   setAlignment(0);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -913,7 +912,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
   setAlignment(0);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -922,7 +921,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), 0), InsertBefore) {
   setAlignment(0);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -931,7 +930,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), 0), InsertAtEnd) {
   setAlignment(0);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -940,7 +939,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertBefore) {
   setAlignment(Align);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -949,7 +948,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
   : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
                      getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
   setAlignment(Align);
-  assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!");
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
   setName(Name);
 }
 
@@ -1392,8 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
 
 
 bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!isa<VectorType>(Val->getType()) ||
-      Index->getType() != Type::getInt32Ty(Val->getContext()))
+  if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32))
     return false;
   return true;
 }
@@ -1440,7 +1438,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
   if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
     return false;// Second operand of insertelement must be vector element type.
     
-  if (Index->getType() != Type::getInt32Ty(Vec->getContext()))
+  if (!Index->getType()->isInteger(32))
     return false;  // Third operand of insertelement must be i32.
   return true;
 }
@@ -1492,7 +1490,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
   
   const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
   if (!isa<Constant>(Mask) || MaskTy == 0 ||
-      MaskTy->getElementType() != Type::getInt32Ty(V1->getContext()))
+      !MaskTy->getElementType()->isInteger(32))
     return false;
   return true;
 }
@@ -2287,7 +2285,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const Twine &Name,
                                       Instruction *InsertBefore) {
-  assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
+  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+         "Invalid integer cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
index 5e0f42e..cb9252e 100644
--- a/lib/VMCore/IntrinsicInst.cpp
+++ b/lib/VMCore/IntrinsicInst.cpp
@@ -8,11 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements methods that make it really easy to deal with intrinsic
-// functions with the isa/dyncast family of functions.  In particular, this
-// allows you to do things like:
-//
-//     if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(Inst))
-//        ... SPI->getFileName() ... SPI->getDirectory() ...
+// functions.
 //
 // All intrinsic function calls are instances of the call instruction, so these
 // are all subclasses of the CallInst class.  Note that none of these classes
@@ -55,25 +51,13 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) {
 }
 
 //===----------------------------------------------------------------------===//
-/// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction.
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
 ///
 
-Value *DbgStopPointInst::getFileName() const {
-  // Once the operand indices are verified, update this assert
-  assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
-  return getContext()->getOperand(3);
-}
-
-Value *DbgStopPointInst::getDirectory() const {
-  // Once the operand indices are verified, update this assert
-  assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices");
-  return getContext()->getOperand(4);
+const Value *DbgValueInst::getValue() const {
+  return cast<MDNode>(getOperand(1))->getOperand(0);
 }
 
-//===----------------------------------------------------------------------===//
-/// DbgValueInst - This represents the llvm.dbg.value instruction.
-///
-
-Value *DbgValueInst::getValue() const {
+Value *DbgValueInst::getValue() {
   return cast<MDNode>(getOperand(1))->getOperand(0);
 }
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 33eb044..7d9f330 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -16,7 +16,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -24,57 +24,57 @@ static char HexDigit(int V) {
   return V < 10 ? V+'0' : V+'A'-10;
 }
 
-static std::string MangleLetter(unsigned char C) {
-  char Result[] = { '_', HexDigit(C >> 4), HexDigit(C & 15), '_', 0 };
-  return Result;
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+  OutName.push_back('_');
+  OutName.push_back(HexDigit(C >> 4));
+  OutName.push_back(HexDigit(C & 15));
+  OutName.push_back('_');
 }
 
 /// makeNameProper - We don't want identifier names non-C-identifier characters
 /// in them, so mangle them as appropriate.
 ///
-std::string Mangler::makeNameProper(const std::string &X,
-                                    ManglerPrefixTy PrefixTy) {
+/// FIXME: This is deprecated, new code should use getNameWithPrefix and use
+/// MCSymbol printing to handle quotes or not etc.
+///
+void Mangler::makeNameProper(SmallVectorImpl<char> &OutName,
+                             const Twine &TheName,
+                             ManglerPrefixTy PrefixTy) {
+  SmallString<256> TmpData;
+  StringRef X = TheName.toStringRef(TmpData);
   assert(!X.empty() && "Cannot mangle empty strings");
   
   if (!UseQuotes) {
-    std::string Result;
-
     // If X does not start with (char)1, add the prefix.
-    bool NeedPrefix = true;
-    std::string::const_iterator I = X.begin();
+    StringRef::iterator I = X.begin();
     if (*I == 1) {
-      NeedPrefix = false;
-      ++I;  // Skip over the marker.
+      ++I;  // Skip over the no-prefix marker.
+    } else {
+      if (PrefixTy == Mangler::Private)
+        OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
+      else if (PrefixTy == Mangler::LinkerPrivate)
+        OutName.append(LinkerPrivatePrefix,
+                       LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));
+      OutName.append(Prefix, Prefix+strlen(Prefix));
     }
     
     // Mangle the first letter specially, don't allow numbers unless the target
     // explicitly allows them.
     if (!SymbolsCanStartWithDigit && *I >= '0' && *I <= '9')
-      Result += MangleLetter(*I++);
+      MangleLetter(OutName, *I++);
 
-    for (std::string::const_iterator E = X.end(); I != E; ++I) {
+    for (StringRef::iterator E = X.end(); I != E; ++I) {
       if (!isCharAcceptable(*I))
-        Result += MangleLetter(*I);
+        MangleLetter(OutName, *I);
       else
-        Result += *I;
-    }
-
-    if (NeedPrefix) {
-      Result = Prefix + Result;
-
-      if (PrefixTy == Mangler::Private)
-        Result = PrivatePrefix + Result;
-      else if (PrefixTy == Mangler::LinkerPrivate)
-        Result = LinkerPrivatePrefix + Result;
+        OutName.push_back(*I);
     }
-
-    return Result;
+    return;
   }
 
   bool NeedPrefix = true;
   bool NeedQuotes = false;
-  std::string Result;    
-  std::string::const_iterator I = X.begin();
+  StringRef::iterator I = X.begin();
   if (*I == 1) {
     NeedPrefix = false;
     ++I;  // Skip over the marker.
@@ -87,7 +87,7 @@ std::string Mangler::makeNameProper(const std::string &X,
   // Do an initial scan of the string, checking to see if we need quotes or
   // to escape a '"' or not.
   if (!NeedQuotes)
-    for (std::string::const_iterator E = X.end(); I != E; ++I)
+    for (StringRef::iterator E = X.end(); I != E; ++I)
       if (!isCharAcceptable(*I)) {
         NeedQuotes = true;
         break;
@@ -95,43 +95,57 @@ std::string Mangler::makeNameProper(const std::string &X,
     
   // In the common case, we don't need quotes.  Handle this quickly.
   if (!NeedQuotes) {
-    if (!NeedPrefix)
-      return X.substr(1);   // Strip off the \001.
-    
-    Result = Prefix + X;
+    if (!NeedPrefix) {
+      OutName.append(X.begin()+1, X.end());   // Strip off the \001.
+      return;
+    }
 
     if (PrefixTy == Mangler::Private)
-      Result = PrivatePrefix + Result;
+      OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
     else if (PrefixTy == Mangler::LinkerPrivate)
-      Result = LinkerPrivatePrefix + Result;
-
-    return Result;
-  }
-
-  if (NeedPrefix)
-    Result = X.substr(0, I-X.begin());
+      OutName.append(LinkerPrivatePrefix,
+                     LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));
     
-  // Otherwise, construct the string the expensive way.
-  for (std::string::const_iterator E = X.end(); I != E; ++I) {
-    if (*I == '"')
-      Result += "_QQ_";
-    else if (*I == '\n')
-      Result += "_NL_";
+    if (Prefix[0] == 0)
+      ; // Common noop, no prefix.
+    else if (Prefix[1] == 0)
+      OutName.push_back(Prefix[0]);  // Common, one character prefix.
     else
-      Result += *I;
+      OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix.
+    OutName.append(X.begin(), X.end());
+    return;
   }
 
+  // Add leading quote.
+  OutName.push_back('"');
+  
+  // Add prefixes unless disabled.
   if (NeedPrefix) {
-    Result = Prefix + Result;
-
     if (PrefixTy == Mangler::Private)
-      Result = PrivatePrefix + Result;
+      OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
     else if (PrefixTy == Mangler::LinkerPrivate)
-      Result = LinkerPrivatePrefix + Result;
+      OutName.append(LinkerPrivatePrefix,
+                     LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));
+    OutName.append(Prefix, Prefix+strlen(Prefix));
+  }
+  
+  // Add the piece that we already scanned through.
+  OutName.append(X.begin()+!NeedPrefix, I);
+  
+  // Otherwise, construct the string the expensive way.
+  for (StringRef::iterator E = X.end(); I != E; ++I) {
+    if (*I == '"') {
+      const char *Quote = "_QQ_";
+      OutName.append(Quote, Quote+4);
+    } else if (*I == '\n') {
+      const char *Newline = "_NL_";
+      OutName.append(Newline, Newline+4);
+    } else
+      OutName.push_back(*I);
   }
 
-  Result = '"' + Result + '"';
-  return Result;
+  // Add trailing quote.
+  OutName.push_back('"');
 }
 
 /// getMangledName - Returns the mangled name of V, an LLVM Value,
@@ -139,6 +153,9 @@ std::string Mangler::makeNameProper(const std::string &X,
 /// specified suffix.  If 'ForcePrivate' is specified, the label is specified
 /// to have a private label prefix.
 ///
+/// FIXME: This is deprecated, new code should use getNameWithPrefix and use
+/// MCSymbol printing to handle quotes or not etc.
+///
 std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix,
                                     bool ForcePrivate) {
   assert((!isa<Function>(GV) || !cast<Function>(GV)->isIntrinsic()) &&
@@ -148,8 +165,11 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix,
     (GV->hasPrivateLinkage() || ForcePrivate) ? Mangler::Private :
       GV->hasLinkerPrivateLinkage() ? Mangler::LinkerPrivate : Mangler::Default;
 
-  if (GV->hasName())
-    return makeNameProper(GV->getNameStr() + Suffix, PrefixTy);
+  SmallString<128> Result;
+  if (GV->hasName()) {
+    makeNameProper(Result, GV->getNameStr() + Suffix, PrefixTy);
+    return Result.str().str();
+  }
   
   // Get the ID for the global, assigning a new one if we haven't got one
   // already.
@@ -157,7 +177,38 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix,
   if (ID == 0) ID = NextAnonGlobalID++;
   
   // Must mangle the global into a unique ID.
-  return makeNameProper("__unnamed_" + utostr(ID) + Suffix, PrefixTy);
+  makeNameProper(Result, "__unnamed_" + utostr(ID) + Suffix, PrefixTy);
+  return Result.str().str();
+}
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name.  GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const Twine &GVName, ManglerPrefixTy PrefixTy) {
+  SmallString<256> TmpData;
+  StringRef Name = GVName.toStringRef(TmpData);
+  assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+  
+  // If the global name is not led with \1, add the appropriate prefixes.
+  if (Name[0] != '\1') {
+    if (PrefixTy == Mangler::Private)
+      OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
+    else if (PrefixTy == Mangler::LinkerPrivate)
+      OutName.append(LinkerPrivatePrefix,
+                     LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));
+    
+    if (Prefix[0] == 0)
+      ; // Common noop, no prefix.
+    else if (Prefix[1] == 0)
+      OutName.push_back(Prefix[0]);  // Common, one character prefix.
+    else
+      OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix.
+  } else {
+    Name = Name.substr(1);
+  }
+  
+  OutName.append(Name.begin(), Name.end());
 }
 
 
@@ -167,33 +218,28 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix,
 void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
                                 const GlobalValue *GV,
                                 bool isImplicitlyPrivate) {
-   
-  // If the global is anonymous or not led with \1, then add the appropriate
-  // prefix.
-  if (!GV->hasName() || GV->getName()[0] != '\1') {
+  // If this global has a name, handle it simply.
+  if (GV->hasName()) {
+    ManglerPrefixTy PrefixTy = Mangler::Default;
     if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
-      OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
+      PrefixTy = Mangler::Private;
     else if (GV->hasLinkerPrivateLinkage())
-      OutName.append(LinkerPrivatePrefix,
-                     LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));;
-    OutName.append(Prefix, Prefix+strlen(Prefix));
-  }
-
-  // If the global has a name, just append it now.
-  if (GV->hasName()) {
-    StringRef Name = GV->getName();
+      PrefixTy = Mangler::LinkerPrivate;
     
-    // Strip off the prefix marker if present.
-    if (Name[0] != '\1')
-      OutName.append(Name.begin(), Name.end());
-    else
-      OutName.append(Name.begin()+1, Name.end());
-    return;
+    return getNameWithPrefix(OutName, GV->getName(), PrefixTy);
   }
   
   // If the global variable doesn't have a name, return a unique name for the
   // global based on a numbering.
   
+  // Anonymous names always get prefixes.
+  if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+    OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix));
+  else if (GV->hasLinkerPrivateLinkage())
+    OutName.append(LinkerPrivatePrefix,
+                   LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));;
+  OutName.append(Prefix, Prefix+strlen(Prefix));
+  
   // Get the ID for the global, assigning a new one if we haven't got one
   // already.
   unsigned &ID = AnonGlobalIDs[GV];
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 8e9aab9..7988b44 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Instruction.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallString.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/Support/ValueHandle.h"
 using namespace llvm;
@@ -31,7 +32,7 @@ MDString::MDString(LLVMContext &C, StringRef S)
 
 MDString *MDString::get(LLVMContext &Context, StringRef Str) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  StringMapEntry<MDString *> &Entry = 
+  StringMapEntry<MDString *> &Entry =
     pImpl->MDStringCache.GetOrCreateValue(Str);
   MDString *&S = Entry.getValue();
   if (!S) S = new MDString(Context, Entry.getKey());
@@ -40,7 +41,7 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
 
 MDString *MDString::get(LLVMContext &Context, const char *Str) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  StringMapEntry<MDString *> &Entry = 
+  StringMapEntry<MDString *> &Entry =
     pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef());
   MDString *&S = Entry.getValue();
   if (!S) S = new MDString(Context, Entry.getKey());
@@ -58,11 +59,11 @@ class MDNodeOperand : public CallbackVH {
 public:
   MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
   ~MDNodeOperand() {}
-  
+
   void set(Value *V) {
     setValPtr(V);
   }
-  
+
   virtual void deleted();
   virtual void allUsesReplacedWith(Value *NV);
 };
@@ -94,7 +95,7 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
                bool isFunctionLocal)
 : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) {
   NumOperands = NumVals;
- 
+
   if (isFunctionLocal)
     setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
 
@@ -107,19 +108,82 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
 
 /// ~MDNode - Destroy MDNode.
 MDNode::~MDNode() {
-  assert((getSubclassDataFromValue() & DestroyFlag) != 0 && 
+  assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
          "Not being destroyed through destroy()?");
   if (!isNotUniqued()) {
     LLVMContextImpl *pImpl = getType()->getContext().pImpl;
     pImpl->MDNodeSet.RemoveNode(this);
   }
-  
+
   // Destroy the operands.
   for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
        Op != E; ++Op)
     Op->~MDNodeOperand();
 }
 
+#ifndef NDEBUG
+static Function *assertLocalFunction(const MDNode *N,
+                                     SmallPtrSet<const MDNode *, 32> &Visited) {
+  Function *F = NULL;
+  // Only visit each MDNode once.
+  if (!Visited.insert(N)) return F;
+  
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    Value *V = N->getOperand(i);
+    Function *NewF = NULL;
+    if (!V) continue;
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      NewF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+      NewF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(V))
+      NewF = A->getParent();
+    else if (MDNode *MD = dyn_cast<MDNode>(V))
+      if (MD->isFunctionLocal())
+        NewF = assertLocalFunction(MD, Visited);
+    if (F && NewF) assert(F == NewF && "inconsistent function-local metadata");
+    if (!F) F = NewF;
+  }
+  return F;
+}
+#endif
+
+static Function *getFunctionHelper(const MDNode *N,
+                                   SmallPtrSet<const MDNode *, 32> &Visited) {
+  assert(N->isFunctionLocal() && "Should only be called on function-local MD");
+#ifndef NDEBUG
+  return assertLocalFunction(N, Visited);
+#endif
+  Function *F = NULL;
+  // Only visit each MDNode once.
+  if (!Visited.insert(N)) return F;
+  
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    Value *V = N->getOperand(i);
+    if (!V) continue;
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      F = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+      F = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(V))
+      F = A->getParent();
+    else if (MDNode *MD = dyn_cast<MDNode>(V))
+      if (MD->isFunctionLocal())
+        F = getFunctionHelper(MD, Visited);
+    if (F) break;
+  }
+  return F;
+}
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. 
+Function *MDNode::getFunction() const {
+  if (!isFunctionLocal()) return NULL;
+  SmallPtrSet<const MDNode *, 32> Visited;
+  return getFunctionHelper(this, Visited);
+}
+
 // destroy - Delete this node.  Only when there are no uses.
 void MDNode::destroy() {
   setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
@@ -128,9 +192,8 @@ void MDNode::destroy() {
   free(this);
 }
 
-
-MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals,
-                    bool isFunctionLocal) {
+MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
+                          unsigned NumVals, FunctionLocalness FL) {
   LLVMContextImpl *pImpl = Context.pImpl;
   FoldingSetNodeID ID;
   for (unsigned i = 0; i != NumVals; ++i)
@@ -139,16 +202,46 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals,
   void *InsertPoint;
   MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
   if (!N) {
+    bool isFunctionLocal = false;
+    switch (FL) {
+    case FL_Unknown:
+      for (unsigned i = 0; i != NumVals; ++i) {
+        Value *V = Vals[i];
+        if (!V) continue;
+        if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+            (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) {
+          isFunctionLocal = true;
+          break;
+        }
+      }
+      break;
+    case FL_No:
+      isFunctionLocal = false;
+      break;
+    case FL_Yes:
+      isFunctionLocal = true;
+      break;
+    }
+
     // Coallocate space for the node and Operands together, then placement new.
     void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
     N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
-    
+
     // InsertPoint will have been set by the FindNodeOrInsertPos call.
     pImpl->MDNodeSet.InsertNode(N, InsertPoint);
   }
   return N;
 }
 
+MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
+  return getMDNode(Context, Vals, NumVals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value*const* Vals,
+                                      unsigned NumVals, bool isFunctionLocal) {
+  return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
 /// getOperand - Return specified operand.
 Value *MDNode::getOperand(unsigned i) const {
   return *getOperandPtr(const_cast<MDNode*>(this), i);
@@ -163,7 +256,7 @@ void MDNode::Profile(FoldingSetNodeID &ID) const {
 // Replace value from this node's operand list.
 void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   Value *From = *Op;
-  
+
   if (From == To)
     return;
 
@@ -173,7 +266,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   // If this node is already not being uniqued (because one of the operands
   // already went to null), then there is nothing else to do here.
   if (isNotUniqued()) return;
-  
+
   LLVMContextImpl *pImpl = getType()->getContext().pImpl;
 
   // Remove "this" from the context map.  FoldingSet doesn't have to reprofile
@@ -187,7 +280,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
     setIsNotUniqued();
     return;
   }
-  
+
   // Now that the node is out of the folding set, get ready to reinsert it.
   // First, check to see if another node with the same operands already exists
   // in the set.  If it doesn't exist, this returns the position to insert it.
@@ -210,21 +303,40 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
 //===----------------------------------------------------------------------===//
 // NamedMDNode implementation.
 //
-static SmallVector<TrackingVH<MetadataBase>, 4> &getNMDOps(void *Operands) {
-  return *(SmallVector<TrackingVH<MetadataBase>, 4>*)Operands;
+
+namespace llvm {
+// SymbolTableListTraits specialization for MDSymbolTable.
+void ilist_traits<NamedMDNode>
+::addNodeToList(NamedMDNode *N) {
+  assert(N->getParent() == 0 && "Value already in a container!!");
+  Module *Owner = getListOwner();
+  N->setParent(Owner);
+  MDSymbolTable &ST = Owner->getMDSymbolTable();
+  ST.insert(N->getName(), N);
+}
+
+void ilist_traits<NamedMDNode>::removeNodeFromList(NamedMDNode *N) {
+  N->setParent(0);
+  Module *Owner = getListOwner();
+  MDSymbolTable &ST = Owner->getMDSymbolTable();
+  ST.remove(N->getName());
+}
+}
+
+static SmallVector<WeakVH, 4> &getNMDOps(void *Operands) {
+  return *(SmallVector<WeakVH, 4>*)Operands;
 }
 
 NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N,
-                         MetadataBase *const *MDs, 
+                         MDNode *const *MDs,
                          unsigned NumMDs, Module *ParentModule)
-  : MetadataBase(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) {
+  : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) {
   setName(N);
-    
-  Operands = new SmallVector<TrackingVH<MetadataBase>, 4>();
-    
-  SmallVector<TrackingVH<MetadataBase>, 4> &Node = getNMDOps(Operands);
+  Operands = new SmallVector<WeakVH, 4>();
+
+  SmallVector<WeakVH, 4> &Node = getNMDOps(Operands);
   for (unsigned i = 0; i != NumMDs; ++i)
-    Node.push_back(TrackingVH<MetadataBase>(MDs[i]));
+    Node.push_back(WeakVH(MDs[i]));
 
   if (ParentModule)
     ParentModule->getNamedMDList().push_back(this);
@@ -232,9 +344,9 @@ NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N,
 
 NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) {
   assert(NMD && "Invalid source NamedMDNode!");
-  SmallVector<MetadataBase *, 4> Elems;
+  SmallVector<MDNode *, 4> Elems;
   Elems.reserve(NMD->getNumOperands());
-  
+
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
     Elems.push_back(NMD->getOperand(i));
   return new NamedMDNode(NMD->getContext(), NMD->getName().data(),
@@ -252,14 +364,14 @@ unsigned NamedMDNode::getNumOperands() const {
 }
 
 /// getOperand - Return specified operand.
-MetadataBase *NamedMDNode::getOperand(unsigned i) const {
+MDNode *NamedMDNode::getOperand(unsigned i) const {
   assert(i < getNumOperands() && "Invalid Operand number!");
-  return getNMDOps(Operands)[i];
+  return dyn_cast_or_null<MDNode>(getNMDOps(Operands)[i]);
 }
 
 /// addOperand - Add metadata Operand.
-void NamedMDNode::addOperand(MetadataBase *M) {
-  getNMDOps(Operands).push_back(TrackingVH<MetadataBase>(M));
+void NamedMDNode::addOperand(MDNode *M) {
+  getNMDOps(Operands).push_back(WeakVH(M));
 }
 
 /// eraseFromParent - Drop all references and remove the node from parent
@@ -273,6 +385,26 @@ void NamedMDNode::dropAllReferences() {
   getNMDOps(Operands).clear();
 }
 
+/// setName - Set the name of this named metadata.
+void NamedMDNode::setName(const Twine &NewName) {
+  assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!");
+
+  SmallString<256> NameData;
+  StringRef NameRef = NewName.toStringRef(NameData);
+
+  // Name isn't changing?
+  if (getName() == NameRef)
+    return;
+
+  Name = NameRef.str();
+  if (Parent)
+    Parent->getMDSymbolTable().insert(NameRef, this);
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+  return StringRef(Name);
+}
 
 //===----------------------------------------------------------------------===//
 // LLVMContext MDKind naming implementation.
@@ -299,9 +431,9 @@ static bool isValidName(StringRef MDName) {
 /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
 unsigned LLVMContext::getMDKindID(StringRef Name) const {
   assert(isValidName(Name) && "Invalid MDNode name");
-  
+
   unsigned &Entry = pImpl->CustomMDKindNames[Name];
-  
+
   // If this is new, assign it its ID.
   if (Entry == 0) Entry = pImpl->CustomMDKindNames.size();
   return Entry;
@@ -313,7 +445,7 @@ void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
   Names.resize(pImpl->CustomMDKindNames.size()+1);
   Names[0] = "";
   for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
-       E = pImpl->CustomMDKindNames.end(); I != E; ++I) 
+       E = pImpl->CustomMDKindNames.end(); I != E; ++I)
     // MD Handlers are numbered from 1.
     Names[I->second] = I->first();
 }
@@ -336,7 +468,7 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const {
 /// Node is null.
 void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
   if (Node == 0 && !hasMetadata()) return;
-  
+
   // Handle the case when we're adding/updating metadata on an instruction.
   if (Node) {
     LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
@@ -351,24 +483,24 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
           return;
         }
     }
-    
+
     // No replacement, just add it to the list.
     Info.push_back(std::make_pair(KindID, Node));
     return;
   }
-  
+
   // Otherwise, we're removing metadata from an instruction.
   assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) &&
          "HasMetadata bit out of date!");
   LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
-  
+
   // Common case is removing the only entry.
   if (Info.size() == 1 && Info[0].first == KindID) {
     getContext().pImpl->MetadataStore.erase(this);
     setHasMetadata(false);
     return;
   }
-  
+
   // Handle replacement of an existing value.
   for (unsigned i = 0, e = Info.size(); i != e; ++i)
     if (Info[i].first == KindID) {
@@ -383,7 +515,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
 MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
   LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
   assert(hasMetadata() && !Info.empty() && "Shouldn't have called this");
-  
+
   for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
        I != E; ++I)
     if (I->first == KindID)
@@ -398,10 +530,10 @@ void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
   const LLVMContextImpl::MDMapTy &Info =
     getContext().pImpl->MetadataStore.find(this)->second;
   assert(!Info.empty() && "Shouldn't have called this");
-  
+
   Result.clear();
   Result.append(Info.begin(), Info.end());
-  
+
   // Sort the resulting array so it is stable.
   if (Result.size() > 1)
     array_pod_sort(Result.begin(), Result.end());
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index a7f503b..503e708 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -59,6 +59,7 @@ Module::Module(StringRef MID, LLVMContext& C)
   : Context(C), ModuleID(MID), DataLayout("")  {
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
+  NamedMDSymTab = new MDSymbolTable();
 }
 
 Module::~Module() {
@@ -70,15 +71,17 @@ Module::~Module() {
   NamedMDList.clear();
   delete ValSymTab;
   delete TypeSymTab;
+  delete NamedMDSymTab;
 }
 
 /// Target endian information...
 Module::Endianness Module::getEndianness() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
   Module::Endianness ret = AnyEndianness;
   
   while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
+    StringRef token = DataLayout;
+    tie(token, temp) = getToken(DataLayout, "-");
     
     if (token[0] == 'e') {
       ret = LittleEndian;
@@ -92,15 +95,17 @@ Module::Endianness Module::getEndianness() const {
 
 /// Target Pointer Size information...
 Module::PointerSize Module::getPointerSize() const {
-  std::string temp = DataLayout;
+  StringRef temp = DataLayout;
   Module::PointerSize ret = AnyPointerSize;
   
   while (!temp.empty()) {
-    std::string token = getToken(temp, "-");
-    char signal = getToken(token, ":")[0];
+    StringRef token, signalToken;
+    tie(token, temp) = getToken(temp, "-");
+    tie(signalToken, token) = getToken(token, ":");
     
-    if (signal == 'p') {
-      int size = atoi(getToken(token, ":").c_str());
+    if (signalToken[0] == 'p') {
+      int size = 0;
+      getToken(token, ":").first.getAsInteger(10, size);
       if (size == 32)
         ret = Pointer32;
       else if (size == 64)
@@ -307,15 +312,14 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const {
 /// specified name. This method returns null if a NamedMDNode with the 
 //// specified name is not found.
 NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
-  return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
+  return NamedMDSymTab->lookup(Name);
 }
 
 /// getOrInsertNamedMetadata - Return the first named MDNode in the module 
 /// with the specified name. This method returns a new NamedMDNode if a 
 /// NamedMDNode with the specified name is not found.
 NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
-  NamedMDNode *NMD =
-    dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
+  NamedMDNode *NMD = NamedMDSymTab->lookup(Name);
   if (!NMD)
     NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this);
   return NMD;
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6bea7a8..39da8fb 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ModuleProvider.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Atomic.h"
@@ -51,7 +52,7 @@ bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
 
 // dumpPassStructure - Implement the -debug-passes=Structure option
 void Pass::dumpPassStructure(unsigned Offset) {
-  errs().indent(Offset*2) << getPassName() << "\n";
+  dbgs().indent(Offset*2) << getPassName() << "\n";
 }
 
 /// getPassName - Return a nice clean name for a pass.  This usually
@@ -95,7 +96,7 @@ void Pass::print(raw_ostream &O,const Module*) const {
 
 // dump - call print(cerr);
 void Pass::dump() const {
-  print(errs(), 0);
+  print(dbgs(), 0);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index d688385..b37b2ae 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -15,6 +15,7 @@
 #include "llvm/PassManagers.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
@@ -132,7 +133,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::errs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+    llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
       BP->dumpPassStructure(Offset + 1);
@@ -272,7 +273,7 @@ public:
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) {
-    llvm::errs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+    llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       ModulePass *MP = getContainedPass(Index);
       MP->dumpPassStructure(Offset + 1);
@@ -595,11 +596,11 @@ void PMTopLevelManager::dumpArguments() const {
   if (PassDebugging < Arguments)
     return;
 
-  errs() << "Pass Arguments: ";
+  dbgs() << "Pass Arguments: ";
   for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
          E = PassManagers.end(); I != E; ++I)
     (*I)->dumpPassArguments();
-  errs() << "\n";
+  dbgs() << "\n";
 }
 
 void PMTopLevelManager::initializeAllAnalysisInfo() {
@@ -718,8 +719,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
       // Remove this analysis
       if (PassDebugging >= Details) {
         Pass *S = Info->second;
-        errs() << " -- '" <<  P->getPassName() << "' is not preserving '";
-        errs() << S->getPassName() << "'\n";
+        dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+        dbgs() << S->getPassName() << "'\n";
       }
       AvailableAnalysis.erase(Info);
     }
@@ -742,8 +743,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
         // Remove this analysis
         if (PassDebugging >= Details) {
           Pass *S = Info->second;
-          errs() << " -- '" <<  P->getPassName() << "' is not preserving '";
-          errs() << S->getPassName() << "'\n";
+          dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+          dbgs() << S->getPassName() << "'\n";
         }
         InheritedAnalysis[Index]->erase(Info);
       }
@@ -764,9 +765,9 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
   TPM->collectLastUses(DeadPasses, P);
 
   if (PassDebugging >= Details && !DeadPasses.empty()) {
-    errs() << " -*- '" <<  P->getPassName();
-    errs() << "' is the last user of following pass instances.";
-    errs() << " Free these instances\n";
+    dbgs() << " -*- '" <<  P->getPassName();
+    dbgs() << "' is the last user of following pass instances.";
+    dbgs() << " Free these instances\n";
   }
 
   for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(),
@@ -959,7 +960,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
   
   for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
          E = LUses.end(); I != E; ++I) {
-    llvm::errs() << "--" << std::string(Offset*2, ' ');
+    llvm::dbgs() << "--" << std::string(Offset*2, ' ');
     (*I)->dumpPassStructure(0);
   }
 }
@@ -972,7 +973,7 @@ void PMDataManager::dumpPassArguments() const {
     else
       if (const PassInfo *PI = (*I)->getPassInfo())
         if (!PI->isAnalysisGroup())
-          errs() << " -" << PI->getPassArgument();
+          dbgs() << " -" << PI->getPassArgument();
   }
 }
 
@@ -981,35 +982,35 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
                                  StringRef Msg) {
   if (PassDebugging < Executions)
     return;
-  errs() << (void*)this << std::string(getDepth()*2+1, ' ');
+  dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
   switch (S1) {
   case EXECUTION_MSG:
-    errs() << "Executing Pass '" << P->getPassName();
+    dbgs() << "Executing Pass '" << P->getPassName();
     break;
   case MODIFICATION_MSG:
-    errs() << "Made Modification '" << P->getPassName();
+    dbgs() << "Made Modification '" << P->getPassName();
     break;
   case FREEING_MSG:
-    errs() << " Freeing Pass '" << P->getPassName();
+    dbgs() << " Freeing Pass '" << P->getPassName();
     break;
   default:
     break;
   }
   switch (S2) {
   case ON_BASICBLOCK_MSG:
-    errs() << "' on BasicBlock '" << Msg << "'...\n";
+    dbgs() << "' on BasicBlock '" << Msg << "'...\n";
     break;
   case ON_FUNCTION_MSG:
-    errs() << "' on Function '" << Msg << "'...\n";
+    dbgs() << "' on Function '" << Msg << "'...\n";
     break;
   case ON_MODULE_MSG:
-    errs() << "' on Module '"  << Msg << "'...\n";
+    dbgs() << "' on Module '"  << Msg << "'...\n";
     break;
   case ON_LOOP_MSG:
-    errs() << "' on Loop '" << Msg << "'...\n";
+    dbgs() << "' on Loop '" << Msg << "'...\n";
     break;
   case ON_CG_MSG:
-    errs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+    dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
     break;
   default:
     break;
@@ -1039,12 +1040,12 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
   assert(PassDebugging >= Details);
   if (Set.empty())
     return;
-  errs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+  dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
   for (unsigned i = 0; i != Set.size(); ++i) {
-    if (i) errs() << ',';
-    errs() << ' ' << Set[i]->getPassName();
+    if (i) dbgs() << ',';
+    dbgs() << ' ' << Set[i]->getPassName();
   }
-  errs() << '\n';
+  dbgs() << '\n';
 }
 
 /// Add RequiredPass into list of lower level passes required by pass P.
@@ -1067,8 +1068,8 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
   // checks whether any lower level manager will be able to provide this 
   // analysis info on demand or not.
 #ifndef NDEBUG
-  errs() << "Unable to schedule '" << RequiredPass->getPassName();
-  errs() << "' required by '" << P->getPassName() << "'\n";
+  dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+  dbgs() << "' required by '" << P->getPassName() << "'\n";
 #endif
   llvm_unreachable("Unable to schedule pass");
 }
@@ -1300,7 +1301,7 @@ bool FunctionPassManagerImpl::run(Function &F) {
 char FPPassManager::ID = 0;
 /// Print passes managed by this manager
 void FPPassManager::dumpPassStructure(unsigned Offset) {
-  llvm::errs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+  llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     FP->dumpPassStructure(Offset + 1);
@@ -1698,19 +1699,19 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
   return wrap(new FunctionPassManager(unwrap(P)));
 }
 
-int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
   return unwrap<PassManager>(PM)->run(*unwrap(M));
 }
 
-int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
   return unwrap<FunctionPassManager>(FPM)->doInitialization();
 }
 
-int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
   return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
 }
 
-int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
   return unwrap<FunctionPassManager>(FPM)->doFinalization();
 }
 
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 3d4f19d..f0f6e7a 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -26,7 +27,7 @@ namespace {
     bool DeleteStream;      // Delete the ostream in our dtor?
   public:
     static char ID;
-    PrintModulePass() : ModulePass(&ID), Out(&errs()), 
+    PrintModulePass() : ModulePass(&ID), Out(&dbgs()), 
       DeleteStream(false) {}
     PrintModulePass(raw_ostream *o, bool DS)
       : ModulePass(&ID), Out(o), DeleteStream(DS) {}
@@ -51,7 +52,7 @@ namespace {
     bool DeleteStream;      // Delete the ostream in our dtor?
   public:
     static char ID;
-    PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&errs()), 
+    PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()), 
                           DeleteStream(false) {}
     PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
       : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index fd46aa1..044de4f 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -124,6 +124,11 @@ const Type *Type::getScalarType() const {
   return this;
 }
 
+/// isInteger - Return true if this is an IntegerType of the specified width.
+bool Type::isInteger(unsigned Bitwidth) const {
+  return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
 /// isIntOrIntVector - Return true if this is an integer type or a vector of
 /// integer types.
 ///
@@ -280,7 +285,7 @@ std::string Type::getDescription() const {
 
 bool StructType::indexValid(const Value *V) const {
   // Structure indexes require 32-bit integer constants.
-  if (V->getType() == Type::getInt32Ty(V->getContext()))
+  if (V->getType()->isInteger(32))
     if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
       return indexValid(CU->getZExtValue());
   return false;
@@ -487,7 +492,7 @@ PointerType::PointerType(const Type *E, unsigned AddrSpace)
 OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) {
   setAbstract(true);
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *this << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *this << "\n");
 #endif
 }
 
@@ -782,7 +787,7 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
     pImpl->IntegerTypes.add(IVT, ITy);
   }
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *ITy << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *ITy << "\n");
 #endif
   return ITy;
 }
@@ -825,7 +830,7 @@ FunctionType *FunctionType::get(const Type *ReturnType,
   }
 
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << FT << "\n");
+  DEBUG(dbgs() << "Derived new type: " << FT << "\n");
 #endif
   return FT;
 }
@@ -846,7 +851,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
     pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *AT << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *AT << "\n");
 #endif
   return AT;
 }
@@ -870,7 +875,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
     pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *PT << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
 #endif
   return PT;
 }
@@ -902,7 +907,7 @@ StructType *StructType::get(LLVMContext &Context,
     pImpl->StructTypes.add(STV, ST);
   }
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *ST << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *ST << "\n");
 #endif
   return ST;
 }
@@ -946,7 +951,7 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
     pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace));
   }
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "Derived new type: " << *PT << "\n");
+  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
 #endif
   return PT;
 }
@@ -1009,13 +1014,13 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
   AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
 
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "  remAbstractTypeUser[" << (void*)this << ", "
+  DEBUG(dbgs() << "  remAbstractTypeUser[" << (void*)this << ", "
                << *this << "][" << i << "] User = " << U << "\n");
 #endif
 
   if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
 #ifdef DEBUG_MERGE_TYPES
-    DEBUG(errs() << "DELETEing unused abstract type: <" << *this
+    DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this
                  << ">[" << (void*)this << "]" << "\n");
 #endif
   
@@ -1041,7 +1046,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   pImpl->AbstractTypeDescriptions.clear();
 
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "REFINING abstract type [" << (void*)this << " "
+  DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " "
                << *this << "] to [" << (void*)NewType << " "
                << *NewType << "]!\n");
 #endif
@@ -1078,7 +1083,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
 
     unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
 #ifdef DEBUG_MERGE_TYPES
-    DEBUG(errs() << " REFINING user " << OldSize-1 << "[" << (void*)User
+    DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
                  << "] of abstract type [" << (void*)this << " "
                  << *this << "] to [" << (void*)NewTy.get() << " "
                  << *NewTy << "]!\n");
@@ -1109,7 +1114,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
 //
 void DerivedType::notifyUsesThatTypeBecameConcrete() {
 #ifdef DEBUG_MERGE_TYPES
-  DEBUG(errs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
+  DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
 #endif
 
   unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 0d0cdf5..b4daf0f 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -15,6 +15,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -58,7 +59,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
 #if DEBUG_SYMBOL_TABLE
   dump();
-  errs() << " Removing Value: " << Result->getDescription() << "\n";
+  dbgs() << " Removing Value: " << Result->getDescription() << "\n";
 #endif
 
   tmap.erase(Entry);
@@ -67,7 +68,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
   // list...
   if (Result->isAbstract()) {
 #if DEBUG_ABSTYPE
-    errs() << "Removing abstract type from symtab"
+    dbgs() << "Removing abstract type from symtab"
            << Result->getDescription()
            << "\n";
 #endif
@@ -87,7 +88,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
     
 #if DEBUG_SYMBOL_TABLE
     dump();
-    errs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
+    dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
 #endif
   } else {
     // If there is a name conflict...
@@ -99,7 +100,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
     
 #if DEBUG_SYMBOL_TABLE
     dump();
-    errs() << " Inserting type: " << UniqueName << ": "
+    dbgs() << " Inserting type: " << UniqueName << ": "
            << T->getDescription() << "\n";
 #endif
 
@@ -111,7 +112,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
   if (T->isAbstract()) {
     cast<DerivedType>(T)->addAbstractTypeUser(this);
 #if DEBUG_ABSTYPE
-    errs() << "Added abstract type to ST: " << T->getDescription() << "\n";
+    dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n";
 #endif
   }
 }
@@ -127,14 +128,14 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
   for (iterator I = begin(), E = end(); I != E; ++I) {
     if (I->second == (Type*)OldType) {  // FIXME when Types aren't const.
 #if DEBUG_ABSTYPE
-      errs() << "Removing type " << OldType->getDescription() << "\n";
+      dbgs() << "Removing type " << OldType->getDescription() << "\n";
 #endif
       OldType->removeAbstractTypeUser(this);
 
       I->second = (Type*)NewType;  // TODO FIXME when types aren't const
       if (NewType->isAbstract()) {
 #if DEBUG_ABSTYPE
-        errs() << "Added type " << NewType->getDescription() << "\n";
+        dbgs() << "Added type " << NewType->getDescription() << "\n";
 #endif
         cast<DerivedType>(NewType)->addAbstractTypeUser(this);
       }
@@ -154,13 +155,13 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
 }
 
 static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
-  errs() << "  '" << T.first << "' = ";
+  dbgs() << "  '" << T.first << "' = ";
   T.second->dump();
-  errs() << "\n";
+  dbgs() << "\n";
 }
 
 void TypeSymbolTable::dump() const {
-  errs() << "TypeSymbolPlane: ";
+  dbgs() << "TypeSymbolPlane: ";
   for_each(tmap.begin(), tmap.end(), DumpTypes);
 }
 
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index e7950bd..93a801b 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -302,7 +302,7 @@ public:
   void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
                         const Type *NewType) {
 #ifdef DEBUG_MERGE_TYPES
-    DEBUG(errs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
+    DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
                  << "], " << (void*)NewType << " [" << *NewType << "])\n");
 #endif
     
@@ -408,11 +408,11 @@ public:
 
   void print(const char *Arg) const {
 #ifdef DEBUG_MERGE_TYPES
-    DEBUG(errs() << "TypeMap<>::" << Arg << " table contents:\n");
+    DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n");
     unsigned i = 0;
     for (typename std::map<ValType, PATypeHolder>::const_iterator I
            = Map.begin(), E = Map.end(); I != E; ++I)
-      DEBUG(errs() << " " << (++i) << ". " << (void*)I->second.get() << " "
+      DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " "
                    << *I->second.get() << "\n");
 #endif
   }
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index fe1219f..40679bf 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -44,14 +44,12 @@ Value::Value(const Type *ty, unsigned scid)
     SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)),
     UseList(0), Name(0) {
   if (isa<CallInst>(this) || isa<InvokeInst>(this))
-    assert((VTy->isFirstClassType() ||
-            VTy == Type::getVoidTy(ty->getContext()) ||
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
             isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
            "invalid CallInst  type!");
   else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
-    assert((VTy->isFirstClassType() ||
-            VTy == Type::getVoidTy(ty->getContext()) ||
-           isa<OpaqueType>(ty)) &&
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
+            isa<OpaqueType>(ty)) &&
            "Cannot create non-first-class values except for constants!");
 }
 
@@ -68,9 +66,9 @@ Value::~Value() {
   // a <badref>
   //
   if (!use_empty()) {
-    errs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+    dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
     for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
-      errs() << "Use still stuck around after Def is destroyed:"
+      dbgs() << "Use still stuck around after Def is destroyed:"
            << **I << "\n";
   }
 #endif
@@ -172,17 +170,13 @@ void Value::setName(const Twine &NewName) {
     return;
 
   SmallString<256> NameData;
-  NewName.toVector(NameData);
-
-  const char *NameStr = NameData.data();
-  unsigned NameLen = NameData.size();
+  StringRef NameRef = NewName.toStringRef(NameData);
 
   // Name isn't changing?
-  if (getName() == StringRef(NameStr, NameLen))
+  if (getName() == NameRef)
     return;
 
-  assert(getType() != Type::getVoidTy(getContext()) &&
-         "Cannot assign a name to void values!");
+  assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
 
   // Get the symbol table to update for this object.
   ValueSymbolTable *ST;
@@ -190,7 +184,7 @@ void Value::setName(const Twine &NewName) {
     return;  // Cannot set a name on this value (e.g. constant).
 
   if (!ST) { // No symbol table to update?  Just do the change.
-    if (NameLen == 0) {
+    if (NameRef.empty()) {
       // Free the name for this value.
       Name->Destroy();
       Name = 0;
@@ -204,7 +198,7 @@ void Value::setName(const Twine &NewName) {
     // then reallocated.
 
     // Create the new name.
-    Name = ValueName::Create(NameStr, NameStr+NameLen);
+    Name = ValueName::Create(NameRef.begin(), NameRef.end());
     Name->setValue(this);
     return;
   }
@@ -217,12 +211,12 @@ void Value::setName(const Twine &NewName) {
     Name->Destroy();
     Name = 0;
 
-    if (NameLen == 0)
+    if (NameRef.empty())
       return;
   }
 
   // Name is changing to something new.
-  Name = ST->createValueName(StringRef(NameStr, NameLen), this);
+  Name = ST->createValueName(NameRef, this);
 }
 
 
@@ -522,7 +516,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
   // All callbacks, weak references, and assertingVHs should be dropped by now.
   if (V->HasValueHandle) {
 #ifndef NDEBUG      // Only in +Asserts mode...
-    errs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+    dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
            << "\n";
     if (pImpl->ValueHandles[V]->getKind() == Assert)
       llvm_unreachable("An asserting value handle still pointed to this"
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 9d39a50..d30a9d6 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 ValueSymbolTable::~ValueSymbolTable() {
 #ifndef NDEBUG   // Only do this in -g mode...
   for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
-    errs() << "Value still in symbol table! Type = '"
+    dbgs() << "Value still in symbol table! Type = '"
            << VI->getValue()->getType()->getDescription() << "' Name = '"
            << VI->getKeyData() << "'\n";
   assert(vmap.empty() && "Values remain in symbol table!");
@@ -38,7 +38,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
 
   // Try inserting the name, assuming it won't conflict.
   if (vmap.insert(V->Name)) {
-    //DEBUG(errs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+    //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
     return;
   }
   
@@ -62,14 +62,14 @@ void ValueSymbolTable::reinsertValue(Value* V) {
       // Newly inserted name.  Success!
       NewName.setValue(V);
       V->Name = &NewName;
-     //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
       return;
     }
   }
 }
 
 void ValueSymbolTable::removeValueName(ValueName *V) {
-  //DEBUG(errs() << " Removing Value: " << V->getKeyData() << "\n");
+  //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
   // Remove the value from the symbol table.
   vmap.remove(V);
 }
@@ -82,7 +82,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   ValueName &Entry = vmap.GetOrCreateValue(Name);
   if (Entry.getValue() == 0) {
     Entry.setValue(V);
-    //DEBUG(errs() << " Inserted value: " << Entry.getKeyData() << ": "
+    //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
     //           << *V << "\n");
     return &Entry;
   }
@@ -102,7 +102,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
     if (NewName.getValue() == 0) {
       // Newly inserted name.  Success!
       NewName.setValue(V);
-     //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
       return &NewName;
     }
   }
@@ -112,10 +112,12 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
 // dump - print out the symbol table
 //
 void ValueSymbolTable::dump() const {
-  //DEBUG(errs() << "ValueSymbolTable:\n");
+  //DEBUG(dbgs() << "ValueSymbolTable:\n");
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    //DEBUG(errs() << "  '" << I->getKeyData() << "' = ");
+    //DEBUG(dbgs() << "  '" << I->getKeyData() << "' = ");
     I->getValue()->dump();
-    //DEBUG(errs() << "\n");
+    //DEBUG(dbgs() << "\n");
   }
 }
+
+MDSymbolTable::~MDSymbolTable() { }
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 30528bf..ec475e4 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -56,6 +56,7 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -85,9 +86,9 @@ namespace {  // Anonymous namespace for class
 
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
         if (I->empty() || !I->back().isTerminator()) {
-          errs() << "Basic Block does not have terminator!\n";
-          WriteAsOperand(errs(), I, true);
-          errs() << "\n";
+          dbgs() << "Basic Block does not have terminator!\n";
+          WriteAsOperand(dbgs(), I, true);
+          dbgs() << "\n";
           Broken = true;
         }
       }
@@ -262,12 +263,12 @@ namespace {
       default: llvm_unreachable("Unknown action");
       case AbortProcessAction:
         MessagesStr << "compilation aborted!\n";
-        errs() << MessagesStr.str();
+        dbgs() << MessagesStr.str();
         // Client should choose different reaction if abort is not desired
         abort();
       case PrintMessageAction:
         MessagesStr << "verification continues.\n";
-        errs() << MessagesStr.str();
+        dbgs() << MessagesStr.str();
         return false;
       case ReturnStatusAction:
         MessagesStr << "compilation terminated.\n";
@@ -1589,9 +1590,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   default:
     break;
   case Intrinsic::dbg_declare:  // llvm.dbg.declare
-    if (Constant *C = dyn_cast<Constant>(CI.getOperand(1)))
-      Assert1(C && !isa<ConstantPointerNull>(C),
-              "invalid llvm.dbg.declare intrinsic call", &CI);
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(1)))
+      if (Constant *C = dyn_cast<Constant>(MD->getOperand(0)))
+        Assert1(C && !isa<ConstantPointerNull>(C),
+                "invalid llvm.dbg.declare intrinsic call", &CI);
     break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
diff --git a/runtime/libprofile/exported_symbols.lst b/runtime/libprofile/exported_symbols.lst
index 45c6d5e..aafafb6 100644
--- a/runtime/libprofile/exported_symbols.lst
+++ b/runtime/libprofile/exported_symbols.lst
@@ -1,6 +1,4 @@
 
-llvm_start_func_profiling
-llvm_start_block_profiling
 llvm_start_edge_profiling
 llvm_start_opt_edge_profiling
 llvm_start_basic_block_tracing
diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index f0f1535..f699ba2 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -23,9 +23,9 @@ entry:
 cond_true34:            ; preds = %entry
         %tmp631 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 2, i64
 2305843009213693950            ; <i64*> [#uses=1]
-        %tmp70 = bitcast i64* %tmp631 to %struct.device**               ;
+        %tmp70 = bitcast i64* %tmp631 to %struct.device**
 
-        %tmp71 = load %struct.device** %tmp70, align 8          ;
+        %tmp71 = load %struct.device** %tmp70, align 8
 
         ret i32 undef
 
diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index 7f82ea4..ba57662 100644
--- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop bb: backedge-taken count is 100}
+; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop %bb: backedge-taken count is 100}
 ; PR1533
 
 @array = weak global [101 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index f623da1..ce8f725 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
 ; PR1597
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index c8e483e..6685778 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %header: backedge-taken count is (0 smax %n)}
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index cb9a182..addf346 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))}
 ; PR2002
 
 define void @foo(i8 %n) {
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index daeb26a..f9dd40f 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \
-; RUN: grep {Loop bb: backedge-taken count is (7 + (-1 \\* %argc))}
+; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))}
 ; XFAIL: *
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 9dda78b..9ee781f 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:  | grep {Loop bb: Unpredictable backedge-taken count\\.}
+; RUN:  | grep {Loop %bb: Unpredictable backedge-taken count\\.}
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index b733d6a..55d3bd5 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)}
+; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop %bb3: backedge-taken count is (-1 + %n)}
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index 506401d..a4fdcd0 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   | grep {\{%d,+,\[^\{\}\]\*\}<bb>}
+; RUN:   | grep {\{%d,+,\[^\{\}\]\*\}<%bb>}
 
 ; ScalarEvolution should be able to understand the loop and eliminate the casts.
 
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
index 1e165bf..fd0dfe6 100644
--- a/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -18,11 +18,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ]   ; <i32> [#uses=5]
 
 ; CHECK: %1 = sext i32 %i.01 to i64
-; CHECK: -->  {0,+,2}<bb>
+; CHECK: -->  {0,+,2}<%bb>
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<bb>
+; CHECK: -->  {%d,+,16}<%bb>
   %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double* %2, align 8                   ; <double> [#uses=1]
@@ -32,11 +32,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %8 = sext i32 %7 to i64
-; CHECK: -->  {1,+,2}<bb>
+; CHECK: -->  {1,+,2}<%bb>
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
-; CHECK: {(8 + %q),+,16}<bb>
+; CHECK: {(8 + %q),+,16}<%bb>
   %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -44,11 +44,11 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t7 = add nsw i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %t8 = sext i32 %t7 to i64
-; CHECK: -->  {1,+,2}<bb>
+; CHECK: -->  {1,+,2}<%bb>
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
-; CHECK: {(8 + %q),+,16}<bb>
+; CHECK: {(8 + %q),+,16}<%bb>
   %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double* %9, align 8                  ; <double> [#uses=1]
@@ -72,5 +72,5 @@ return:                                           ; preds = %bb1.return_crit_edg
   ret void
 }
 
-; CHECK: Loop bb: backedge-taken count is ((-1 + %n) /u 2)
-; CHECK: Loop bb: max backedge-taken count is 1073741823
+; CHECK: Loop %bb: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %bb: max backedge-taken count is 1073741823
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index c31edab..e4f2b29 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { -->  {.*,+,.*}<bb>} | count 8
+; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { -->  {.*,+,.*}<%bb>} | count 8
 
 ; The addrecs in this loop are analyzable only by using nsw information.
 
diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll
index 1612835..44878225 100644
--- a/test/Analysis/ScalarEvolution/sext-inreg.ll
+++ b/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t
-; RUN: grep {sext i57 \{0,+,199\}<bb> to i64} %t | count 1
-; RUN: grep {sext i59 \{0,+,199\}<bb> to i64} %t | count 1
+; RUN: grep {sext i57 \{0,+,199\}<%bb> to i64} %t | count 1
+; RUN: grep {sext i59 \{0,+,199\}<%bb> to i64} %t | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 8f887c4..05983c1 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -disable-output -scalar-evolution -analyze \
-; RUN:  | grep { -->  \{-128,+,1\}<bb1>		Exits: 127} | count 5
+; RUN:  | grep { -->  \{-128,+,1\}<%bb1>		Exits: 127} | count 5
 
 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
 ; trip count is within range where this is safe.
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index 02c3206..0bf51d9 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -disable-output -scalar-evolution -analyze \
-; RUN:  | grep { -->  (sext i. \{.\*,+,.\*\}<bb1> to i64)} | count 5
+; RUN:  | grep { -->  (sext i. \{.\*,+,.\*\}<%bb1> to i64)} | count 5
 
 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
 ; where the trip count is not within range.
diff --git a/test/Analysis/ScalarEvolution/sext-iv-2.ll b/test/Analysis/ScalarEvolution/sext-iv-2.ll
index b25c237..fc39cae 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-2.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output | FileCheck %s
 
 ; CHECK: %tmp3 = sext i8 %tmp2 to i32
-; CHECK: -->  (sext i8 {0,+,1}<bb1> to i32)   Exits: -1
+; CHECK: -->  (sext i8 {0,+,1}<%bb1> to i32)   Exits: -1
 ; CHECK: %tmp4 = mul i32 %tmp3, %i.02
-; CHECK: -->  ((sext i8 {0,+,1}<bb1> to i32) * {0,+,1}<bb>)   Exits: {0,+,-1}<bb>
+; CHECK: -->  ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>)   Exits: {0,+,-1}<%bb>
 
 ; These sexts are not foldable.
 
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 2409831..7d8e0c6 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze -disable-output \
-; RUN:  | grep {Loop bb3\\.i: Unpredictable backedge-taken count\\.}
+; RUN:  | grep {Loop %bb3\\.i: Unpredictable backedge-taken count\\.}
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll
index 0cd8d7c..74c856f 100644
--- a/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:   | grep {Loop bb7.i: Unpredictable backedge-taken count\\.}
+; RUN:   | grep {Loop %bb7.i: Unpredictable backedge-taken count\\.}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll
index c49f5ce..5063342 100644
--- a/test/Analysis/ScalarEvolution/trip-count8.ll
+++ b/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -disable-output \
-; RUN:  | grep {Loop for\\.body: backedge-taken count is (-1 + \[%\]ecx)}
+; RUN:  | grep {Loop %for\\.body: backedge-taken count is (-1 + \[%\]ecx)}
 ; PR4599
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll
index 9ff99be..c4ac5de 100644
--- a/test/Analysis/ScalarEvolution/zext-wrap.ll
+++ b/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -11,7 +11,7 @@ bb.i:           ; preds = %bb1.i, %bb.nph
 
 ; This cast shouldn't be folded into the addrec.
 ; CHECK: %tmp = zext i8 %l_95.0.i1 to i16
-; CHECK: -->  (zext i8 {0,+,-1}<bb.i> to i16)    Exits: 2
+; CHECK: -->  (zext i8 {0,+,-1}<%bb.i> to i16)    Exits: 2
 
         %tmp = zext i8 %l_95.0.i1 to i16
 
diff --git a/test/Assembler/functionlocal-metadata.ll b/test/Assembler/functionlocal-metadata.ll
new file mode 100644
index 0000000..8265aa1
--- /dev/null
+++ b/test/Assembler/functionlocal-metadata.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+  %0 = add i32 %a, 1                              ; <i32> [#uses=1]
+  %two = add i32 %b, %0                           ; <i32> [#uses=0]
+  %1 = alloca i32                                 ; <i32*> [#uses=1]
+
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32* %1})
+; CHECK: metadata !{i32* %1}, metadata !{i32* %1}
+  call void @llvm.dbg.declare(metadata !{i32 %two}, metadata !{i32 %0})
+  call void @llvm.dbg.declare(metadata !{i32 %0}, metadata !{i32* %1, i32 %0})
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32 %b, i32 %0})
+  call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"})
+; CHECK: metadata !{i32 %a, metadata !"foo"}
+  call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two})
+
+  call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1)
+  call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0)
+  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"foo")
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"foo")
+  call void @llvm.dbg.value(metadata !"foo", i64 12, metadata !"bar")
+
+  ret void, !foo !0, !bar !1
+; CHECK: ret void, !foo !0, !bar !1
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!foo = !{ !0 }
+!bar = !{ !1 }
diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll
index e4d35d9..688369b 100644
--- a/test/Assembler/vector-cmp.ll
+++ b/test/Assembler/vector-cmp.ll
@@ -9,8 +9,8 @@ entry:
 	ret <4 x i1> %cmp
 }
 
-global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
+global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
 
-@B = external global i32;
+@B = external global i32
 
-global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> ) ;
+global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 8b56f13..cd16084 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -55,6 +55,6 @@ L1:                                               ; preds = %L2, %bb2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
 }
-; ARM: .long LBA4__foo__L5-(LPC{{.*}}+8)
-; THUMB: .long LBA4__foo__L5-(LPC{{.*}}+4)
-; THUMB2: .long LBA4__foo__L5
+; ARM: .long L_BA4__foo_L5-(LPC{{.*}}+8)
+; THUMB: .long L_BA4__foo_L5-(LPC{{.*}}+4)
+; THUMB2: .long L_BA4__foo_L5
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index 03376a4..fba56b4 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -12,7 +12,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 1a867a9..17c8bae 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -9,7 +9,7 @@ declare i1 @qux()
 
 @GHJK = global i32 0
 
-declare i8* @choose(i8*, i8*);
+declare i8* @choose(i8*, i8*)
 
 ; BranchFolding should tail-duplicate the indirect jump to avoid
 ; redundant branching.
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 96ab4eb..26076e0 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -12,7 +12,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
index f21da52..b6cd2d4 100644
--- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
+++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs
-; XFAIL: *
 
 ; An undef argument causes a setugt node to escape instruction selection.
 
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
index e9b66eb..363286d 100644
--- a/test/CodeGen/Blackfin/ct32.ll
+++ b/test/CodeGen/Blackfin/ct32.ll
@@ -6,15 +6,15 @@ declare i32 @llvm.ctpop.i32(i32)
 
 define i32 @ctlztest(i32 %B) {
 	%b = call i32 @llvm.ctlz.i32( i32 %B )
-	ret i32 %b;
+	ret i32 %b
 }
 
 define i32 @cttztest(i32 %B) {
 	%b = call i32 @llvm.cttz.i32( i32 %B )
-	ret i32 %b;
+	ret i32 %b
 }
 
 define i32 @ctpoptest(i32 %B) {
 	%b = call i32 @llvm.ctpop.i32( i32 %B )
-	ret i32 %b;
+	ret i32 %b
 }
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
index ac4bdcf..7502434 100644
--- a/test/CodeGen/Blackfin/ct64.ll
+++ b/test/CodeGen/Blackfin/ct64.ll
@@ -6,15 +6,15 @@ declare i64 @llvm.ctpop.i64(i64)
 
 define i64 @ctlztest(i64 %B) {
 	%b = call i64 @llvm.ctlz.i64( i64 %B )
-	ret i64 %b;
+	ret i64 %b
 }
 
 define i64 @cttztest(i64 %B) {
 	%b = call i64 @llvm.cttz.i64( i64 %B )
-	ret i64 %b;
+	ret i64 %b
 }
 
 define i64 @ctpoptest(i64 %B) {
 	%b = call i64 @llvm.ctpop.i64( i64 %B )
-	ret i64 %b;
+	ret i64 %b
 }
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
index 56a65c0..eb4af23 100644
--- a/test/CodeGen/Blackfin/ctlz16.ll
+++ b/test/CodeGen/Blackfin/ctlz16.ll
@@ -4,15 +4,15 @@ declare i16 @llvm.ctlz.i16(i16)
 
 define i16 @ctlztest(i16 %B) {
 	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 define i16 @ctlztest_z(i16 zeroext %B) {
 	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
 define i16 @ctlztest_s(i16 signext %B) {
 	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
index cbbb3d9..8b6c07e 100644
--- a/test/CodeGen/Blackfin/ctpop16.ll
+++ b/test/CodeGen/Blackfin/ctpop16.ll
@@ -4,15 +4,15 @@ declare i16 @llvm.ctpop.i16(i16)
 
 define i16 @ctpoptest(i16 %B) {
 	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 define i16 @ctpoptest_z(i16 zeroext %B) {
 	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
 define i16 @ctpoptest_s(i16 signext %B) {
 	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
index 05fe9bf..510882a 100644
--- a/test/CodeGen/Blackfin/cttz16.ll
+++ b/test/CodeGen/Blackfin/cttz16.ll
@@ -4,15 +4,15 @@ declare i16 @llvm.cttz.i16(i16)
 
 define i16 @cttztest(i16 %B) {
 	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 define i16 @cttztest_z(i16 zeroext %B) {
 	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
 define i16 @cttztest_s(i16 signext %B) {
 	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b;
+	ret i16 %b
 }
 
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
index c247aca..46da566 100644
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ b/test/CodeGen/Blackfin/promote-logic.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=bfin > %t
+; XFAIL: *
 
 ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
 ; operation after LegalizeOps.
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index b0a372b..66bff3e 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -83,7 +83,7 @@ define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
 ; FNMS: - (a * b - c) => c - (a * b)
 define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
         %A = fmul <2 x double> %arg1,  %arg2
-        %B = fsub <2 x double> %arg3, %A ;
+        %B = fsub <2 x double> %arg3, %A
         ret <2 x double> %B
 }
 
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
index 031d6c3..1e28fc7 100644
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -11,7 +11,6 @@
 ; RUN: grep shli    %t1.s | count 4
 ; RUN: grep shlhi   %t1.s | count 4
 ; RUN: grep ila     %t1.s | count 2
-; RUN: grep xsbh    %t1.s | count 4
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 7452276..56f72e7 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -13,7 +13,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 9a9c1a1..45b561a 100644
--- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -soft-float
 ; PR3899
 
-@m = external global <2 x double>;
+@m = external global <2 x double>
 
 define double @vector_ex() nounwind {
        %v = load <2 x double>* @m
diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index 577b547..b62f811 100644
--- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s
 ; rdar://6836460
+; rdar://7516906
+; PR5963
 
 define i32 @test(i128* %P) nounwind {
 entry:
@@ -14,3 +16,17 @@ if.then50:		; preds = %if.then20
 if.end61:		; preds = %if.then50, %if.then20, %entry
 	ret i32 123
 }
+
+define i32 @test2(i320* %P) nounwind {
+entry:
+	%tmp48 = load i320* %P
+	%and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584
+	%tobool = icmp ne i320 %and49, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %if.then50, label %if.end61
+
+if.then50:		; preds = %if.then20
+	ret i32 1241
+
+if.end61:		; preds = %if.then50, %if.then20, %entry
+	ret i32 123
+}
diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll
index 2c78366..0dc2158 100644
--- a/test/CodeGen/MSP430/bit.ll
+++ b/test/CodeGen/MSP430/bit.ll
@@ -1,9 +1,10 @@
 ; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+; XFAIL: *
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
 target triple = "msp430-generic-generic"
 
-@foo8 = external global i8;
-@bar8 = external global i8;
+@foo8 = external global i8
+@bar8 = external global i8
 
 define i8 @bitbrr(i8 %a, i8 %b) nounwind {
 	%t1 = and i8 %a, %b
@@ -83,8 +84,8 @@ define i8 @bitbmm() nounwind {
 ; CHECK: bitbmm:
 ; CHECK: bit.b	&bar8, &foo8
 
-@foo16 = external global i16;
-@bar16 = external global i16;
+@foo16 = external global i16
+@bar16 = external global i16
 
 define i16 @bitwrr(i16 %a, i16 %b) nounwind {
 	%t1 = and i16 %a, %b
diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll
index 971d1b5..ecf0661 100644
--- a/test/CodeGen/MSP430/setcc.ll
+++ b/test/CodeGen/MSP430/setcc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=msp430 < %s | FileCheck %s
+; XFAIL: *
 target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
 target triple = "msp430-generic-generic"
 
diff --git a/test/CodeGen/MSP430/shifts.ll b/test/CodeGen/MSP430/shifts.ll
new file mode 100644
index 0000000..b5b3054
--- /dev/null
+++ b/test/CodeGen/MSP430/shifts.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16"
+target triple = "msp430-elf"
+
+define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr8:
+; CHECK: rrc.b
+  %shr = lshr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr8:
+; CHECK: rra.b
+  %shr = ashr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl8
+; CHECK: rla.b
+  %shl = shl i8 %a, %cnt
+  ret i8 %shl
+}
+
+define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr16:
+; CHECK: rrc.w
+  %shr = lshr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr16:
+; CHECK: rra.w
+  %shr = ashr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl16:
+; CHECK: rla.w
+  %shl = shl i16 %a, %cnt
+  ret i16 %shl
+}
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index a1b45c2..34b7547 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -12,7 +12,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 1b302e41..fbc7bd2 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,12 +43,12 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r2, ha16(LBA4__foo__L5-"L1$pb")
-; PIC: li r5, lo16(LBA4__foo__L5-"L1$pb")
+; PIC: addis r4, r2, ha16(L_BA4__foo_L5-"L1$pb")
+; PIC: li r5, lo16(L_BA4__foo_L5-"L1$pb")
 ; PIC: add r4, r4, r5
 ; PIC: stw r4
-; STATIC: li r2, lo16(LBA4__foo__L5)
-; STATIC: addis r2, r2, ha16(LBA4__foo__L5)
+; STATIC: li r2, lo16(L_BA4__foo_L5)
+; STATIC: addis r2, r2, ha16(L_BA4__foo_L5)
 ; STATIC: stw r2
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll
index d6e6770..f9405f6 100644
--- a/test/CodeGen/PowerPC/private.ll
+++ b/test/CodeGen/PowerPC/private.ll
@@ -15,7 +15,7 @@ define private void @foo() nounwind {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() nounwind {
         call void @foo()
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index 8fa3e7e..f091aa6 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -12,7 +12,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
index 3317864..07a164d 100644
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -5,8 +5,8 @@ target triple = "s390x-linux"
 
 define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-	%shl = shl i32 %x, 0		; <i32> [#uses=1]
-	%sub = sub i32 32, 0		; <i32> [#uses=1]
+	%shl = shl i32 %x, 1		; <i32> [#uses=1]
+	%sub = sub i32 32, 1		; <i32> [#uses=1]
 	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
 	%or = or i32 %shr, %shl		; <i32> [#uses=1]
 	ret i32 %or
diff --git a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
new file mode 100644
index 0000000..d730bec
--- /dev/null
+++ b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
+target triple = "s390x-elf"
+
+@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
+
+define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
+entry:
+  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
+  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
+  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
+  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
+  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
+  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
+  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
+  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
+  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
+  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
+  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
+  br i1 %or.cond, label %if.then13, label %if.end25
+
+if.then13:                                        ; preds = %entry
+  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
+  store i32 %div, i32* %arrayidx4
+  br label %if.end25
+
+if.end25:                                         ; preds = %if.then13, %entry
+  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
+  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
+  br i1 %or.cond46, label %if.then31, label %if.end45
+
+if.then31:                                        ; preds = %if.end25
+  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
+  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
+  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
+  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
+  store i32 %sub41, i32* %arrayidx4
+  ret void
+
+if.end45:                                         ; preds = %if.end25
+  ret void
+}
+
+declare signext i32 @FORMAT2(...)
diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
new file mode 100644
index 0000000..6a05df1
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -0,0 +1,89 @@
+; RUN: llc -relocation-model=pic -pre-regalloc-taildup < %s | grep {:$} | sort | uniq -d | count 0
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This function produces a duplicate LPC label unless special care is taken when duplicating a t2LDRpci_pic instruction.
+
+%struct.PlatformMutex = type { i32, [40 x i8] }
+%struct.SpinLock = type { %struct.PlatformMutex }
+%"struct.WTF::TCMalloc_ThreadCache" = type { i32, %struct._opaque_pthread_t*, i8, [68 x %"struct.WTF::TCMalloc_ThreadCache_FreeList"], i32, i32, %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"* }
+%"struct.WTF::TCMalloc_ThreadCache_FreeList" = type { i8*, i16, i16 }
+%struct.__darwin_pthread_handler_rec = type { void (i8*)*, i8*, %struct.__darwin_pthread_handler_rec* }
+%struct._opaque_pthread_t = type { i32, %struct.__darwin_pthread_handler_rec*, [596 x i8] }
+
+@_ZN3WTFL8heap_keyE = internal global i32 0       ; <i32*> [#uses=1]
+@_ZN3WTFL10tsd_initedE.b = internal global i1 false ; <i1*> [#uses=2]
+@_ZN3WTFL13pageheap_lockE = internal global %struct.SpinLock { %struct.PlatformMutex { i32 850045863, [40 x i8] zeroinitializer } } ; <%struct.SpinLock*> [#uses=1]
+@_ZN3WTFL12thread_heapsE = internal global %"struct.WTF::TCMalloc_ThreadCache"* null ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"struct.WTF::TCMalloc_ThreadCache"* ()* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define arm_apcscc %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind {
+entry:
+  %0 = tail call arm_apcscc  i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b24, label %bb5, label %bb6
+
+bb5:                                              ; preds = %entry
+  %1 = tail call arm_apcscc  %struct._opaque_pthread_t* @pthread_self() nounwind
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  %me.0 = phi %struct._opaque_pthread_t* [ %1, %bb5 ], [ null, %entry ] ; <%struct._opaque_pthread_t*> [#uses=2]
+  br label %bb11
+
+bb7:                                              ; preds = %bb11
+  %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
+  %3 = load %struct._opaque_pthread_t** %2, align 4
+  %4 = tail call arm_apcscc  i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %bb10, label %bb14
+
+bb10:                                             ; preds = %bb7
+  %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb6
+  %h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+  %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null
+  br i1 %7, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11
+  %8 = tail call arm_apcscc  %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t* %me.0) nounwind
+  br label %bb14
+
+bb14:                                             ; preds = %bb13, %bb7
+  %heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %9 = tail call arm_apcscc  i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
+  %11 = load i8* %10, align 4
+  %toBool15not = icmp eq i8 %11, 0                ; <i1> [#uses=1]
+  br i1 %toBool15not, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb14
+  %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb19
+  store i8 1, i8* %10, align 4
+  %12 = load i32* @_ZN3WTFL8heap_keyE, align 4
+  %13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8*
+  %14 = tail call arm_apcscc  i32 @pthread_setspecific(i32 %12, i8* %13) nounwind
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+
+bb22:                                             ; preds = %bb19, %bb14
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+}
+
+declare arm_apcscc i32 @pthread_mutex_lock(%struct.PlatformMutex*)
+
+declare arm_apcscc i32 @pthread_mutex_unlock(%struct.PlatformMutex*)
+
+declare hidden arm_apcscc %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t*) nounwind
+
+declare arm_apcscc i32 @pthread_setspecific(i32, i8*)
+
+declare arm_apcscc %struct._opaque_pthread_t* @pthread_self()
+
+declare arm_apcscc i32 @pthread_equal(%struct._opaque_pthread_t*, %struct._opaque_pthread_t*)
+
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index d42ea71..5e25cf6 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -8,43 +8,43 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
 
 define i32 @t2ADDrc_255(i32 %lhs) {
-    %Rd = add i32 %lhs, 255;
+    %Rd = add i32 %lhs, 255
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_256(i32 %lhs) {
-    %Rd = add i32 %lhs, 256;
+    %Rd = add i32 %lhs, 256
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_257(i32 %lhs) {
-    %Rd = add i32 %lhs, 257;
+    %Rd = add i32 %lhs, 257
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4094(i32 %lhs) {
-    %Rd = add i32 %lhs, 4094;
+    %Rd = add i32 %lhs, 4094
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4095(i32 %lhs) {
-    %Rd = add i32 %lhs, 4095;
+    %Rd = add i32 %lhs, 4095
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4096(i32 %lhs) {
-    %Rd = add i32 %lhs, 4096;
+    %Rd = add i32 %lhs, 4096
     ret i32 %Rd
 }
 
 define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
-    %Rd = add i32 %lhs, %rhs;
+    %Rd = add i32 %lhs, %rhs
     ret i32 %Rd
 }
 
 define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
     %tmp = shl i32 %rhs, 8
-    %Rd = add i32 %lhs, %tmp;
+    %Rd = add i32 %lhs, %tmp
     ret i32 %Rd
 }
 
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 89b127c..bdbe713 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -10,9 +10,8 @@ entry:
 cond_true:		; preds = %cond_true, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
 	%tmp. = shl i32 %indvar, 2		; <i32> [#uses=1]
-	%tmp.10 = add i32 %tmp., 1		; <i32> [#uses=2]
-	%k.0.0 = bitcast i32 %tmp.10 to i32		; <i32> [#uses=2]
-	%tmp31 = add i32 %k.0.0, -1		; <i32> [#uses=4]
+	%tmp.10 = add nsw i32 %tmp., 1		; <i32> [#uses=2]
+	%tmp31 = add nsw i32 %tmp.10, -1		; <i32> [#uses=4]
 	%tmp32 = getelementptr i32* %mpp, i32 %tmp31		; <i32*> [#uses=1]
 	%tmp34 = bitcast i32* %tmp32 to <16 x i8>*		; <i8*> [#uses=1]
 	%tmp = load <16 x i8>* %tmp34, align 1
@@ -37,14 +36,13 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
 	%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4		; <<2 x i64>> [#uses=1]
 	%tmp131 = or <2 x i64> %tmp121, %tmp111		; <<2 x i64>> [#uses=1]
-	%gep.upgrd.6 = zext i32 %tmp.10 to i64		; <i64> [#uses=1]
-	%tmp137 = getelementptr i32* %mc, i64 %gep.upgrd.6		; <i32*> [#uses=1]
+	%tmp137 = getelementptr i32* %mc, i32 %tmp.10		; <i32*> [#uses=1]
 	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
-	%tmp147 = add i32 %k.0.0, 8		; <i32> [#uses=1]
-	%tmp.upgrd.8 = icmp sgt i32 %tmp147, %M		; <i1> [#uses=1]
+	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]
+	%tmp.upgrd.8 = icmp slt i32 %tmp147, %M		; <i1> [#uses=1]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br i1 %tmp.upgrd.8, label %return, label %cond_true
+	br i1 %tmp.upgrd.8, label %cond_true, label %return
 
 return:		; preds = %cond_true, %entry
 	ret void
diff --git a/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
index de226a1..de226a1 100644
--- a/test/Transforms/IndVarSimplify/2007-01-08-X86-64-Pointer.ll
+++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index 93e8808..10bbe74 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
 
 ;; This example can't fold the or into an LEA.
-define i32 @test(float ** %tmp2, i32 %tmp12) {
+define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
 	%tmp3 = load float** %tmp2
 	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]
 	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]
@@ -14,7 +14,7 @@ define i32 @test(float ** %tmp2, i32 %tmp12) {
 
 
 ;; This can!
-define i32 @test2(i32 %a, i32 %b) {
+define i32 @test2(i32 %a, i32 %b) nounwind {
 	%c = shl i32 %a, 3
 	%d = or i32 %c, 3
 	ret i32 %d
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index cb1b1ef..a4d642b 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -6,13 +6,13 @@ target triple = "x86_64-apple-darwin10.0"
 	%struct.__Rec = type opaque
 	%struct.__vv = type {  }
 
-define %struct.__vv* @t(%struct.Key* %desc) nounwind ssp {
+define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
 entry:
 	br label %bb4
 
 bb4:		; preds = %bb.i, %bb26, %bb4, %entry
 	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
-	%ins = or i64 0, 0		; <i64> [#uses=1]
+	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
 	%1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
 	%cond = icmp eq i32 %1, 1		; <i1> [#uses=1]
 	br i1 %cond, label %bb26, label %bb4
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 8a0b244..3cd5416 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,12 +1,18 @@
-; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4
+; RUN: llc < %s | FileCheck %s
+
 ; Check that the shr(shl X, 56), 48) is not mistakenly turned into
 ; a shr (X, -8) that gets subsequently "optimized away" as undef
 ; PR4254
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @foo(i64 %b) nounwind readnone {
 entry:
+; CHECK: foo:
+; CHECK: shlq $56, %rdi
+; CHECK: sarq $48, %rdi
+; CHECK: leaq 1(%rdi), %rax
 	%shl = shl i64 %b, 56		; <i64> [#uses=1]
 	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
 	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
index a7c2020..8f274df 100644
--- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -10,7 +10,7 @@ entry:
   br i1 %0, label %bb.nph, label %return
 
 bb.nph:                                           ; preds = %entry
-; CHECK: movq _g@GOTPCREL(%rip), %rcx
+; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]]
   %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
   br label %bb
 
diff --git a/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
new file mode 100644
index 0000000..e7004e2
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+; <rdar://problem/7499313>
+target triple = "i686-apple-darwin8"
+
+declare void @func2(i16 zeroext)
+
+define void @func1() nounwind {
+entry:
+  %t1 = icmp ne i8 undef, 0
+  %t2 = icmp eq i8 undef, 14
+  %t3 = and i1 %t1, %t2
+  %t4 = select i1 %t3, i16 0, i16 128
+  call void @func2(i16 zeroext %t4) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-01-07-ISelBug.ll b/test/CodeGen/X86/2010-01-07-ISelBug.ll
new file mode 100644
index 0000000..081fab7
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-07-ISelBug.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://r7519827
+
+define i32 @t() nounwind ssp {
+entry:
+  br label %if.end.i11
+
+if.end.i11:                                       ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10
+  br i1 undef, label %for.body161, label %for.end197
+
+for.body161:                                      ; preds = %if.end.i11
+  br label %for.end197
+
+for.end197:                                       ; preds = %for.body161, %if.end.i11
+  %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; <i96> [#uses=2]
+  store i96 %mlucEntry.4, i96* undef, align 8
+  %tmp172 = lshr i96 %mlucEntry.4, 64             ; <i96> [#uses=1]
+  %tmp173 = trunc i96 %tmp172 to i32              ; <i32> [#uses=1]
+  %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; <i32> [#uses=1]
+  store i32 %tmp1.i1.i, i32* undef, align 8
+  unreachable
+
+if.then283:                                       ; preds = %lor.lhs.false.i10, %do.end105, %for.end
+  ret i32 undef
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
new file mode 100644
index 0000000..3728f15
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
+; CHECK: addps (
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
+	%A = load <4 x float>* %P, align 4
+	%B = add <4 x float> %A, %In
+	ret <4 x float> %B
+}
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
new file mode 100644
index 0000000..172e1c7
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://r7512579
+
+; PHI defs in the atomic loop should be used by the add / adc
+; instructions. They should not be dead.
+
+define void @t(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl $1
+; CHECK: movl (%ebp), %eax
+; CHECK: movl 4(%ebp), %edx
+; CHECK: LBB1_1:
+; CHECK-NOT: movl $1
+; CHECK-NOT: movl $0
+; CHECK: addl
+; CHECK: adcl
+; CHECK: lock
+; CHECK: cmpxchg8b
+; CHECK: jne
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
new file mode 100644
index 0000000..db98eef
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s
+;
+; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction.
+;
+target triple = "x86_64-apple-darwin10"
+
+define void @foo() {
+  br label %cond_true813.i
+
+cond_true813.i:                                   ; preds = %0
+  br i1 false, label %cond_true818.i, label %cond_next1146.i
+
+cond_true818.i:                                   ; preds = %cond_true813.i
+  br i1 false, label %recog_memoized.exit52, label %cond_next1146.i
+
+recog_memoized.exit52:                            ; preds = %cond_true818.i
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb857.i:                                          ; preds = %bb886.i, %bb866.i
+  %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; <i8*> [#uses=1]
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb866.i.loopexit:                                 ; preds = %bb874.i
+  br label %bb866.i
+
+bb866.i.loopexit31:                               ; preds = %cond_true903.i
+  br label %bb866.i
+
+bb866.i:                                          ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %bb907.i, label %bb857.i
+
+bb874.i.preheader.loopexit:                       ; preds = %cond_true903.i, %cond_true903.i
+  ret void
+
+bb874.i:                                          ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52
+  switch i32 0, label %bb886.i.preheader.loopexit [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit
+    i32 103, label %bb874.i.backedge
+    i32 114, label %bb874.i.backedge
+  ]
+
+bb874.i.backedge:                                 ; preds = %bb874.i, %bb874.i
+  ret void
+
+bb886.i.preheader.loopexit:                       ; preds = %bb874.i
+  ret void
+
+bb886.i.preheader:                                ; preds = %bb857.i, %recog_memoized.exit52
+  %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; <i8*> [#uses=1]
+  br label %bb886.i
+
+bb886.i:                                          ; preds = %cond_true903.i, %bb886.i.preheader
+  br i1 false, label %bb857.i, label %cond_true903.i
+
+cond_true903.i:                                   ; preds = %bb886.i
+  switch i32 0, label %bb886.i [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit31
+    i32 103, label %bb874.i.preheader.loopexit
+    i32 114, label %bb874.i.preheader.loopexit
+  ]
+
+bb907.i:                                          ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %cond_next1146.i, label %cond_true910.i
+
+cond_true910.i:                                   ; preds = %bb907.i
+  ret void
+
+cond_next1146.i:                                  ; preds = %bb907.i, %cond_true818.i, %cond_true813.i
+  ret void
+
+bb2060.i:                                         ; No predecessors!
+  br i1 false, label %cond_true2064.i, label %bb2067.i
+
+cond_true2064.i:                                  ; preds = %bb2060.i
+  unreachable
+
+bb2067.i:                                         ; preds = %bb2060.i
+  ret void
+
+cond_next3473:                                    ; No predecessors!
+  ret void
+
+cond_next3521:                                    ; No predecessors!
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/test/CodeGen/X86/2010-01-13-OptExtBug.ll
new file mode 100644
index 0000000..d49e2a8
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-13-OptExtBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR6027
+
+%class.OlsonTimeZone = type { i16, i32*, i8*, i16 }
+
+define void @XX(%class.OlsonTimeZone* %this) align 2 {
+entry:
+  %call = tail call i8* @_Z15uprv_malloc_4_2v()
+  %0 = bitcast i8* %call to double*
+  %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3
+  %tmp2 = load i16* %tmp
+  %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp626 = load i16* %tmp525
+  %cmp27 = icmp slt i16 %tmp2, %tmp626
+  br i1 %cmp27, label %bb.nph, label %for.end
+
+for.cond:
+  %tmp6 = load i16* %tmp5
+  %cmp = icmp slt i16 %inc, %tmp6
+  %indvar.next = add i32 %indvar, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+bb.nph:
+  %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2
+  %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1
+  %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp29 = sext i16 %tmp2 to i32
+  %tmp31 = add i16 %tmp2, 1
+  %tmp32 = zext i16 %tmp31 to i32
+  br label %for.body
+
+for.body:
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ]
+  %tmp30 = add i32 %indvar, %tmp29
+  %tmp33 = add i32 %indvar, %tmp32
+  %inc = trunc i32 %tmp33 to i16
+  %tmp11 = load i8** %tmp10
+  %arrayidx = getelementptr i8* %tmp11, i32 %tmp30
+  %tmp12 = load i8* %arrayidx
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i8* @_Z15uprv_malloc_4_2v()
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
new file mode 100644
index 0000000..30a1f36
--- /dev/null
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7527734
+
+define i32 @test(i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test:
+; CHECK: leal 3(%rdi), %eax
+  %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
+  %1 = or i32 %0, 3                               ; <i32> [#uses=1]
+  ret i32 %1
+}
+
+define i64 @test2(i8 %A, i8 %B) nounwind {
+; CHECK: test2:
+; CHECK: shrq $4
+; CHECK-NOT: movq
+; CHECK-NOT: orq
+; CHECK: leaq
+; CHECK: ret
+  %C = zext i8 %A to i64                          ; <i64> [#uses=1]
+  %D = shl i64 %C, 4                              ; <i64> [#uses=1]
+  %E = and i64 %D, 48                             ; <i64> [#uses=1]
+  %F = zext i8 %B to i64                          ; <i64> [#uses=1]
+  %G = lshr i64 %F, 4                             ; <i64> [#uses=1]
+  %H = or i64 %G, %E                              ; <i64> [#uses=1]
+  ret i64 %H
+}
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
new file mode 100644
index 0000000..547d6b5
--- /dev/null
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -0,0 +1,22 @@
+; RUN: llc %s -o - | grep {__TEXT,__const}
+; PR5929
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+; This array should go into the __TEXT,__const section, not into the
+; __DATA,__const section, because the elements don't need relocations.
+@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+
+define void @test(i32 %i) nounwind ssp {
+entry:
+  br label %foo
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+}
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index b5ac23b..38db88a 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -1,16 +1,53 @@
-; RUN: llc < %s -march=x86 | grep {(%} | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 ; Don't duplicate the load.
 
 define fastcc i32 @foo(i32* %p) nounwind {
+; CHECK: foo:
+; CHECK: andl $10, %eax
+; CHECK: je
 	%t0 = load i32* %p
 	%t2 = and i32 %t0, 10
 	%t3 = icmp ne i32 %t2, 0
 	br i1 %t3, label %bb63, label %bb76
-
 bb63:
 	ret i32 %t2
-
 bb76:
 	ret i32 0
 }
+
+define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
+entry:
+; CHECK: bar:
+  %0 = and i32 %hash, 15
+  %1 = icmp ult i32 %0, 8
+  br i1 %1, label %bb11, label %bb10
+
+bb10:
+; CHECK: bb10
+; CHECK: testb $1
+  %2 = and i32 %hash, 1
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %bb13, label %bb11
+
+bb11:
+  %4 = fsub double -0.000000e+00, %x
+  br label %bb13
+
+bb13:
+; CHECK: bb13
+; CHECK: testb $2
+  %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ]
+  %5 = and i32 %hash, 2
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb16, label %bb14
+
+bb14:
+  %7 = fsub double -0.000000e+00, %y
+  br label %bb16
+
+bb16:
+  %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ]
+  %8 = fadd double %iftmp.9.0, %iftmp.10.0
+  ret double %8
+}
diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll
deleted file mode 100644
index 0cf169e..0000000
--- a/test/CodeGen/X86/anyext-uses.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep mov %t | count 8
-; RUN: not grep implicit %t
-
-; Avoid partial register updates; don't define an i8 register and read
-; the i32 super-register.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9.6"
-	%struct.RC4_KEY = type { i8, i8, [256 x i8] }
-
-define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind {
-entry:
-	br label %bb24
-
-bb24:		; preds = %bb24, %entry
-	%0 = load i8* null, align 1		; <i8> [#uses=1]
-	%1 = zext i8 %0 to i64		; <i64> [#uses=1]
-	%2 = shl i64 %1, 32		; <i64> [#uses=1]
-	%3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0		; <i8*> [#uses=1]
-	%4 = load i8* %3, align 1		; <i8> [#uses=2]
-	%5 = add i8 %4, 0		; <i8> [#uses=2]
-	%6 = zext i8 %5 to i64		; <i64> [#uses=0]
-	%7 = load i8* null, align 1		; <i8> [#uses=1]
-	%8 = zext i8 %4 to i32		; <i32> [#uses=1]
-	%9 = zext i8 %7 to i32		; <i32> [#uses=1]
-	%10 = add i32 %9, %8		; <i32> [#uses=1]
-	%11 = and i32 %10, 255		; <i32> [#uses=1]
-	%12 = zext i32 %11 to i64		; <i64> [#uses=1]
-	%13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12		; <i8*> [#uses=1]
-	%14 = load i8* %13, align 1		; <i8> [#uses=1]
-	%15 = zext i8 %14 to i64		; <i64> [#uses=1]
-	%16 = shl i64 %15, 48		; <i64> [#uses=1]
-	%17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0		; <i8*> [#uses=1]
-	%18 = load i8* %17, align 1		; <i8> [#uses=2]
-	%19 = add i8 %18, %5		; <i8> [#uses=1]
-	%20 = zext i8 %19 to i64		; <i64> [#uses=1]
-	%21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20		; <i8*> [#uses=1]
-	store i8 %18, i8* %21, align 1
-	%22 = or i64 0, %2		; <i64> [#uses=1]
-	%23 = or i64 %22, 0		; <i64> [#uses=1]
-	%24 = or i64 %23, %16		; <i64> [#uses=1]
-	%25 = or i64 %24, 0		; <i64> [#uses=1]
-	%26 = xor i64 %25, 0		; <i64> [#uses=1]
-	store i64 %26, i64* null, align 8
-	br label %bb24
-}
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
new file mode 100644
index 0000000..8af3bd1
--- /dev/null
+++ b/test/CodeGen/X86/br-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: orq
+; CHECK-NEXT: jne
+
+@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
+@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
+
+define fastcc void @foo() {
+entry:
+  br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64),
+                         i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0),
+     label %bb8.i329, label %bb4.i.i318.preheader
+
+bb4.i.i318.preheader:                             ; preds = %bb6
+  unreachable
+
+bb8.i329:                                         ; preds = %bb6
+  unreachable
+}
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
new file mode 100644
index 0000000..130483a
--- /dev/null
+++ b/test/CodeGen/X86/brcond.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; rdar://7475489
+
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: test1:
+; CHECK: xorb
+; CHECK-NOT: andb
+; CHECK-NOT: shrb
+; CHECK: testb $64
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+
+
+; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
+define i32 @test2(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp eq i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp eq i32* %Q, null                    ; <i1> [#uses=1]
+  %c = and i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test2:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	jne	LBB2_2
+}
+
+; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
+define i32 @test3(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp ne i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp ne i32* %Q, null                    ; <i1> [#uses=1]
+  %c = or i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test3:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	je	LBB3_2
+}
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index a3c1e6f..a9573cf 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -3,6 +3,6 @@
 declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
 define void @foo(i8* %p, i32 %len) {
-  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1);
+  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
   ret void
 }
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index 12a2ef3..d1ba9a8 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -6,8 +6,8 @@
 ; through the 3rd mask element, which doesn't exist.
 define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone {
 entry:
-	%shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>;
-	%bit  = bitcast <2 x i64> %shuf to <4 x i32>;
-	%res =  extractelement <4 x i32> %bit, i32 3;
-	ret i32 %res;
-}
-\ No newline at end of file
+	%shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>
+	%bit  = bitcast <2 x i64> %shuf to <4 x i32>
+	%res =  extractelement <4 x i32> %bit, i32 3
+	ret i32 %res
+}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 3dcd736..84b3fd7 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -14,7 +14,7 @@ fast:
   %t1 = mul i32 %t0, %s
   %t2 = sub i32 %t1, %s
   %t3 = and i32 %t2, %s
-  %t4 = or i32 %t3, %s
+  %t4 = xor i32 %t3, 3
   %t5 = xor i32 %t4, %s
   %t6 = add i32 %t5, 2
   %t7 = getelementptr i32* %y, i32 1
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index eb182da..5525af2 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
 
-define void @expand_start_bindings() {
+; This should just not crash.
+define void @test1() nounwind {
 entry:
-	br i1 false, label %cond_true, label %cond_next
+	br i1 true, label %cond_true, label %cond_next
 
 cond_true:		; preds = %entry
 	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
@@ -25,3 +26,22 @@ cond_false30.i:		; preds = %cond_true
 cond_next:		; preds = %entry
 	ret void
 }
+
+
+
+define i32 @test2(i16* %P, i16* %Q) nounwind {
+  %A = load i16* %P, align 4                      ; <i16> [#uses=11]
+  %C = zext i16 %A to i32                         ; <i32> [#uses=1]
+  %D = and i32 %C, 255                            ; <i32> [#uses=1]
+  br label %L
+L:
+
+  store i16 %A, i16* %Q
+  ret i32 %D
+  
+; CHECK: test2:
+; CHECK: 	movl	4(%esp), %eax
+; CHECK-NEXT:	movzwl	(%eax), %ecx
+
+}
+
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 4058989..1f3b59a 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -4,7 +4,7 @@
 
 @X = common global i16 0		; <i16*> [#uses=1]
 
-define void @foo(i32 %N) nounwind {
+define i32 @foo(i32 %N) nounwind {
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %0, label %bb, label %return
@@ -18,5 +18,6 @@ bb:		; preds = %bb, %entry
 	br i1 %exitcond, label %return, label %bb
 
 return:		; preds = %bb, %entry
-	ret void
+        %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]
+	ret i32 %h
 }
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index eca9e6f..069737d 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -19,6 +19,6 @@ return:
 	ret void
 }
 
-declare i1 @report__equal(i32 %x, i32 %y) nounwind;
+declare i1 @report__equal(i32 %x, i32 %y) nounwind
 
 declare void @abort()
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index 22b6f35..f52f8c7 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -11,7 +11,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 0000000..c4f768c
--- /dev/null
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {xorl	%edi, %edi} | count 4
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
new file mode 100644
index 0000000..b2b9f81
--- /dev/null
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+  %C = add i64 %A, %B
+  %D = trunc i64 %C to i32
+  volatile store i32 %D, i32* %P
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32  
+  volatile store i64 %F, i64 *%P2
+  volatile store i32 %D, i32* %P
+  ret i64 undef
+}
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
index d762392..7d85818 100644
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 6
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 14
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index 7aae9eb..f4847a3 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -7,6 +7,7 @@
 @B = external global [1000 x i8], align 32
 @A = external global [1000 x i8], align 32
 @P = external global [1000 x i8], align 32
+@Q = external global [1000 x i8], align 32
 
 define void @foo(i32 %m, i32 %p) nounwind {
 entry:
@@ -24,6 +25,8 @@ bb:
         %tmp0 = add i32 %tmp8, %p
 	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0
 	store i8 17, i8* %tmp10, align 4
+	%tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0
+	store i8 19, i8* %tmp11, align 4
 	%indvar.next = add i32 %i.019.0, 1
 	%exitcond = icmp eq i32 %indvar.next, %m
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 8c3cae9..c5dbb04 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -55,7 +55,7 @@ altret:
   ret void
 }
 
-declare i8* @choose(i8*, i8*);
+declare i8* @choose(i8*, i8*)
 
 ; BranchFolding should tail-duplicate the indirect jump to avoid
 ; redundant branching.
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
new file mode 100644
index 0000000..8ddc405
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+
+declare fastcc i32 @callee(i32 %arg)
+define fastcc i32 @directcall(i32 %arg) {
+entry:
+; This is the large code model, so &callee may not fit into the jmp
+; instruction.  Instead, stick it into a register.
+;  CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
+;  CHECK: jmpq    *[[REGISTER]]  # TAILCALL
+  %res = tail call fastcc i32 @callee(i32 %arg)
+  ret i32 %res
+}
+
+; Check that the register used for an indirect tail call doesn't
+; clobber any of the arguments.
+define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: subq $8, %rsp
+; Put the call target into R11, which won't be clobbered while restoring
+; callee-saved registers and won't be used for passing arguments.
+;  CHECK: movq %rdi, %r11
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+;  CHECK: addq $8, %rsp
+; And tail-call to the target.
+;  CHECK: jmpq *%r11  # TAILCALL
+  %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                      i32 6, i32 7)
+  ret i32 %res
+}
+
+; Check that the register used for a direct tail call doesn't clobber
+; any of the arguments.
+declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
+define fastcc i32 @direct_manyargs() {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: subq $8, %rsp
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into R11, which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %r11
+; Adjust the stack to "return".
+;  CHECK: addq $8, %rsp
+; And tail-call to the target.
+;  CHECK: jmpq *%r11  # TAILCALL
+  %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
+                                               i32 5, i32 6, i32 7)
+  ret i32 %res
+}
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index 772ff6c..f1063dc 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -1,22 +1,35 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | grep {testl.*%e.x.*%e.x}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
 ; rdar://5752025
 
-; We don't want to fold the and into the test, because the and clobbers its
-; input forcing a copy.  We want:
-;	movl	$15, %ecx
+; We want:
+;      CHECK: movl	4(%esp), %ecx
+; CHECK-NEXT: andl	$15, %ecx
+; CHECK-NEXT: movl	$42, %eax
+; CHECK-NEXT: cmovel	%ecx, %eax
+; CHECK-NEXT: ret
+;
+; We don't want:
+;	movl	4(%esp), %eax
+;	movl	%eax, %ecx     # bad: extra copy
+;	andl	$15, %ecx
+;	testl	$15, %eax      # bad: peep obstructed
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	$15, %ecx      # bad: larger encoding
 ;	andl	4(%esp), %ecx
-;	testl	%ecx, %ecx
 ;	movl	$42, %eax
-;	cmove	%ecx, %eax
+;	cmovel	%ecx, %eax
 ;	ret
 ;
-; Not:
-;	movl	4(%esp), %eax
-;	movl	%eax, %ecx
+; We also don't want:
+;	movl	4(%esp), %ecx
 ;	andl	$15, %ecx
-;	testl	$15, %eax
+;	testl	%ecx, %ecx     # bad: unnecessary test
 ;	movl	$42, %eax
-;	cmove	%ecx, %eax
+;	cmovel	%ecx, %eax
 ;	ret
 
 define i32 @t1(i32 %X) nounwind  {
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
new file mode 100644
index 0000000..a245ed7
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -0,0 +1,24 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov E.X, E.X}
+
+@G = external global i32                ; <i32*> [#uses=3]
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
+        volatile store i32 %Y, i32* @G
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+define i32 @test2(i32 %X) {
+        %Z = add i32 %X, 1              ; <i32> [#uses=1]
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
new file mode 100644
index 0000000..2dd2a4a
--- /dev/null
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
+
+; Reuse the flags value from the add instructions instead of emitting separate
+; testl instructions.
+
+; Use the flags on the add.
+
+; CHECK: add_zf:
+;      CHECK: addl    (%rdi), %esi
+; CHECK-NEXT: movl    %edx, %eax
+; CHECK-NEXT: cmovnsl %ecx, %eax
+; CHECK-NEXT: ret
+
+define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
+	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
+	%tmp.0 = select i1 %tmp5, i32 %a, i32 %b		; <i32> [#uses=1]
+	ret i32 %tmp.0
+}
+
+declare void @foo(i32)
+
+; Don't use the flags result of the and here, since the and has no
+; other use. A simple test is better.
+
+; CHECK: bar:
+; CHECK: testb   $16, %dil
+
+define void @bar(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %x)
+  ret void
+false:
+  ret void
+}
+
+; Do use the flags result of the and here, since the and has another use.
+
+; CHECK: qux:
+;      CHECK: andl    $16, %edi
+; CHECK-NEXT: jne
+
+define void @qux(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %y)
+  ret void
+false:
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
new file mode 100644
index 0000000..1f899b3
--- /dev/null
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 
+; RUN: llc < %s -march=x86-64 -disable-mmx
+
+define <8 x i32> @a(<8 x i16> %a) nounwind {
+  %c = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i16> %a) nounwind {
+  %c = sext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i16> %a) nounwind {
+  %c = sext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i16> %a) nounwind {
+  %c = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i16> %a) nounwind {
+  %c = zext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i16> %a) nounwind {
+  %c = zext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+; TODO: Legalize doesn't yet handle this.
+;define <8 x i16> @g(<8 x i32> %a) nounwind {
+;  %c = trunc <8 x i32> %a to <8 x i16>
+;  ret <8 x i16> %c
+;}
+
+define <3 x i16> @h(<3 x i32> %a) nounwind {
+  %c = trunc <3 x i32> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <1 x i16> @i(<1 x i32> %a) nounwind {
+  %c = trunc <1 x i32> %a to <1 x i16>
+  ret <1 x i16> %c
+}
diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
index 02b16a7..8d2a3c3 100644
--- a/test/CodeGen/X86/vec_ext_inreg.ll
+++ b/test/CodeGen/X86/vec_ext_inreg.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 
+; RUN: llc < %s -march=x86-64 -disable-mmx
 
 define <8 x i32> @a(<8 x i32> %a) nounwind {
   %b = trunc <8 x i32> %a to <8 x i16>
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
index 1cf37d4..6807e4d 100644
--- a/test/CodeGen/X86/vec_shuffle-22.ll
+++ b/test/CodeGen/X86/vec_shuffle-22.ll
@@ -9,7 +9,7 @@ define <4 x float> @t1(<4 x float> %a) nounwind  {
 define <4 x i32> @t2(<4 x i32>* %a) nounwind {
 ; CHECK: pshufd
 ; CHECK: ret
-  %tmp1 = load <4 x i32>* %a;
+  %tmp1 = load <4 x i32>* %a
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp2
 }
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index 2aa2d25..d9b2388 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -19,16 +19,16 @@ entry:
 	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
 	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
 	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; 
+	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; 	
-	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>; 
+	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; 
-	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>; 
+	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
 	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>; 
-	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>; 
-	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>; 
-	ret <8 x float> %r4;
+	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x float> %r4
 }
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index 8cc15d1..086af6b 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -20,10 +20,10 @@ entry:
 	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
 	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
 	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >; 
+        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
 	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >;
+        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 ;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
-	ret <8 x float> %r2;
+	ret <8 x float> %r2
 }
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
index 4154433..d9de892 100644
--- a/test/CodeGen/X86/widen_select-1.ll
+++ b/test/CodeGen/X86/widen_select-1.ll
@@ -6,7 +6,7 @@
 define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
 entry:
 	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
-	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >;
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	store <6 x i32> %val, <6 x i32>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index dd02241..47dba4b 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -7,7 +7,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
-	%val = fadd <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
index d097e41..9374a02 100644
--- a/test/CodeGen/X86/widen_shuffle-2.ll
+++ b/test/CodeGen/X86/widen_shuffle-2.ll
@@ -7,7 +7,7 @@
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
-	%val = fadd <3 x float> %x, %src2;
+	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
 }
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 3c73891..2465f23 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,12 +1,49 @@
-; RUN: llc < %s | grep {movl.*%edi, %eax}
-; This should be a single mov, not a load of immediate + andq.
+; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
-define i64 @test(i64 %x) nounwind  {
+; This should be a single mov, not a load of immediate + andq.
+; CHECK: test:
+; CHECK: movl %edi, %eax
+
+define i64 @test(i64 %x) nounwind {
 entry:
 	%tmp123 = and i64 %x, 4294967295		; <i64> [#uses=1]
 	ret i64 %tmp123
 }
 
+; This copy can't be coalesced away because it needs the implicit zero-extend.
+; CHECK: bbb:
+; CHECK: movl %edi, %edi
+
+define void @bbb(i64 %x) nounwind {
+  %t = and i64 %x, 4294967295
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This should use a 32-bit and with implicit zero-extension, not a 64-bit and
+; with a separate mov to materialize the mask.
+; rdar://7527390
+; CHECK: ccc:
+; CHECK: andl $-1048593, %edi
+
+declare void @foo(i64 %x) nounwind
+
+define void @ccc(i64 %x) nounwind {
+  %t = and i64 %x, 4293918703
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This requires a mov and a 64-bit and.
+; CHECK: ddd:
+; CHECK: movabsq $4294967296, %rax
+; CHECK: andq %rax, %rdi
+
+define void @ddd(i64 %x) nounwind {
+  %t = and i64 %x, 4294967296
+  call void @foo(i64 %t)
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll
index 5ed6a23..11b40c8 100644
--- a/test/CodeGen/X86/x86-64-jumps.ll
+++ b/test/CodeGen/X86/x86-64-jumps.ll
@@ -14,3 +14,32 @@ bb6:                                              ; preds = %entry
   ret i8 2
 }
 
+
+; PR5930 - Trunc of block address differences.
+@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1]
+
+define void @test2(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %i, i32* %i.addr
+  %tmp = load i32* %i.addr                        ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
+  %idx.ext = sext i32 %tmp1 to i64                ; <i64> [#uses=1]
+  %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
+  br label %indirectgoto
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+
+indirectgoto:                                     ; preds = %entry
+  %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo]
+}
diff --git a/test/CodeGen/X86/brcond-srl.ll b/test/CodeGen/X86/xor-icmp.ll
index 12674e9..a6bdb13 100644
--- a/test/CodeGen/X86/brcond-srl.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -1,13 +1,20 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
-; rdar://7475489
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 define i32 @t(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: t:
-; CHECK: xorb
-; CHECK-NOT: andb
-; CHECK-NOT: shrb
-; CHECK: testb $64
+; X32:     t:
+; X32:     xorb
+; X32-NOT: andb
+; X32-NOT: shrb
+; X32:     testb $64
+; X32:     jne
+
+; X64:     t:
+; X64-NOT: setne
+; X64:     xorl
+; X64:     testb $64
+; X64:     jne
   %0 = and i32 %a, 16384
   %1 = icmp ne i32 %0, 0
   %2 = and i32 %b, 16384
diff --git a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
index a6b9699..e35a36a 100644
--- a/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
+++ b/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
@@ -2,5 +2,5 @@
 ; PR3898
 
 define i32 @vector_param(<2 x double> %x) nounwind {
-       ret i32 1;
+       ret i32 1
 }
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 9a2f5b3..c595a6d 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -12,7 +12,7 @@ define private void @foo() {
         ret void
 }
 
-@baz = private global i32 4;
+@baz = private global i32 4
 
 define i32 @bar() {
         call void @foo()
diff --git a/test/DebugInfo/2009-10-16-Scope.ll b/test/DebugInfo/2009-10-16-Scope.ll
index ea43249..9f9fa65 100644
--- a/test/DebugInfo/2009-10-16-Scope.ll
+++ b/test/DebugInfo/2009-10-16-Scope.ll
@@ -9,8 +9,7 @@ entry:
   br label %do.body, !dbg !0
 
 do.body:                                          ; preds = %entry
-  %0 = bitcast i32* %count_ to { }*               ; <{ }*> [#uses=1]
-  call void @llvm.dbg.declare({ }* %0, metadata !4)
+  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
   %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
   %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
   br label %do.end, !dbg !0
@@ -19,7 +18,7 @@ do.end:                                           ; preds = %do.body
   ret void, !dbg !7
 }
 
-declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare i32 @foo(i32) ssp
 
diff --git a/test/DebugInfo/2009-12-01-CurrentFn.ll b/test/DebugInfo/2009-12-01-CurrentFn.ll
deleted file mode 100644
index 6fc538e..0000000
--- a/test/DebugInfo/2009-12-01-CurrentFn.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s | grep "func_end1:" | count 1
-; XFAIL: powerpc-apple-darwin
-declare void @foo()
-
-define void @bar(i32 %i) nounwind ssp {
-entry:
-  tail call void @foo() nounwind, !dbg !0
-  ret void, !dbg !0
-}
-
-!0 = metadata !{i32 9, i32 0, metadata !1, null}
-!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !3, i1 true, i1 true}; [DW_TAG_subprogram ]
-!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Volumes/Data/ddunbar/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
new file mode 100644
index 0000000..8cf20e3
--- /dev/null
+++ b/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -o /dev/null
+; PR 5942
+define i8* @foo() nounwind {
+entry:
+  %0 = load i32* undef, align 4, !dbg !0          ; <i32> [#uses=1]
+  %1 = inttoptr i32 %0 to i8*, !dbg !0            ; <i8*> [#uses=1]
+  ret i8* %1, !dbg !10
+
+}
+
+!0 = metadata !{i32 571, i32 3, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 588, i32 1, metadata !2, null}
diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll
index c5fe7ad..e19395b 100644
--- a/test/DebugInfo/printdbginfo2.ll
+++ b/test/DebugInfo/printdbginfo2.ll
@@ -9,32 +9,26 @@
 
 define i32 @main() nounwind {
 entry:
-; CHECK:; (x.c:6:3)
   %retval = alloca i32                            ; <i32*> [#uses=3]
   %b = alloca %struct.foo, align 4                ; <%struct.foo*> [#uses=2]
 ; CHECK:; %b is variable b of type foo declared at x.c:7
   %a = alloca [4 x i32], align 4                  ; <[4 x i32]*> [#uses=1]
 ; CHECK:; %a is variable a of type  declared at x.c:8
   call void @llvm.dbg.func.start(metadata !3)
-; CHECK:; fully qualified function name: main return type: int at line 5
   store i32 0, i32* %retval
   call void @llvm.dbg.stoppoint(i32 6, i32 3, metadata !1)
-; CHECK:; x.c:7:3
   call void @llvm.dbg.stoppoint(i32 7, i32 3, metadata !1)
   %0 = bitcast %struct.foo* %b to { }*            ; <{ }*> [#uses=1]
-  call void @llvm.dbg.declare({ }* %0, metadata !4)
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %b}, metadata !4)
 ; CHECK:; %0 is variable b of type foo declared at x.c:7
   call void @llvm.dbg.stoppoint(i32 8, i32 3, metadata !1)
-; CHECK:; x.c:8:3
   %1 = bitcast [4 x i32]* %a to { }*              ; <{ }*> [#uses=1]
-  call void @llvm.dbg.declare({ }* %1, metadata !8)
+  call void @llvm.dbg.declare(metadata !{[4 x i32]* %a}, metadata !8)
 ; CHECK:; %1 is variable a of type  declared at x.c:8
   call void @llvm.dbg.stoppoint(i32 9, i32 3, metadata !1)
-; CHECK:; x.c:9:3
   %tmp = getelementptr inbounds %struct.foo* %b, i32 0, i32 0 ; <i32*> [#uses=1]
 ; CHECK:; %tmp is variable b of type foo declared at x.c:7
   store i32 5, i32* %tmp
-; CHECK:; x.c:10:3
   call void @llvm.dbg.stoppoint(i32 10, i32 3, metadata !1)
   %tmp1 = load i32* @main.c                       ; <i32> [#uses=1]
 ; CHECK:; @main.c is variable c of type int declared at x.c:6
@@ -43,7 +37,6 @@ entry:
 
 ; <label>:2                                       ; preds = %entry
   call void @llvm.dbg.stoppoint(i32 11, i32 1, metadata !1)
-; CHECK:; (x.c:11:1)
   call void @llvm.dbg.region.end(metadata !3)
   %3 = load i32* %retval                          ; <i32> [#uses=1]
   ret i32 %3
@@ -53,7 +46,7 @@ declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
 declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
 
-declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
diff --git a/test/ExecutionEngine/2010-01-15-UndefValue.ll b/test/ExecutionEngine/2010-01-15-UndefValue.ll
new file mode 100644
index 0000000..7d646eb
--- /dev/null
+++ b/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: lli -force-interpreter=true %t.bc
+
+define i32 @main() {
+       %a = add i32 0, undef
+       %b = add float 0.0, undef
+       %c = add double 0.0, undef
+       ret i32 0
+}
diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll
index 56fc349..02a79f8 100644
--- a/test/Feature/NamedMDNode.ll
+++ b/test/Feature/NamedMDNode.ll
@@ -3,4 +3,7 @@
 ;; Simple NamedMDNode
 !0 = metadata !{i32 42}
 !1 = metadata !{metadata !"foo"}
-!llvm.stuff = !{!0, !1}
+!llvm.stuff = !{!0, !1, null}
+
+!samename = !{!0, !1}
+declare void @samename()
diff --git a/test/FrontendC/2010-01-05-LinkageName.c b/test/FrontendC/2010-01-05-LinkageName.c
new file mode 100644
index 0000000..9c1a215
--- /dev/null
+++ b/test/FrontendC/2010-01-05-LinkageName.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -O2 -S -g %s -o - | llc -o 2010-01-05-LinkageName.s -O0 
+// RUN: %compile_c 2010-01-05-LinkageName.s -o 2010-01-05-LinkageName.s
+
+struct tm {};
+long mktime(struct tm *) __asm("_mktime$UNIX2003");
+tzload(name, sp, doextend){}
+long mktime(tmp)
+     struct tm *const tmp;
+{
+  tzset();
+}
+timelocal(tmp) {
+  return mktime(tmp);
+}
+
diff --git a/test/FrontendC/2010-01-13-MemBarrier.c b/test/FrontendC/2010-01-13-MemBarrier.c
new file mode 100644
index 0000000..53d0081
--- /dev/null
+++ b/test/FrontendC/2010-01-13-MemBarrier.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+// XFAIL: sparc,powerpc
+// rdar://7536390
+
+unsigned t(unsigned *ptr, unsigned val) {
+  // CHECK:      @t
+  // CHECK:      call void @llvm.memory.barrier
+  // CHECK-NEXT: call i32 @llvm.atomic.swap.i32
+  // CHECK-NEXT: call void @llvm.memory.barrier
+  return __sync_lock_test_and_set(ptr, val);
+}
diff --git a/test/FrontendC/2010-01-14-FnType-DebugInfo.c b/test/FrontendC/2010-01-14-FnType-DebugInfo.c
new file mode 100644
index 0000000..beaad91
--- /dev/null
+++ b/test/FrontendC/2010-01-14-FnType-DebugInfo.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc %s -S -g -o /dev/null
+typedef void (*sigcatch_t)( struct sigcontext *);
+sigcatch_t sigcatch[50] = {(sigcatch_t) 0};
+
diff --git a/test/FrontendC/2010-01-14-StaticVariable.c b/test/FrontendC/2010-01-14-StaticVariable.c
new file mode 100644
index 0000000..80dd4d4
--- /dev/null
+++ b/test/FrontendC/2010-01-14-StaticVariable.c
@@ -0,0 +1,12 @@
+// This is a regression test on debug info to make sure that llvm emitted
+// debug info does not crash gdb.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: echo {quit\n} > %t.in 
+// RUN: gdb -q -batch -n -x %t.in %t.o > /dev/null
+
+int foo() {
+	static int i = 42;
+        return i;
+}
diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c
index 2e3fc8b..715d0f3 100644
--- a/test/FrontendC/cstring-align.c
+++ b/test/FrontendC/cstring-align.c
@@ -1,5 +1,7 @@
 // RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32
 // RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64
+// XFAIL: *
+// XTARGET: darwin
 
 extern void func(const char *, const char *);
 
diff --git a/test/Integer/BitPacked.ll b/test/Integer/BitPacked.ll
index d363a81..e6e453a 100644
--- a/test/Integer/BitPacked.ll
+++ b/test/Integer/BitPacked.ll
@@ -2,8 +2,8 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
-@foo1 = external global <4 x float>;
-@foo2 = external global <2 x i10>;
+@foo1 = external global <4 x float>
+@foo2 = external global <2 x i10>
 
 
 define void @main() 
diff --git a/test/Integer/packed_bt.ll b/test/Integer/packed_bt.ll
index 5a2045d..f6ea87c 100644
--- a/test/Integer/packed_bt.ll
+++ b/test/Integer/packed_bt.ll
@@ -2,8 +2,8 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
-@foo1 = external global <4 x float>;
-@foo2 = external global <2 x i10>;
+@foo1 = external global <4 x float>
+@foo2 = external global <2 x i10>
 
 
 define void @main() 
diff --git a/test/Integer/testvarargs_bt.ll b/test/Integer/testvarargs_bt.ll
index a645c84..3227d14 100644
--- a/test/Integer/testvarargs_bt.ll
+++ b/test/Integer/testvarargs_bt.ll
@@ -7,7 +7,7 @@ declare i31 @"printf"(i8*, ...)   ;; Prototype for: i32 __builtin_printf(const c
 
 define i31 @"testvarar"()
 begin
-	call i31(i8*, ...) *@printf(i8 * null, i31 12, i8 42);
+	call i31(i8*, ...) *@printf(i8 * null, i31 12, i8 42)
 	ret i31 %1
 end
 
diff --git a/test/Other/2007-06-28-PassManager.ll b/test/Other/2007-06-28-PassManager.ll
index f097f59..f162a40 100644
--- a/test/Other/2007-06-28-PassManager.ll
+++ b/test/Other/2007-06-28-PassManager.ll
@@ -3,5 +3,5 @@
 ; RUN: opt < %s -analyze -indvars -disable-output
 ; PR1539
 define i32 @test1() {
-       ret i32 0;
+       ret i32 0
 }
diff --git a/test/Other/2008-02-14-PassManager.ll b/test/Other/2008-02-14-PassManager.ll
index 6b51edb..bdaf933 100644
--- a/test/Other/2008-02-14-PassManager.ll
+++ b/test/Other/2008-02-14-PassManager.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-unroll -loop-rotate -simplifycfg -disable-output
 ; PR2028
 define i32 @test1() {
-       ret i32 0;
+       ret i32 0
 }
diff --git a/test/Other/2008-08-14-PassManager.ll b/test/Other/2008-08-14-PassManager.ll
index 22a421d..8d6a6d8 100644
--- a/test/Other/2008-08-14-PassManager.ll
+++ b/test/Other/2008-08-14-PassManager.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -loop-deletion -loop-index-split -disable-output
 ; PR2640
 define i32 @test1() {
-       ret i32 0;
+       ret i32 0
 }
diff --git a/test/Other/2009-06-05-no-implicit-float.ll b/test/Other/2009-06-05-no-implicit-float.ll
index 0d02e3c..3f07170 100644
--- a/test/Other/2009-06-05-no-implicit-float.ll
+++ b/test/Other/2009-06-05-no-implicit-float.ll
@@ -1,4 +1,4 @@
 
 ; RUN: opt < %s -verify -S | grep noimplicitfloat
-define void @f() noimplicitfloat {
-}
+declare void @f() noimplicitfloat
+
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
new file mode 100644
index 0000000..8ba6d7e
--- /dev/null
+++ b/test/TableGen/eq.td
@@ -0,0 +1,13 @@
+// RUN: tblgen %s | FileCheck %s
+// CHECK: Value = 0
+// CHECK: Value = 1
+
+class Base<int V> {
+  int Value = V;
+}
+
+class Derived<string Truth> :
+  Base<!if(!eq(Truth, "true"), 1, 0)>;
+
+def TRUE : Derived<"true">;
+def FALSE : Derived<"false">;
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 9151d25..9fbba2b 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -110,3 +110,13 @@ define i16 @test12() {
 ; CHECK: @test12
 ; CHECK: ret i16 98
 }
+
+
+; PR5978
+@g5 = constant i8 4
+define i1 @test13() {
+  %A = load i1* bitcast (i8* @g5 to i1*)
+  ret i1 %A
+; CHECK: @test13
+; CHECK: ret i1 false
+}
diff --git a/test/Transforms/DeadArgElim/canon.ll b/test/Transforms/DeadArgElim/canon.ll
index 025a46a..11cd482 100644
--- a/test/Transforms/DeadArgElim/canon.ll
+++ b/test/Transforms/DeadArgElim/canon.ll
@@ -10,12 +10,12 @@ define internal {} @test() {
 }
 
 define internal {i32} @test2() {
-  ret {i32} undef;
+  ret {i32} undef
 }
 
 define void @caller() {
   call {} @test()
-  %X = call {i32} @test2();
+  %X = call {i32} @test2()
   %Y = extractvalue {i32} %X, 0
   call void @user(i32 %Y, {i32} %X)
   ret void
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index ce3b24c..728a118 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -2,7 +2,7 @@
 
 %t = type { i32 }
 
-@g = global i32 42;
+@g = global i32 42
 
 define void @test1(%t* noalias %pp) {
   %p = getelementptr inbounds %t* %pp, i32 0, i32 0
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 42c4e1b..7e8f52a 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -3,7 +3,7 @@
 declare void @test1f()
 
 define void @test1(i32* noalias %p) {
-       store i32 1, i32* %p;
+       store i32 1, i32* %p
        call void @test1f()
        store i32 2, i32 *%p
        ret void
@@ -12,4 +12,4 @@ define void @test1(i32* noalias %p) {
 ; CHECK-NEXT: call void
 ; CHECK-NEXT: store i32 2
 ; CHECK-NEXT: ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
index 09eb468..c6c2e13 100644
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
@@ -1,10 +1,64 @@
-; RUN: opt < %s -functionattrs -S | grep readnone | count 2
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+%struct.X = type { i32*, i32* }
 
 declare i32 @g(i32*) readnone
 
 define i32 @f() {
+; CHECK: @f() readnone
 	%x = alloca i32		; <i32*> [#uses=2]
 	store i32 0, i32* %x
 	%y = call i32 @g(i32* %x)		; <i32> [#uses=1]
 	ret i32 %y
 }
+
+define i32 @foo() nounwind {
+; CHECK: @foo() nounwind readonly
+entry:
+  %y = alloca %struct.X                           ; <%struct.X*> [#uses=2]
+  %x = alloca %struct.X                           ; <%struct.X*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=2]
+  %i = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 0, i32* %i, align 4
+  store i32 1, i32* %j, align 4
+  %0 = getelementptr inbounds %struct.X* %y, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %i, i32** %0, align 8
+  %1 = getelementptr inbounds %struct.X* %x, i32 0, i32 1 ; <i32**> [#uses=1]
+  store i32* %j, i32** %1, align 8
+  %x1 = bitcast %struct.X* %x to i8*              ; <i8*> [#uses=2]
+  %y2 = bitcast %struct.X* %y to i8*              ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i64(i8* %x1, i8* %y2, i64 8, i32 1)
+  %2 = bitcast i8* %x1 to i32**                   ; <i32**> [#uses=1]
+  %3 = load i32** %2, align 8                     ; <i32*> [#uses=1]
+  %4 = load i32* %3, align 4                      ; <i32> [#uses=1]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %4
+}
+
+define i32 @t(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: @t(i32 %a, i32 %b, i32 %c) nounwind readnone
+entry:
+  %a.addr = alloca i32                            ; <i32*> [#uses=3]
+  %c.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %a, i32* %a.addr
+  store i32 %c, i32* %c.addr
+  %tmp = load i32* %a.addr                        ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] ; <i32*> [#uses=1]
+  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
+  ret i32 %tmp2
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
index bc5c850..4d533bb 100644
--- a/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -10,7 +10,7 @@ define void @test1(%t* noalias %stuff ) {
     call void @test1f(i8* null)
 
     %after = load i32* %p ; <--- This should be a dead load
-    %sum = add i32 %before, %after;
+    %sum = add i32 %before, %after
 
     store i32 %sum, i32* %p
     ret void
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
new file mode 100644
index 0000000..a45cbe9
--- /dev/null
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -0,0 +1,16 @@
+; RUN: opt -globalopt -disable-output %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+%0 = type { i32, void ()* }
+%struct.btSimdScalar = type { %"union.btSimdScalar::$_14" }
+%"union.btSimdScalar::$_14" = type { <4 x float> }
+
+@_ZL6vTwist =  global %struct.btSimdScalar zeroinitializer ; <%struct.btSimdScalar*> [#uses=1]
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev }] ; <[12 x %0]*> [#uses=0]
+
+define internal  void @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  store float 1.0, float* getelementptr inbounds (%struct.btSimdScalar* @_ZL6vTwist, i32 0, i32 0, i32 0, i32 3), align 4
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll b/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
index 5fe89ee..f794e9f 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll
@@ -12,7 +12,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/IPConstantProp/return-argument.ll b/test/Transforms/IPConstantProp/return-argument.ll
index 0223453..6d6eb24 100644
--- a/test/Transforms/IPConstantProp/return-argument.ll
+++ b/test/Transforms/IPConstantProp/return-argument.ll
@@ -34,9 +34,9 @@ define void @caller(i1 %C) {
         ;; Call @foo twice, to prevent the arguments from propagating into the
         ;; function (so we can check the returned argument is properly
         ;; propagated per-caller).
-        %S1 = call { i32, i32 } @foo(i32 1, i32 2);
+        %S1 = call { i32, i32 } @foo(i32 1, i32 2)
         %X1 = extractvalue { i32, i32 } %S1, 0
-        %S2 = invoke { i32, i32 } @foo(i32 3, i32 4) to label %OK unwind label %RET;
+        %S2 = invoke { i32, i32 } @foo(i32 3, i32 4) to label %OK unwind label %RET
 OK:
         %X2 = extractvalue { i32, i32 } %S2, 0
         ;; Do some stuff with the returned values which we can grep for
diff --git a/test/Transforms/IndVarSimplify/shrunk-constant.ll b/test/Transforms/IndVarSimplify/shrunk-constant.ll
index 623c528..8003fd3 100644
--- a/test/Transforms/IndVarSimplify/shrunk-constant.ll
+++ b/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -scalar-evolution -analyze -disable-output \
-; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<loop> to i32)}
+; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<%loop> to i32)}
 
 define fastcc void @foo() nounwind {
 entry:
diff --git a/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll b/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
index 08d4dc6..979157e 100644
--- a/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
+++ b/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
@@ -8,7 +8,7 @@ entry:
 	%n = call i1 @extern( )
 	br i1 %n, label %r, label %u
 r:
-	ret i32 0;
+	ret i32 0
 u:
 	unwind
 }
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index 6573b5b..55243a6 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -168,13 +168,6 @@ define i11 @test23(i44 %A) {
 	ret i11 %D
 }
 
-define i17 @test24(i17 %X) {
-	%Y = and i17 %X, -5		; <i17> [#uses=1]
-	%Z = shl i17 %Y, 9		; <i17> [#uses=1]
-	%Q = ashr i17 %Z, 9		; <i17> [#uses=1]
-	ret i17 %Q
-}
-
 define i37 @test25(i37 %tmp.2, i37 %AA) {
 	%x = lshr i37 %AA, 17		; <i37> [#uses=1]
 	%tmp.3 = lshr i37 %tmp.2, 17		; <i37> [#uses=1]
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 3e56951..034c70e 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,23 +1,22 @@
-; RUN: opt < %s -instcombine -S | grep ret | count 6
 ; RUN: opt < %s -instcombine -S | not grep call.*bswap
 
 define i1 @test1(i16 %tmp2) {
-        %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )          ; <i16> [#uses=1]
-        %tmp = icmp eq i16 %tmp10, 1            ; <i1> [#uses=1]
+        %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )
+        %tmp = icmp eq i16 %tmp10, 1
         ret i1 %tmp
 }
 
 define i1 @test2(i32 %tmp) {
-        %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )              ; <i32> [#uses=1]
-        %tmp.upgrd.1 = icmp eq i32 %tmp34, 1            ; <i1> [#uses=1]
+        %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
+        %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
         ret i1 %tmp.upgrd.1
 }
 
 declare i32 @llvm.bswap.i32(i32)
 
 define i1 @test3(i64 %tmp) {
-        %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
-        %tmp.upgrd.2 = icmp eq i64 %tmp34, 1            ; <i1> [#uses=1]
+        %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
+        %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
         ret i1 %tmp.upgrd.2
 }
 
@@ -50,3 +49,21 @@ entry:
 	ret i32 %tmp4
 }
 
+; PR5284
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.bswap.i32(i32)
+declare i16 @llvm.bswap.i16(i16)
+
+define i16 @test7(i32 %A) {
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
+  %C = trunc i32 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+define i16 @test8(i64 %A) {
+  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
+  %C = trunc i64 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index c5aa8be..168b3e8 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,3 +1,5 @@
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
 ; RUN: opt < %s -instcombine -S | \
 ; RUN:    grep {call.*llvm.bswap} | count 6
 
diff --git a/test/Transforms/InstCombine/cast-and-cast.ll b/test/Transforms/InstCombine/cast-and-cast.ll
deleted file mode 100644
index eda9d99..0000000
--- a/test/Transforms/InstCombine/cast-and-cast.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep bitcast
-
-define i1 @test1(i32 %val) {
-        %t1 = bitcast i32 %val to i32           ; <i32> [#uses=1]
-        %t2 = and i32 %t1, 1            ; <i32> [#uses=1]
-        %t3 = trunc i32 %t2 to i1               ; <i1> [#uses=1]
-        ret i1 %t3
-}
-
-define i16 @test1.upgrd.1(i32 %val) {
-        %t1 = bitcast i32 %val to i32           ; <i32> [#uses=1]
-        %t2 = and i32 %t1, 1            ; <i32> [#uses=1]
-        %t3 = trunc i32 %t2 to i16              ; <i16> [#uses=1]
-        ret i16 %t3
-}
-
diff --git a/test/Transforms/InstCombine/cast-cast-to-and.ll b/test/Transforms/InstCombine/cast-cast-to-and.ll
deleted file mode 100644
index 1e591cc..0000000
--- a/test/Transforms/InstCombine/cast-cast-to-and.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep i8 
-
-define i32 @test1(i32 %X) {
-        %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
-        %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
-        ret i32 %Z
-}
-
diff --git a/test/Transforms/InstCombine/cast-load-gep.ll b/test/Transforms/InstCombine/cast-load-gep.ll
deleted file mode 100644
index 271c737..0000000
--- a/test/Transforms/InstCombine/cast-load-gep.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -instcombine -globaldce -S | \
-; RUN:   not grep Array
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-; Pulling the cast out of the load allows us to eliminate the load, and then 
-; the whole array.
-
-        %op = type { float }
-        %unop = type { i32 }
-@Array = internal constant [1 x %op* (%op*)*] [ %op* (%op*)* @foo ]             ; <[1 x %op* (%op*)*]*> [#uses=1]
-
-define %op* @foo(%op* %X) {
-        ret %op* %X
-}
-
-define %unop* @caller(%op* %O) {
-        %tmp = load %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
-        %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
-        ret %unop* %tmp.2
-}
-
diff --git a/test/Transforms/InstCombine/cast-propagate.ll b/test/Transforms/InstCombine/cast-propagate.ll
deleted file mode 100644
index 95c040b..0000000
--- a/test/Transforms/InstCombine/cast-propagate.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -instcombine -mem2reg -S | \
-; RUN:    not grep load
-
-define i32 @test1(i32* %P) {
-        %A = alloca i32         ; <i32*> [#uses=2]
-        store i32 123, i32* %A
-        ; Cast the result of the load not the source
-        %Q = bitcast i32* %A to i32*            ; <i32*> [#uses=1]
-        %V = load i32* %Q               ; <i32> [#uses=1]
-        ret i32 %V
-}
diff --git a/test/Transforms/InstCombine/cast-sext-zext.ll b/test/Transforms/InstCombine/cast-sext-zext.ll
deleted file mode 100644
index 0fecc1c..0000000
--- a/test/Transforms/InstCombine/cast-sext-zext.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep sext
-; XFAIL: *
-
-define zeroext i16 @t(i8 zeroext %on_off, i16* nocapture %puls) nounwind readonly {
-entry:
-	%0 = zext i8 %on_off to i32
-	%1 = add i32 %0, -1
-	%2 = sext i32 %1 to i64
-	%3 = getelementptr i16* %puls, i64 %2
-	%4 = load i16* %3, align 2
-	ret i16 %4
-}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 5f75cd0..878da68 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -55,8 +55,8 @@ define i32 @test6(i64 %A) {
         %c1 = trunc i64 %A to i32               ; <i32> [#uses=1]
         %res = bitcast i32 %c1 to i32           ; <i32> [#uses=1]
         ret i32 %res
-; CHECK: %res = trunc i64 %A to i32
-; CHECK: ret i32 %res
+; CHECK:  trunc i64 %A to i32
+; CHECK-NEXT: ret i32
 }
 
 define i64 @test7(i1 %A) {
@@ -71,8 +71,8 @@ define i64 @test8(i8 %A) {
         %c1 = sext i8 %A to i64         ; <i64> [#uses=1]
         %res = bitcast i64 %c1 to i64           ; <i64> [#uses=1]
         ret i64 %res
-; CHECK: %res = sext i8 %A to i64
-; CHECK: ret i64 %res
+; CHECK: = sext i8 %A to i64
+; CHECK-NEXT: ret i64
 }
 
 define i16 @test9(i16 %A) {
@@ -185,8 +185,8 @@ define i32 @test22(i32 %X) {
         %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
         %RV = shl i32 %c2, 24           ; <i32> [#uses=1]
         ret i32 %RV
-; CHECK: %RV = shl i32 %X, 24
-; CHECK: ret i32 %RV
+; CHECK: shl i32 %X, 24
+; CHECK-NEXT: ret i32
 }
 
 define i32 @test23(i32 %X) {
@@ -337,3 +337,271 @@ define i64 @test38(i32 %a) {
 ; CHECK: %2 = zext i1 %1 to i64
 ; CHECK: ret i64 %2
 }
+
+define i16 @test39(i16 %a) {
+        %tmp = zext i16 %a to i32
+        %tmp21 = lshr i32 %tmp, 8
+        %tmp5 = shl i32 %tmp, 8
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+        ret i16 %tmp.upgrd.3
+; CHECK: @test39
+; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
+define i16 @test40(i16 %a) {
+        %tmp = zext i16 %a to i32
+        %tmp21 = lshr i32 %tmp, 9
+        %tmp5 = shl i32 %tmp, 8
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+        ret i16 %tmp.upgrd.3
+; CHECK: @test40
+; CHECK: %tmp21 = lshr i16 %a, 9
+; CHECK: %tmp5 = shl i16 %a, 8
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
+; PR1263
+define i32* @test41(i32* %tmp1) {
+        %tmp64 = bitcast i32* %tmp1 to { i32 }*
+        %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
+        ret i32* %tmp65
+; CHECK: @test41
+; CHECK: ret i32* %tmp1
+}
+
+define i32 @test42(i32 %X) {
+        %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
+        %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
+        ret i32 %Z
+; CHECK: @test42
+; CHECK: %Z = and i32 %X, 255
+}
+
+; rdar://6598839
+define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
+	%A = zext i8 %on_off to i32
+	%B = add i32 %A, -1
+	%C = sext i32 %B to i64
+	ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
+; CHECK: @test43
+; CHECK-NEXT: %A = zext i8 %on_off to i64
+; CHECK-NEXT: %B = add i64 %A, -1
+; CHECK-NEXT: ret i64 %B
+}
+
+define i64 @test44(i8 %T) {
+ %A = zext i8 %T to i16
+ %B = or i16 %A, 1234
+ %C = zext i16 %B to i64
+ ret i64 %C
+; CHECK: @test44
+; CHECK-NEXT: %A = zext i8 %T to i64
+; CHECK-NEXT: %B = or i64 %A, 1234
+; CHECK-NEXT: ret i64 %B
+}
+
+define i64 @test45(i8 %A, i64 %Q) {
+ %D = trunc i64 %Q to i32  ;; should be removed
+ %B = sext i8 %A to i32
+ %C = or i32 %B, %D
+ %E = zext i32 %C to i64 
+ ret i64 %E
+; CHECK: @test45
+; CHECK-NEXT: %B = sext i8 %A to i64
+; CHECK-NEXT: %C = or i64 %B, %Q
+; CHECK-NEXT: %E = and i64 %C, 4294967295
+; CHECK-NEXT: ret i64 %E
+}
+
+
+define i64 @test46(i64 %A) {
+ %B = trunc i64 %A to i32
+ %C = and i32 %B, 42
+ %D = shl i32 %C, 8
+ %E = zext i32 %D to i64 
+ ret i64 %E
+; CHECK: @test46
+; CHECK-NEXT: %C = shl i64 %A, 8
+; CHECK-NEXT: %D = and i64 %C, 10752
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test47(i8 %A) {
+ %B = sext i8 %A to i32
+ %C = or i32 %B, 42
+ %E = zext i32 %C to i64 
+ ret i64 %E
+; CHECK: @test47
+; CHECK-NEXT:   %B = sext i8 %A to i64
+; CHECK-NEXT: %C = or i64 %B, 42
+; CHECK-NEXT:  %E = and i64 %C, 4294967295
+; CHECK-NEXT:  ret i64 %E
+}
+
+define i64 @test48(i8 %A, i8 %a) {
+  %b = zext i8 %a to i32
+  %B = zext i8 %A to i32
+  %C = shl i32 %B, 8
+  %D = or i32 %C, %b
+  %E = zext i32 %D to i64
+  ret i64 %E
+; CHECK: @test48
+; CHECK-NEXT: %b = zext i8 %a to i64
+; CHECK-NEXT: %B = zext i8 %A to i64
+; CHECK-NEXT: %C = shl i64 %B, 8
+; CHECK-NEXT: %D = or i64 %C, %b
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test49(i64 %A) {
+ %B = trunc i64 %A to i32
+ %C = or i32 %B, 1
+ %D = sext i32 %C to i64 
+ ret i64 %D
+; CHECK: @test49
+; CHECK-NEXT: %C = shl i64 %A, 32
+; CHECK-NEXT: ashr i64 %C, 32
+; CHECK-NEXT: %D = or i64 {{.*}}, 1
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test50(i64 %A) {
+  %a = lshr i64 %A, 2
+  %B = trunc i64 %a to i32
+  %D = add i32 %B, -1
+  %E = sext i32 %D to i64
+  ret i64 %E
+; CHECK: @test50
+; CHECK-NEXT: shl i64 %A, 30
+; CHECK-NEXT: add i64 {{.*}}, -4294967296
+; CHECK-NEXT: %E = ashr i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %E
+}
+
+define i64 @test51(i64 %A, i1 %cond) {
+  %B = trunc i64 %A to i32
+  %C = and i32 %B, -2
+  %D = or i32 %B, 1
+  %E = select i1 %cond, i32 %C, i32 %D
+  %F = sext i32 %E to i64
+  ret i64 %F
+; CHECK: @test51
+
+; FIXME: disabled, see PR5997
+; HECK-NEXT: %C = and i64 %A, 4294967294
+; HECK-NEXT: %D = or i64 %A, 1
+; HECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
+; HECK-NEXT: %sext = shl i64 %E, 32
+; HECK-NEXT: %F = ashr i64 %sext, 32
+; HECK-NEXT: ret i64 %F
+}
+
+define i32 @test52(i64 %A) {
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i32
+  ret i32 %E
+; CHECK: @test52
+; CHECK-NEXT: %B = trunc i64 %A to i32
+; CHECK-NEXT: %C = or i32 %B, 32962
+; CHECK-NEXT: %D = and i32 %C, 40186
+; CHECK-NEXT: ret i32 %D
+}
+
+define i64 @test53(i32 %A) {
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i64
+  ret i64 %E
+; CHECK: @test53
+; CHECK-NEXT: %B = zext i32 %A to i64
+; CHECK-NEXT: %C = or i64 %B, 32962
+; CHECK-NEXT: %D = and i64 %C, 40186
+; CHECK-NEXT: ret i64 %D
+}
+
+define i32 @test54(i64 %A) {
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i32
+  ret i32 %E
+; CHECK: @test54
+; CHECK-NEXT: %B = trunc i64 %A to i32
+; CHECK-NEXT: %C = or i32 %B, -32574
+; CHECK-NEXT: %D = and i32 %C, -25350
+; CHECK-NEXT: ret i32 %D
+}
+
+define i64 @test55(i32 %A) {
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i64
+  ret i64 %E
+; CHECK: @test55
+; CHECK-NEXT: %B = zext i32 %A to i64
+; CHECK-NEXT: %C = or i64 %B, -32574
+; CHECK-NEXT: %D = and i64 %C, -25350
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test56(i16 %A) nounwind {
+  %tmp353 = sext i16 %A to i32
+  %tmp354 = lshr i32 %tmp353, 5
+  %tmp355 = zext i32 %tmp354 to i64
+  ret i64 %tmp355
+; CHECK: @test56
+; CHECK-NEXT: %tmp353 = sext i16 %A to i64
+; CHECK-NEXT: %tmp354 = lshr i64 %tmp353, 5
+; CHECK-NEXT: %tmp355 = and i64 %tmp354, 134217727
+; CHECK-NEXT: ret i64 %tmp355
+}
+
+define i64 @test57(i64 %A) nounwind {
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %E = zext i32 %C to i64
+ ret i64 %E
+; CHECK: @test57
+; CHECK-NEXT: %C = lshr i64 %A, 8 
+; CHECK-NEXT: %E = and i64 %C, 16777215
+; CHECK-NEXT: ret i64 %E
+}
+
+define i64 @test58(i64 %A) nounwind {
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %D = or i32 %C, 128
+ %E = zext i32 %D to i64
+ ret i64 %E
+ 
+; CHECK: @test58
+; CHECK-NEXT:   %C = lshr i64 %A, 8
+; CHECK-NEXT:   %D = or i64 %C, 128
+; CHECK-NEXT:   %E = and i64 %D, 16777215
+; CHECK-NEXT:   ret i64 %E
+}
+
+define i64 @test59(i8 %A, i8 %B) nounwind {
+  %C = zext i8 %A to i32
+  %D = shl i32 %C, 4
+  %E = and i32 %D, 48
+  %F = zext i8 %B to i32
+  %G = lshr i32 %F, 4
+  %H = or i32 %G, %E
+  %I = zext i32 %H to i64
+  ret i64 %I
+; CHECK: @test59
+; CHECK-NEXT:   %C = zext i8 %A to i64
+; CHECK-NOT: i32
+; CHECK:   %F = zext i8 %B to i64
+; CHECK-NOT: i32
+; CHECK:   ret i64 %H
+}
diff --git a/test/Transforms/InstCombine/cast2.ll b/test/Transforms/InstCombine/cast2.ll
deleted file mode 100644
index 2941ee0..0000000
--- a/test/Transforms/InstCombine/cast2.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Tests to make sure elimination of casts is working correctly
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-define i16 @test1(i16 %a) {
-        %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
-        %tmp21 = lshr i32 %tmp, 8               ; <i32> [#uses=1]
-        %tmp5 = shl i32 %tmp, 8         ; <i32> [#uses=1]
-        %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
-        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
-        ret i16 %tmp.upgrd.3
-; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
-; CHECK: ret i16 %tmp.upgrd.32
-}
-
-define i16 @test2(i16 %a) {
-        %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
-        %tmp21 = lshr i32 %tmp, 9               ; <i32> [#uses=1]
-        %tmp5 = shl i32 %tmp, 8         ; <i32> [#uses=1]
-        %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
-        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
-        ret i16 %tmp.upgrd.3
-; CHECK: %tmp21 = lshr i16 %a, 9
-; CHECK: %tmp5 = shl i16 %a, 8
-; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
-; CHECK: ret i16 %tmp.upgrd.32
-}
-
-; PR1263
-define i32* @test3(i32* %tmp1) {
-        %tmp64 = bitcast i32* %tmp1 to { i32 }*         ; <{ i32 }*> [#uses=1]
-        %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0            ; <i32*> [#uses=1]
-        ret i32* %tmp65
-; CHECK: ret i32* %tmp1
-}
-
-
diff --git a/test/Transforms/InstCombine/cast3.ll b/test/Transforms/InstCombine/cast3.ll
deleted file mode 100644
index bc60f55..0000000
--- a/test/Transforms/InstCombine/cast3.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep getelementptr
-; PR2831
-
-; Don't raise arbitrary inttoptr+arithmetic+ptrtoint to getelementptr.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	%0 = ptrtoint i8** %argv to i32		; <i32> [#uses=1]
-	%1 = add i32 %0, 1		; <i32> [#uses=1]
-	ret i32 %1
-}
-
-; This testcase could theoretically be optimized down to return zero,
-; but for now being conservative with ptrtoint/inttoptr is fine.
-define i32 @a() nounwind {
-entry:
-	%b = alloca i32		; <i32*> [#uses=3]
-	%a = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 1, i32* %b, align 4
-	%a1 = ptrtoint i32* %a to i32		; <i32> [#uses=1]
-	%b4 = ptrtoint i32* %b to i32		; <i32> [#uses=1]
-	%a7 = ptrtoint i32* %a to i32		; <i32> [#uses=1]
-	%0 = sub i32 %b4, %a7		; <i32> [#uses=1]
-	%1 = add i32 %a1, %0		; <i32> [#uses=1]
-	%2 = inttoptr i32 %1 to i32*		; <i32*> [#uses=1]
-	store i32 0, i32* %2, align 4
-	%3 = load i32* %b, align 4		; <i32> [#uses=1]
-	br label %return
-
-return:		; preds = %entry
-	ret i32 %3
-}
diff --git a/test/Transforms/InstCombine/cast_ld_addr_space.ll b/test/Transforms/InstCombine/cast_ld_addr_space.ll
deleted file mode 100644
index e94dce7..0000000
--- a/test/Transforms/InstCombine/cast_ld_addr_space.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep bitcast | count 1
-
-; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
-; the address space.
-
-
-define void @test2(i8 addrspace(1)* %source, <2 x i8> addrspace(1)* %dest) {                                                                                        
-entry: 
-  %arrayidx1 = bitcast <2 x i8> addrspace(1)* %dest to <2 x i8> addrspace(1)*
-  %conv = bitcast i8 addrspace(1)* %source to <16 x i8>*
-  %arrayidx22 = bitcast <16 x i8>* %conv to <16 x i8>*
-  %tmp3 = load <16 x i8>* %arrayidx22
-  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
-  %tmp4 = load i8* %arrayidx223
-  %tmp5 = insertelement <2 x i8> undef, i8 %tmp4, i32 0
-  %splat = shufflevector <2 x i8> %tmp5, <2 x i8> undef, <2 x i32> zeroinitializer
-  store <2 x i8> %splat, <2 x i8> addrspace(1)* %arrayidx1
-  ret void                                                                                                                                                             
-} 
-\ No newline at end of file
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 6a00e83..09910fb 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -36,3 +36,44 @@ define i1 @test3(i8* %a) {
         %r = icmp eq i32 %tmpa, ptrtoint (i8* @global to i32)
         ret i1 %r
 }
+
+define i1 @test4(i32 %A) {
+  %B = inttoptr i32 %A to i8*
+  %C = icmp eq i8* %B, null
+  ret i1 %C
+; CHECK: @test4
+; CHECK-NEXT: %C = icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %C 
+}
+
+
+; Pulling the cast out of the load allows us to eliminate the load, and then 
+; the whole array.
+
+        %op = type { float }
+        %unop = type { i32 }
+@Array = internal constant [1 x %op* (%op*)*] [ %op* (%op*)* @foo ]             ; <[1 x %op* (%op*)*]*> [#uses=1]
+
+declare %op* @foo(%op* %X)
+
+define %unop* @test5(%op* %O) {
+        %tmp = load %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
+        %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
+        ret %unop* %tmp.2
+; CHECK: @test5
+; CHECK: call %op* @foo(%op* %O)
+}
+
+
+
+; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
+; the address space.
+
+define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
+entry: 
+  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+  %tmp4 = load i8* %arrayidx223
+  ret i8 %tmp4
+; CHECK: @test6
+; CHECK: load i8* %arrayidx223
+} 
diff --git a/test/Transforms/InstCombine/fsub-fadd.ll b/test/Transforms/InstCombine/fsub-fadd.ll
new file mode 100644
index 0000000..f4cff88
--- /dev/null
+++ b/test/Transforms/InstCombine/fsub-fadd.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/7530098>
+
+define void @func(double* %rhi, double* %rlo, double %xh, double %xl, double %yh, double %yl) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %tmp = fmul double %xh, 0x41A0000002000000      ; <double> [#uses=2]
+  %tmp1 = fsub double %xh, %tmp                   ; <double> [#uses=1]
+  %tmp2 = fadd double %tmp1, %tmp                 ; <double> [#uses=3]
+  %tmp3 = fsub double %xh, %tmp2                  ; <double> [#uses=2]
+  %tmp4 = fmul double %yh, 0x41A0000002000000     ; <double> [#uses=2]
+  %tmp5 = fsub double %yh, %tmp4                  ; <double> [#uses=1]
+  %tmp6 = fadd double %tmp5, %tmp4                ; <double> [#uses=3]
+  %tmp7 = fsub double %yh, %tmp6                  ; <double> [#uses=2]
+  %tmp8 = fmul double %xh, %yh                    ; <double> [#uses=3]
+  %tmp9 = fmul double %tmp2, %tmp6                ; <double> [#uses=1]
+  %tmp10 = fsub double %tmp9, %tmp8               ; <double> [#uses=1]
+  %tmp11 = fmul double %tmp2, %tmp7               ; <double> [#uses=1]
+  %tmp12 = fadd double %tmp10, %tmp11             ; <double> [#uses=1]
+  %tmp13 = fmul double %tmp3, %tmp6               ; <double> [#uses=1]
+  %tmp14 = fadd double %tmp12, %tmp13             ; <double> [#uses=1]
+  %tmp15 = fmul double %tmp3, %tmp7               ; <double> [#uses=1]
+  %tmp16 = fadd double %tmp14, %tmp15             ; <double> [#uses=1]
+  %tmp17 = fmul double %xh, %yl                   ; <double> [#uses=1]
+  %tmp18 = fmul double %xl, %yh                   ; <double> [#uses=1]
+  %tmp19 = fadd double %tmp17, %tmp18             ; <double> [#uses=1]
+  %tmp20 = fadd double %tmp19, %tmp16             ; <double> [#uses=2]
+  %tmp21 = fadd double %tmp8, %tmp20              ; <double> [#uses=1]
+  store double %tmp21, double* %rhi, align 8
+  %tmp22 = load double* %rhi, align 8             ; <double> [#uses=1]
+  %tmp23 = fsub double %tmp8, %tmp22              ; <double> [#uses=1]
+  %tmp24 = fadd double %tmp23, %tmp20             ; <double> [#uses=1]
+
+; CHECK: %tmp23 = fsub double %tmp8, %tmp21
+; CHECK: %tmp24 = fadd double %tmp23, %tmp20
+
+  store double %tmp24, double* %rlo, align 8
+  ret void
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 135e777..c63475c 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -5,6 +5,10 @@
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8) nounwind readnone
 
 define i8 @test1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -79,7 +83,6 @@ define i8 @test6(i8 %A, i1* %overflowPtr) {
 ; CHECK-NEXT: ret i8 %A
 }
 
-
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
@@ -98,4 +101,46 @@ entry:
 ; CHECK: volatile store double %V
 }
 
+define i32 @cttz(i32 %a) {
+entry:
+  %or = or i32 %a, 8
+  %and = and i32 %or, -8
+  %count = tail call i32 @llvm.cttz.i32(i32 %and) nounwind readnone
+  ret i32 %count
+; CHECK: @cttz
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i32 3
+}
 
+define i8 @ctlz(i8 %a) {
+entry:
+  %or = or i8 %a, 32
+  %and = and i8 %or, 63
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and) nounwind readnone
+  ret i8 %count
+; CHECK: @ctlz
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i8 2
+}
+
+define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
+entry:
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a) nounwind readnone
+  %lz.cmp = icmp eq i32 %lz, 32
+  volatile store i1 %lz.cmp, i1* %c
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a) nounwind readnone
+  %tz.cmp = icmp ne i32 %tz, 32
+  volatile store i1 %tz.cmp, i1* %c
+  %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
+  %pop.cmp = icmp eq i32 %pop, 0
+  volatile store i1 %pop.cmp, i1* %c
+  ret void
+; CHECK: @cmp.simplify
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %lz.cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: volatile store i1 %lz.cmp, i1* %c
+; CHECK-NEXT: %tz.cmp = icmp ne i32 %a, 0
+; CHECK-NEXT: volatile store i1 %tz.cmp, i1* %c
+; CHECK-NEXT: %pop.cmp = icmp eq i32 %b, 0
+; CHECK-NEXT: volatile store i1 %pop.cmp, i1* %c
+}
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
new file mode 100644
index 0000000..fe5df92
--- /dev/null
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+                                     i16 73, i16 82, i16 69, i16 68, i16 0]
+@GD = internal constant [6 x double]
+   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
+
+define i1 @test1(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; CHECK: @test1
+; CHECK-NEXT: %R = icmp eq i32 %X, 9
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test2(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp slt i16 %Q, 85
+  ret i1 %R
+; CHECK: @test2
+; CHECK-NEXT: %R = icmp ne i32 %X, 4
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test3(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp oeq double %Q, 1.0
+  ret i1 %R
+; CHECK: @test3
+; CHECK-NEXT: %R = icmp eq i32 %X, 1
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test4(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+; CHECK: @test4
+; CHECK-NEXT: lshr i32 933, %X
+; CHECK-NEXT: and i32 {{.*}}, 1
+; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test5(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 69
+  ret i1 %R
+; CHECK: @test5
+; CHECK-NEXT: icmp eq i32 %X, 2
+; CHECK-NEXT: icmp eq i32 %X, 7
+; CHECK-NEXT: %R = or i1
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test6(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp ogt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test6
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test7(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp olt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test7
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test8(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = and i16 %Q, 3
+  %S = icmp eq i16 %R, 0
+  ret i1 %S
+; CHECK: @test8
+; CHECK-NEXT: add i32 %X, -8
+; CHECK-NEXT: %S = icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1 %S
+}
+
+@GA = internal constant [4 x { i32, i32 } ] [
+  { i32, i32 } { i32 1, i32 0 },
+  { i32, i32 } { i32 2, i32 1 },
+  { i32, i32 } { i32 3, i32 1 },
+  { i32, i32 } { i32 4, i32 0 }
+]
+
+define i1 @test9(i32 %X) {
+  %P = getelementptr inbounds [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
+  %Q = load i32* %P
+  %R = icmp eq i32 %Q, 1
+  ret i1 %R
+; CHECK: @test9
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1 %R
+}
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index 6d068f5..75c62a8 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -76,3 +76,12 @@ define double @test11(double* %p) {
   %x = load double* %t1
   ret double %x
 }
+
+define i32 @test12(i32* %P) {
+        %A = alloca i32
+        store i32 123, i32* %A
+        ; Cast the result of the load not the source
+        %Q = bitcast i32* %A to i32*
+        %V = load i32* %Q
+        ret i32 %V
+}
diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll
index 9fbe683..1d932d2 100644
--- a/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -29,7 +29,7 @@ define <2 x i64> @foo() {
 
 define <2 x i64> @bar() {
 	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t);
+        call void @kip(<2 x i64>* %t)
 	%tmp1 = load <2 x i64>* %t, align 1
 	ret <2 x i64> %tmp1
 }
@@ -59,7 +59,7 @@ define void @foo_store(<2 x i64> %y) {
 
 define void @bar_store(<2 x i64> %y) {
 	%t = alloca <2 x i64>
-        call void @kip(<2 x i64>* %t);
+        call void @kip(<2 x i64>* %t)
 	store <2 x i64> %y, <2 x i64>* %t, align 1
         ret void
 }
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 4a14081..2040f3d 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -268,19 +268,17 @@ define i1 @test26(i32 %A, i32 %B) {
 ; CHECK: ret i1 
 }
 
-; PR5634
 define i1 @test27(i32* %A, i32* %B) {
-        %C1 = icmp eq i32* %A, null
-        %C2 = icmp eq i32* %B, null
-        ; (A == 0) & (A == 0)   -->   (A|B) == 0
-        %D = and i1 %C1, %C2
-        ret i1 %D
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
 ; CHECK: @test27
-; CHECK: ptrtoint i32* %A
-; CHECK: ptrtoint i32* %B
-; CHECK: or i32 
-; CHECK: icmp eq i32 {{.*}}, 0
-; CHECK: ret i1 
+; CHECK: icmp eq i32* %A, null
+; CHECK: icmp eq i32* %B, null
+; CHECK: and i1
+; CHECK: ret i1
 }
 
 ; PR5634
@@ -295,3 +293,46 @@ define i1 @test28(i32 %A, i32 %B) {
 ; CHECK: icmp ne i32 {{.*}}, 0
 ; CHECK: ret i1 
 }
+
+define i1 @test29(i32* %A, i32* %B) {
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp ne i32 %D, 0
+  ret i1 %E
+; CHECK: @test29
+; CHECK: icmp ne i32* %A, null
+; CHECK: icmp ne i32* %B, null
+; CHECK: or i1
+; CHECK: ret i1
+}
+
+; PR4216
+define i32 @test30(i32 %A) {
+entry:
+  %B = or i32 %A, 32962
+  %C = and i32 %A, -65536
+  %D = and i32 %B, 40186
+  %E = or i32 %D, %C
+  ret i32 %E
+; CHECK: @test30
+; CHECK: %B = or i32 %A, 32962
+; CHECK: %E = and i32 %B, -25350
+; CHECK: ret i32 %E
+}
+
+; PR4216
+define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
+  %B = or i64 %A, 194
+  %D = and i64 %B, 250
+
+  %C = or i64 %A, 32768
+  %E = and i64 %C, 4294941696
+
+  %F = or i64 %D, %E
+  ret i64 %F
+; CHECK: @test31
+; CHECK-NEXT: %bitfield = or i64 %A, 32962
+; CHECK-NEXT: %F = and i64 %bitfield, 4294941946
+; CHECK-NEXT: ret i64 %F
+}
diff --git a/test/Transforms/InstCombine/setcc-cast-cast.ll b/test/Transforms/InstCombine/setcc-cast-cast.ll
deleted file mode 100644
index b2681ea..0000000
--- a/test/Transforms/InstCombine/setcc-cast-cast.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; This test case was reduced from MultiSource/Applications/hbd. It makes sure
-; that folding doesn't happen in case a zext is applied where a sext should have
-; been when a setcc is used with two casts.
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {br i1 false}
-; END.
-
-define i32 @bug(i8 %inbuff) {
-entry:
-        %tmp = bitcast i8 %inbuff to i8         ; <i8> [#uses=1]
-        %tmp.upgrd.1 = sext i8 %tmp to i32              ; <i32> [#uses=3]
-        %tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 1              ; <i1> [#uses=1]
-        br i1 %tmp.upgrd.2, label %cond_true, label %cond_next
-
-cond_true:              ; preds = %entry
-        br label %bb
-
-cond_next:              ; preds = %entry
-        %tmp3 = icmp eq i32 %tmp.upgrd.1, -1            ; <i1> [#uses=1]
-        br i1 %tmp3, label %cond_true4, label %cond_next5
-
-cond_true4:             ; preds = %cond_next
-        br label %bb
-
-cond_next5:             ; preds = %cond_next
-        %tmp7 = icmp sgt i32 %tmp.upgrd.1, 1            ; <i1> [#uses=1]
-        br i1 %tmp7, label %cond_true8, label %cond_false
-
-cond_true8:             ; preds = %cond_next5
-        br label %cond_next9
-
-cond_false:             ; preds = %cond_next5
-        br label %cond_next9
-
-cond_next9:             ; preds = %cond_false, %cond_true8
-        %iftmp.1.0 = phi i32 [ 42, %cond_true8 ], [ 23, %cond_false ]           ; <i32> [#uses=1]
-        br label %return
-
-bb:             ; preds = %cond_true4, %cond_true
-        br label %return
-
-return:         ; preds = %bb, %cond_next9
-        %retval.0 = phi i32 [ 17, %bb ], [ %iftmp.1.0, %cond_next9 ]            ; <i32> [#uses=1]
-        ret i32 %retval.0
-}
-
diff --git a/test/Transforms/InstCombine/shift-sra.ll b/test/Transforms/InstCombine/shift-sra.ll
index 4492785..58f3226 100644
--- a/test/Transforms/InstCombine/shift-sra.ll
+++ b/test/Transforms/InstCombine/shift-sra.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {lshr i32} | count 2
-; RUN: opt < %s -instcombine -S | not grep ashr
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 
 define i32 @test1(i32 %X, i8 %A) {
@@ -9,6 +7,8 @@ define i32 @test1(i32 %X, i8 %A) {
         %Y = ashr i32 %X, %shift.upgrd.1                ; <i32> [#uses=1]
         %Z = and i32 %Y, 1              ; <i32> [#uses=1]
         ret i32 %Z
+; CHECK: @test1
+; CHECK: lshr i32 %X, %shift.upgrd.1 
 }
 
 define i32 @test2(i8 %tmp) {
@@ -16,4 +16,43 @@ define i32 @test2(i8 %tmp) {
         %tmp4 = add i32 %tmp3, 7                ; <i32> [#uses=1]
         %tmp5 = ashr i32 %tmp4, 3               ; <i32> [#uses=1]
         ret i32 %tmp5
+; CHECK: @test2
+; CHECK: lshr i32 %tmp4, 3
+}
+
+define i64 @test3(i1 %X, i64 %Y, i1 %Cond) {
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %S = ashr i64 %P, 12
+  ret i64 %S
+  
+; CHECK: @test3
+; CHECK: %P = phi i64
+; CHECK-NEXT: ret i64 %P
+}
+
+define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %R = shl i64 %P, 12
+  %S = ashr i64 %R, 12
+  ret i64 %S
+  
+; CHECK: @test4
+; CHECK: %P = phi i64
+; CHECK-NEXT: ret i64 %P
 }
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index ba28910..fa0322a 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -223,8 +223,8 @@ define i32 @test23(i8* %P, i64 %A){
   %G = sub i32 %D, %F
   ret i32 %G
 ; CHECK: @test23
-; CHECK: %A1 = trunc i64 %A to i32
-; CHECK: ret i32 %A1
+; CHECK-NEXT: = trunc i64 %A to i32
+; CHECK-NEXT: ret i32
 }
 
 define i64 @test24(i8* %P, i64 %A){
@@ -248,3 +248,28 @@ define i64 @test24a(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i64 
 }
 
+@Arr = external global [42 x i16]
+
+define i64 @test24b(i8* %P, i64 %A){
+  %B = getelementptr inbounds [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64)
+  ret i64 %G
+; CHECK: @test24b
+; CHECK-NEXT: shl i64 %A, 1
+; CHECK-NEXT: ret i64 
+}
+
+
+define i64 @test25(i8* %P, i64 %A){
+  %B = getelementptr inbounds [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16]* @Arr, i64 1, i64 0) to i64)
+  ret i64 %G
+; CHECK: @test25
+; CHECK-NEXT: shl i64 %A, 1
+; CHECK-NEXT: add i64 {{.*}}, -84
+; CHECK-NEXT: ret i64 
+}
+
+
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index ac31cdb..503d301 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -383,11 +383,11 @@ return:
 }
 
 
-;;; Duplicate condition to avoid xor of cond.
-;;; TODO: Make this happen.
-define i32 @testXX(i1 %cond, i1 %cond2) {
+;; Duplicate condition to avoid xor of cond.
+;; rdar://7391699
+define i32 @test13(i1 %cond, i1 %cond2) {
 Entry:
-; CHECK: @testXX
+; CHECK: @test13
 	%v1 = call i32 @f1()
 	br i1 %cond, label %Merge, label %F1
 
@@ -396,7 +396,8 @@ F1:
 
 Merge:
 	%B = phi i1 [true, %Entry], [%cond2, %F1]
-        %M = icmp eq i32 %v1, 192
+        %C = phi i32 [192, %Entry], [%v1, %F1]
+        %M = icmp eq i32 %C, 192
         %N = xor i1 %B, %M
 	br i1 %N, label %T2, label %F2
 
@@ -405,6 +406,13 @@ T2:
 
 F2:
 	ret i32 %v1
+        
+; CHECK:   br i1 %cond, label %F2, label %Merge
+
+; CHECK:      Merge:
+; CHECK-NEXT:   %M = icmp eq i32 %v1, 192
+; CHECK-NEXT:   %N = xor i1 %cond2, %M
+; CHECK-NEXT:   br i1 %N, label %T2, label %F2
 }
 
 
diff --git a/test/Transforms/LICM/licm_preserve_dbginfo.ll b/test/Transforms/LICM/licm_preserve_dbginfo.ll
deleted file mode 100644
index e013c27..0000000
--- a/test/Transforms/LICM/licm_preserve_dbginfo.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: opt -licm -S <%s | FileCheck %s
-; Test that licm doesn't sink/delete debug info.
-define i32 @foo(i32 %a, i32 %j) nounwind {
-entry:
-;CHECK: entry:
-  call void @llvm.dbg.func.start(metadata !0)
-  call void @llvm.dbg.stoppoint(i32 3, i32 5, metadata !1)
-;CHECK: %mul = mul i32 %j, %j
-  br label %for.cond
-
-for.cond:
-;CHECK: for.cond:
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %s.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
-  call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
-; CHECK: call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
-  %cmp = icmp slt i32 %i.0, %a
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-;CHECK: for.body:
-  call void @llvm.dbg.stoppoint(i32 5, i32 2, metadata !1)
-;CHECK: call void @llvm.dbg.stoppoint(i32 5, i32 2, metadata !1)
-  %mul = mul i32 %j, %j
-  %add = add nsw i32 %s.0, %mul
-  br label %for.inc
-
-for.inc:
-;CHECK: for.inc:
-  call void @llvm.dbg.stoppoint(i32 4, i32 18, metadata !1)
-;CHECK: call void @llvm.dbg.stoppoint(i32 4, i32 18, metadata !1)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  call void @llvm.dbg.stoppoint(i32 7, i32 5, metadata !1)
-  br label %0
-
-; <label>:0                                       ; preds = %for.end
-  call void @llvm.dbg.stoppoint(i32 8, i32 1, metadata !1)
-  call void @llvm.dbg.region.end(metadata !0)
-  ret i32 %s.0
-}
-
-declare void @llvm.dbg.func.start(metadata) nounwind readnone
-
-declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
-
-declare void @llvm.dbg.region.end(metadata) nounwind readnone
-
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 2, metadata !2, i1 false, i1 true}; [DW_TAG_subprogram ]
-!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"licm.c", metadata !"/home/edwin", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
diff --git a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
index 09a6423..4ab95fc 100644
--- a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
+++ b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24-dbg.ll
@@ -5,7 +5,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index a7326fa..74426a8 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -73,7 +73,7 @@ cond_next:		; preds = %cond_true, %bb
 	br label %bb21
 
 bb21:		; preds = %cond_next, %entry
-        %l.in = phi %struct.list** [ @operators, %entry ], [ %tmp19, %cond_next ]  ;
+        %l.in = phi %struct.list** [ @operators, %entry ], [ %tmp19, %cond_next ]
 	%tmp22 = load %struct.list** %l.in		; <%struct.list*> [#uses=1]
 	icmp ne %struct.list* %tmp22, null		; <i1>:3 [#uses=1]
 	zext i1 %3 to i8		; <i8>:4 [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
index 3a7496e..002a878 100644
--- a/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
+++ b/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -loop-reduce -S \
-; RUN:   | grep {getelementptr.*%lsr.iv.*%lsr.iv.*<i32\\*>}
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
 ; The multiply in bb2 must not be reduced to an add, as the sext causes the
 ; %1 argument to become negative after a while.
-; ModuleID = '<stdin>'
+
+; CHECK: sext i8
+; CHECK: mul i32
+; CHECK: store i32
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 @table = common global [32 x [256 x i32]] zeroinitializer, align 32		; <[32 x [256 x i32]]*> [#uses=2]
diff --git a/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
index 214f15a..4c5db04 100644
--- a/test/Transforms/LoopStrengthReduce/dont_reverse.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_reverse.ll
@@ -4,14 +4,14 @@
 ; Don't reverse the iteration if the rhs of the compare is defined
 ; inside the loop.
 
-define void @Fill_Buffer() nounwind {
+define void @Fill_Buffer(i2* %p) nounwind {
 entry:
 	br label %bb8
 
 bb8:
 	%indvar34 = phi i32 [ 0, %entry ], [ %indvar.next35, %bb8 ]
 	%indvar3451 = trunc i32 %indvar34 to i2
-	%xmp4344 = xor i2 0, -1
+	%xmp4344 = load i2* %p
 	%xmp104 = icmp eq i2 %indvar3451, %xmp4344
 	%indvar.next35 = add i32 %indvar34, 1
 	br i1 %xmp104, label %bb10, label %bb8
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 2302dba..b829b47 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users -disable-output | grep {Stride i64 {3,+,2}<loop>:}
+; RUN: opt < %s -analyze -iv-users -disable-output | grep {Stride i64 {3,+,2}<%loop>:}
 
 ; The value of %r is dependent on a polynomial iteration expression.
 
diff --git a/test/Transforms/PruneEH/simplenoreturntest.ll b/test/Transforms/PruneEH/simplenoreturntest.ll
index 6cdd42f..61e2f15 100644
--- a/test/Transforms/PruneEH/simplenoreturntest.ll
+++ b/test/Transforms/PruneEH/simplenoreturntest.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -prune-eh -S | not grep {ret i32}
 
-declare void @noreturn() noreturn;
+declare void @noreturn() noreturn
 
 define i32 @caller() {
 	call void @noreturn( )
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
new file mode 100644
index 0000000..060018d
--- /dev/null
+++ b/test/Transforms/Reassociate/crash.ll
@@ -0,0 +1,33 @@
+; RUN: opt -reassociate -disable-output %s
+
+
+; rdar://7507855
+define fastcc i32 @test1() nounwind {
+entry:
+  %cond = select i1 undef, i32 1, i32 -1          ; <i32> [#uses=2]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %sub889 = sub i32 undef, undef                  ; <i32> [#uses=1]
+  %sub891 = sub i32 %sub889, %cond                ; <i32> [#uses=0]
+  %add896 = sub i32 0, %cond                      ; <i32> [#uses=0]
+  ret i32 undef
+}
+
+; PR5981
+define i32 @test2() nounwind ssp {
+entry:
+  %0 = load i32* undef, align 4
+  %1 = mul nsw i32 undef, %0
+  %2 = mul nsw i32 undef, %0
+  %3 = add nsw i32 undef, %1
+  %4 = add nsw i32 %3, %2
+  %5 = add nsw i32 %4, 4
+  %6 = shl i32 %0, 3                              ; <i32> [#uses=1]
+  %7 = add nsw i32 %5, %6
+  br label %bb4.i9
+
+bb4.i9:                                           ; preds = %bb3.i7, %bb1.i25.i
+  %8 = add nsw i32 undef, %1
+  ret i32 0
+}
diff --git a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll b/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
index 725a9b6..62f7d19 100644
--- a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
+++ b/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
@@ -8,7 +8,7 @@
 
 define void @_ada_c37304a() {
 entry:
-        %v = alloca %struct.c37304a__vrec               ;
+        %v = alloca %struct.c37304a__vrec
         %0 = getelementptr %struct.c37304a__vrec* %v, i32 0, i32 0             
         store i8 8, i8* %0, align 1
         unreachable
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
index 577d650..af59ba0 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
@@ -5,7 +5,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll b/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
index 9caa9a1..db56fdb 100644
--- a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
+++ b/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
@@ -2,7 +2,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
index c91a87ef..6a500de 100644
--- a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
@@ -4,7 +4,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
index 2e7ef7a..6fbbb1b 100644
--- a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
+++ b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
@@ -4,7 +4,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
index f5f4c93..f1c820e 100644
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
@@ -4,7 +4,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
index 3a6c2ed..01041eb 100644
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
+++ b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
@@ -3,7 +3,7 @@
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; 
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
 
 @.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index 7b3b232..cc273c3 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -34,8 +34,8 @@ entry:
 	br i1 %tmp2, label %if, label %else
 
 if:		; preds = %entry
-	store i32 1, i32* %a_arg;
-        ret i32 0;
+	store i32 1, i32* %a_arg
+        ret i32 0
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
@@ -53,7 +53,7 @@ entry:
 	br i1 %tmp2, label %if, label %else
 
 if:		; preds = %entry
-        ret i32 0;
+        ret i32 0
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
diff --git a/test/Verifier/2006-10-15-AddrLabel.ll b/test/Verifier/2006-10-15-AddrLabel.ll
index 73b6902..0b73b47 100644
--- a/test/Verifier/2006-10-15-AddrLabel.ll
+++ b/test/Verifier/2006-10-15-AddrLabel.ll
@@ -4,5 +4,5 @@ define i32 @main() {
          %foo  = call i8* %llvm.stacksave()
          %foop = bitcast i8* %foo to label*
          %nret = load label* %foop
-         br label %nret;
+         br label %nret
 }
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 84e6867..930de26 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/LinkAllVMCore.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -210,6 +211,10 @@ static formatted_raw_ostream *GetOutputStream(const char *TargetName,
 int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
+
+  // Enable debug stream buffering.
+  EnableDebugBuffering = true;
+
   LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
index 99055c6..ba0d247 100644
--- a/tools/llvm-mc/AsmLexer.cpp
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -44,7 +44,7 @@ void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
 /// ReturnError - Set the error to the specified string at the specified
 /// location.  This is defined to always return AsmToken::Error.
 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
-  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+  PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
   return AsmToken(AsmToken::Error, StringRef(Loc, 0));
 }
 
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
index 1204a00..2eb75a7 100644
--- a/tools/llvm-mc/AsmParser.cpp
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParsedAsmOperand.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -28,6 +29,11 @@
 #include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
+/// getStartLoc - Get the location of the first token of this operand.
+SMLoc MCParsedAsmOperand::getStartLoc() const { return SMLoc(); }
+SMLoc MCParsedAsmOperand::getEndLoc() const { return SMLoc(); }
+
+
 // Mach-O section uniquing.
 //
 // FIXME: Figure out where this should live, it should be shared by
@@ -710,16 +716,34 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
-  MCInst Inst;
-  if (getTargetParser().ParseInstruction(IDVal, Inst))
+  
+  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+  if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands))
+    // FIXME: Leaking ParsedOperands on failure.
     return true;
   
   if (Lexer.isNot(AsmToken::EndOfStatement))
+    // FIXME: Leaking ParsedOperands on failure.
     return TokError("unexpected token in argument list");
 
   // Eat the end of statement marker.
   Lexer.Lex();
   
+
+  MCInst Inst;
+
+  bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst);
+
+  // Free any parsed operands.
+  for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+    delete ParsedOperands[i];
+
+  if (MatchFail) {
+    // FIXME: We should give nicer diagnostics about the exact failure.
+    Error(IDLoc, "unrecognized instruction");
+    return true;
+  }
+  
   // Instruction is good, process it.
   Out.EmitInstruction(Inst);
   
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index c7ba3be..cf0ff68 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -56,7 +56,7 @@ def OptList : OptionList<[
     (help "A deprecated synonym for -mtune"), (hidden)),
  (parameter_option "MF",
     (help "Specify a file to write dependencies to"), (hidden)),
- (parameter_option "MT",
+ (parameter_list_option "MT",
     (help "Change the name of the rule emitted by dependency generation"),
     (hidden)),
  (parameter_list_option "include",
@@ -139,6 +139,7 @@ class llvm_gcc_based <string cmd_prefix, string in_lang, string E_ext> : Tool<
          (not_empty "m"), (forward "m"),
          (switch_on "m32"), (forward "m32"),
          (switch_on "m64"), (forward "m64"),
+         (switch_on "O0"), (forward "O0"),
          (switch_on "O1"), (forward "O1"),
          (switch_on "O2"), (forward "O2"),
          (switch_on "O3"), (forward "O3"),
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 060fc4f..93eb0a5 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -443,12 +443,12 @@ bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
 /// Optimize merged modules using various IPO passes
 void LTOCodeGenerator::setCodeGenDebugOptions(const char* options)
 {
-    std::string ops(options);
-    for (std::string o = getToken(ops); !o.empty(); o = getToken(ops)) {
+    for (std::pair<StringRef, StringRef> o = getToken(options);
+         !o.first.empty(); o = getToken(o.second)) {
         // ParseCommandLineOptions() expects argv[0] to be program name.
         // Lazily add that.
         if ( _codegenOptions.empty() ) 
             _codegenOptions.push_back("libLTO");
-        _codegenOptions.push_back(strdup(o.c_str()));
+        _codegenOptions.push_back(strdup(o.first.str().c_str()));
     }
 }
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 50a2e39..a636bd9 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/System/Signals.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -344,6 +345,9 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   llvm::PrettyStackTraceProgram X(argc, argv);
   
+  // Enable debug stream buffering.
+  EnableDebugBuffering = true;
+
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   LLVMContext &Context = getGlobalContext();
   
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
new file mode 100644
index 0000000..5348281
--- /dev/null
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -0,0 +1,140 @@
+//===- llvm/unittest/ADT/BitVectorTest.cpp - BitVector tests --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(BitVectorTest, TrivialOperation) {
+  BitVector Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.resize(5, true);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(5U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Vec.resize(11);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(11U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  BitVector Inv = ~Vec;
+  EXPECT_EQ(6U, Inv.count());
+  EXPECT_EQ(11U, Inv.size());
+  EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.none());
+  EXPECT_FALSE(Inv.empty());
+
+  EXPECT_FALSE(Inv == Vec);
+  EXPECT_TRUE(Inv != Vec);
+  Vec = ~Vec;
+  EXPECT_TRUE(Inv == Vec);
+  EXPECT_FALSE(Inv != Vec);
+
+  // Add some "interesting" data to Vec.
+  Vec.resize(23, true);
+  Vec.resize(25, false);
+  Vec.resize(26, true);
+  Vec.resize(29, false);
+  Vec.resize(33, true);
+  Vec.resize(61, false);
+  unsigned Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 23u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+
+  BitVector Copy = Vec;
+  BitVector Alt(3, false);
+  Alt.resize(6, true);
+  std::swap(Alt, Vec);
+  EXPECT_TRUE(Copy == Alt);
+  EXPECT_TRUE(Vec.size() == 6);
+  EXPECT_TRUE(Vec.count() == 3);
+  EXPECT_TRUE(Vec.find_first() == 3);
+  std::swap(Copy, Vec);
+
+  // Add some more "interesting" data.
+  Vec.resize(68, true);
+  Vec.resize(78, false);
+  Vec.resize(89, true);
+  Vec.resize(90, false);
+  Vec.resize(91, true);
+  Vec.resize(130, false);
+  Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 42u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[129]);
+
+  Vec.flip(60);
+  EXPECT_TRUE(Vec[60]);
+  EXPECT_EQ(Count + 1, Vec.count());
+  Vec.flip(60);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.reset(32);
+  EXPECT_FALSE(Vec[32]);
+  EXPECT_EQ(Count - 1, Vec.count());
+  Vec.set(32);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.flip();
+  EXPECT_EQ(Vec.size() - Count, Vec.count());
+
+  Vec.reset();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(130U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Inv = ~BitVector();
+  EXPECT_EQ(0U, Inv.count());
+  EXPECT_EQ(0U, Inv.size());
+  EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.none());
+  EXPECT_TRUE(Inv.empty());
+
+  Vec.clear();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+}
+
+}
diff --git a/unittests/ADT/SmallBitVectorTest.cpp b/unittests/ADT/SmallBitVectorTest.cpp
new file mode 100644
index 0000000..a5c60de
--- /dev/null
+++ b/unittests/ADT/SmallBitVectorTest.cpp
@@ -0,0 +1,140 @@
+//===- llvm/unittest/ADT/SmallBitVectorTest.cpp - SmallBitVector tests ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallBitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(SmallBitVectorTest, TrivialOperation) {
+  SmallBitVector Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.resize(5, true);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(5U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Vec.resize(11);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(11U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  SmallBitVector Inv = ~Vec;
+  EXPECT_EQ(6U, Inv.count());
+  EXPECT_EQ(11U, Inv.size());
+  EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.none());
+  EXPECT_FALSE(Inv.empty());
+
+  EXPECT_FALSE(Inv == Vec);
+  EXPECT_TRUE(Inv != Vec);
+  Vec = ~Vec;
+  EXPECT_TRUE(Inv == Vec);
+  EXPECT_FALSE(Inv != Vec);
+
+  // Add some "interesting" data to Vec.
+  Vec.resize(23, true);
+  Vec.resize(25, false);
+  Vec.resize(26, true);
+  Vec.resize(29, false);
+  Vec.resize(33, true);
+  Vec.resize(61, false);
+  unsigned Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 23u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+
+  SmallBitVector Copy = Vec;
+  SmallBitVector Alt(3, false);
+  Alt.resize(6, true);
+  std::swap(Alt, Vec);
+  EXPECT_TRUE(Copy == Alt);
+  EXPECT_TRUE(Vec.size() == 6);
+  EXPECT_TRUE(Vec.count() == 3);
+  EXPECT_TRUE(Vec.find_first() == 3);
+  std::swap(Copy, Vec);
+
+  // Add some more "interesting" data.
+  Vec.resize(68, true);
+  Vec.resize(78, false);
+  Vec.resize(89, true);
+  Vec.resize(90, false);
+  Vec.resize(91, true);
+  Vec.resize(130, false);
+  Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 42u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[129]);
+
+  Vec.flip(60);
+  EXPECT_TRUE(Vec[60]);
+  EXPECT_EQ(Count + 1, Vec.count());
+  Vec.flip(60);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.reset(32);
+  EXPECT_FALSE(Vec[32]);
+  EXPECT_EQ(Count - 1, Vec.count());
+  Vec.set(32);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.flip();
+  EXPECT_EQ(Vec.size() - Count, Vec.count());
+
+  Vec.reset();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(130U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Inv = ~SmallBitVector();
+  EXPECT_EQ(0U, Inv.count());
+  EXPECT_EQ(0U, Inv.size());
+  EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.none());
+  EXPECT_TRUE(Inv.empty());
+
+  Vec.clear();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+}
+
+}
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index e118568..e374789 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -123,7 +123,7 @@ TEST(NamedMDNodeTest, Search) {
   MDNode *n = MDNode::get(Context, &V, 1);
   MDNode *n2 = MDNode::get(Context, &V2, 1);
 
-  MetadataBase *Nodes[2] = { n, n2 };
+  MDNode *Nodes[2] = { n, n2 };
 
   Module *M = new Module("MyModule", getGlobalContext());
   const char *Name = "llvm.NMD1";
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 3eac9d2..019908b 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -961,8 +961,8 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
 
   CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
         << "unsigned Opcode,\n"
-        << "                            SmallVectorImpl<"
-        << Target.getName() << "Operand> &Operands) {\n";
+        << "                      const SmallVectorImpl<MCParsedAsmOperand*"
+        << "> &Operands) {\n";
   CvtOS << "  Inst.setOpcode(Opcode);\n";
   CvtOS << "  switch (Kind) {\n";
   CvtOS << "  default:\n";
@@ -972,6 +972,9 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
   OS << "// Unified function for converting operants to MCInst instances.\n\n";
   OS << "enum ConversionKind {\n";
   
+  // TargetOperandClass - This is the target's operand class, like X86Operand.
+  std::string TargetOperandClass = Target.getName() + "Operand";
+  
   for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
          ie = Infos.end(); it != ie; ++it) {
     InstructionInfo &II = **it;
@@ -1050,8 +1053,9 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
       for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex)
         CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
 
-      CvtOS << "    Operands[" << MIOperandList[i].second 
-         << "]." << Op.Class->RenderMethod 
+      CvtOS << "    ((" << TargetOperandClass << "*)Operands["
+         << MIOperandList[i].second 
+         << "])->" << Op.Class->RenderMethod 
          << "(Inst, " << Op.OperandInfo->MINumOperands << ");\n";
       CurIndex += Op.OperandInfo->MINumOperands;
     }
@@ -1111,8 +1115,9 @@ static void EmitMatchClassEnumeration(CodeGenTarget &Target,
 static void EmitClassifyOperand(CodeGenTarget &Target,
                                 AsmMatcherInfo &Info,
                                 raw_ostream &OS) {
-  OS << "static MatchClassKind ClassifyOperand("
-     << Target.getName() << "Operand &Operand) {\n";
+  OS << "static MatchClassKind ClassifyOperand(MCParsedAsmOperand *GOp) {\n"
+     << "  " << Target.getName() << "Operand &Operand = *("
+     << Target.getName() << "Operand*)GOp;\n";
 
   // Classify tokens.
   OS << "  if (Operand.isToken())\n";
@@ -1467,9 +1472,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
   
   OS << "bool " << Target.getName() << ClassName
-     << "::MatchInstruction(" 
-     << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, "
-     << "MCInst &Inst) {\n";
+     << "::\nMatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> "
+        "&Operands,\n                 MCInst &Inst) {\n";
 
   // Emit the static match table; unused classes get initalized to 0 which is
   // guaranteed to be InvalidMatchClass.
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 7e6c769..714a39c 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -34,7 +34,8 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
         R->getName() == "INSERT_SUBREG" ||
         R->getName() == "IMPLICIT_DEF" ||
         R->getName() == "SUBREG_TO_REG" ||
-        R->getName() == "COPY_TO_REGCLASS") continue;
+        R->getName() == "COPY_TO_REGCLASS" ||
+        R->getName() == "DEBUG_VALUE") continue;
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
 
@@ -111,7 +112,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
         R->getName() == "INSERT_SUBREG" ||
         R->getName() == "IMPLICIT_DEF" ||
         R->getName() == "SUBREG_TO_REG" ||
-        R->getName() == "COPY_TO_REGCLASS") {
+        R->getName() == "COPY_TO_REGCLASS" ||
+        R->getName() == "DEBUG_VALUE") {
       o << "    0U,\n";
       continue;
     }
@@ -149,7 +151,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
         InstName == "INSERT_SUBREG" ||
         InstName == "IMPLICIT_DEF" ||
         InstName == "SUBREG_TO_REG" ||
-        InstName == "COPY_TO_REGCLASS") continue;
+        InstName == "COPY_TO_REGCLASS" ||
+        InstName == "DEBUG_VALUE") continue;
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
     const std::vector<RecordVal> &Vals = R->getValues();
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index c69ce96..684431a 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenInstruction.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include <set>
 using namespace llvm;
 
@@ -224,7 +225,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
   // Parse the DisableEncoding field.
   std::string DisableEncoding = R->getValueAsString("DisableEncoding");
   while (1) {
-    std::string OpName = getToken(DisableEncoding, " ,\t");
+    std::string OpName;
+    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
     if (OpName.empty()) break;
 
     // Figure out which operand this is.
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 0edca73..c9af5f7 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -337,6 +337,11 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
     throw "Could not find 'COPY_TO_REGCLASS' instruction!";
   const CodeGenInstruction *COPY_TO_REGCLASS = &I->second;
 
+  I = getInstructions().find("DEBUG_VALUE");
+  if (I == Instructions.end())
+    throw "Could not find 'DEBUG_VALUE' instruction!";
+  const CodeGenInstruction *DEBUG_VALUE = &I->second;
+
   // Print out the rest of the instructions now.
   NumberedInstructions.push_back(PHI);
   NumberedInstructions.push_back(INLINEASM);
@@ -349,6 +354,7 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
   NumberedInstructions.push_back(IMPLICIT_DEF);
   NumberedInstructions.push_back(SUBREG_TO_REG);
   NumberedInstructions.push_back(COPY_TO_REGCLASS);
+  NumberedInstructions.push_back(DEBUG_VALUE);
   for (inst_iterator II = inst_begin(), E = inst_end(); II != E; ++II)
     if (&II->second != PHI &&
         &II->second != INLINEASM &&
@@ -360,7 +366,8 @@ getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
         &II->second != INSERT_SUBREG &&
         &II->second != IMPLICIT_DEF &&
         &II->second != SUBREG_TO_REG &&
-        &II->second != COPY_TO_REGCLASS)
+        &II->second != COPY_TO_REGCLASS &&
+        &II->second != DEBUG_VALUE)
       NumberedInstructions.push_back(&II->second);
 }
 
@@ -402,18 +409,6 @@ ComplexPattern::ComplexPattern(Record *R) {
              << "' on ComplexPattern '" << R->getName() << "'!\n";
       exit(1);
     }
-  
-  // Parse the attributes.  
-  Attributes = 0;
-  PropList = R->getValueAsListOfDefs("Attributes");
-  for (unsigned i = 0, e = PropList.size(); i != e; ++i)
-    if (PropList[i]->getName() == "CPAttrParentAsRoot") {
-      Attributes |= 1 << CPAttrParentAsRoot;
-    } else {
-      errs() << "Unsupported pattern attribute '" << PropList[i]->getName()
-             << "' on ComplexPattern '" << R->getName() << "'!\n";
-      exit(1);
-    }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index da4b1cc..07bc54d 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -46,9 +46,6 @@ enum SDNP {
   SDNPMemOperand
 };
 
-// ComplexPattern attributes.
-enum CPAttr { CPAttrParentAsRoot };
-
 /// getValueType - Return the MVT::SimpleValueType that the specified TableGen
 /// record corresponds to.
 MVT::SimpleValueType getValueType(Record *Rec);
@@ -227,7 +224,6 @@ class ComplexPattern {
   std::string SelectFunc;
   std::vector<Record*> RootNodes;
   unsigned Properties; // Node properties
-  unsigned Attributes; // Pattern attributes
 public:
   ComplexPattern() : NumOperands(0) {}
   ComplexPattern(Record *R);
@@ -239,7 +235,6 @@ public:
     return RootNodes;
   }
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
-  bool hasAttribute(enum CPAttr Attr) const { return Attributes & (1 << Attr); }
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index a901fd0..c97582b 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -30,6 +30,22 @@ GenDebug("gen-debug", cl::desc("Generate debug code"), cl::init(false));
 // DAGISelEmitter Helper methods
 //
 
+/// getNodeName - The top level Select_* functions have an "SDNode* N"
+/// argument. When expanding the pattern-matching code, the intermediate
+/// variables have type SDValue. This function provides a uniform way to
+/// reference the underlying "SDNode *" for both cases.
+static std::string getNodeName(const std::string &S) {
+  if (S == "N") return S;
+  return S + ".getNode()";
+}
+
+/// getNodeValue - Similar to getNodeName, except it provides a uniform
+/// way to access the SDValue for both cases.
+static std::string getValueName(const std::string &S) {
+  if (S == "N") return "SDValue(N, 0)";
+  return S;
+}
+
 /// NodeIsComplexPattern - return true if N is a leaf node and a subclass of
 /// ComplexPattern.
 static bool NodeIsComplexPattern(TreePatternNode *N) {
@@ -452,7 +468,7 @@ public:
     // Save loads/stores matched by a pattern.
     if (!N->isLeaf() && N->getName().empty()) {
       if (NodeHasProperty(N, SDNPMemOperand, CGP))
-        LSI.push_back(RootName);
+        LSI.push_back(getNodeName(RootName));
     }
 
     bool isRoot = (P == NULL);
@@ -469,7 +485,7 @@ public:
 
     if (N->isLeaf()) {
       if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
-        emitCheck("cast<ConstantSDNode>(" + RootName +
+        emitCheck("cast<ConstantSDNode>(" + getNodeName(RootName) +
                   ")->getSExtValue() == INT64_C(" +
                   itostr(II->getValue()) + ")");
         return;
@@ -509,7 +525,7 @@ public:
         OpNo = 1;
       if (!isRoot) {
         // Multiple uses of actual result?
-        emitCheck(RootName + ".hasOneUse()");
+        emitCheck(getValueName(RootName) + ".hasOneUse()");
         EmittedUseCheck = true;
         if (NodeHasChain) {
           // If the immediate use can somehow reach this node through another
@@ -540,23 +556,25 @@ public:
 
           if (NeedCheck) {
             std::string ParentName(RootName.begin(), RootName.end()-1);
-            emitCheck("IsLegalAndProfitableToFold(" + RootName +
-                      ".getNode(), " + ParentName + ".getNode(), N.getNode())");
+            emitCheck("IsLegalAndProfitableToFold(" + getNodeName(RootName) +
+                      ", " + getNodeName(ParentName) + ", N)");
           }
         }
       }
 
       if (NodeHasChain) {
         if (FoundChain) {
-          emitCheck("(" + ChainName + ".getNode() == " + RootName + ".getNode() || "
+          emitCheck("(" + ChainName + ".getNode() == " +
+                    getNodeName(RootName) + " || "
                     "IsChainCompatible(" + ChainName + ".getNode(), " +
-                    RootName + ".getNode()))");
-          OrigChains.push_back(std::make_pair(ChainName, RootName));
+                    getNodeName(RootName) + "))");
+          OrigChains.push_back(std::make_pair(ChainName,
+                                              getValueName(RootName)));
         } else
           FoundChain = true;
         ChainName = "Chain" + ChainSuffix;
-        emitInit("SDValue " + ChainName + " = " + RootName +
-                 ".getOperand(0);");
+        emitInit("SDValue " + ChainName + " = " + getNodeName(RootName) +
+                 "->getOperand(0);");
       }
     }
 
@@ -571,13 +589,13 @@ public:
          PatternHasProperty(N, SDNPOutFlag, CGP))) {
       if (!EmittedUseCheck) {
         // Multiple uses of actual result?
-        emitCheck(RootName + ".hasOneUse()");
+        emitCheck(getValueName(RootName) + ".hasOneUse()");
       }
     }
 
     // If there are node predicates for this, emit the calls.
     for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
-      emitCheck(N->getPredicateFns()[i] + "(" + RootName + ".getNode())");
+      emitCheck(N->getPredicateFns()[i] + "(" + getNodeName(RootName) + ")");
 
     // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is
     // a constant without a predicate fn that has more that one bit set, handle
@@ -597,20 +615,22 @@ public:
       if (IntInit *II = dynamic_cast<IntInit*>(N->getChild(1)->getLeafValue())) {
         if (!isPowerOf2_32(II->getValue())) {  // Don't bother with single bits.
           emitInit("SDValue " + RootName + "0" + " = " +
-                   RootName + ".getOperand(" + utostr(0) + ");");
+                   getNodeName(RootName) + "->getOperand(" + utostr(0) + ");");
           emitInit("SDValue " + RootName + "1" + " = " +
-                   RootName + ".getOperand(" + utostr(1) + ");");
+                   getNodeName(RootName) + "->getOperand(" + utostr(1) + ");");
 
           unsigned NTmp = TmpNo++;
           emitCode("ConstantSDNode *Tmp" + utostr(NTmp) +
-                   " = dyn_cast<ConstantSDNode>(" + RootName + "1);");
+                   " = dyn_cast<ConstantSDNode>(" +
+                   getNodeName(RootName + "1") + ");");
           emitCheck("Tmp" + utostr(NTmp));
           const char *MaskPredicate = N->getOperator()->getName() == "or"
             ? "CheckOrMask(" : "CheckAndMask(";
-          emitCheck(MaskPredicate + RootName + "0, Tmp" + utostr(NTmp) +
+          emitCheck(MaskPredicate + getValueName(RootName + "0") +
+                    ", Tmp" + utostr(NTmp) +
                     ", INT64_C(" + itostr(II->getValue()) + "))");
           
-          EmitChildMatchCode(N->getChild(0), N, RootName + utostr(0), RootName,
+          EmitChildMatchCode(N->getChild(0), N, RootName + utostr(0),
                              ChainSuffix + utostr(0), FoundChain);
           return;
         }
@@ -618,10 +638,10 @@ public:
     }
     
     for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i, ++OpNo) {
-      emitInit("SDValue " + RootName + utostr(OpNo) + " = " +
-               RootName + ".getOperand(" +utostr(OpNo) + ");");
+      emitInit("SDValue " + getValueName(RootName + utostr(OpNo)) + " = " +
+               getNodeName(RootName) + "->getOperand(" + utostr(OpNo) + ");");
 
-      EmitChildMatchCode(N->getChild(i), N, RootName + utostr(OpNo), RootName,
+      EmitChildMatchCode(N->getChild(i), N, RootName + utostr(OpNo),
                          ChainSuffix + utostr(OpNo), FoundChain);
     }
 
@@ -641,7 +661,9 @@ public:
         emitCode("SDValue Chain" + ChainSuffix + ";");
       }
 
-      std::string Code = Fn + "(" + RootName + ", " + RootName;
+      std::string Code = Fn + "(" +
+                         getNodeName(RootName) + ", " +
+                         getValueName(RootName);
       for (unsigned i = 0; i < NumOps; i++)
         Code += ", CPTmp" + RootName + "_" + utostr(i);
       if (CP->hasProperty(SDNPHasChain)) {
@@ -654,23 +676,23 @@ public:
 
   void EmitChildMatchCode(TreePatternNode *Child, TreePatternNode *Parent,
                           const std::string &RootName, 
-                          const std::string &ParentRootName,
                           const std::string &ChainSuffix, bool &FoundChain) {
     if (!Child->isLeaf()) {
       // If it's not a leaf, recursively match.
       const SDNodeInfo &CInfo = CGP.getSDNodeInfo(Child->getOperator());
-      emitCheck(RootName + ".getOpcode() == " +
+      emitCheck(getNodeName(RootName) + "->getOpcode() == " +
                 CInfo.getEnumName());
       EmitMatchCode(Child, Parent, RootName, ChainSuffix, FoundChain);
       bool HasChain = false;
       if (NodeHasProperty(Child, SDNPHasChain, CGP)) {
         HasChain = true;
-        FoldedChains.push_back(std::make_pair(RootName, CInfo.getNumResults()));
+        FoldedChains.push_back(std::make_pair(getValueName(RootName),
+                                              CInfo.getNumResults()));
       }
       if (NodeHasProperty(Child, SDNPOutFlag, CGP)) {
         assert(FoldedFlag.first == "" && FoldedFlag.second == 0 &&
                "Pattern folded multiple nodes which produce flags?");
-        FoldedFlag = std::make_pair(RootName,
+        FoldedFlag = std::make_pair(getValueName(RootName),
                                     CInfo.getNumResults() + (unsigned)HasChain);
       }
     } else {
@@ -679,14 +701,14 @@ public:
       if (!Child->getName().empty()) {
         std::string &VarMapEntry = VariableMap[Child->getName()];
         if (VarMapEntry.empty()) {
-          VarMapEntry = RootName;
+          VarMapEntry = getValueName(RootName);
         } else {
           // If we get here, this is a second reference to a specific name.
           // Since we already have checked that the first reference is valid,
           // we don't have to recursively match it, just check that it's the
           // same as the previously named thing.
-          emitCheck(VarMapEntry + " == " + RootName);
-          Duplicates.insert(RootName);
+          emitCheck(VarMapEntry + " == " + getValueName(RootName));
+          Duplicates.insert(getValueName(RootName));
           return;
         }
       }
@@ -719,17 +741,12 @@ public:
             emitCode("SDValue " + ChainName + ";");
           }
           
-          std::string Code = Fn + "(";
-          if (CP->hasAttribute(CPAttrParentAsRoot)) {
-            Code += ParentRootName + ", ";
-          } else {
-            Code += "N, ";
-          }
+          std::string Code = Fn + "(N, ";
           if (CP->hasProperty(SDNPHasChain)) {
             std::string ParentName(RootName.begin(), RootName.end()-1);
-            Code += ParentName + ", ";
+            Code += getValueName(ParentName) + ", ";
           }
-          Code += RootName;
+          Code += getValueName(RootName);
           for (unsigned i = 0; i < NumOps; i++)
             Code += ", CPTmp" + RootName + "_" + utostr(i);
           if (CP->hasProperty(SDNPHasChain))
@@ -739,11 +756,11 @@ public:
           // Place holder for SRCVALUE nodes. Nothing to do here.
         } else if (LeafRec->isSubClassOf("ValueType")) {
           // Make sure this is the specified value type.
-          emitCheck("cast<VTSDNode>(" + RootName +
+          emitCheck("cast<VTSDNode>(" + getNodeName(RootName) +
                     ")->getVT() == MVT::" + LeafRec->getName());
         } else if (LeafRec->isSubClassOf("CondCode")) {
           // Make sure this is the specified cond code.
-          emitCheck("cast<CondCodeSDNode>(" + RootName +
+          emitCheck("cast<CondCodeSDNode>(" + getNodeName(RootName) +
                     ")->get() == ISD::" + LeafRec->getName());
         } else {
 #ifndef NDEBUG
@@ -755,14 +772,14 @@ public:
         
         // If there are node predicates for this, emit the calls.
         for (unsigned i = 0, e = Child->getPredicateFns().size(); i != e; ++i)
-          emitCheck(Child->getPredicateFns()[i] + "(" + RootName +
-                    ".getNode())");
+          emitCheck(Child->getPredicateFns()[i] + "(" + getNodeName(RootName) +
+                    ")");
       } else if (IntInit *II =
                  dynamic_cast<IntInit*>(Child->getLeafValue())) {
         unsigned NTmp = TmpNo++;
         emitCode("ConstantSDNode *Tmp"+ utostr(NTmp) +
                  " = dyn_cast<ConstantSDNode>("+
-                 RootName + ");");
+                 getNodeName(RootName) + ");");
         emitCheck("Tmp" + utostr(NTmp));
         unsigned CTmp = TmpNo++;
         emitCode("int64_t CN"+ utostr(CTmp) +
@@ -798,7 +815,7 @@ public:
       }
       if (Val[0] == 'T' && Val[1] == 'm' && Val[2] == 'p') {
         // Already selected this operand, just return the tmpval.
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
         return NodeOps;
       }
 
@@ -827,7 +844,7 @@ public:
         // value if used multiple times by this pattern result.
         Val = TmpVar;
         ModifiedVal = true;
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       } else if (!N->isLeaf() && N->getOperator()->getName() == "fpimm") {
         assert(N->getExtTypes().size() == 1 && "Multiple types not handled!");
         std::string TmpVar =  "Tmp" + utostr(ResNo);
@@ -839,7 +856,7 @@ public:
         // value if used multiple times by this pattern result.
         Val = TmpVar;
         ModifiedVal = true;
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       } else if (!N->isLeaf() && N->getOperator()->getName() == "texternalsym"){
         Record *Op = OperatorMap[N->getName()];
         // Transform ExternalSymbol to TargetExternalSymbol
@@ -854,7 +871,7 @@ public:
           Val = TmpVar;
           ModifiedVal = true;
         }
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       } else if (!N->isLeaf() && (N->getOperator()->getName() == "tglobaladdr"
                  || N->getOperator()->getName() == "tglobaltlsaddr")) {
         Record *Op = OperatorMap[N->getName()];
@@ -871,27 +888,27 @@ public:
           Val = TmpVar;
           ModifiedVal = true;
         }
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       } else if (!N->isLeaf()
                  && (N->getOperator()->getName() == "texternalsym"
                       || N->getOperator()->getName() == "tconstpool")) {
         // Do not rewrite the variable name, since we don't generate a new
         // temporary.
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       } else if (N->isLeaf() && (CP = NodeGetComplexPattern(N, CGP))) {
         for (unsigned i = 0; i < CP->getNumOperands(); ++i) {
-          NodeOps.push_back("CPTmp" + Val + "_" + utostr(i));
+          NodeOps.push_back(getValueName("CPTmp" + Val + "_" + utostr(i)));
         }
       } else {
         // This node, probably wrapped in a SDNodeXForm, behaves like a leaf
         // node even if it isn't one. Don't select it.
         if (!LikeLeaf) {
           if (isRoot && N->isLeaf()) {
-            emitCode("ReplaceUses(N, " + Val + ");");
+            emitCode("ReplaceUses(SDValue(N, 0), " + Val + ");");
             emitCode("return NULL;");
           }
         }
-        NodeOps.push_back(Val);
+        NodeOps.push_back(getValueName(Val));
       }
 
       if (ModifiedVal) {
@@ -907,13 +924,13 @@ public:
           emitCode("SDValue Tmp" + utostr(ResNo) + " = CurDAG->getRegister(" +
                    getQualifiedName(DI->getDef()) + ", " +
                    getEnumName(N->getTypeNum(0)) + ");");
-          NodeOps.push_back("Tmp" + utostr(ResNo));
+          NodeOps.push_back(getValueName("Tmp" + utostr(ResNo)));
           return NodeOps;
         } else if (DI->getDef()->getName() == "zero_reg") {
           emitCode("SDValue Tmp" + utostr(ResNo) +
                    " = CurDAG->getRegister(0, " +
                    getEnumName(N->getTypeNum(0)) + ");");
-          NodeOps.push_back("Tmp" + utostr(ResNo));
+          NodeOps.push_back(getValueName("Tmp" + utostr(ResNo)));
           return NodeOps;
         } else if (DI->getDef()->isSubClassOf("RegisterClass")) {
           // Handle a reference to a register class. This is used
@@ -922,7 +939,7 @@ public:
                    " = CurDAG->getTargetConstant(" +
                    getQualifiedName(DI->getDef()) + "RegClassID, " +
                    "MVT::i32);");
-          NodeOps.push_back("Tmp" + utostr(ResNo));
+          NodeOps.push_back(getValueName("Tmp" + utostr(ResNo)));
           return NodeOps;
         }
       } else if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
@@ -932,7 +949,7 @@ public:
                  " = CurDAG->getTargetConstant(0x" + 
                  utohexstr((uint64_t) II->getValue()) +
                  "ULL, " + getEnumName(N->getTypeNum(0)) + ");");
-        NodeOps.push_back("Tmp" + utostr(ResNo));
+        NodeOps.push_back(getValueName("Tmp" + utostr(ResNo)));
         return NodeOps;
       }
     
@@ -979,7 +996,8 @@ public:
 
       if (NodeHasOptInFlag) {
         emitCode("bool HasInFlag = "
-           "(N.getOperand(N.getNumOperands()-1).getValueType() == MVT::Flag);");
+                   "(N->getOperand(N->getNumOperands()-1).getValueType() == "
+                   "MVT::Flag);");
       }
       if (IsVariadic)
         emitCode("SmallVector<SDValue, 8> Ops" + utostr(OpcNo) + ";");
@@ -1007,7 +1025,7 @@ public:
         }
         emitCode("InChains.push_back(" + ChainName + ");");
         emitCode(ChainName + " = CurDAG->getNode(ISD::TokenFactor, "
-                 "N.getDebugLoc(), MVT::Other, "
+                 "N->getDebugLoc(), MVT::Other, "
                  "&InChains[0], InChains.size());");
         if (GenDebug) {
           emitCode("CurDAG->setSubgraphColor(" + ChainName +".getNode(), \"yellow\");");
@@ -1062,7 +1080,7 @@ public:
         }
         if (NodeHasOptInFlag) {
           emitCode("if (HasInFlag) {");
-          emitCode("  InFlag = N.getOperand(N.getNumOperands()-1);");
+          emitCode("  InFlag = N->getOperand(N->getNumOperands()-1);");
           emitCode("}");
         }
       }
@@ -1090,7 +1108,7 @@ public:
 
       if (!isRoot || (InputHasChain && !NodeHasChain))
         // For call to "getMachineNode()".
-        Code += ", N.getDebugLoc()";
+        Code += ", N->getDebugLoc()";
 
       emitOpcode(II.Namespace + "::" + II.TheDef->getName());
 
@@ -1129,9 +1147,9 @@ public:
           EndAdjust = "-(HasInFlag?1:0)"; // May have a flag.
 
         emitCode("for (unsigned i = NumInputRootOps + " + utostr(NodeHasChain) +
-                 ", e = N.getNumOperands()" + EndAdjust + "; i != e; ++i) {");
+                 ", e = N->getNumOperands()" + EndAdjust + "; i != e; ++i) {");
 
-        emitCode("  Ops" + utostr(OpsNo) + ".push_back(N.getOperand(i));");
+        emitCode("  Ops" + utostr(OpsNo) + ".push_back(N->getOperand(i));");
         emitCode("}");
       }
 
@@ -1227,7 +1245,7 @@ public:
           ReplaceTos.push_back("InFlag");
         } else {
           assert(NodeHasProperty(Pattern, SDNPOutFlag, CGP));
-          ReplaceFroms.push_back("SDValue(N.getNode(), " +
+          ReplaceFroms.push_back("SDValue(N, " +
                                  utostr(NumPatResults + (unsigned)InputHasChain)
                                  + ")");
           ReplaceTos.push_back("InFlag");
@@ -1235,7 +1253,7 @@ public:
       }
 
       if (!ReplaceFroms.empty() && InputHasChain) {
-        ReplaceFroms.push_back("SDValue(N.getNode(), " +
+        ReplaceFroms.push_back("SDValue(N, " +
                                utostr(NumPatResults) + ")");
         ReplaceTos.push_back("SDValue(" + ChainName + ".getNode(), " +
                              ChainName + ".getResNo()" + ")");
@@ -1247,13 +1265,8 @@ public:
         ;
       } else if (InputHasChain && !NodeHasChain) {
         // One of the inner node produces a chain.
-        if (NodeHasOutFlag) {
-          ReplaceFroms.push_back("SDValue(N.getNode(), " +
-                                 utostr(NumPatResults+1) +
-                                 ")");
-          ReplaceTos.push_back("SDValue(ResNode, N.getResNo()-1)");
-        }
-        ReplaceFroms.push_back("SDValue(N.getNode(), " +
+        assert(!NodeHasOutFlag && "Node has flag but not chain!");
+        ReplaceFroms.push_back("SDValue(N, " +
                                utostr(NumPatResults) + ")");
         ReplaceTos.push_back(ChainName);
       }
@@ -1299,7 +1312,7 @@ public:
       if (!isRoot || (InputHasChain && !NodeHasChain)) {
         Code = "CurDAG->getMachineNode(" + Code;
       } else {
-        Code = "CurDAG->SelectNodeTo(N.getNode(), " + Code;
+        Code = "CurDAG->SelectNodeTo(N, " + Code;
       }
       if (isRoot) {
         if (After.empty())
@@ -1402,10 +1415,12 @@ private:
             MVT::SimpleValueType RVT = getRegisterValueType(RR, T);
             if (RVT == MVT::Flag) {
               if (!InFlagDecled) {
-                emitCode("SDValue InFlag = " + RootName + utostr(OpNo) + ";");
+                emitCode("SDValue InFlag = " +
+                         getValueName(RootName + utostr(OpNo)) + ";");
                 InFlagDecled = true;
               } else
-                emitCode("InFlag = " + RootName + utostr(OpNo) + ";");
+                emitCode("InFlag = " +
+                         getValueName(RootName + utostr(OpNo)) + ";");
             } else {
               if (!ChainEmitted) {
                 emitCode("SDValue Chain = CurDAG->getEntryNode();");
@@ -1418,9 +1433,10 @@ private:
               }
               std::string Decl = (!ResNodeDecled) ? "SDNode *" : "";
               emitCode(Decl + "ResNode = CurDAG->getCopyToReg(" + ChainName +
-                       ", " + RootName + ".getDebugLoc()" +
+                       ", " + getNodeName(RootName) + "->getDebugLoc()" +
                        ", " + getQualifiedName(RR) +
-                       ", " +  RootName + utostr(OpNo) + ", InFlag).getNode();");
+                       ", " +  getValueName(RootName + utostr(OpNo)) +
+                       ", InFlag).getNode();");
               ResNodeDecled = true;
               emitCode(ChainName + " = SDValue(ResNode, 0);");
               emitCode("InFlag = SDValue(ResNode, 1);");
@@ -1432,12 +1448,12 @@ private:
 
     if (HasInFlag) {
       if (!InFlagDecled) {
-        emitCode("SDValue InFlag = " + RootName +
-               ".getOperand(" + utostr(OpNo) + ");");
+        emitCode("SDValue InFlag = " + getNodeName(RootName) +
+               "->getOperand(" + utostr(OpNo) + ");");
         InFlagDecled = true;
       } else
-        emitCode("InFlag = " + RootName +
-               ".getOperand(" + utostr(OpNo) + ");");
+        emitCode("InFlag = " + getNodeName(RootName) +
+               "->getOperand(" + utostr(OpNo) + ");");
     }
   }
 };
@@ -1763,7 +1779,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
             AddedInits.push_back(GeneratedCode[j].second);
         }
 
-        std::string CalleeCode = "(const SDValue &N";
+        std::string CalleeCode = "(SDNode *N";
         std::string CallerCode = "(N";
         for (unsigned j = 0, e = TargetOpcodes.size(); j != e; ++j) {
           CalleeCode += ", unsigned Opc" + utostr(j);
@@ -1816,7 +1832,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
         // Replace the emission code within selection routines with calls to the
         // emission functions.
         if (GenDebug) {
-          GeneratedCode.push_back(std::make_pair(0, "CurDAG->setSubgraphColor(N.getNode(), \"red\");"));
+          GeneratedCode.push_back(std::make_pair(0, "CurDAG->setSubgraphColor(N, \"red\");"));
         }
         CallerCode = "SDNode *Result = Emit_" + utostr(EmitFuncNum) + CallerCode;
         GeneratedCode.push_back(std::make_pair(3, CallerCode));
@@ -1825,7 +1841,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
           GeneratedCode.push_back(std::make_pair(0, "  CurDAG->setSubgraphColor(Result, \"yellow\");"));
           GeneratedCode.push_back(std::make_pair(0, "  CurDAG->setSubgraphColor(Result, \"black\");"));
           GeneratedCode.push_back(std::make_pair(0, "}"));
-          //GeneratedCode.push_back(std::make_pair(0, "CurDAG->setSubgraphColor(N.getNode(), \"black\");"));
+          //GeneratedCode.push_back(std::make_pair(0, "CurDAG->setSubgraphColor(N, \"black\");"));
         }
         GeneratedCode.push_back(std::make_pair(0, "return Result;"));
       }
@@ -1894,7 +1910,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
       std::reverse(CodeForPatterns.begin(), CodeForPatterns.end());
     
       OS << "SDNode *Select_" << getLegalCName(OpName)
-         << OpVTStr << "(const SDValue &N) {\n";    
+         << OpVTStr << "(SDNode *N) {\n";
 
       // Emit all of the patterns now, grouped together to share code.
       EmitPatterns(CodeForPatterns, 2, OS);
@@ -1917,11 +1933,11 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
   }
   
   OS << "// The main instruction selector code.\n"
-     << "SDNode *SelectCode(SDValue N) {\n"
-     << "  MVT::SimpleValueType NVT = N.getNode()->getValueType(0).getSimpleVT().SimpleTy;\n"
-     << "  switch (N.getOpcode()) {\n"
+     << "SDNode *SelectCode(SDNode *N) {\n"
+     << "  MVT::SimpleValueType NVT = N->getValueType(0).getSimpleVT().SimpleTy;\n"
+     << "  switch (N->getOpcode()) {\n"
      << "  default:\n"
-     << "    assert(!N.isMachineOpcode() && \"Node already selected!\");\n"
+     << "    assert(!N->isMachineOpcode() && \"Node already selected!\");\n"
      << "    break;\n"
      << "  case ISD::EntryToken:       // These nodes remain the same.\n"
      << "  case ISD::BasicBlock:\n"
@@ -1943,7 +1959,7 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
      << "  }\n"
      << "  case ISD::AssertSext:\n"
      << "  case ISD::AssertZext: {\n"
-     << "    ReplaceUses(N, N.getOperand(0));\n"
+     << "    ReplaceUses(SDValue(N, 0), N->getOperand(0));\n"
      << "    return NULL;\n"
      << "  }\n"
      << "  case ISD::INLINEASM: return Select_INLINEASM(N);\n"
@@ -2010,9 +2026,9 @@ void DAGISelEmitter::EmitInstructionSelector(raw_ostream &OS) {
   }
 
   OS << "  } // end of big switch.\n\n"
-     << "  if (N.getOpcode() != ISD::INTRINSIC_W_CHAIN &&\n"
-     << "      N.getOpcode() != ISD::INTRINSIC_WO_CHAIN &&\n"
-     << "      N.getOpcode() != ISD::INTRINSIC_VOID) {\n"
+     << "  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&\n"
+     << "      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&\n"
+     << "      N->getOpcode() != ISD::INTRINSIC_VOID) {\n"
      << "    CannotYetSelect(N);\n"
      << "  } else {\n"
      << "    CannotYetSelectIntrinsic(N);\n"
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 277640d..f589bcc 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -586,7 +586,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
     // on opcode and type.
     OS << "unsigned FastEmit_";
     Operands.PrintManglingSuffix(OS);
-    OS << "(MVT VT, MVT RetVT, ISD::NodeType Opcode";
+    OS << "(MVT VT, MVT RetVT, unsigned Opcode";
     if (!Operands.empty())
       OS << ", ";
     Operands.PrintParameters(OS);
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index adb98fb9..cf40c78 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -345,7 +345,8 @@ void InstrInfoEmitter::emitShiftedValue(Record *R, StringInit *Val,
         R->getName() != "INSERT_SUBREG" &&
         R->getName() != "IMPLICIT_DEF" &&
         R->getName() != "SUBREG_TO_REG" &&
-        R->getName() != "COPY_TO_REGCLASS")
+        R->getName() != "COPY_TO_REGCLASS" &&
+        R->getName() != "DEBUG_VALUE")
       throw R->getName() + " doesn't have a field named '" + 
             Val->getValue() + "'!";
     return;
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 23919d9..c5df9e4 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -522,7 +522,7 @@ void IntrinsicEmitter::
 EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
   OS << "// Determine intrinsic alias analysis mod/ref behavior.\n";
   OS << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n";
-  OS << "switch (id) {\n";
+  OS << "switch (iid) {\n";
   OS << "default:\n    return UnknownModRefBehavior;\n";
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
     if (Ints[i].ModRef == CodeGenIntrinsic::WriteMem)
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index 3cd5784..6892912 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -75,26 +75,10 @@ void OptParserEmitter::run(raw_ostream &OS) {
     Records.getAllDerivedDefinitions("OptionGroup");
   std::vector<Record*> Opts = Records.getAllDerivedDefinitions("Option");
 
-  if (GenDefs) {
-    OS << "\
-//=== TableGen'erated File - Option Parsing Definitions ---------*- C++ -*-===//\n \
-//\n\
-// Option Parsing Definitions\n\
-//\n\
-// Automatically generated file, do not edit!\n\
-//\n\
-//===----------------------------------------------------------------------===//\n";
-  } else {
-    OS << "\
-//=== TableGen'erated File - Option Parsing Table ---------------*- C++ -*-===//\n \
-//\n\
-// Option Parsing Definitions\n\
-//\n\
-// Automatically generated file, do not edit!\n\
-//\n\
-//===----------------------------------------------------------------------===//\n";
-  }
-  OS << "\n";
+  if (GenDefs)
+    EmitSourceFileHeader("Option Parsing Definitions", OS);
+  else
+    EmitSourceFileHeader("Option Parsing Table", OS);
 
   array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
   if (GenDefs) {
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index 542735e..f9e2fe8 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -730,6 +730,15 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     }
     break;
   }
+  case EQ: {
+    // Make sure we've resolved
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+    if (LHSs && RHSs)
+      return new IntInit(LHSs->getValue() == RHSs->getValue());
+
+    break;
+  }
   case SHL:
   case SRA:
   case SRL: {
@@ -768,6 +777,7 @@ std::string BinOpInit::getAsString() const {
   case SHL: Result = "!shl"; break;
   case SRA: Result = "!sra"; break;
   case SRL: Result = "!srl"; break;
+  case EQ: Result = "!eq"; break;
   case STRCONCAT: Result = "!strconcat"; break;
   case NAMECONCAT:
     Result = "!nameconcat<" + getType()->getAsString() + ">"; break;
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 278c349..45f3072 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -842,7 +842,7 @@ public:
 ///
 class BinOpInit : public OpInit {
 public:
-  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, NAMECONCAT };
+  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, NAMECONCAT, EQ };
 private:
   BinaryOp Opc;
   Init *LHS, *RHS;
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 0dbfcbe..9ac652f 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -519,8 +519,8 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
   OS << Target;
   OS << "Subtarget::ParseSubtargetFeatures(const std::string &FS,\n"
      << "                                  const std::string &CPU) {\n"
-     << "  DEBUG(errs() << \"\\nFeatures:\" << FS);\n"
-     << "  DEBUG(errs() << \"\\nCPU:\" << CPU);\n"
+     << "  DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
+     << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n"
      << "  SubtargetFeatures Features(FS);\n"
      << "  Features.setCPUIfNone(CPU);\n"
      << "  uint32_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
index 4498e30..2c7becc 100644
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@@ -434,6 +434,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
   if (Len == 3  && !memcmp(Start, "sra", 3)) return tgtok::XSRA;
   if (Len == 3  && !memcmp(Start, "srl", 3)) return tgtok::XSRL;
   if (Len == 3  && !memcmp(Start, "shl", 3)) return tgtok::XSHL;
+  if (Len == 2  && !memcmp(Start, "eq", 2)) return tgtok::XEq;
   if (Len == 9  && !memcmp(Start, "strconcat", 9))   return tgtok::XStrConcat;
   if (Len == 10 && !memcmp(Start, "nameconcat", 10)) return tgtok::XNameConcat;
   if (Len == 5 && !memcmp(Start, "subst", 5)) return tgtok::XSubst;
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index 6790208..835f351 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -45,7 +45,7 @@ namespace tgtok {
     
     // !keywords.
     XConcat, XSRA, XSRL, XSHL, XStrConcat, XNameConcat, XCast, XSubst,
-    XForEach, XCar, XCdr, XNull, XIf,
+    XForEach, XCar, XCdr, XNull, XIf, XEq,
 
     // Integer value.
     IntVal,
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index b095a6c..8c158e0 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -792,6 +792,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
+  case tgtok::XEq:
   case tgtok::XStrConcat:
   case tgtok::XNameConcat: {  // Value ::= !binop '(' Value ',' Value ')'
     BinOpInit::BinaryOp Code;
@@ -820,6 +821,11 @@ Init *TGParser::ParseOperation(Record *CurRec) {
       Code = BinOpInit::SHL;
       Type = new IntRecTy();
       break;
+    case tgtok::XEq:  
+      Lex.Lex();  // eat the operation
+      Code = BinOpInit::EQ;
+      Type = new IntRecTy();
+      break;
     case tgtok::XStrConcat:
       Lex.Lex();  // eat the operation
       Code = BinOpInit::STRCONCAT;
@@ -1257,6 +1263,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
+  case tgtok::XEq:
   case tgtok::XStrConcat:
   case tgtok::XNameConcat:  // Value ::= !binop '(' Value ',' Value ')'
   case tgtok::XIf:
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 9971883..57aac43 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -34,11 +34,9 @@ DSTROOT = $(OBJROOT)/../dst
 
 PREFIX = /usr/local
 
-# Unless assertions are forced on in the GMAKE command line, disable them.
-ifdef ENABLE_ASSERTIONS
-LLVM_ASSERTIONS := yes
-else
-LLVM_ASSERTIONS := no
+# Unless assertions are forced on in the GMAKE command line, enable them.
+ifndef ENABLE_ASSERTIONS
+ENABLE_ASSERTIONS := yes
 endif
 
 # Default is optimized build.
@@ -61,7 +59,7 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
 	cd $(OBJROOT) && \
 	  $(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
 	    $(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
-	    $(LLVM_ASSERTIONS) $(LLVM_OPTIMIZED) \
+	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) \
 	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
 
 
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 451013e..6e4a207 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -71,7 +71,8 @@ syn keyword llvmBoolean true false
 syn keyword llvmConstant zeroinitializer undef null
 syn match   llvmComment /;.*$/
 syn region  llvmString start=/"/ skip=/\\"/ end=/"/
-syn match   llvmLabel /[\-a-zA-Z\$._0-9]*:/
+syn match   llvmLabel /[-a-zA-Z$._][-a-zA-Z$._0-9]*:/
+syn match   llvmIdentifier /[%@][-a-zA-Z$._][-a-zA-Z$._0-9]*/
 
 " Syntax-highlight dejagnu test commands.
 syn match  llvmSpecialComment /;\s*RUN:.*$/
@@ -101,6 +102,7 @@ if version >= 508 || !exists("did_c_syn_inits")
   HiLink llvmConstant Constant
   HiLink llvmSpecialComment SpecialComment
   HiLink llvmError Error
+  HiLink llvmIdentifier Identifier
 
   delcommand HiLink
 endif
diff --git a/utils/vim/vimrc b/utils/vim/vimrc
index 7b1fd87..4909f60 100644
--- a/utils/vim/vimrc
+++ b/utils/vim/vimrc
@@ -35,7 +35,7 @@ augroup END
 " Set a few indentation parameters. See the VIM help for cinoptions-values for
 " details.  These aren't absolute rules; they're just an approximation of
 " common style in LLVM source.
-set cinoptions=:0,g0,(0,Ws
+set cinoptions=:0,g0,(0,Ws,l1
 " Add and delete spaces in increments of `shiftwidth' for tabs
 set smarttab