Import LLVM r73340.

author: ed <ed@FreeBSD.org> 2009-06-14 09:23:33 +0000
committer: ed <ed@FreeBSD.org> 2009-06-14 09:23:33 +0000
commit: db89e312d968c258aba3c79c1c398f5fb19267a3 (patch)
tree: 49817b316c4fdaa56d9d16ebf2555303d1a990e0
parent: de000e339094f8c6e06a635dac9a803861416ec6 (diff)
download: FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.zip
FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.tar.gz
115 files changed, 3934 insertions, 1118 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8133398..2e2cf35 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,7 @@ set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
 set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
 set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
+set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
 
 set(LLVM_ALL_TARGETS
   Alpha
@@ -186,11 +187,26 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
 
 if( MSVC )
+  # List of valid CRTs for MSVC
+  set(MSVC_CRT
+    MD
+    MDd)
+
+  set(LLVM_USE_CRT "" CACHE STRING "Specify VC++ CRT to use for debug/release configurations.")
   add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
   add_llvm_definitions( -D_SCL_SECURE_NO_WARNINGS -DCRT_NONSTDC_NO_WARNINGS )
   add_llvm_definitions( -D_SCL_SECURE_NO_DEPRECATE )
   add_llvm_definitions( -wd4146 -wd4503 -wd4996 -wd4800 -wd4244 -wd4624 )
   add_llvm_definitions( -wd4355 -wd4715 -wd4180 -wd4345 -wd4224 )
+
+  if (NOT ${LLVM_USE_CRT} STREQUAL "")
+    list(FIND MSVC_CRT ${LLVM_USE_CRT} idx)
+    if (idx LESS 0)
+      message(FATAL_ERROR "Invalid value for LLVM_USE_CRT: ${LLVM_USE_CRT}. Valid options are one of: ${MSVC_CRT}")
+    endif (idx LESS 0)
+    add_llvm_definitions("/${LLVM_USE_CRT}")
+    message(STATUS "Using VC++ CRT: ${LLVM_USE_CRT}")
+  endif (NOT ${LLVM_USE_CRT} STREQUAL "")
 endif( MSVC )
 
 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
@@ -207,6 +223,8 @@ set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${LLVM_LIBS} )
 
 set(LLVM_TABLEGEN "tblgen" CACHE
   STRING "Native TableGen executable. Saves building one when cross-compiling.")
+# Effective tblgen executable to be used:
+set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN})
 
 add_subdirectory(utils/TableGen)
 
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 0beee20..76b7296 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -3,7 +3,7 @@ project.  If you have contributed a patch or made some other contribution to
 LLVM, please submit a patch to this file to add yourself, and it will be
 done!
 
-The list is sorted by name and formatted to allow easy grepping and
+The list is sorted by surname and formatted to allow easy grepping and
 beautification by scripts.  The fields are: name (N), email (E), web-address
 (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
 (S).
@@ -148,10 +148,6 @@ N: Patrick Jenkins
 E: patjenk@wam.umd.edu
 D: Nightly Tester
 
-N: Brad Jones
-E: kungfoomaster@nondot.org
-D: Support for packed types
-
 N: Dale Johannesen
 E: dalej@apple.com
 D: ARM constant islands improvements
@@ -160,6 +156,10 @@ D: Rewrite X87 back end
 D: Use APFloat for floating point constants widely throughout compiler
 D: Implement X87 long double
 
+N: Brad Jones
+E: kungfoomaster@nondot.org
+D: Support for packed types
+
 N: Eric Kidd
 W: http://randomhacks.net/
 D: llvm-config script
@@ -231,6 +231,13 @@ N: Scott Michel
 E: scottm@aero.org
 D: Added STI Cell SPU backend.
 
+N: Edward O'Callaghan
+E: eocallaghan@auroraux.org
+W: http://www.auroraux.org
+D: Add Clang support with various other improvements to utils/NewNightlyTest.pl
+D: Fix and maintain Solaris & AuroraUX support for llvm, various build warnings
+D: and error clean ups.
+
 N: Morten Ofstad
 E: morten@hue.no
 D: Visual C++ compatibility fixes
@@ -266,6 +273,10 @@ N: Arnold Schwaighofer
 E: arnold.schwaighofer@gmail.com
 D: Tail call optimization for the x86 backend
 
+N: Shantonu Sen
+E: ssen@apple.com
+D: Miscellaneous bug fixes
+
 N: Anand Shukla
 E: ashukla@cs.uiuc.edu
 D: The `paths' pass
@@ -290,8 +301,4 @@ D: Thread Local Storage implementation
 
 N: Bill Wendling
 E: isanbard@gmail.com
-D: Machine LICM
-D: Darwin exception handling
-D: MMX & SSSE3 instructions
-D: SPEC2006 support
-
+D: Bunches of stuff
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index a21ed20..e525674 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -10,8 +10,8 @@ macro(add_llvm_library name)
     add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
   endif( LLVM_COMMON_DEPENDS )
   install(TARGETS ${name}
-    LIBRARY DESTINATION lib
-    ARCHIVE DESTINATION lib)
+    LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+    ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
 endmacro(add_llvm_library name)
 
 
diff --git a/cmake/modules/AddPartiallyLinkedObject.cmake b/cmake/modules/AddPartiallyLinkedObject.cmake
index d20666d..0f92455 100755
--- a/cmake/modules/AddPartiallyLinkedObject.cmake
+++ b/cmake/modules/AddPartiallyLinkedObject.cmake
@@ -38,5 +38,5 @@ macro(add_partially_linked_object lib)
     set( llvm_lib_targets ${llvm_lib_targets} ${tnplo} PARENT_SCOPE )
   endif( )
   install(FILES ${pll}
-    DESTINATION lib)
+    DESTINATION lib${LLVM_LIBDIR_SUFFIX})
 endmacro(add_partially_linked_object lib)
diff --git a/cmake/modules/CrossCompileLLVM.cmake b/cmake/modules/CrossCompileLLVM.cmake
index f638d67..138ff0e 100644
--- a/cmake/modules/CrossCompileLLVM.cmake
+++ b/cmake/modules/CrossCompileLLVM.cmake
@@ -1,7 +1,7 @@
 
 if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
   set(CX_NATIVE_TG_DIR "${CMAKE_BINARY_DIR}/native")
-  set(LLVM_TABLEGEN "${CX_NATIVE_TG_DIR}/bin/tblgen")
+  set(LLVM_TABLEGEN_EXE "${CX_NATIVE_TG_DIR}/bin/tblgen")
 
   add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CX_NATIVE_TG_DIR}
@@ -13,12 +13,12 @@ if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
     DEPENDS ${CX_NATIVE_TG_DIR}
     COMMENT "Configuring native TableGen...")
 
-  add_custom_command(OUTPUT ${LLVM_TABLEGEN}
+  add_custom_command(OUTPUT ${LLVM_TABLEGEN_EXE}
     COMMAND ${CMAKE_BUILD_TOOL}
     DEPENDS ${CX_NATIVE_TG_DIR}/CMakeCache.txt
     WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}/utils/TableGen
     COMMENT "Building native TableGen...")
-  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN})
+  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN_EXE})
 
   add_dependencies(tblgen NativeTableGen)
 
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index adb22c7..16c732b 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -6,11 +6,11 @@ macro(tablegen ofn)
   file(GLOB all_tds "*.td")
 
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    COMMAND ${LLVM_TABLEGEN} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
     -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS} 
     -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    DEPENDS ${LLVM_TABLEGEN} ${all_tds}
+    DEPENDS tblgen ${all_tds}
     COMMENT "Building ${ofn}.tmp..."
     )
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
diff --git a/docs/CMake.html b/docs/CMake.html
index ac3b57a..1f50d39 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -217,6 +217,11 @@
   <dd>Path where LLVM will be installed if "make install" is invoked
     or the "INSTALL" target is built.</dd>
 
+  <dt><b>LLVM_LIBDIR_SUFFIX</b>:STRING</dt>
+  <dd>Extra suffix to append to the directory where libraries are to
+    be installed. On a 64-bit architecture, one could use
+    -DLLVM_LIBDIR_SUFFIX=64 to install libraries to /usr/lib64.</dd>
+
   <dt><b>CMAKE_C_FLAGS</b>:STRING</dt>
   <dd>Extra flags to use when compiling C source files.</dd>
 
@@ -296,7 +301,13 @@
 
 <div class="doc_text">
 
-<p>TODO</p>
+<p>See <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling">this
+    wiki page</a> for generic instructions on how to cross-compile
+    with CMake. It goes into detailed explanations and may seem
+    daunting, but it is not. On the wiki page there are several
+    examples including toolchain files. Go directly to
+    <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling#Information_how_to_set_up_various_cross_compiling_toolchains">this
+    section</a> for a quick solution.</p>
 
 </div>
 
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 89d4f93..897654d 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -1091,19 +1091,27 @@ stack before the local variables that's checked upon return from the function to
 see if it has been overwritten. A heuristic is used to determine if a function
 needs stack protectors or not.
 
-<p>If a function that has an <tt>ssp</tt> attribute is inlined into a function
+<br><br>If a function that has an <tt>ssp</tt> attribute is inlined into a function
 that doesn't have an <tt>ssp</tt> attribute, then the resulting function will
-have an <tt>ssp</tt> attribute.</p></dd>
+have an <tt>ssp</tt> attribute.</dd>
 
 <dt><tt>sspreq</tt></dt>
 <dd>This attribute indicates that the function should <em>always</em> emit a
 stack smashing protector. This overrides the <tt><a href="#ssp">ssp</a></tt>
 function attribute.
 
-<p>If a function that has an <tt>sspreq</tt> attribute is inlined into a
+If a function that has an <tt>sspreq</tt> attribute is inlined into a
 function that doesn't have an <tt>sspreq</tt> attribute or which has
 an <tt>ssp</tt> attribute, then the resulting function will have
-an <tt>sspreq</tt> attribute.</p></dd>
+an <tt>sspreq</tt> attribute.</dd>
+
+<dt><tt>noredzone</tt></dt>
+<dd>This attribute indicates that the code generator should not enforce red zone
+mandated by target specific ABI.</dd>
+
+<dt><tt>noimplicitfloat</tt></dt>
+<dd>This attributes disables implicit floating point instructions.</dd>
+
 </dl>
 
 </div>
@@ -1177,6 +1185,9 @@ aspect of the data layout.  The specifications accepted are as follows: </p>
   <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
   <dd>This specifies the alignment for an aggregate type of a given bit
   <i>size</i>.</dd>
+  <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for a stack object of a given bit
+  <i>size</i>.</dd>
 </dl>
 <p>When constructing the data layout for a given target, LLVM starts with a
 default set of specifications which are then (possibly) overriden by the
@@ -1196,6 +1207,7 @@ are given in this list:</p>
   <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
   <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
   <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
+  <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
 </ul>
 <p>When LLVM is determining the alignment for a given type, it uses the 
 following rules:</p>
@@ -7209,7 +7221,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-05 00:49:04 +0200 (Fri, 05 Jun 2009) $
+  Last modified: $Date: 2009-06-12 21:45:19 +0200 (Fri, 12 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index f477b47..48c60b9 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -371,8 +371,11 @@ supported include:</p>
   <dd>string value</dd>
 <dt><tt>[{ ... }]</tt></dt>
   <dd>code fragment</dd>
-<dt><tt>[ X, Y, Z ]</tt></dt>
-  <dd>list value.</dd>
+<dt><tt>[ X, Y, Z ]<type></tt></dt>
+  <dd>list value.  <type> is the type of the list 
+element and is usually optional.  In rare cases,
+TableGen is unable to deduce the element type in
+which case the user must specify it explicitly.</dd>
 <dt><tt>{ a, b, c }</tt></dt>
   <dd>initializer for a "bits&lt;3&gt;" value</dd>
 <dt><tt>value</tt></dt>
@@ -778,7 +781,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-19 00:14:45 +0200 (Tue, 19 May 2009) $
+  Last modified: $Date: 2009-06-09 20:31:17 +0200 (Tue, 09 Jun 2009) $
 </address>
 
 </body>
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index e3d4299..41725be 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -393,6 +393,7 @@ namespace llvm {
     SCEVHandle getTruncateExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getZeroExtendExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getSignExtendExpr(const SCEVHandle &Op, const Type *Ty);
+    SCEVHandle getAnyExtendExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getAddExpr(std::vector<SCEVHandle> &Ops);
     SCEVHandle getAddExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) {
       std::vector<SCEVHandle> Ops;
@@ -465,6 +466,12 @@ namespace llvm {
     /// it is sign extended.  The conversion must not be narrowing.
     SCEVHandle getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty);
 
+    /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+    /// the input value to the specified type. If the type must be extended,
+    /// it is extended with unspecified bits. The conversion must not be
+    /// narrowing.
+    SCEVHandle getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty);
+
     /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
     /// input value to the specified type.  The conversion must not be
     /// widening.
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index a594e32..134e350 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -61,7 +61,8 @@ const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 
-/// @brief Attributes that only apply to function.
+/// @brief Attributes that may be applied to the function itself.  These cannot
+/// be used on return values or function parameters.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | 
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
   NoRedZone | NoImplicitFloat;
@@ -186,7 +187,7 @@ public:
 
   /// getFnAttributes - The function attributes are returned.
   Attributes getFnAttributes() const {
-    return getAttributes(~0);
+    return getAttributes(~0U);
   }
   
   /// paramHasAttr - Return true if the specified parameter index has the
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
new file mode 100644
index 0000000..0780cd6
--- /dev/null
+++ b/include/llvm/CodeGen/BinaryObject.h
@@ -0,0 +1,325 @@
+//===-- llvm/CodeGen/BinaryObject.h - Binary Object. -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a Binary Object Aka. "blob" for holding data from code
+// generators, ready for data to the object module code writters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BINARYOBJECT_H
+#define LLVM_CODEGEN_BINARYOBJECT_H
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class MachineRelocation;
+typedef std::vector<uint8_t> BinaryData;
+
+class BinaryObject {
+protected:
+  std::string Name;
+  bool IsLittleEndian;
+  bool Is64Bit;
+  BinaryData Data;
+  std::vector<MachineRelocation> Relocations;
+
+public:
+  /// Constructors and destructor
+  BinaryObject() {}
+
+  BinaryObject(bool isLittleEndian, bool is64Bit)
+    : IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  BinaryObject(const std::string &name, bool isLittleEndian, bool is64Bit)
+    : Name(name), IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  ~BinaryObject() {}
+
+  /// getName - get name of BinaryObject
+  inline std::string getName() const { return Name; }
+
+  /// get size of binary data
+  size_t size() const {
+    return Data.size();
+  }
+
+  /// get binary data
+  BinaryData& getData() {
+    return Data;
+  }
+
+  /// get machine relocations
+  const std::vector<MachineRelocation>& getRelocations() const {
+    return Relocations;
+  }
+
+  /// emitByte - This callback is invoked when a byte needs to be
+  /// written to the data stream.
+  inline void emitByte(uint8_t B) {
+    Data.push_back(B);
+  }
+
+  /// emitWord16 - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16(uint16_t W) {
+    if (IsLittleEndian)
+      emitWord16LE(W);
+    else
+      emitWord16BE(W);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16LE(uint16_t W) {
+    Data.push_back((W >> 0) & 255);
+    Data.push_back((W >> 8) & 255);
+  }
+
+  /// emitWord16BE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16BE(uint16_t W) {
+    Data.push_back((W >> 8) & 255);
+    Data.push_back((W >> 0) & 255);
+  }
+
+  /// emitWord - This callback is invoked when a word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord(uint64_t W) {
+    if (!Is64Bit)
+      emitWord32(W);
+    else
+      emitWord64(W);
+  }
+
+  /// emitWord32 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord32(uint32_t W) {
+    if (IsLittleEndian)
+      emitWordLE(W);
+    else
+      emitWordBE(W);
+  }
+
+  /// emitWord64 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord64(uint64_t W) {
+    if (IsLittleEndian)
+      emitDWordLE(W);
+    else
+      emitDWordBE(W);
+  }
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitWordLE(uint32_t W) {
+    Data.push_back((W >>  0) & 255);
+    Data.push_back((W >>  8) & 255);
+    Data.push_back((W >> 16) & 255);
+    Data.push_back((W >> 24) & 255);
+  }
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in big-endian format.
+  ///
+  inline void emitWordBE(uint32_t W) {
+    Data.push_back((W >> 24) & 255);
+    Data.push_back((W >> 16) & 255);
+    Data.push_back((W >>  8) & 255);
+    Data.push_back((W >>  0) & 255);
+  }
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitDWordLE(uint64_t W) {
+    Data.push_back(unsigned(W >>  0) & 255);
+    Data.push_back(unsigned(W >>  8) & 255);
+    Data.push_back(unsigned(W >> 16) & 255);
+    Data.push_back(unsigned(W >> 24) & 255);
+    Data.push_back(unsigned(W >> 32) & 255);
+    Data.push_back(unsigned(W >> 40) & 255);
+    Data.push_back(unsigned(W >> 48) & 255);
+    Data.push_back(unsigned(W >> 56) & 255);
+  }
+
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in big-endian format.
+  inline void emitDWordBE(uint64_t W) {
+    Data.push_back(unsigned(W >> 56) & 255);
+    Data.push_back(unsigned(W >> 48) & 255);
+    Data.push_back(unsigned(W >> 40) & 255);
+    Data.push_back(unsigned(W >> 32) & 255);
+    Data.push_back(unsigned(W >> 24) & 255);
+    Data.push_back(unsigned(W >> 16) & 255);
+    Data.push_back(unsigned(W >>  8) & 255);
+    Data.push_back(unsigned(W >>  0) & 255);
+  }
+
+  /// fixByte - This callback is invoked when a byte needs to be
+  /// fixup the buffer.
+  inline void fixByte(uint8_t B, uint32_t offset) {
+    Data[offset] = B;
+  }
+
+  /// fixWord16 - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in correct endian format.
+  inline void fixWord16(uint16_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord16LE(W, offset);
+    else
+      fixWord16BE(W, offset);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in little endian format.
+  inline void fixWord16LE(uint16_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset] = (W >> 8) & 255;
+  }
+
+  /// fixWord16BE - This callback is invoked when a 16-bit word needs to
+  /// fixup data stream in big endian format.
+  inline void fixWord16BE(uint16_t W, uint32_t offset) {
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// emitWord - This callback is invoked when a word needs to
+  /// fixup the data in correct endian format and correct size.
+  inline void fixWord(uint64_t W, uint32_t offset) {
+    if (!Is64Bit)
+      fixWord32(W, offset);
+    else
+      fixWord64(W, offset);
+  }
+
+  /// fixWord32 - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord32(uint32_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord32LE(W, offset);
+    else
+      fixWord32BE(W, offset);
+  }
+
+  /// fixWord32LE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord32LE(uint32_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset] = (W >> 24) & 255;
+  }
+
+  /// fixWord32BE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord32BE(uint32_t W, uint32_t offset) {
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// fixWord64 - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord64(uint64_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord64LE(W, offset);
+    else
+      fixWord64BE(W, offset);
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord64LE(uint64_t W, uint32_t offset) {
+    Data[offset++] = W & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 32) & 255;
+    Data[offset++] = (W >> 40) & 255;
+    Data[offset++] = (W >> 48) & 255;
+    Data[offset] = (W >> 56) & 255;
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord64BE(uint64_t W, uint32_t offset) {
+    Data[offset++] = (W >> 56) & 255;
+    Data[offset++] = (W >> 48) & 255;
+    Data[offset++] = (W >> 40) & 255;
+    Data[offset++] = (W >> 32) & 255;
+    Data[offset++] = (W >> 24) & 255;
+    Data[offset++] = (W >> 16) & 255;
+    Data[offset++] = (W >> 8) & 255;
+    Data[offset] = W & 255;
+  }
+
+  /// emitAlignment - Pad the data to the specified alignment.
+  void emitAlignment(unsigned Alignment) {
+    if (Alignment <= 1) return;
+    unsigned PadSize = -Data.size() & (Alignment-1);
+    for (unsigned i = 0; i<PadSize; ++i)
+      Data.push_back(0);
+  }
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the data stream.
+  void emitULEB128Bytes(uint64_t Value) {
+    do {
+      unsigned char Byte = Value & 0x7f;
+      Value >>= 7;
+      if (Value) Byte |= 0x80;
+      emitByte(Byte);
+    } while (Value);
+  }
+
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the data stream.
+  void emitSLEB128Bytes(int64_t Value) {
+    int Sign = Value >> (8 * sizeof(Value) - 1);
+    bool IsMore;
+
+    do {
+      unsigned char Byte = Value & 0x7f;
+      Value >>= 7;
+      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+      if (IsMore) Byte |= 0x80;
+      emitByte(Byte);
+    } while (IsMore);
+  }
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the data stream.
+  void emitString(const std::string &String) {
+    for (unsigned i = 0, N = static_cast<unsigned>(String.size()); i<N; ++i) {
+      unsigned char C = String[i];
+      emitByte(C);
+    }
+    emitByte(0);
+  }
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const {
+    return Data.size();
+  }
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  void addRelocation(const MachineRelocation& relocation) {
+    Relocations.push_back(relocation);
+  }
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index bf6b76e..73197af 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -97,7 +97,7 @@ public:
   /// emitWordLE - This callback is invoked when a 32-bit word needs to be
   /// written to the output stream in little-endian format.
   ///
-  void emitWordLE(unsigned W) {
+  void emitWordLE(uint32_t W) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *CurBufferPtr++ = (uint8_t)(W >>  0);
       *CurBufferPtr++ = (uint8_t)(W >>  8);
@@ -111,7 +111,7 @@ public:
   /// emitWordBE - This callback is invoked when a 32-bit word needs to be
   /// written to the output stream in big-endian format.
   ///
-  void emitWordBE(unsigned W) {
+  void emitWordBE(uint32_t W) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *CurBufferPtr++ = (uint8_t)(W >> 24);
       *CurBufferPtr++ = (uint8_t)(W >> 16);
@@ -176,7 +176,7 @@ public:
 
   /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
   /// written to the output stream.
-  void emitULEB128Bytes(unsigned Value) {
+  void emitULEB128Bytes(uint64_t Value) {
     do {
       uint8_t Byte = Value & 0x7f;
       Value >>= 7;
@@ -187,7 +187,7 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int32_t Value) {
+  void emitSLEB128Bytes(int64_t Value) {
     int32_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
@@ -212,7 +212,7 @@ public:
   }
   
   /// emitInt32 - Emit a int32 directive.
-  void emitInt32(int32_t Value) {
+  void emitInt32(uint32_t Value) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *((uint32_t*)CurBufferPtr) = Value;
       CurBufferPtr += 4;
diff --git a/include/llvm/CodeGen/LazyLiveness.h b/include/llvm/CodeGen/LazyLiveness.h
new file mode 100644
index 0000000..82e4a15
--- /dev/null
+++ b/include/llvm/CodeGen/LazyLiveness.h
@@ -0,0 +1,63 @@
+//===- LazyLiveness.h - Lazy, CFG-invariant liveness information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a lazy liveness analysis as per "Fast Liveness Checking
+// for SSA-form Programs," by Boissinot, et al.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LAZYLIVENESS_H
+#define LLVM_CODEGEN_LAZYLIVENESS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include <vector>
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class LazyLiveness : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  LazyLiveness() : MachineFunctionPass(&ID) { }
+  
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  bool vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB);
+  
+private:
+  void computeBackedgeChain(MachineFunction& mf, MachineBasicBlock* MBB);
+  
+  typedef std::pair<MachineBasicBlock*, MachineBasicBlock*> edge_t;
+  
+  MachineRegisterInfo* MRI;
+  
+  DenseMap<MachineBasicBlock*, unsigned> preorder;
+  std::vector<MachineBasicBlock*> rev_preorder;
+  DenseMap<MachineBasicBlock*, SparseBitVector<128> > rv;
+  DenseMap<MachineBasicBlock*, SparseBitVector<128> > tv;
+  DenseSet<edge_t> backedges;
+  SparseBitVector<128> backedge_source;
+  SparseBitVector<128> backedge_target;
+  SparseBitVector<128> calculated;
+};
+
+}
+
+#endif
+
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index aaa41a4..eb1ea2d 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -104,7 +104,7 @@ public:
   /// emitWordLE - This callback is invoked when a 32-bit word needs to be
   /// written to the output stream in little-endian format.
   ///
-  void emitWordLE(unsigned W) {
+  void emitWordLE(uint32_t W) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *CurBufferPtr++ = (uint8_t)(W >>  0);
       *CurBufferPtr++ = (uint8_t)(W >>  8);
@@ -118,7 +118,7 @@ public:
   /// emitWordBE - This callback is invoked when a 32-bit word needs to be
   /// written to the output stream in big-endian format.
   ///
-  void emitWordBE(unsigned W) {
+  void emitWordBE(uint32_t W) {
     if (4 <= BufferEnd-CurBufferPtr) {
       *CurBufferPtr++ = (uint8_t)(W >> 24);
       *CurBufferPtr++ = (uint8_t)(W >> 16);
@@ -183,7 +183,7 @@ public:
 
   /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
   /// written to the output stream.
-  void emitULEB128Bytes(unsigned Value) {
+  void emitULEB128Bytes(uint64_t Value) {
     do {
       uint8_t Byte = Value & 0x7f;
       Value >>= 7;
@@ -194,8 +194,8 @@ public:
   
   /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
   /// written to the output stream.
-  void emitSLEB128Bytes(int32_t Value) {
-    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
+  void emitSLEB128Bytes(uint64_t Value) {
+    uint64_t Sign = Value >> (8 * sizeof(Value) - 1);
     bool IsMore;
   
     do {
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 0d171f6..170e184 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -243,7 +243,7 @@ public:
   }
 
   // The JIT overrides a version that actually does this.
-  virtual void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0) { }
+  virtual void runJITOnFunction(Function *, MachineCodeInfo * = 0) { }
 
   /// getGlobalValueAtAddress - Return the LLVM global value object that starts
   /// at the specified address.
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index ccc006c..228ef94 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -395,6 +395,10 @@ public:
   /// including any contained basic blocks.
   ///
   void dropAllReferences();
+
+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it.
+  bool hasAddressTaken() const;
 };
 
 inline ValueSymbolTable *
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index edd27223..0fb837d 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -127,6 +127,7 @@ namespace {
       (void) llvm::createPrintModulePass(0);
       (void) llvm::createPrintFunctionPass("", 0);
       (void) llvm::createDbgInfoPrinterPass();
+      (void) llvm::createPartialInliningPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index e266a71..f7e3392 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -15,15 +15,21 @@
 #define LLVM_TARGET_TARGETELFWRITERINFO_H
 
 namespace llvm {
+  class Function;
+  class TargetData;
+  class TargetMachine;
 
   //===--------------------------------------------------------------------===//
   //                          TargetELFWriterInfo
   //===--------------------------------------------------------------------===//
 
   class TargetELFWriterInfo {
+  protected:
     // EMachine - This field is the target specific value to emit as the
     // e_machine member of the ELF header.
     unsigned short EMachine;
+    TargetMachine &TM;
+    bool is64Bit, isLittleEndian;
   public:
 
     // Machine architectures
@@ -44,10 +50,39 @@ namespace llvm {
       EM_X86_64 = 62   // AMD64
     };
 
-    explicit TargetELFWriterInfo(MachineType machine) : EMachine(machine) {}
-    virtual ~TargetELFWriterInfo() {}
+    // ELF File classes
+    enum {
+      ELFCLASS32 = 1, // 32-bit object file
+      ELFCLASS64 = 2  // 64-bit object file
+    };
+
+    // ELF Endianess
+    enum {
+      ELFDATA2LSB = 1, // Little-endian object file
+      ELFDATA2MSB = 2  // Big-endian object file
+    };
+
+    explicit TargetELFWriterInfo(TargetMachine &tm);
+    virtual ~TargetELFWriterInfo();
 
     unsigned short getEMachine() const { return EMachine; }
+    unsigned getEFlags() const { return 0; }
+    unsigned getEIClass() const { return is64Bit ? ELFCLASS64 : ELFCLASS32; }
+    unsigned getEIData() const {
+      return isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+    }
+
+    /// ELF Header and ELF Section Header Info
+    unsigned getHdrSize() const { return is64Bit ? 64 : 52; }
+    unsigned getSHdrSize() const { return is64Bit ? 64 : 40; }
+
+    /// Symbol Table Info
+    unsigned getSymTabEntrySize() const { return is64Bit ? 24 : 16; }
+    unsigned getSymTabAlignment() const { return is64Bit ? 8 : 4; }
+
+    /// getFunctionAlignment - Returns the alignment for function 'F', targets
+    /// with different alignment constraints should overload this method
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 
 } // end llvm namespace
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 327af27..47dcc6c 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -350,7 +350,7 @@ public:
   LegalizeAction getOperationAction(unsigned Op, MVT VT) const {
     if (VT.isExtended()) return Expand;
     assert(Op < array_lengthof(OpActions) &&
-           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*4 &&
+           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
            "Table isn't big enough!");
     return (LegalizeAction)((OpActions[Op] >> (2*VT.getSimpleVT())) & 3);
   }
@@ -417,11 +417,10 @@ public:
   /// for it.
   LegalizeAction
   getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
-    assert(IdxMode < array_lengthof(IndexedModeActions[0]) &&
-           (unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[0][0])*4 &&
+    assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
+           ((unsigned)VT.getSimpleVT()) < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[0][IdxMode] >>
-                             (2*VT.getSimpleVT())) & 3);
+    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode]));
   }
 
   /// isIndexedLoadLegal - Return true if the specified indexed load is legal
@@ -438,11 +437,10 @@ public:
   /// for it.
   LegalizeAction
   getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
-    assert(IdxMode < array_lengthof(IndexedModeActions[1]) &&
-           (unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[1][0])*4 &&
+    assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) &&
+           (unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)((IndexedModeActions[1][IdxMode] >>
-                             (2*VT.getSimpleVT())) & 3);
+    return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode]));
   }  
 
   /// isIndexedStoreLegal - Return true if the specified indexed load is legal
@@ -942,7 +940,7 @@ protected:
   /// with the specified type and indicate what to do about it.
   void setOperationAction(unsigned Op, MVT VT,
                           LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*4 &&
+    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
            Op < array_lengthof(OpActions) && "Table isn't big enough!");
     OpActions[Op] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
     OpActions[Op] |= (uint64_t)Action << VT.getSimpleVT()*2;
@@ -978,11 +976,10 @@ protected:
   /// TargetLowering.cpp
   void setIndexedLoadAction(unsigned IdxMode, MVT VT,
                             LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[0])*4 &&
-           IdxMode < array_lengthof(IndexedModeActions[0]) &&
+    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           IdxMode < array_lengthof(IndexedModeActions[0][0]) &&
            "Table isn't big enough!");
-    IndexedModeActions[0][IdxMode] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    IndexedModeActions[0][IdxMode] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode] = (uint8_t)Action;
   }
   
   /// setIndexedStoreAction - Indicate that the specified indexed store does or
@@ -991,11 +988,10 @@ protected:
   /// TargetLowering.cpp
   void setIndexedStoreAction(unsigned IdxMode, MVT VT,
                              LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(IndexedModeActions[1][0])*4 &&
-           IdxMode < array_lengthof(IndexedModeActions[1]) &&
+    assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           IdxMode < array_lengthof(IndexedModeActions[0][1] ) &&
            "Table isn't big enough!");
-    IndexedModeActions[1][IdxMode] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    IndexedModeActions[1][IdxMode] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode] = (uint8_t)Action;
   }
   
   /// setConvertAction - Indicate that the specified conversion does or does
@@ -1581,10 +1577,13 @@ private:
   /// indicates how instruction selection should deal with the store.
   uint64_t TruncStoreActions[MVT::LAST_VALUETYPE];
 
-  /// IndexedModeActions - For each indexed mode and each value type, keep a
-  /// pair of LegalizeAction that indicates how instruction selection should
-  /// deal with the load / store.
-  uint64_t IndexedModeActions[2][ISD::LAST_INDEXED_MODE];
+  /// IndexedModeActions - For each indexed mode and each value type,
+  /// keep a pair of LegalizeAction that indicates how instruction
+  /// selection should deal with the load / store.  The first
+  /// dimension is now the value_type for the reference.  The second
+  /// dimension is the load [0] vs. store[1].  The third dimension
+  /// represents the various modes for load store.
+  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][2][ISD::LAST_INDEXED_MODE];
   
   /// ConvertActions - For each conversion from source type to destination type,
   /// keep a LegalizeAction that indicates how instruction selection should
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index bdcc4ef..a8db68c 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -78,6 +78,14 @@ namespace CodeGenOpt {
   };
 }
 
+namespace FloatABI {
+  enum ABIType {
+    Default,
+    Soft,
+    Hard
+  };
+}
+
 //===----------------------------------------------------------------------===//
 ///
 /// TargetMachine - Primary interface to the complete machine description for
@@ -88,7 +96,7 @@ class TargetMachine {
   TargetMachine(const TargetMachine &);   // DO NOT IMPLEMENT
   void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetMachine() : AsmInfo(0) { }
+  TargetMachine();
 
   /// getSubtargetImpl - virtual method implemented by subclasses that returns
   /// a reference to that target's TargetSubtarget-derived member variable.
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 0c74fa1..377e03f 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -73,6 +73,14 @@ namespace llvm {
   /// target FP instructions.
   extern bool UseSoftFloat;
 
+  /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
+  /// on the command line. This setting may either be Default, Soft, or Hard.
+  /// Default selects the target's default behavior. Soft selects the ABI for
+  /// UseSoftFloat, but does not inidcate that FP hardware may not be used.
+  /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
+  /// Hard presumes that the normal FP ABI is used.
+  extern FloatABI::ABIType FloatABIType;
+
   /// NoZerosInBSS - By default some codegens place zero-initialized data to
   /// .bss section. This flag disables such behaviour (necessary, e.g. for
   /// crt*.o compiling).
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 2cd2967..364d4d0 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -228,6 +228,7 @@ class SDNode<string opcode, SDTypeProfile typeprof,
   SDTypeProfile TypeProfile = typeprof;
 }
 
+// Special TableGen-recognized dag nodes
 def set;
 def implicit;
 def parallel;
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 4372ea0..750969b 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -214,6 +214,11 @@ Pass *createFunctionAttrsPass();
 ///
 ModulePass *createMergeFunctionsPass();
 
+//===----------------------------------------------------------------------===//
+/// createPartialInliningPass - This pass inlines parts of functions.
+///
+ModulePass *createPartialInliningPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index fd97db8..98ab6f4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -937,6 +937,48 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
   return Result;
 }
 
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op,
+                                             const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Sign-extend negative constants.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    if (SC->getValue()->getValue().isNegative())
+      return getSignExtendExpr(Op, Ty);
+
+  // Peel off a truncate cast.
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+    SCEVHandle NewOp = T->getOperand();
+    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+      return getAnyExtendExpr(NewOp, Ty);
+    return getTruncateOrNoop(NewOp, Ty);
+  }
+
+  // Next try a zext cast. If the cast is folded, use it.
+  SCEVHandle ZExt = getZeroExtendExpr(Op, Ty);
+  if (!isa<SCEVZeroExtendExpr>(ZExt))
+    return ZExt;
+
+  // Next try a sext cast. If the cast is folded, use it.
+  SCEVHandle SExt = getSignExtendExpr(Op, Ty);
+  if (!isa<SCEVSignExtendExpr>(SExt))
+    return SExt;
+
+  // If the expression is obviously signed, use the sext cast value.
+  if (isa<SCEVSMaxExpr>(Op))
+    return SExt;
+
+  // Absent any other information, use the zext cast value.
+  return ZExt;
+}
+
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
 SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
@@ -1903,6 +1945,23 @@ ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) {
   return getSignExtendExpr(V, Ty);
 }
 
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+SCEVHandle
+ScalarEvolution::getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) &&
+         (Ty->isInteger() || (TD && isa<PointerType>(Ty))) &&
+         "Cannot noop or any extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrAnyExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getAnyExtendExpr(V, Ty);
+}
+
 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  The conversion must not be widening.
 SCEVHandle
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index ef77e46..e1f8fa4 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 /// InsertCastOfTo - Insert a cast of V to the specified type, doing what
@@ -319,8 +320,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
   if (!AnyNonZeroIndices) {
     V = InsertNoopCastOfTo(V,
                            Type::Int8Ty->getPointerTo(PTy->getAddressSpace()));
-    Value *Idx = expand(SE.getAddExpr(Ops));
-    Idx = InsertNoopCastOfTo(Idx, Ty);
+    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
 
     // Fold a GEP with constant operands.
     if (Constant *CLHS = dyn_cast<Constant>(V))
@@ -374,8 +374,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
 
   // Emit a bunch of add instructions
   for (int i = S->getNumOperands()-2; i >= 0; --i) {
-    Value *W = expand(S->getOperand(i));
-    W = InsertNoopCastOfTo(W, Ty);
+    Value *W = expandCodeFor(S->getOperand(i), Ty);
     V = InsertBinop(Instruction::Add, V, W, InsertPt);
   }
   return V;
@@ -389,13 +388,11 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
       FirstOp = 1;
 
   int i = S->getNumOperands()-2;
-  Value *V = expand(S->getOperand(i+1));
-  V = InsertNoopCastOfTo(V, Ty);
+  Value *V = expandCodeFor(S->getOperand(i+1), Ty);
 
   // Emit a bunch of multiply instructions
   for (; i >= FirstOp; --i) {
-    Value *W = expand(S->getOperand(i));
-    W = InsertNoopCastOfTo(W, Ty);
+    Value *W = expandCodeFor(S->getOperand(i), Ty);
     V = InsertBinop(Instruction::Mul, V, W, InsertPt);
   }
 
@@ -408,8 +405,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
 
-  Value *LHS = expand(S->getLHS());
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getLHS(), Ty);
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
     const APInt &RHS = SC->getValue()->getValue();
     if (RHS.isPowerOf2())
@@ -418,8 +414,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
                          InsertPt);
   }
 
-  Value *RHS = expand(S->getRHS());
-  RHS = InsertNoopCastOfTo(RHS, Ty);
+  Value *RHS = expandCodeFor(S->getRHS(), Ty);
   return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt);
 }
 
@@ -448,6 +443,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   const Loop *L = S->getLoop();
 
+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = 0;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.isSCEVable(PN->getType()) &&
+        isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+        SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SCEVHandle Start = SE.getAnyExtendExpr(S->getStart(),
+                                           CanonicalIV->getType());
+    SCEVHandle Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
+                                          CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
+    BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+    BasicBlock::iterator NewInsertPt =
+      next(BasicBlock::iterator(cast<Instruction>(V)));
+    while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+                      NewInsertPt);
+    setInsertionPoint(SaveInsertPt);
+    return V;
+  }
+
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
     std::vector<SCEVHandle> NewOps(S->getOperands());
@@ -481,6 +504,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // {0,+,1} --> Insert a canonical induction variable into the loop!
   if (S->isAffine() &&
       S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+    // If there's a canonical IV, just use it.
+    if (CanonicalIV) {
+      assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+             "IVs with types different from the canonical IV should "
+             "already have been handled!");
+      return CanonicalIV;
+    }
+
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
@@ -508,19 +539,16 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     return PN;
   }
 
+  // {0,+,F} --> {0,+,1} * F
   // Get the canonical induction variable I for this loop.
-  Value *I = getOrInsertCanonicalInductionVariable(L, Ty);
+  Value *I = CanonicalIV ?
+             CanonicalIV :
+             getOrInsertCanonicalInductionVariable(L, Ty);
 
   // If this is a simple linear addrec, emit it now as a special case.
   if (S->isAffine()) {   // {0,+,F} --> i*F
-    Value *F = expand(S->getOperand(1));
-    F = InsertNoopCastOfTo(F, Ty);
-    
-    // IF the step is by one, just return the inserted IV.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(F))
-      if (CI->getValue() == 1)
-        return I;
-    
+    Value *F = expandCodeFor(S->getOperand(1), Ty);
+
     // If the insert point is directly inside of the loop, emit the multiply at
     // the insert point.  Otherwise, L is a loop that is a parent of the insert
     // point loop.  If we can, move the multiply to the outer most loop that it
@@ -555,16 +583,24 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // into this folder.
   SCEVHandle IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
 
-  SCEVHandle V = S->evaluateAtIteration(IH, SE);
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  SCEVHandle NewS = S;
+  SCEVHandle Ext = SE.getNoopOrAnyExtend(S, I->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  SCEVHandle V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
 
+  // Truncate the result down to the original type, if needed.
+  SCEVHandle T = SE.getTruncateOrNoop(V, Ty);
   return expand(V);
 }
 
 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt);
   InsertedValues.insert(I);
   return I;
@@ -572,8 +608,8 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
 
 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt);
   InsertedValues.insert(I);
   return I;
@@ -581,8 +617,8 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
 
 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *V = expand(S->getOperand());
-  V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType()));
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt);
   InsertedValues.insert(I);
   return I;
@@ -590,11 +626,9 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
 
 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expand(S->getOperand(0));
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
   for (unsigned i = 1; i < S->getNumOperands(); ++i) {
-    Value *RHS = expand(S->getOperand(i));
-    RHS = InsertNoopCastOfTo(RHS, Ty);
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Instruction *ICmp =
       new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt);
     InsertedValues.insert(ICmp);
@@ -607,11 +641,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
 
 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  Value *LHS = expand(S->getOperand(0));
-  LHS = InsertNoopCastOfTo(LHS, Ty);
+  Value *LHS = expandCodeFor(S->getOperand(0), Ty);
   for (unsigned i = 1; i < S->getNumOperands(); ++i) {
-    Value *RHS = expand(S->getOperand(i));
-    RHS = InsertNoopCastOfTo(RHS, Ty);
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Instruction *ICmp =
       new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt);
     InsertedValues.insert(ICmp);
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 336a2bd..cebb087 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -167,10 +167,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
   mbr->data = 0;
   mbr->path = filePath;
   const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
-  if (FSInfo)
-    mbr->info = *FSInfo;
-  else
+  if (!FSInfo) {
+    delete mbr;
     return true;
+  }
+  mbr->info = *FSInfo;
 
   unsigned flags = 0;
   bool hasSlash = filePath.toString().find('/') != std::string::npos;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9f16728..6dcdded 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1308,16 +1308,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit constants.
   WriteModuleConstants(VE, Stream);
   
-  // If we have any aggregate values in the value table, purge them - these can
-  // only be used to initialize global variables.  Doing so makes the value
-  // namespace smaller for code in functions.
-  int NumNonAggregates = VE.PurgeAggregateValues();
-  if (NumNonAggregates != -1) {
-    SmallVector<unsigned, 1> Vals;
-    Vals.push_back(NumNonAggregates);
-    Stream.EmitRecord(bitc::MODULE_CODE_PURGEVALS, Vals);
-  }
-  
   // Emit function bodies.
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
     if (!I->isDeclaration())
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 8002a36..32b2819 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -277,22 +277,6 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
 }
 
 
-/// PurgeAggregateValues - If there are any aggregate values at the end of the
-/// value list, remove them and return the count of the remaining values.  If
-/// there are none, return -1.
-int ValueEnumerator::PurgeAggregateValues() {
-  // If there are no aggregate values at the end of the list, return -1.
-  if (Values.empty() || Values.back().first->getType()->isSingleValueType())
-    return -1;
-  
-  // Otherwise, remove aggregate values...
-  while (!Values.empty() && !Values.back().first->getType()->isSingleValueType())
-    Values.pop_back();
-  
-  // ... and return the new size.
-  return Values.size();
-}
-
 void ValueEnumerator::incorporateFunction(const Function &F) {
   NumModuleValues = Values.size();
   
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index bb0324b..40eeabb 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -99,11 +99,6 @@ public:
     return Attributes;
   }
 
-  /// PurgeAggregateValues - If there are any aggregate values at the end of the
-  /// value list, remove them and return the count of the remaining values.  If
-  /// there are none, return -1.
-  int PurgeAggregateValues();
-  
   /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
   /// use these two methods to get its data into the ValueEnumerator!
   ///
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5a66f4b..c773378 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1581,6 +1581,7 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
     FunctionDbgScope = NULL;
     LexicalScopeStack.clear();
     AbstractInstanceRootList.clear();
+    AbstractInstanceRootMap.clear();
   }
 
   Lines.clear();
@@ -1669,7 +1670,11 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) {
   DbgScope *Scope = getOrCreateScope(V);
   unsigned ID = MMI->NextLabelID();
   Scope->setEndLabelID(ID);
-  if (LexicalScopeStack.size() != 0)
+  // FIXME : region.end() may not be in the last basic block.
+  // For now, do not pop last lexical scope because next basic
+  // block may start new inlined function's body.
+  unsigned LSSize = LexicalScopeStack.size();
+  if (LSSize != 0 && LSSize != 1)
     LexicalScopeStack.pop_back();
 
   if (TimePassesIsEnabled)
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ff917a7..5ba8b3c 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_library(LLVMCodeGen
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
+  LazyLiveness.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
   LiveStackAnalysis.cpp
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index c22f6ed..796bc2c 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -10,23 +10,24 @@
 // This header contains common, non-processor-specific data structures and
 // constants for the ELF file format.
 //
-// The details of the ELF32 bits in this file are largely based on
-// the Tool Interface Standard (TIS) Executable and Linking Format
-// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not
-// standardized, as far as I can tell. It was largely based on information
-// I found in OpenBSD header files.
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef CODEGEN_ELF_H
 #define CODEGEN_ELF_H
 
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Support/DataTypes.h"
 #include <cstring>
 
 namespace llvm {
-  class GlobalVariable;
+  class BinaryObject;
 
   // Identification Indexes
   enum {
@@ -47,71 +48,28 @@ namespace llvm {
     ET_HIPROC = 0xffff  // Processor-specific
   };
 
-  // Object file classes.
-  enum {
-    ELFCLASS32 = 1, // 32-bit object file
-    ELFCLASS64 = 2  // 64-bit object file
-  };
-
-  // Object file byte orderings.
-  enum {
-    ELFDATA2LSB = 1, // Little-endian object file
-    ELFDATA2MSB = 2  // Big-endian object file
-  };
-
   // Versioning
   enum {
     EV_NONE = 0,
     EV_CURRENT = 1
   };
 
-  struct ELFHeader {
-    // e_machine - This field is the target specific value to emit as the
-    // e_machine member of the ELF header.
-    unsigned short e_machine;
-
-    // e_flags - The machine flags for the target.  This defaults to zero.
-    unsigned e_flags;
-
-    // e_size - Holds the ELF header's size in bytes
-    unsigned e_ehsize;
-
-    // Endianess and ELF Class (64 or 32 bits)
-    unsigned ByteOrder;
-    unsigned ElfClass;
-
-    unsigned getByteOrder() const { return ByteOrder; }
-    unsigned getElfClass() const { return ElfClass; }
-    unsigned getSize() const { return e_ehsize; }
-    unsigned getMachine() const { return e_machine; }
-    unsigned getFlags() const { return e_flags; }
-
-    ELFHeader(unsigned short machine, unsigned flags,
-              bool is64Bit, bool isLittleEndian)
-      : e_machine(machine), e_flags(flags) {
-        ElfClass  = is64Bit ? ELFCLASS64 : ELFCLASS32;
-        ByteOrder = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
-        e_ehsize  = is64Bit ? 64 : 52;
-      }
-  };
-
   /// ELFSection - This struct contains information about each section that is
   /// emitted to the file.  This is eventually turned into the section header
   /// table at the end of the file.
-  struct ELFSection {
-
+  class ELFSection : public BinaryObject {
+    public:
     // ELF specific fields
-    std::string Name;       // Name of the section.
-    unsigned NameIdx;       // Index in .shstrtab of name, once emitted.
-    unsigned Type;
-    unsigned Flags;
-    uint64_t Addr;
-    unsigned Offset;
-    unsigned Size;
-    unsigned Link;
-    unsigned Info;
-    unsigned Align;
-    unsigned EntSize;
+    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
+    unsigned Type;      // sh_type - Section contents & semantics 
+    unsigned Flags;     // sh_flags - Section flags.
+    uint64_t Addr;      // sh_addr - The mem addr this section is in.
+    unsigned Offset;    // sh_offset - Offset from the file start
+    unsigned Size;      // sh_size - The section size.
+    unsigned Link;      // sh_link - Section header table index link.
+    unsigned Info;      // sh_info - Auxillary information.
+    unsigned Align;     // sh_addralign - Alignment of section.
+    unsigned EntSize;   // sh_entsize - Size of entries in the section e
 
     // Section Header Flags
     enum {
@@ -141,8 +99,8 @@ namespace llvm {
       SHT_REL      = 9,  // Relocation entries; no explicit addends.
       SHT_SHLIB    = 10, // Reserved.
       SHT_DYNSYM   = 11, // Symbol table.
-      SHT_LOPROC   = 0x70000000, // Lowest processor architecture-specific type.
-      SHT_HIPROC   = 0x7fffffff, // Highest processor architecture-specific type.
+      SHT_LOPROC   = 0x70000000, // Lowest processor arch-specific type.
+      SHT_HIPROC   = 0x7fffffff, // Highest processor arch-specific type.
       SHT_LOUSER   = 0x80000000, // Lowest type reserved for applications.
       SHT_HIUSER   = 0xffffffff  // Highest type reserved for applications.
     };
@@ -161,22 +119,9 @@ namespace llvm {
     /// SectionIdx - The number of the section in the Section Table.
     unsigned short SectionIdx;
 
-    /// SectionData - The actual data for this section which we are building
-    /// up for emission to the file.
-    std::vector<unsigned char> SectionData;
-
-    /// Relocations - The relocations that we have encountered so far in this 
-    /// section that we will need to convert to Elf relocation entries when
-    /// the file is written.
-    std::vector<MachineRelocation> Relocations;
-
-    /// Section Header Size 
-    static unsigned getSectionHdrSize(bool is64Bit)
-      { return is64Bit ? 64 : 40; }
-
-    ELFSection(const std::string &name)
-      : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0),
-        Link(0), Info(0), Align(0), EntSize(0) {}
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {}
   };
 
   /// ELFSym - This struct contains information about each symbol that is
@@ -207,9 +152,33 @@ namespace llvm {
       STT_FILE = 4 
     };
 
+    enum {
+      STV_DEFAULT = 0,  // Visibility is specified by binding type
+      STV_INTERNAL = 1, // Defined by processor supplements
+      STV_HIDDEN = 2,   // Not visible to other components
+      STV_PROTECTED = 3 // Visible in other components but not preemptable
+    };
+
     ELFSym(const GlobalValue *gv) : GV(gv), NameIdx(0), Value(0),
                                     Size(0), Info(0), Other(0),
-                                    SectionIdx(ELFSection::SHN_UNDEF) {}
+                                    SectionIdx(ELFSection::SHN_UNDEF) {
+      if (!GV)
+        return;
+
+      switch (GV->getVisibility()) {
+      default:
+        assert(0 && "unknown visibility type");
+      case GlobalValue::DefaultVisibility:
+        Other = STV_DEFAULT;
+        break;
+      case GlobalValue::HiddenVisibility:
+        Other = STV_HIDDEN;
+        break;
+      case GlobalValue::ProtectedVisibility:
+        Other = STV_PROTECTED;
+        break;
+      }
+    }
 
     void SetBind(unsigned X) {
       assert(X == (X & 0xF) && "Bind value out of range!");
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index c7bd873..ca68396 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -13,9 +13,9 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 
@@ -28,27 +28,22 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  const TargetData *TD = TM.getTargetData();
-  const Function *F = MF.getFunction();
-
-  // Align the output buffer to the appropriate alignment, power of 2.
-  unsigned FnAlign = F->getAlignment();
-  unsigned TDAlign = TD->getPrefTypeAlignment(F->getType());
-  unsigned Align = std::max(FnAlign, TDAlign);
-  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
-
   // Get the ELF Section that this function belongs in.
   ES = &EW.getTextSection();
 
-  // FIXME: better memory management, this will be replaced by BinaryObjects
-  ES->SectionData.reserve(4096);
-  BufferBegin = &ES->SectionData[0];
-  BufferEnd = BufferBegin + ES->SectionData.capacity();
+  DOUT << "processing function: " << MF.getFunction()->getName() << "\n";
 
-  // Upgrade the section alignment if required.
+  // FIXME: better memory management, this will be replaced by BinaryObjects
+  BinaryData &BD = ES->getData();
+  BD.reserve(4096);
+  BufferBegin = &BD[0];
+  BufferEnd = BufferBegin + BD.capacity();
+
+  // Align the output buffer with function alignment, and
+  // upgrade the section alignment if required
+  unsigned Align =
+    TM.getELFWriterInfo()->getFunctionAlignment(MF.getFunction());
   if (ES->Align < Align) ES->Align = Align;
-
-  // Round the size up to the correct alignment for starting the new function.
   ES->Size = (ES->Size + (Align-1)) & (-Align);
 
   // Snaity check on allocated space for text section
@@ -107,7 +102,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
   FnSym.Value = FnStartPtr-BufferBegin;
 
   // Finally, add it to the symtab.
-  EW.SymbolTable.push_back(FnSym);
+  EW.SymbolList.push_back(FnSym);
 
   // Relocations
   // -----------
@@ -128,7 +123,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
     } else {
       assert(0 && "Unhandled relocation type");
     }
-    ES->Relocations.push_back(MR);
+    ES->addRelocation(MR);
   }
   Relocations.clear();
 
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 3859ea3..aeccefb 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -26,9 +26,6 @@
 //  ...
 //  #N. ".shstrtab" entry - String table for the section names.
 //
-// NOTE: This code should eventually be extended to support 64-bit ELF (this
-// won't be hard), but we haven't done so yet!
-//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "elfwriter"
@@ -36,18 +33,18 @@
 #include "ELFWriter.h"
 #include "ELFCodeEmitter.h"
 #include "ELF.h"
+#include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/FileWriters.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
-#include "llvm/Support/OutputBuffer.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
@@ -70,21 +67,23 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM,
 //===----------------------------------------------------------------------===//
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
-  : MachineFunctionPass(&ID), O(o), TM(tm), ElfHdr() {
-  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-  isLittleEndian = TM.getTargetData()->isLittleEndian();
+  : MachineFunctionPass(&ID), O(o), TM(tm),
+    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+    isLittleEndian(TM.getTargetData()->isLittleEndian()),
+    ElfHdr(isLittleEndian, is64Bit) {
 
-  ElfHdr = new ELFHeader(TM.getELFWriterInfo()->getEMachine(), 0,
-                         is64Bit, isLittleEndian);
+  TAI = TM.getTargetAsmInfo();
+  TEW = TM.getELFWriterInfo();
 
   // Create the machine code emitter object for this target.
   MCE = new ELFCodeEmitter(*this);
+
+  // Inital number of sections
   NumSections = 0;
 }
 
 ELFWriter::~ELFWriter() {
   delete MCE;
-  delete ElfHdr;
 }
 
 // doInitialization - Emit the file header and all of the global variables for
@@ -92,10 +91,6 @@ ELFWriter::~ELFWriter() {
 bool ELFWriter::doInitialization(Module &M) {
   Mang = new Mangler(M);
 
-  // Local alias to shortenify coming code.
-  std::vector<unsigned char> &FH = FileHeader;
-  OutputBuffer FHOut(FH, is64Bit, isLittleEndian);
-
   // ELF Header
   // ----------
   // Fields e_shnum e_shstrndx are only known after all section have
@@ -104,54 +99,58 @@ bool ELFWriter::doInitialization(Module &M) {
   //
   // Note
   // ----
-  // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing
+  // emitWord method behaves differently for ELF32 and ELF64, writing
   // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
 
-  FHOut.outbyte(0x7f); // e_ident[EI_MAG0]
-  FHOut.outbyte('E');  // e_ident[EI_MAG1]
-  FHOut.outbyte('L');  // e_ident[EI_MAG2]
-  FHOut.outbyte('F');  // e_ident[EI_MAG3]
-
-  FHOut.outbyte(ElfHdr->getElfClass());   // e_ident[EI_CLASS]
-  FHOut.outbyte(ElfHdr->getByteOrder());  // e_ident[EI_DATA]
-  FHOut.outbyte(EV_CURRENT);  // e_ident[EI_VERSION]
-
-  FH.resize(16);  // e_ident[EI_NIDENT-EI_PAD]
-
-  FHOut.outhalf(ET_REL);               // e_type
-  FHOut.outhalf(ElfHdr->getMachine()); // e_machine = target
-  FHOut.outword(EV_CURRENT);           // e_version
-  FHOut.outaddr(0);                    // e_entry = 0, no entry point in .o file
-  FHOut.outaddr(0);                    // e_phoff = 0, no program header for .o
-  ELFHdr_e_shoff_Offset = FH.size();
-  FHOut.outaddr(0);                    // e_shoff = sec hdr table off in bytes
-  FHOut.outword(ElfHdr->getFlags());   // e_flags = whatever the target wants
-  FHOut.outhalf(ElfHdr->getSize());    // e_ehsize = ELF header size
-  FHOut.outhalf(0);                    // e_phentsize = prog header entry size
-  FHOut.outhalf(0);                    // e_phnum = # prog header entries = 0
+  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
+  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
+  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]
+
+  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
+  ElfHdr.emitByte(EV_CURRENT);        // e_ident[EI_VERSION]
+  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]
+
+  ElfHdr.emitWord16(ET_REL);             // e_type
+  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+  ElfHdr.emitWord32(EV_CURRENT);         // e_version
+  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
+  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
+  ELFHdr_e_shoff_Offset = ElfHdr.size();
+  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
+  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
+  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
+  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
+  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0
 
   // e_shentsize = Section header entry size
-  FHOut.outhalf(ELFSection::getSectionHdrSize(is64Bit));
+  ElfHdr.emitWord16(TEW->getSHdrSize());
 
   // e_shnum     = # of section header ents
-  ELFHdr_e_shnum_Offset = FH.size();
-  FHOut.outhalf(0);
+  ELFHdr_e_shnum_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
 
   // e_shstrndx  = Section # of '.shstrtab'
-  ELFHdr_e_shstrndx_Offset = FH.size();
-  FHOut.outhalf(0);
+  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
 
   // Add the null section, which is required to be first in the file.
   getSection("", ELFSection::SHT_NULL, 0);
 
-  // Start up the symbol table.  The first entry in the symtab is the null
+  // Start up the symbol table.  The first entry in the symtab is the null 
   // entry.
-  SymbolTable.push_back(ELFSym(0));
+  SymbolList.push_back(ELFSym(0));
 
   return false;
 }
 
 void ELFWriter::EmitGlobal(GlobalVariable *GV) {
+
+  // XXX: put local symbols *before* global ones!
+  const Section *S = TAI->SectionForGlobal(GV);
+  DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n";
+
   // If this is an external global, emit it now.  TODO: Note that it would be
   // better to ignore the symbol here and only add it to the symbol table if
   // referenced.
@@ -160,17 +159,17 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
     ExternalSym.SetBind(ELFSym::STB_GLOBAL);
     ExternalSym.SetType(ELFSym::STT_NOTYPE);
     ExternalSym.SectionIdx = ELFSection::SHN_UNDEF;
-    SymbolTable.push_back(ExternalSym);
+    SymbolList.push_back(ExternalSym);
     return;
   }
 
-  unsigned Align = TM.getTargetData()->getPreferredAlignment(GV);
-  unsigned Size  =
-    TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType());
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPreferredAlignment(GV);
+  Constant *CV = GV->getInitializer();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());
 
-  // If this global has a zero initializer, it is part of the .bss or common
-  // section.
-  if (GV->getInitializer()->isNullValue()) {
+  // If this global has a zero initializer, go to .bss or common section.
+  if (CV->isNullValue() || isa<UndefValue>(CV)) {
     // If this global is part of the common block, add it now.  Variables are
     // part of the common block if they are zero initialized and allowed to be
     // merged with other symbols.
@@ -182,14 +181,14 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
       CommonSym.Size  = Size;
       CommonSym.SetBind(ELFSym::STB_GLOBAL);
       CommonSym.SetType(ELFSym::STT_OBJECT);
-      // TODO SOMEDAY: add ELF visibility.
       CommonSym.SectionIdx = ELFSection::SHN_COMMON;
-      SymbolTable.push_back(CommonSym);
+      SymbolList.push_back(CommonSym);
+      getSection(S->getName(), ELFSection::SHT_NOBITS,
+        ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 1);
       return;
     }
 
     // Otherwise, this symbol is part of the .bss section.  Emit it now.
-
     // Handle alignment.  Ensure section is aligned at least as much as required
     // by this symbol.
     ELFSection &BSSSection = getBSSSection();
@@ -220,18 +219,128 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) {
     // Set the idx of the .bss section
     BSSSym.SectionIdx = BSSSection.SectionIdx;
     if (!GV->hasPrivateLinkage())
-      SymbolTable.push_back(BSSSym);
+      SymbolList.push_back(BSSSym);
 
     // Reserve space in the .bss section for this symbol.
     BSSSection.Size += Size;
     return;
   }
 
-  // FIXME: handle .rodata
-  //assert(!GV->isConstant() && "unimp");
+  /// Emit the Global symbol to the right ELF section
+  ELFSym GblSym(GV);
+  GblSym.Size = Size;
+  GblSym.SetType(ELFSym::STT_OBJECT);
+  GblSym.SetBind(ELFSym::STB_GLOBAL);
+  unsigned Flags = S->getFlags();
+  unsigned SectType = ELFSection::SHT_PROGBITS;
+  unsigned SHdrFlags = ELFSection::SHF_ALLOC;
+
+  if (Flags & SectionFlags::Code)
+    SHdrFlags |= ELFSection::SHF_EXECINSTR;
+  if (Flags & SectionFlags::Writeable)
+    SHdrFlags |= ELFSection::SHF_WRITE;
+  if (Flags & SectionFlags::Mergeable)
+    SHdrFlags |= ELFSection::SHF_MERGE;
+  if (Flags & SectionFlags::TLS)
+    SHdrFlags |= ELFSection::SHF_TLS;
+  if (Flags & SectionFlags::Strings)
+    SHdrFlags |= ELFSection::SHF_STRINGS;
+
+  // Remove tab from section name prefix
+  std::string SectionName(S->getName());
+  size_t Pos = SectionName.find("\t");
+  if (Pos != std::string::npos)
+    SectionName.erase(Pos, 1);
+
+  // The section alignment should be bound to the element with
+  // the largest alignment
+  ELFSection &ElfS = getSection(SectionName, SectType, SHdrFlags);
+  GblSym.SectionIdx = ElfS.SectionIdx;
+  if (Align > ElfS.Align)
+    ElfS.Align = Align;
+
+  // S.Value should contain the symbol index inside the section,
+  // and all symbols should start on their required alignment boundary
+  GblSym.Value = (ElfS.size() + (Align-1)) & (-Align);
+  ElfS.emitAlignment(Align);
+
+  // Emit the constant symbol to its section
+  EmitGlobalConstant(CV, ElfS);
+  SymbolList.push_back(GblSym);
+}
 
-  // FIXME: handle .data
-  //assert(0 && "unimp");
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                         ELFSection &GblS) {
+
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CVS->getType());
+  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+  uint64_t sizeSoFar = 0;
+  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+    const Constant* field = CVS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+                        - cvsLayout->getElementOffset(i)) - fieldSize;
+    sizeSoFar += fieldSize + padSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstant(field, GblS);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    for (unsigned p=0; p < padSize; p++)
+      GblS.emitByte(0);
+  }
+  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+    if (CVA->isString()) {
+      std::string GblStr = CVA->getAsString();
+      GblS.emitString(GblStr);
+    } else { // Not a string.  Print the values in successive locations
+      for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+        EmitGlobalConstant(CVA->getOperand(i), GblS);
+    }
+    return;
+  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+    EmitGlobalConstantStruct(CVS, GblS);
+    return;
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    if (CFP->getType() == Type::DoubleTy)
+      GblS.emitWord64(Val);
+    else if (CFP->getType() == Type::FloatTy)
+      GblS.emitWord32(Val);
+    else if (CFP->getType() == Type::X86_FP80Ty) {
+      assert(0 && "X86_FP80Ty global emission not implemented");
+    } else if (CFP->getType() == Type::PPC_FP128Ty)
+      assert(0 && "PPC_FP128Ty global emission not implemented");
+    return;
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (Size == 4)
+      GblS.emitWord32(CI->getZExtValue());
+    else if (Size == 8)
+      GblS.emitWord64(CI->getZExtValue());
+    else
+      assert(0 && "LargeInt global emission not implemented");
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const VectorType *PTy = CP->getType();
+    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+      EmitGlobalConstant(CP->getOperand(I), GblS);
+    return;
+  }
+  assert(0 && "unknown global constant");
 }
 
 
@@ -243,22 +352,41 @@ bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
 /// doFinalization - Now that the module has been completely processed, emit
 /// the ELF file to 'O'.
 bool ELFWriter::doFinalization(Module &M) {
-  // Okay, the ELF header and .text sections have been completed, build the
-  // .data, .bss, and "common" sections next.
+  /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the
+  /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data
+  /// vector size for .text sections, so this is a quick dirty fix
+  ELFSection &TS = getTextSection();
+  if (TS.Size) {
+    BinaryData &BD = TS.getData();
+    for (unsigned e=0; e<TS.Size; ++e)
+      BD.push_back(BD[e]);
+  }
+
+  // Emit .data section placeholder
+  getDataSection();
+
+  // Emit .bss section placeholder
+  getBSSSection();
+
+  // Build and emit data, bss and "common" sections.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     EmitGlobal(I);
 
+  // Emit non-executable stack note
+  if (TAI->getNonexecutableStackDirective())
+    getNonExecStackSection();
+
   // Emit the symbol table now, if non-empty.
   EmitSymbolTable();
 
   // Emit the relocation sections.
   EmitRelocations();
 
-  // Emit the string table for the sections in the ELF file.
+  // Emit the sections string table.
   EmitSectionTableStringTable();
 
-  // Emit the sections to the .o file, and emit the section table for the file.
+  // Dump the sections and section table to the .o file.
   OutputSectionsAndSectionTable();
 
   // We are done with the abstract symbols.
@@ -274,78 +402,97 @@ bool ELFWriter::doFinalization(Module &M) {
 void ELFWriter::EmitRelocations() {
 }
 
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+  if (is64Bit) {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+    SymbolTable.emitWord64(Sym.Value);
+    SymbolTable.emitWord64(Sym.Size);
+  } else {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitWord32(Sym.Value);
+    SymbolTable.emitWord32(Sym.Size);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+  }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, 
+                                  const ELFSection &SHdr) {
+  SHdrTab.emitWord32(SHdr.NameIdx);
+  SHdrTab.emitWord32(SHdr.Type);
+  if (is64Bit) {
+    SHdrTab.emitWord64(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord64(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord64(SHdr.Align);
+    SHdrTab.emitWord64(SHdr.EntSize);
+  } else {
+    SHdrTab.emitWord32(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord32(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord32(SHdr.Align);
+    SHdrTab.emitWord32(SHdr.EntSize);
+  }
+}
+
 /// EmitSymbolTable - If the current symbol table is non-empty, emit the string
 /// table for it and then the symbol table itself.
 void ELFWriter::EmitSymbolTable() {
-  if (SymbolTable.size() == 1) return;  // Only the null entry.
+  if (SymbolList.size() == 1) return;  // Only the null entry.
 
   // FIXME: compact all local symbols to the start of the symtab.
   unsigned FirstNonLocalSymbol = 1;
 
-  ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0);
-  StrTab.Align = 1;
-
-  DataBuffer &StrTabBuf = StrTab.SectionData;
-  OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian);
+  ELFSection &StrTab = getStringTableSection();
 
   // Set the zero'th symbol to a null byte, as required.
-  StrTabOut.outbyte(0);
+  StrTab.emitByte(0);
+
   unsigned Index = 1;
-  for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) {
+  for (unsigned i = 1, e = SymbolList.size(); i != e; ++i) {
     // Use the name mangler to uniquify the LLVM symbol.
-    std::string Name = Mang->getValueName(SymbolTable[i].GV);
+    std::string Name = Mang->getValueName(SymbolList[i].GV);
 
     if (Name.empty()) {
-      SymbolTable[i].NameIdx = 0;
+      SymbolList[i].NameIdx = 0;
     } else {
-      SymbolTable[i].NameIdx = Index;
-
-      // Add the name to the output buffer, including the null terminator.
-      StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end());
-
-      // Add a null terminator.
-      StrTabBuf.push_back(0);
+      SymbolList[i].NameIdx = Index;
+      StrTab.emitString(Name);
 
       // Keep track of the number of bytes emitted to this section.
       Index += Name.size()+1;
     }
   }
-  assert(Index == StrTabBuf.size());
+  assert(Index == StrTab.size());
   StrTab.Size = Index;
 
   // Now that we have emitted the string table and know the offset into the
   // string table of each symbol, emit the symbol table itself.
-  ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
-  SymTab.Align = is64Bit ? 8 : 4;
-  SymTab.Link = StrTab.SectionIdx;      // Section Index of .strtab.
-  SymTab.Info = FirstNonLocalSymbol;    // First non-STB_LOCAL symbol.
-  SymTab.EntSize = is64Bit ? 24 : 16;   // Size of each symtab entry. 
-  DataBuffer &SymTabBuf = SymTab.SectionData;
-  OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian);
-
-  if (!is64Bit) {   // 32-bit and 64-bit formats are shuffled a bit.
-    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
-      ELFSym &Sym = SymbolTable[i];
-      SymTabOut.outword(Sym.NameIdx);
-      SymTabOut.outaddr32(Sym.Value);
-      SymTabOut.outword(Sym.Size);
-      SymTabOut.outbyte(Sym.Info);
-      SymTabOut.outbyte(Sym.Other);
-      SymTabOut.outhalf(Sym.SectionIdx);
-    }
-  } else {
-    for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) {
-      ELFSym &Sym = SymbolTable[i];
-      SymTabOut.outword(Sym.NameIdx);
-      SymTabOut.outbyte(Sym.Info);
-      SymTabOut.outbyte(Sym.Other);
-      SymTabOut.outhalf(Sym.SectionIdx);
-      SymTabOut.outaddr64(Sym.Value);
-      SymTabOut.outxword(Sym.Size);
-    }
-  }
+  ELFSection &SymTab = getSymbolTableSection();
+  SymTab.Align = TEW->getSymTabAlignment();
+  SymTab.Link  = StrTab.SectionIdx;      // Section Index of .strtab.
+  SymTab.Info  = FirstNonLocalSymbol;    // First non-STB_LOCAL symbol.
+
+  // Size of each symtab entry.
+  SymTab.EntSize = TEW->getSymTabEntrySize();
+
+  for (unsigned i = 0, e = SymbolList.size(); i != e; ++i)
+    EmitSymbol(SymTab, SymbolList[i]);
 
-  SymTab.Size = SymTabBuf.size();
+  SymTab.Size = SymTab.size();
 }
 
 /// EmitSectionTableStringTable - This method adds and emits a section for the
@@ -357,32 +504,25 @@ void ELFWriter::EmitSectionTableStringTable() {
 
   // Now that we know which section number is the .shstrtab section, update the
   // e_shstrndx entry in the ELF header.
-  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
 
   // Set the NameIdx of each section in the string table and emit the bytes for
   // the string table.
   unsigned Index = 0;
-  DataBuffer &Buf = SHStrTab.SectionData;
 
   for (std::list<ELFSection>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I) {
     // Set the index into the table.  Note if we have lots of entries with
     // common suffixes, we could memoize them here if we cared.
     I->NameIdx = Index;
-
-    // Add the name to the output buffer, including the null terminator.
-    Buf.insert(Buf.end(), I->Name.begin(), I->Name.end());
-
-    // Add a null terminator.
-    Buf.push_back(0);
+    SHStrTab.emitString(I->getName());
 
     // Keep track of the number of bytes emitted to this section.
-    Index += I->Name.size()+1;
+    Index += I->getName().size()+1;
   }
 
   // Set the size of .shstrtab now that we know what it is.
-  assert(Index == Buf.size());
+  assert(Index == SHStrTab.size());
   SHStrTab.Size = Index;
 }
 
@@ -391,9 +531,9 @@ void ELFWriter::EmitSectionTableStringTable() {
 /// SectionTable.
 void ELFWriter::OutputSectionsAndSectionTable() {
   // Pass #1: Compute the file offset for each section.
-  size_t FileOff = FileHeader.size();   // File header first.
+  size_t FileOff = ElfHdr.size();   // File header first.
 
-  // Emit all of the section data in order.
+  // Adjust alignment of all section if needed.
   for (std::list<ELFSection>::iterator I = SectionList.begin(),
          E = SectionList.end(); I != E; ++I) {
 
@@ -401,9 +541,14 @@ void ELFWriter::OutputSectionsAndSectionTable() {
     if (!I->SectionIdx)
       continue;
 
+    if (!I->size()) {
+      I->Offset = FileOff;
+      continue;
+    }
+
     // Update Section size
     if (!I->Size)
-      I->Size = I->SectionData.size();
+      I->Size = I->size();
 
     // Align FileOff to whatever the alignment restrictions of the section are.
     if (I->Align)
@@ -419,49 +564,40 @@ void ELFWriter::OutputSectionsAndSectionTable() {
 
   // Now that we know where all of the sections will be emitted, set the e_shnum
   // entry in the ELF header.
-  OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian);
-  FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset);
+  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
 
   // Now that we know the offset in the file of the section table, update the
   // e_shoff address in the ELF header.
-  FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset);
+  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
 
   // Now that we know all of the data in the file header, emit it and all of the
   // sections!
-  O.write((char*)&FileHeader[0], FileHeader.size());
-  FileOff = FileHeader.size();
-  DataBuffer().swap(FileHeader);
+  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+  FileOff = ElfHdr.size();
 
-  DataBuffer Table;
-  OutputBuffer TableOut(Table, is64Bit, isLittleEndian);
+  // Section Header Table blob
+  BinaryObject SHdrTable(isLittleEndian, is64Bit);
 
-  // Emit all of the section data and build the section table itself.
+  // Emit all of sections to the file and build the section header table.
   while (!SectionList.empty()) {
-    const ELFSection &S = *SectionList.begin();
+    ELFSection &S = *SectionList.begin();
+    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+         << ", Size: " << S.Size << ", Offset: " << S.Offset
+         << ", SectionData Size: " << S.size() << "\n";
 
     // Align FileOff to whatever the alignment restrictions of the section are.
-    if (S.Align)
+    if (S.Align) {
       for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
-           FileOff != NewFileOff; ++FileOff)
+        FileOff != NewFileOff; ++FileOff)
         O << (char)0xAB;
-    O.write((char*)&S.SectionData[0], S.Size);
-
-    DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.Name
-         << ", Size: " << S.Size << ", Offset: " << S.Offset << "\n";
-
-    FileOff += S.Size;
+    }
 
-    TableOut.outword(S.NameIdx);  // sh_name - Symbol table name idx
-    TableOut.outword(S.Type);     // sh_type - Section contents & semantics
-    TableOut.outaddr(S.Flags);    // sh_flags - Section flags.
-    TableOut.outaddr(S.Addr);     // sh_addr - The mem addr this section is in.
-    TableOut.outaddr(S.Offset);   // sh_offset - Offset from the file start.
-    TableOut.outaddr(S.Size);     // sh_size - The section size.
-    TableOut.outword(S.Link);     // sh_link - Section header table index link.
-    TableOut.outword(S.Info);     // sh_info - Auxillary information.
-    TableOut.outaddr(S.Align);    // sh_addralign - Alignment of section.
-    TableOut.outaddr(S.EntSize);  // sh_entsize - Size of entries in the section
+    if (S.size()) {
+      O.write((char *)&S.getData()[0], S.Size);
+      FileOff += S.Size;
+    }
 
+    EmitSectionHeader(SHdrTable, S);
     SectionList.pop_front();
   }
 
@@ -471,5 +607,5 @@ void ELFWriter::OutputSectionsAndSectionTable() {
     O << (char)0xAB;
 
   // Emit the section table itself.
-  O.write((char*)&Table[0], Table.size());
+  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
 }
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index 14a44f0..8a380f0 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -16,15 +16,20 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/OutputBuffer.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
 #include "ELF.h"
 #include <list>
 #include <map>
 
 namespace llvm {
+  class BinaryObject;
+  class ConstantStruct;
+  class ELFCodeEmitter;
   class GlobalVariable;
   class Mangler;
   class MachineCodeEmitter;
-  class ELFCodeEmitter;
   class raw_ostream;
 
   /// ELFWriter - This class implements the common target-independent code for
@@ -52,6 +57,9 @@ namespace llvm {
     /// Target machine description.
     TargetMachine &TM;
 
+    /// Target Elf Writer description.
+    const TargetELFWriterInfo *TEW;
+
     /// Mang - The object used to perform name mangling for this module.
     Mangler *Mang;
 
@@ -59,6 +67,10 @@ namespace llvm {
     /// code for functions to the .o file.
     ELFCodeEmitter *MCE;
 
+    /// TAI - Target Asm Info, provide information about section names for
+    /// globals and other target specific stuff.
+    const TargetAsmInfo *TAI;
+
     //===------------------------------------------------------------------===//
     // Properties inferred automatically from the target machine.
     //===------------------------------------------------------------------===//
@@ -77,13 +89,8 @@ namespace llvm {
     bool doFinalization(Module &M);
 
   private:
-    // The buffer we accumulate the file header into.  Note that this should be
-    // changed into something much more efficient later (and the bitcode writer
-    // as well!).
-    DataBuffer FileHeader;
-
-    /// ElfHdr - Hold information about the ELF Header
-    ELFHeader *ElfHdr;
+    // Blob containing the Elf header
+    BinaryObject ElfHdr;
 
     /// SectionList - This is the list of sections that we have emitted to the
     /// file.  Once the file has been completely built, the section header table
@@ -97,17 +104,18 @@ namespace llvm {
 
     /// getSection - Return the section with the specified name, creating a new
     /// section if one does not already exist.
-    ELFSection &getSection(const std::string &Name,
-                           unsigned Type, unsigned Flags = 0) {
+    ELFSection &getSection(const std::string &Name, unsigned Type, 
+                           unsigned Flags = 0, unsigned Align = 0) {
       ELFSection *&SN = SectionLookup[Name];
       if (SN) return *SN;
 
-      SectionList.push_back(Name);
+      SectionList.push_back(ELFSection(Name, isLittleEndian, is64Bit));
       SN = &SectionList.back();
       SN->SectionIdx = NumSections++;
       SN->Type = Type;
       SN->Flags = Flags;
       SN->Link = ELFSection::SHN_UNDEF;
+      SN->Align = Align;
       return *SN;
     }
 
@@ -116,23 +124,36 @@ namespace llvm {
                         ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
     }
 
+    ELFSection &getNonExecStackSection() {
+      return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
+    }
+
+    ELFSection &getSymbolTableSection() {
+      return getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+    }
+
+    ELFSection &getStringTableSection() {
+      return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1);
+    }
+
     ELFSection &getDataSection() {
       return getSection(".data", ELFSection::SHT_PROGBITS,
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
     }
+
     ELFSection &getBSSSection() {
       return getSection(".bss", ELFSection::SHT_NOBITS,
                         ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC);
     }
 
-    /// SymbolTable - This is the list of symbols we have emitted to the file.
+    /// SymbolList - This is the list of symbols we have emitted to the file.
     /// This actually gets rearranged before emission to the file (to put the
     /// local symbols first in the list).
-    std::vector<ELFSym> SymbolTable;
+    std::vector<ELFSym> SymbolList;
 
-    /// PendingSyms - This is a list of externally defined symbols that we have
-    /// been asked to emit, but have not seen a reference to.  When a reference
-    /// is seen, the symbol will move from this list to the SymbolTable.
+    /// PendingGlobals - List of externally defined symbols that we have been
+    /// asked to emit, but have not seen a reference to.  When a reference
+    /// is seen, the symbol will move from this list to the SymbolList.
     SetVector<GlobalValue*> PendingGlobals;
 
     // As we complete the ELF file, we need to update fields in the ELF header
@@ -142,11 +163,17 @@ namespace llvm {
     unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
     unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
     unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
+
   private:
     void EmitGlobal(GlobalVariable *GV);
-    void EmitSymbolTable();
+    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                  ELFSection &GblS);
     void EmitRelocations();
+    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
     void EmitSectionTableStringTable();
+    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+    void EmitSymbolTable();
     void OutputSectionsAndSectionTable();
   };
 }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index b3c60e6..a163cac 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -240,7 +240,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None) {
     PM.add(createMachineLICMPass());
     PM.add(createMachineSinkingPass());
-    printAndVerify(PM, /* allowDoubleDefs= */ true);
+    printAndVerify(PM, /* allowDoubleDefs= */ false);
   }
 
   // Run pre-ra passes.
diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp
new file mode 100644
index 0000000..6fb35d2
--- /dev/null
+++ b/lib/CodeGen/LazyLiveness.cpp
@@ -0,0 +1,158 @@
+//===- LazyLiveness.cpp - Lazy, CFG-invariant liveness information --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a lazy liveness analysis as per "Fast Liveness Checking
+// for SSA-form Programs," by Boissinot, et al.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazyliveness"
+#include "llvm/CodeGen/LazyLiveness.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+using namespace llvm;
+
+char LazyLiveness::ID = 0;
+static RegisterPass<LazyLiveness> X("lazy-liveness", "Lazy Liveness Analysis");
+
+void LazyLiveness::computeBackedgeChain(MachineFunction& mf, 
+                                        MachineBasicBlock* MBB) {
+  SparseBitVector<128> tmp = rv[MBB];
+  tmp.set(preorder[MBB]);
+  tmp &= backedge_source;
+  calculated.set(preorder[MBB]);
+  
+  for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) {
+    MachineBasicBlock* SrcMBB = rev_preorder[*I];
+    
+    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin();
+         SI != SrcMBB->succ_end(); ++SI) {
+      MachineBasicBlock* TgtMBB = *SI;
+      
+      if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) &&
+          !rv[MBB].test(preorder[TgtMBB])) {
+        if (!calculated.test(preorder[TgtMBB]))
+          computeBackedgeChain(mf, TgtMBB);
+        
+        tv[MBB].set(preorder[TgtMBB]);
+        tv[MBB] |= tv[TgtMBB];
+      }
+    }
+    
+    tv[MBB].reset(preorder[MBB]);
+  }
+}
+
+bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
+  rv.clear();
+  tv.clear();
+  backedges.clear();
+  backedge_source.clear();
+  backedge_target.clear();
+  calculated.clear();
+  preorder.clear();
+  
+  MRI = &mf.getRegInfo();
+  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+  
+  // Step 0: Compute preorder numbering for all MBBs.
+  unsigned num = 0;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT.getRootNode()),
+       DE = df_end(MDT.getRootNode()); DI != DE; ++DI) {
+    preorder[(*DI)->getBlock()] = num++;
+    rev_preorder.push_back((*DI)->getBlock());
+  }
+  
+  // Step 1: Compute the transitive closure of the CFG, ignoring backedges.
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI) {
+    MachineBasicBlock* MBB = *POI;
+    SparseBitVector<128>& entry = rv[MBB];
+    entry.set(preorder[MBB]);
+    
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      DenseMap<MachineBasicBlock*, SparseBitVector<128> >::iterator SII = 
+                                                         rv.find(*SI);
+      
+      // Because we're iterating in postorder, any successor that does not yet
+      // have an rv entry must be on a backedge.
+      if (SII != rv.end()) {
+        entry |= SII->second;
+      } else {
+        backedges.insert(std::make_pair(MBB, *SI));
+        backedge_source.set(preorder[MBB]);
+        backedge_target.set(preorder[*SI]);
+      }
+    }
+  }
+  
+  for (SparseBitVector<128>::iterator I = backedge_source.begin();
+       I != backedge_source.end(); ++I)
+    computeBackedgeChain(mf, rev_preorder[*I]);
+  
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI)
+    if (!backedge_target.test(preorder[*POI]))
+      for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(),
+           SE = (*POI)->succ_end(); SI != SE; ++SI)
+        if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) {
+          SparseBitVector<128>& PBV = tv[*POI];
+          PBV = tv[*SI];
+        }
+  
+  for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
+       POE = po_end(&*mf.begin()); POI != POE; ++POI)
+    tv[*POI].set(preorder[*POI]);
+  
+  return false;
+}
+
+bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) {
+  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+  
+  MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent();
+  unsigned def = preorder[DefMBB];
+  unsigned max_dom = 0;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[DefMBB]),
+       DE = df_end(MDT[DefMBB]); DI != DE; ++DI) {
+    if (preorder[DI->getBlock()] > max_dom) {
+      max_dom = preorder[(*DI)->getBlock()];
+    }
+  }
+  
+  if (preorder[MBB] <= def || max_dom < preorder[MBB])
+    return false;
+  
+  SparseBitVector<128>::iterator I = tv[MBB].begin();
+  while (I != tv[MBB].end() && *I <= def) ++I;
+  while (I != tv[MBB].end() && *I < max_dom) {
+    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg),
+         UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) {
+      MachineBasicBlock* UseMBB = UI->getParent();
+      if (rv[rev_preorder[*I]].test(preorder[UseMBB]))
+        return true;
+      
+      unsigned t_dom = 0;
+      for (df_iterator<MachineDomTreeNode*> DI =
+           df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); 
+           DI != DE; ++DI)
+        if (preorder[DI->getBlock()] > t_dom) {
+          max_dom = preorder[(*DI)->getBlock()];
+        }
+      I = tv[MBB].begin();
+      while (I != tv[MBB].end() && *I < t_dom) ++I;
+    }
+  }
+  
+  return false;
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 944468e..3feb92f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -214,26 +214,33 @@ void RegScavenger::forward() {
   }
 
   // Process uses first.
-  BitVector UseRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
   for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) {
     const MachineOperand MO = *UseMOs[i].first;
     unsigned Reg = MO.getReg();
 
     assert(isUsed(Reg) && "Using an undefined register!");
 
-    if (MO.isKill() && !isReserved(Reg)) {
-      UseRegs.set(Reg);
+    // Kill of implicit_def defined registers are ignored. e.g.
+    // entry: 0x2029ab8, LLVM BB @0x1b06080, ID#0:
+    // Live Ins: %R0
+    //  %R0<def> = IMPLICIT_DEF
+    //  %R0<def> = IMPLICIT_DEF
+    //  STR %R0<kill>, %R0, %reg0, 0, 14, %reg0, Mem:ST(4,4) [0x1b06510 + 0]
+    //  %R1<def> = LDR %R0, %reg0, 24, 14, %reg0, Mem:LD(4,4) [0x1b065bc + 0]
+    if (MO.isKill() && !isReserved(Reg) && !isImplicitlyDefined(Reg)) {
+      KillRegs.set(Reg);
 
       // Mark sub-registers as used.
       for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
            unsigned SubReg = *SubRegs; ++SubRegs)
-        UseRegs.set(SubReg);
+        KillRegs.set(SubReg);
     }
   }
 
   // Change states of all registers after all the uses are processed to guard
   // against multiple uses.
-  setUnused(UseRegs);
+  setUnused(KillRegs);
 
   // Process early clobber defs then process defs. We can have a early clobber
   // that is dead, it should not conflict with a def that happens one "slot"
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index f8ae884..43995cb 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -563,6 +563,11 @@ void *JIT::getPointerToFunction(Function *F) {
     return Addr;   // Check if function already code gen'd
 
   MutexGuard locked(lock);
+  
+  // Now that this thread owns the lock, check if another thread has already
+  // code gen'd the function.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;  
 
   // Make sure we read in the function if it exists in this Module.
   if (F->hasNotBeenReadFromBitcode()) {
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ac7de91..7edd118 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O,
 FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 
-FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
-                                        MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, 
-                                           JITCodeEmitter &JCE);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+                                       MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, 
+                                          JITCodeEmitter &JCE);
 
-FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMConstantIslandPass();
 
 } // end namespace llvm;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 4ac6857..594811d 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -28,6 +28,8 @@ def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
                                    "ARM v5TE, v5TEj, v5TExp">;
 def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
                                    "ARM v6">;
+def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+                                   "ARM v6t2">;
 def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
                                    "ARM v7A">;
 def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
@@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
 def : Proc<"mpcorenovfp",     [ArchV6]>;
 def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
 
-def : Proc<"arm1156t2-s",     [ArchV6, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s",    [ArchV6, FeatureThumb2, FeatureVFP2]>;
+// V6T2 Processors.
+def : Proc<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
+// V7 Processors.
 def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 6cd786e..f126760 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>:
 class CCIfAlign<string Align, CCAction A>:
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 
+/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
+///                "Soft".
+class CCIfFloatABI<string ABIType, CCAction A>:
+  CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
+
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
@@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[
 ]>;
 
 //===----------------------------------------------------------------------===//
-// ARM AAPCS (EABI) Calling Convention
+// ARM AAPCS (EABI) Calling Convention, common parts
 //===----------------------------------------------------------------------===//
-def CC_ARM_AAPCS : CallingConv<[
+
+def CC_ARM_AAPCS_Common : CallingConv<[
 
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
@@ -53,23 +59,51 @@ def CC_ARM_AAPCS : CallingConv<[
   // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
   // (and the same is true for f64 if VFP is not enabled)
   CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
-  CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
-
-  CCIfType<[f32], CCBitConvertToType<i32>>,
   CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
                        "ArgFlags.getOrigAlign() != 8",
                        CCAssignToReg<[R0, R1, R2, R3]>>>,
 
-  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
   CCIfType<[f64], CCAssignToStack<8, 8>>
 ]>;
 
-def RetCC_ARM_AAPCS : CallingConv<[
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+  CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
   CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
 
-  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
 
 //===----------------------------------------------------------------------===//
@@ -77,11 +111,19 @@ def RetCC_ARM_AAPCS : CallingConv<[
 //===----------------------------------------------------------------------===//
 
 def CC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()",
+                CCIfSubtarget<"hasVFP2()",
+                              CCIfFloatABI<"Hard",
+                                           CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
   CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
   CCDelegateTo<CC_ARM_APCS>
 ]>;
 
 def RetCC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()",
+                CCIfSubtarget<"hasVFP2()",
+                              CCIfFloatABI<"Hard",
+                                           CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
   CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
   CCDelegateTo<RetCC_ARM_APCS>
 ]>;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c0fd9dc..ec8bd1f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
       else
         RC = ARM::GPRRegisterClass;
 
-      if (RegVT == MVT::f64) {
+      if (FloatABIType == FloatABI::Hard) {
+        if (RegVT == MVT::f32)
+          RC = ARM::SPRRegisterClass;
+        else if (RegVT == MVT::f64)
+          RC = ARM::DPRRegisterClass;
+      } else if (RegVT == MVT::f64) {
         // f64 is passed in pairs of GPRs and must be combined.
         RegVT = MVT::i32;
       } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 680e772..cc9f1a5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
 /// the function.  The first operand is the ID# for this instruction, the second
 /// is the index into the MachineConstantPool that this is, the third is the
 /// size in bytes of this constant pool entry.
-let isNotDuplicable = 1 in
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
 def CONSTPOOL_ENTRY :
 PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
                     i32imm:$size),
@@ -771,6 +771,7 @@ def STM : AXI4st<(outs),
 //  Move Instructions.
 //
 
+let neverHasSideEffects = 1 in
 def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
                  "mov", " $dst, $src", []>, UnaryDP;
 def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
@@ -946,6 +947,7 @@ def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
                    [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
 // Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "smull", " $ldst, $hdst, $a, $b", []>;
@@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "umaal", " $ldst, $hdst, $a, $b", []>,
                     Requires<[IsARM, HasV6]>;
+} // neverHasSideEffects
 
 // Most significant word multiply
 def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index ffb83a8..54232f6 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "add $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
 
+let neverHasSideEffects = 1 in
 def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                    "add $dst, $rhs @ addhirr", []>;
 
@@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
 
 // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
 // which is MOV(3).  This also supports high registers.
+let neverHasSideEffects = 1 in {
 def tMOVr       : TI<(outs tGPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src", []>;
 def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
@@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src\t@ lor2hir", []>;
 def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
+} // neverHasSideEffects
 
 def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 0247daf..9104c77 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
   let Inst{7-4}   = 0b1100;
 }
 
+let neverHasSideEffects = 1 in {
 def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
                  "fcpyd", " $dst, $a", []>;
 
 def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
                  "fcpys", " $dst, $a", []>;
+} // neverHasSideEffects
 
 def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
                  "fnegd", " $dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 963ff0d..684ecb4 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,24 +17,31 @@
 #include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
 STATISTIC(NumSTMGened , "Number of stm instructions generated");
 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
 
 namespace {
   struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
@@ -81,12 +88,6 @@ namespace {
   char ARMLoadStoreOpt::ID = 0;
 }
 
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
-FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
-  return new ARMLoadStoreOpt();
-}
-
 static int getLoadStoreMultipleOpcode(int Opcode) {
   switch (Opcode) {
   case ARM::LDR:
@@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
     RS->forward(prior(Loc));
 }
 
+static int getMemoryOpOffset(const MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+  int Offset = isAM2
+    ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+  if (isAM2) {
+    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  } else {
+    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  }
+  return Offset;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
     bool isMemOp = isMemoryOp(MBBI);
     if (isMemOp) {
       int Opcode = MBBI->getOpcode();
-      bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
       unsigned Size = getLSMultipleTransferSize(MBBI);
       unsigned Base = MBBI->getOperand(1).getReg();
       unsigned PredReg = 0;
       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
-      unsigned NumOperands = MBBI->getDesc().getNumOperands();
-      unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
-      int Offset = isAM2
-        ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
-      if (isAM2) {
-        if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
-          Offset = -Offset;
-      } else {
-        if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
-          Offset = -Offset;
-      }
+      int Offset = getMemoryOpOffset(MBBI);
       // Watch out for:
       // r4 := ldr [r5]
       // r5 := ldr [r5, #4]
@@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   return NumMerges > 0;
 }
 
+namespace {
+  struct OffsetCompare {
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      int LOffset = getMemoryOpOffset(LHS);
+      int ROffset = getMemoryOpOffset(RHS);
+      assert(LHS == RHS || LOffset != ROffset);
+      return LOffset > ROffset;
+    }
+  };
+}
+
 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
 /// (bx lr) into the preceeding stack restore so it directly restore the value
 /// of LR into pc.
@@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   delete RS;
   return Modified;
 }
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+  struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+    static char ID;
+    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pre- register allocation load / store optimization pass";
+    }
+
+  private:
+    bool RescheduleOps(MachineBasicBlock *MBB,
+                       SmallVector<MachineInstr*, 4> &Ops,
+                       unsigned Base, bool isLd,
+                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+  };
+  char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TII = Fn.getTarget().getInstrInfo();
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI)
+    Modified |= RescheduleLoadStoreInstrs(MFI);
+
+  return Modified;
+}
+
+static bool IsSafeToMove(bool isLd, unsigned Base,
+                         MachineBasicBlock::iterator I,
+                         MachineBasicBlock::iterator E,
+                         SmallPtrSet<MachineInstr*, 4> MoveOps,
+                         const TargetRegisterInfo *TRI) {
+  // Are there stores / loads / calls between them?
+  // FIXME: This is overly conservative. We should make use of alias information
+  // some day.
+  while (++I != E) {
+    const TargetInstrDesc &TID = I->getDesc();
+    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+      return false;
+    if (isLd && TID.mayStore())
+      return false;
+    if (!isLd) {
+      if (TID.mayLoad())
+        return false;
+      // It's not safe to move the first 'str' down.
+      // str r1, [r0]
+      // strh r5, [r0]
+      // str r4, [r0, #+4]
+      if (TID.mayStore() && !MoveOps.count(&*I))
+        return false;
+    }
+    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+      MachineOperand &MO = I->getOperand(j);
+      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+        return false;
+    }
+  }
+  return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+                                 SmallVector<MachineInstr*, 4> &Ops,
+                                 unsigned Base, bool isLd,
+                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+  bool RetVal = false;
+
+  // Sort by offset (in reverse order).
+  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+  // The loads / stores of the same base are in order. Scan them from first to
+  // last and check for the followins:
+  // 1. Any def of base.
+  // 2. Any gaps.
+  while (Ops.size() > 1) {
+    unsigned FirstLoc = ~0U;
+    unsigned LastLoc = 0;
+    MachineInstr *FirstOp = 0;
+    MachineInstr *LastOp = 0;
+    int LastOffset = 0;
+    unsigned LastBytes = 0;
+    unsigned NumMove = 0;
+    for (int i = Ops.size() - 1; i >= 0; --i) {
+      MachineInstr *Op = Ops[i];
+      unsigned Loc = MI2LocMap[Op];
+      if (Loc <= FirstLoc) {
+        FirstLoc = Loc;
+        FirstOp = Op;
+      }
+      if (Loc >= LastLoc) {
+        LastLoc = Loc;
+        LastOp = Op;
+      }
+
+      int Offset = getMemoryOpOffset(Op);
+      unsigned Bytes = getLSMultipleTransferSize(Op);
+      if (LastBytes) {
+        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+          break;
+      }
+      LastOffset = Offset;
+      LastBytes = Bytes;
+      if (++NumMove == 4)
+        break;
+    }
+
+    if (NumMove <= 1)
+      Ops.pop_back();
+    else {
+      SmallPtrSet<MachineInstr*, 4> MoveOps;
+      for (int i = NumMove-1; i >= 0; --i)
+        MoveOps.insert(Ops[i]);
+
+      // Be conservative, if the instructions are too far apart, don't
+      // move them. We want to limit the increase of register pressure.
+      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+      if (DoMove)
+        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+      if (!DoMove) {
+        for (unsigned i = 0; i != NumMove; ++i)
+          Ops.pop_back();
+      } else {
+        // This is the new location for the loads / stores.
+        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+          ++InsertPos;
+        for (unsigned i = 0; i != NumMove; ++i) {
+          MachineInstr *Op = Ops.back();
+          Ops.pop_back();
+          MBB->splice(InsertPos, MBB, Op);
+        }
+
+        NumLdStMoved += NumMove;
+        RetVal = true;
+      }
+    }
+  }
+
+  return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+  bool RetVal = false;
+
+  DenseMap<MachineInstr*, unsigned> MI2LocMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+  SmallVector<unsigned, 4> LdBases;
+  SmallVector<unsigned, 4> StBases;
+
+  unsigned Loc = 0;
+  MachineBasicBlock::iterator MBBI = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  while (MBBI != E) {
+    for (; MBBI != E; ++MBBI) {
+      MachineInstr *MI = MBBI;
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.isCall() || TID.isTerminator()) {
+        // Stop at barriers.
+        ++MBBI;
+        break;
+      }
+
+      MI2LocMap[MI] = Loc++;
+      if (!isMemoryOp(MI))
+        continue;
+      unsigned PredReg = 0;
+      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+        continue;
+
+      int Opcode = MI->getOpcode();
+      bool isLd = Opcode == ARM::LDR ||
+        Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+      unsigned Base = MI->getOperand(1).getReg();
+      int Offset = getMemoryOpOffset(MI);
+
+      bool StopHere = false;
+      if (isLd) {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2LdsMap.find(Base);
+        if (BI != Base2LdsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2LdsMap[Base] = MIs;
+          LdBases.push_back(Base);
+        }
+      } else {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2StsMap.find(Base);
+        if (BI != Base2StsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2StsMap[Base] = MIs;
+          StBases.push_back(Base);
+        }
+      }
+
+      if (StopHere) {
+        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+        --Loc;
+        break;
+      }
+    }
+
+    // Re-schedule loads.
+    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+      unsigned Base = LdBases[i];
+      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+      if (Lds.size() > 1)
+        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+    }
+
+    // Re-schedule stores.
+    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+      unsigned Base = StBases[i];
+      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+      if (Sts.size() > 1)
+        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+    }
+
+    if (MBBI != E) {
+      Base2LdsMap.clear();
+      Base2StsMap.clear();
+      LdBases.clear();
+      StBases.clear();
+    }
+  }
+
+  return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+  if (PreAlloc)
+    return new ARMPreAllocLoadStoreOpt();
+  return new ARMLoadStoreOpt();
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b95d1f9..ebe7d58 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
 
 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
+
+//===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [S0, S2, S4, S6, S8, S10, S12, S14,
+                    S16, S18, S20, S22, S24, S26, S28, S30]>;
+
+def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
+                    D8, D9, D10, D11, D12, D13, D14, D15],
+                   [S1, S3, S5, S7, S9, S11, S13, S15,
+                    S17, S19, S21, S23, S25, S27, S29, S31]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index ef78cd5..a978380 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -14,6 +14,8 @@
 #include "ARMSubtarget.h"
 #include "ARMGenSubtarget.inc"
 #include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
@@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
   , TargetABI(ARM_ABI_APCS) {
+  // default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
+
   // Determine default and user specified characteristics
 
   // Parse features string.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8b469cf..0704055 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -23,7 +23,7 @@ class Module;
 class ARMSubtarget : public TargetSubtarget {
 protected:
   enum ARMArchEnum {
-    V4T, V5T, V5TE, V6, V7A
+    V4T, V5T, V5TE, V6, V6T2, V7A
   };
 
   enum ARMFPEnum {
@@ -92,6 +92,7 @@ protected:
   bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
   bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
   bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
+  bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
   bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
 
   bool hasVFP2() const { return ARMFPUType >= VFPv2; }
@@ -105,6 +106,7 @@ protected:
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
   bool isThumb() const { return IsThumb; }
+  bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
   bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
 
   bool useThumbBacktraces() const { return UseThumbBacktraces; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1dc7d19..7033907 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,6 +23,9 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
+static cl::opt<bool>
+EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
+                  cl::desc("Enable pre-regalloc load store optimization pass"));
 static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
                               cl::desc("Disable load store optimization pass"));
 static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
   return false;
 }
 
+bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (!EnablePreLdStOpti)
+    return false;
+  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+    PM.add(createARMLoadStoreOptimizationPass(true));
+  return true;
+}
+
 bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 916a8aa..7192c1b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -71,6 +71,7 @@ public:
 
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 1cf0a91..7cffd0e 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget
   Target.cpp
   TargetAsmInfo.cpp
   TargetData.cpp
+  TargetELFWriterInfo.cpp
   TargetFrameInfo.cpp
   TargetInstrInfo.cpp
   TargetMachOWriterInfo.cpp
@@ -14,4 +15,4 @@ add_llvm_library(LLVMTarget
   TargetSubtarget.cpp
   )
 
-# TODO: Support other targets besides X86. See Makefile.
-\ No newline at end of file
+# TODO: Support other targets besides X86. See Makefile.
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index b42ee45..f9a8801 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -33,8 +33,9 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   return true;
 }
 
-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
+/// runOnMachineFunction - This emits the frame section, autos section and 
+/// assembly for each instruction. Also takes care of function begin debug
+/// directive and file begin debug directive (if required) for the function.
 ///
 bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
@@ -47,20 +48,38 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   CurrentFnName = Mang->getValueName(F);
 
-  DbgInfo.EmitFileDirective(F);
-  // Emit the function variables.
+  // Iterate over the first basic block instructions to find if it has a
+  // DebugLoc. If so emit .file directive. Instructions such as movlw do not
+  // have valid DebugLoc, so need to iterate over instructions.
+  MachineFunction::const_iterator I = MF.begin();
+  for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
+       MBBI != E; MBBI++) {
+    const DebugLoc DLoc = MBBI->getDebugLoc();
+    if (!DLoc.isUnknown()) {
+      GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
+      unsigned line = MF.getDebugLocTuple(DLoc).Line;
+      DbgInfo.EmitFileDirective(CU);
+      DbgInfo.SetFunctBeginLine(line);
+      break;
+    }
+  }
+
+  // Emit the function frame (args and temps).
   EmitFunctionFrame(MF);
 
-  // Emit function begin debug directives
+  // Emit function begin debug directive.
   DbgInfo.EmitFunctBeginDI(F);
 
+  // Emit the autos section of function.
   EmitAutos(CurrentFnName);
+
+  // Now emit the instructions of function in its code section.
   const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str();
  
   const Section *fCodeSection = TAI->getNamedSection(codeSection,
                                                      SectionFlags::Code);
-  O <<  "\n";
   // Start the Code Section.
+  O <<  "\n";
   SwitchToSection (fCodeSection);
 
   // Emit the frame address of the function at the beginning of code.
@@ -77,14 +96,17 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out code for the function.
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
        I != E; ++I) {
+
     // Print a label for the basic block.
     if (I != MF.begin()) {
       printBasicBlockLabel(I, true);
       O << '\n';
     }
     
+    // Print a basic block.
     for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
          II != E; ++II) {
+
       // Emit the line directive if source line changed.
       const DebugLoc DL = II->getDebugLoc();
       if (!DL.isUnknown()) {
@@ -102,6 +124,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   
   // Emit function end debug directives.
   DbgInfo.EmitFunctEndDI(F, CurLine);
+
   return false;  // we didn't modify anything.
 }
 
@@ -158,11 +181,16 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   }
 }
 
+/// printCCOperand - Print the cond code operand.
+///
 void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
   int CC = (int)MI->getOperand(opNum).getImm();
   O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
 }
 
+/// printLibcallDecls - print the extern declarations for compiler 
+/// intrinsics.
+///
 void PIC16AsmPrinter::printLibcallDecls(void) {
   // If no libcalls used, return.
   if (LibcallDecls.empty()) return;
@@ -180,6 +208,10 @@ void PIC16AsmPrinter::printLibcallDecls(void) {
   O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n";
 }
 
+/// doInitialization - Perfrom Module level initializations here.
+/// One task that we do here is to sectionize all global variables.
+/// The MemSelOptimizer pass depends on the sectionizing.
+///
 bool PIC16AsmPrinter::doInitialization (Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
 
@@ -194,23 +226,23 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
     I->setSection(TAI->SectionForGlobal(I)->getName());
   }
 
-  DbgInfo.EmitFileDirective(M);
+  DbgInfo.Init(M);
   EmitFunctionDecls(M);
   EmitUndefinedVars(M);
   EmitDefinedVars(M);
   EmitIData(M);
   EmitUData(M);
   EmitRomData(M);
-  DbgInfo.PopulateFunctsDI(M);
   return Result;
 }
 
-// Emit extern decls for functions imported from other modules, and emit
-// global declarations for function defined in this module and which are
-// available to other modules.
+/// Emit extern decls for functions imported from other modules, and emit
+/// global declarations for function defined in this module and which are
+/// available to other modules.
+///
 void PIC16AsmPrinter::EmitFunctionDecls (Module &M) {
  // Emit declarations for external functions.
-  O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+  O <<"\n"<<TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
   for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
     std::string Name = Mang->getValueName(I);
     if (Name.compare("@abort") == 0)
@@ -280,6 +312,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
 
 bool PIC16AsmPrinter::doFinalization(Module &M) {
   printLibcallDecls();
+  EmitRemainingAutos();
   DbgInfo.EmitVarDebugInfo(M);
   DbgInfo.EmitEOF();
   O << "\n\t" << "END\n";
@@ -383,6 +416,8 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
   for (unsigned i = 0; i < AutosSections.size(); i++) {
     O << "\n";
     if (AutosSections[i]->S_->getName() == SectionName) { 
+      // Set the printing status to true
+      AutosSections[i]->setPrintedStatus(true);
       SwitchToSection(AutosSections[i]->S_);
       std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
       for (unsigned j = 0; j < Items.size(); j++) {
@@ -398,3 +433,34 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
   }
 }
 
+// Print autos that were not printed during the code printing of functions.
+// As the functions might themselves would have got deleted by the optimizer.
+void PIC16AsmPrinter::EmitRemainingAutos()
+{
+  const TargetData *TD = TM.getTargetData();
+
+  // Now print Autos section for this function.
+  std::vector <PIC16Section *>AutosSections = PTAI->AutosSections;
+  for (unsigned i = 0; i < AutosSections.size(); i++) {
+    
+    // if the section is already printed then don't print again
+    if (AutosSections[i]->isPrinted()) 
+      continue;
+
+    // Set status as printed
+    AutosSections[i]->setPrintedStatus(true);
+
+    O << "\n";
+    SwitchToSection(AutosSections[i]->S_);
+    std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
+    for (unsigned j = 0; j < Items.size(); j++) {
+      std::string VarName = Mang->getValueName(Items[j]);
+      Constant *C = Items[j]->getInitializer();
+      const Type *Ty = C->getType();
+      unsigned Size = TD->getTypeAllocSize(Ty);
+      // Emit memory reserve directive.
+      O << VarName << "  RES  " << Size << "\n";
+    }
+  }
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 2545dfd..8bdcf72 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -52,6 +52,7 @@ namespace llvm {
     void EmitIData (Module &M);
     void EmitUData (Module &M);
     void EmitAutos (std::string FunctName);
+    void EmitRemainingAutos ();
     void EmitRomData (Module &M);
     void EmitFunctionFrame(MachineFunction &MF);
     void printLibcallDecls(void);
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index faf4590..d7ebea7 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -18,13 +18,6 @@
 
 using namespace llvm;
 
-PIC16DbgInfo::~PIC16DbgInfo() {
-  for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin();
-      i!=FunctNameMap.end(); i++) 
-    delete i->second;
-  FunctNameMap.clear();
-}
-
 void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
                                      bool &HasAux, int Aux[], 
                                      std::string &TypeName) {
@@ -70,7 +63,7 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
         }
         HasAux = true;
         // In auxillary entry for array, 7th and 8th byte represent array size.
-        Aux[6] = size;
+        Aux[6] = size & 0xff;
         Aux[7] = size >> 8;
         DIType BaseType = CTy.getTypeDerivedFrom();
         PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
@@ -86,10 +79,14 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
         else
           TypeNo = TypeNo | PIC16Dbg::T_UNION;
         CTy.getName(TypeName);
-        unsigned size = CTy.getSizeInBits()/8;
+        // UniqueSuffix is .number where number is obtained from 
+        // llvm.dbg.composite<number>.
+        std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+        TypeName += UniqueSuffix;
+        unsigned short size = CTy.getSizeInBits()/8;
         // 7th and 8th byte represent size.   
         HasAux = true;
-        Aux[6] = size;
+        Aux[6] = size & 0xff;
         Aux[7] = size >> 8;
         break;
       }
@@ -145,37 +142,84 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
   return ClassNo;
 }
 
-void PIC16DbgInfo::PopulateFunctsDI(Module &M) {
-  GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms");
-  if (!Root)
-    return;
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI) {
-      GlobalVariable *GVSP = cast<GlobalVariable>(*UUI);
-      DISubprogram *SP = new DISubprogram(GVSP);
-      std::string Name;
-      SP->getLinkageName(Name);
-      FunctNameMap[Name] = SP; 
-    }
-  return;
+void PIC16DbgInfo::Init(Module &M) {
+  // Do all debug related initializations here.
+  EmitFileDirective(M);
+  EmitCompositeTypeDecls(M);
 }
 
-DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) {
-  return FunctNameMap[FunctName];
+void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
+  for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
+      E = M.getGlobalList().end(); I != E; I++) {
+    // Structures and union declaration's debug info has llvm.dbg.composite
+    // in its name.
+    if(I->getName().find("llvm.dbg.composite") != std::string::npos) {
+      GlobalVariable *GV = cast<GlobalVariable >(I);
+      DICompositeType CTy(GV);
+      if (CTy.getTag() == dwarf::DW_TAG_union_type ||
+          CTy.getTag() == dwarf::DW_TAG_structure_type ) {
+        std::string name;
+        CTy.getName(name);
+        std::string DIVar = I->getName();
+        // Get the number after llvm.dbg.composite and make UniqueSuffix from 
+        // it.
+        std::string UniqueSuffix = "." + DIVar.substr(18);
+        std::string MangledCTyName = name + UniqueSuffix;
+        unsigned short size = CTy.getSizeInBits()/8;
+        int Aux[PIC16Dbg::AuxSize] = {0};
+        // 7th and 8th byte represent size of structure/union.
+        Aux[6] = size & 0xff;
+        Aux[7] = size >> 8;
+        // Emit .def for structure/union tag.
+        if( CTy.getTag() == dwarf::DW_TAG_union_type)
+          EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG);
+        else if  (CTy.getTag() == dwarf::DW_TAG_structure_type) 
+          EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG);
+
+        // Emit auxiliary debug information for structure/union tag. 
+        EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
+        unsigned long Value = 0;
+        DIArray Elements = CTy.getTypeArray();
+        for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+          DIDescriptor Element = Elements.getElement(i);
+          unsigned short TypeNo = 0;
+          bool HasAux = false;
+          int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+          std::string TypeName = "";
+          std::string ElementName;
+          GlobalVariable *GV = Element.getGV();
+          DIDerivedType DITy(GV);
+          DITy.getName(ElementName);
+          unsigned short ElementSize = DITy.getSizeInBits()/8;
+          // Get mangleddd name for this structure/union  element.
+          std::string MangMemName = ElementName + UniqueSuffix;
+	  PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
+          short Class;
+          if( CTy.getTag() == dwarf::DW_TAG_union_type)
+            Class = PIC16Dbg::C_MOU;
+          else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
+            Class = PIC16Dbg::C_MOS;
+          EmitSymbol(MangMemName, Class, TypeNo, Value);
+          if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+            Value += ElementSize;
+          if (HasAux)
+            EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
+        }
+        // Emit mangled Symbol for end of structure/union.
+        std::string EOSSymbol = ".eos" + UniqueSuffix;
+        EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
+        EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName);
+      }
+    }
+  }
 }
 
 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
   std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
+  if (EmitDebugDirectives) {
     std::string FunctBeginSym = ".bf." + FunctName;
     std::string BlockBeginSym = ".bb." + FunctName;
 
-    int FunctBeginLine = SP->getLineNumber();
     int BFAux[PIC16Dbg::AuxSize] = {0};
     BFAux[4] = FunctBeginLine;
     BFAux[5] = FunctBeginLine >> 8;
@@ -189,8 +233,7 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
 
 void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
   std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
+  if (EmitDebugDirectives) {
     std::string FunctEndSym = ".ef." + FunctName;
     std::string BlockEndSym = ".eb." + FunctName;
 
@@ -208,14 +251,21 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
 
 /// EmitAuxEntry - Emit Auxiliary debug information.
 ///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
+                                std::string tag) {
   O << "\n\t.dim " << VarName << ", 1" ;
+  if (tag != "")
+    O << ", " << tag;
   for (int i = 0; i<num; i++)
     O << "," << Aux[i];
 }
 
-void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) {
-  O << "\n\t" << ".def "<< Name << ", debug, class = " << Class;
+void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
+                              Type, unsigned long Value) {
+  O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " 
+    << Class;
+  if (Value > 0)
+    O  << ", value = " << Value;
 }
 
 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
@@ -241,18 +291,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
         O << "\n\t.type " << VarName << ", " << TypeNo;
         short ClassNo = getClass(DIGV);
         O << "\n\t.class " << VarName << ", " << ClassNo;
-        if (HasAux) {
-          if (TypeName != "") {
-           // Emit debug info for structure and union objects after
-           // .dim directive supports structure/union tag name in aux entry.
-           /* O << "\n\t.dim " << VarName << ", 1," << TypeName;
-            for (int i = 0; i<PIC16Dbg::AuxSize; i++)
-              O << "," << Aux[i];*/
-         }
-          else {
-            EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize);
-          }
-        }
+        if (HasAux) 
+          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
       }
     }
   }
@@ -262,26 +302,20 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
 void PIC16DbgInfo::EmitFileDirective(Module &M) {
   GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
   if (CU) {
-    DICompileUnit DIUnit(CU);
-    std::string Dir, FN;
-    std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
-    O << "\n\t.file\t\"" << File << "\"\n" ;
-    CurFile = File;
+    EmitDebugDirectives = true;
+    EmitFileDirective(CU, false);
   }
 }
 
-void PIC16DbgInfo::EmitFileDirective(const Function *F) {
-  std::string FunctName = F->getName();
-  DISubprogram *SP = getFunctDI(FunctName);
-  if (SP) {
-    std::string Dir, FN;
-    DICompileUnit CU = SP->getCompileUnit();
-    std::string File = CU.getDirectory(Dir) + "/" + CU.getFilename(FN);
-    if ( File != CurFile) {
+void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
+  std::string Dir, FN;
+  DICompileUnit DIUnit(CU);
+  std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
+  if ( File != CurFile ) {
+    if (EmitEof)
       EmitEOF();
-      O << "\n\t.file\t\"" << File << "\"\n" ;
-      CurFile = File;
-    }
+    O << "\n\t.file\t\"" << File << "\"\n" ;
+    CurFile = File;
   }
 }
 
@@ -290,3 +324,6 @@ void PIC16DbgInfo::EmitEOF() {
     O << "\n\t.EOF";
 }
 
+void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
+  FunctBeginLine = line;
+}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index be39393..9d50380 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -91,29 +91,36 @@ namespace llvm {
   class raw_ostream;
 
   class PIC16DbgInfo {
-    std::map <std::string, DISubprogram *> FunctNameMap;
     raw_ostream &O;
     const TargetAsmInfo *TAI;
     std::string CurFile;
+    // EmitDebugDirectives is set if debug information is available. Default
+    // value for it is false.
+    bool EmitDebugDirectives;
+    unsigned FunctBeginLine;
   public:
     PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
-      CurFile = "";  
+      CurFile = ""; 
+      EmitDebugDirectives = false; 
     }
-    ~PIC16DbgInfo();
     void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
                            int Aux[], std::string &TypeName);
     unsigned GetTypeDebugNumber(std::string &type);
     short getClass(DIGlobalVariable DIGV);
-    void PopulateFunctsDI(Module &M);
-    DISubprogram *getFunctDI(std::string FunctName);
     void EmitFunctBeginDI(const Function *F);
+    void Init(Module &M);
+    void EmitCompositeTypeDecls(Module &M);
     void EmitFunctEndDI(const Function *F, unsigned Line);
-    void EmitAuxEntry(const std::string VarName, int Aux[], int num);
-    inline void EmitSymbol(std::string Name, int Class);
+    void EmitAuxEntry(const std::string VarName, int Aux[], 
+                      int num = PIC16Dbg::AuxSize, std::string tag = "");
+    inline void EmitSymbol(std::string Name, short Class, 
+                           unsigned short Type = PIC16Dbg::T_NULL, 
+                           unsigned long Value = 0);
     void EmitVarDebugInfo(Module &M);
     void EmitFileDirective(Module &M);
-    void EmitFileDirective(const Function *F);
+    void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
     void EmitEOF();
+    void SetFunctBeginLine(unsigned line);
   };
 } // end namespace llvm;
 #endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ac9a143..ba465f3 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -56,6 +56,17 @@ static const char *getIntrinsicName(unsigned opcode) {
   case RTLIB::SREM_I32: Basename = "srem.i32"; break;
   case RTLIB::UREM_I16: Basename = "urem.i16"; break;
   case RTLIB::UREM_I32: Basename = "urem.i32"; break;
+
+  case RTLIB::FPTOSINT_F32_I32:
+               Basename = "f32_to_si32"; break;
+  case RTLIB::SINTTOFP_I32_F32:
+               Basename = "si32_to_f32"; break;
+               
+  case RTLIB::ADD_F32: Basename = "add.f32"; break;
+  case RTLIB::SUB_F32: Basename = "sub.f32"; break;
+  case RTLIB::MUL_F32: Basename = "mul.f32"; break;
+  case RTLIB::DIV_F32: Basename = "div.f32"; break;
+  
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -113,7 +124,17 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Unsigned remainder lib call names
   setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
   setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
-  
+ 
+  // Floating point operations
+  setLibcallName(RTLIB::FPTOSINT_F32_I32, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+  setLibcallName(RTLIB::SINTTOFP_I32_F32, 
+                 getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+  setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
+  setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
+  setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
+  setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
+
   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
   setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
 
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h
index e464e36..b7292b8 100644
--- a/lib/Target/PIC16/PIC16TargetAsmInfo.h
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h
@@ -33,9 +33,13 @@ namespace llvm {
   struct PIC16Section {
       const Section *S_; // Connection to actual Section.
       unsigned Size;  // Total size of the objects contained.
+      bool SectionPrinted;
       std::vector<const GlobalVariable*> Items;
      
-      PIC16Section (const Section *s) { S_ = s; Size = 0; }
+      PIC16Section (const Section *s) { S_ = s; Size = 0; 
+                                        SectionPrinted = false;}
+      bool isPrinted() { return SectionPrinted ; }
+      void setPrintedStatus(bool status) { SectionPrinted = status ;} 
   };
       
   struct PIC16TargetAsmInfo : public TargetAsmInfo {
diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 0000000..9651e65
--- /dev/null
+++ b/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,36 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
+  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+  isLittleEndian = TM.getTargetData()->isLittleEndian();
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
+/// getFunctionAlignment - Returns the alignment for function 'F', targets
+/// with different alignment constraints should overload this method
+unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const {
+  const TargetData *TD = TM.getTargetData();
+  unsigned FnAlign = F->getAlignment();
+  unsigned TDAlign = TD->getPointerABIAlignment();
+  unsigned Align = std::max(FnAlign, TDAlign);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+  return Align;
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index dea293b..c487cb8 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -30,6 +30,7 @@ namespace llvm {
   bool FiniteOnlyFPMathOption;
   bool HonorSignDependentRoundingFPMathOption;
   bool UseSoftFloat;
+  FloatABI::ABIType FloatABIType;
   bool NoImplicitFloat;
   bool NoZerosInBSS;
   bool ExceptionHandling;
@@ -84,6 +85,19 @@ GenerateSoftFloatCalls("soft-float",
   cl::desc("Generate software floating point library calls"),
   cl::location(UseSoftFloat),
   cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::location(FloatABIType),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
 static cl::opt<bool, true>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -162,6 +176,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
 // TargetMachine Class
 //
 
+TargetMachine::TargetMachine() 
+  : AsmInfo(0) {
+  // Typically it will be subtargets that will adjust FloatABIType from Default
+  // to Soft or Hard.
+  if (UseSoftFloat)
+    FloatABIType = FloatABI::Soft;
+}
+
 TargetMachine::~TargetMachine() {
   delete AsmInfo;
 }
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 710bd03..3796aac 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -482,35 +482,6 @@ _usesbb:
 
 //===---------------------------------------------------------------------===//
 
-Currently we don't have elimination of redundant stack manipulations. Consider
-the code:
-
-int %main() {
-entry:
-	call fastcc void %test1( )
-	call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
-	ret int 0
-}
-
-declare fastcc void %test1()
-
-declare fastcc void %test2(sbyte*)
-
-
-This currently compiles to:
-
-	subl $16, %esp
-	call _test5
-	addl $12, %esp
-	subl $16, %esp
-	movl $_test5, (%esp)
-	call _test6
-	addl $12, %esp
-
-The add\sub pair is really unneeded here.
-
-//===---------------------------------------------------------------------===//
-
 Consider the expansion of:
 
 define i32 @test3(i32 %X) {
@@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0.
 
 //===---------------------------------------------------------------------===//
 
-We aren't matching RMW instructions aggressively
-enough.  Here's a reduced testcase (more in PR1160):
-
-define void @test(i32* %huge_ptr, i32* %target_ptr) {
-        %A = load i32* %huge_ptr                ; <i32> [#uses=1]
-        %B = load i32* %target_ptr              ; <i32> [#uses=1]
-        %C = or i32 %A, %B              ; <i32> [#uses=1]
-        store i32 %C, i32* %target_ptr
-        ret void
-}
-
-$ llvm-as < t.ll | llc -march=x86-64
-
-_test:
-        movl (%rdi), %eax
-        orl (%rsi), %eax
-        movl %eax, (%rsi)
-        ret
-
-That should be something like:
-
-_test:
-        movl (%rdi), %eax
-        orl %eax, (%rsi)
-        ret
-
-//===---------------------------------------------------------------------===//
-
 The following code:
 
 bb114.preheader:		; preds = %cond_next94
@@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
   Core 2, and "Generic"
 
 //===---------------------------------------------------------------------===//
+
+Testcase:
+int a(int x) { return (x & 127) > 31; }
+
+Current output:
+	movl	4(%esp), %eax
+	andl	$127, %eax
+	cmpl	$31, %eax
+	seta	%al
+	movzbl	%al, %eax
+	ret
+
+Ideal output:
+	xorl	%eax, %eax
+	testl	$96, 4(%esp)
+	setne	%al
+	ret
+
+We could do this transformation in instcombine, but it's only clearly
+beneficial on platforms with a test instruction.
+
+//===---------------------------------------------------------------------===//
+Testcase:
+int x(int a) { return (a&0xf0)>>4; }
+
+Current output:
+	movl	4(%esp), %eax
+	shrl	$4, %eax
+	andl	$15, %eax
+	ret
+
+Ideal output:
+	movzbl	4(%esp), %eax
+	shrl	$4, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+Testcase:
+int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+
+Current output:
+	testl	$128, 4(%esp)
+	setne	%al
+	movzbl	%al, %eax
+	shll	$8, %eax
+	ret
+
+Ideal output:
+	movl	4(%esp), %eax
+	addl	%eax, %eax
+	andl	$256, %eax
+	ret
+
+We generally want to fold shifted tests of a single bit into a shift+and on x86.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 7f99203..e9fcbd5 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
 ]>;
 
-// Tail call convention (fast): One register is reserved for target address,
-// namely R9
-def CC_X86_64_TailCall : CallingConv<[
-  // Handles byval parameters.
-  CCIfByVal<CCPassByVal<8, 8>>,
-
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
-  // The 'nest' parameter, if any, is passed in R10.
-  CCIfNest<CCAssignToReg<[R10]>>,
-
-  // The first 6 integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
-  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
-  
-  // The first 8 FP/Vector arguments are passed in XMM registers.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasSSE1()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
-  // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
-  // registers on Darwin.
-  CCIfType<[v8i8, v4i16, v2i32, v2f32],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- 
-  // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
-  CCIfType<[v1i64],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
- 
-  // Integer/FP values get stored in stack slots that are 8 bytes in size and
-  // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-  
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
-]>;
-
-
 //===----------------------------------------------------------------------===//
 // X86 C Calling Convention
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 2604741..d84034b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -12,8 +12,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86ELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
-  TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM) {
+    bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+    EMachine = is64Bit ? EM_X86_64 : EM_386;
+  }
+
 X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
+  unsigned FnAlign = 4;
+
+  if (F->hasFnAttr(Attribute::OptimizeForSize))
+    FnAlign = 1;
+
+  if (F->getAlignment())
+    FnAlign = Log2_32(F->getAlignment());
+
+  return (1 << FnAlign);
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index acfa501..e9c5bc4 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,8 +20,10 @@ namespace llvm {
 
   class X86ELFWriterInfo : public TargetELFWriterInfo {
   public:
-    X86ELFWriterInfo(bool is64Bit);
+    X86ELFWriterInfo(TargetMachine &TM);
     virtual ~X86ELFWriterInfo();
+
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 
 } // end llvm namespace
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 33332e4..2bcfd76 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
   if (Subtarget->is64Bit()) {
     if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
-    else if (CC == CallingConv::Fast && isTaillCall)
-      return CC_X86_64_TailCall;
     else
       return CC_X86_64_C;
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9e15a54..36e3ab2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
     SDValue StackAdjustment = TailCall.getOperand(2);
     assert(((TargetAddress.getOpcode() == ISD::Register &&
                (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
-                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
               TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
               TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
              "Expecting an global address, external symbol, or register");
@@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
   if (Subtarget->is64Bit()) {
     if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
-    else if (CC == CallingConv::Fast && PerformTailCallOpt)
-      return CC_X86_64_TailCall;
     else
       return CC_X86_64_C;
   }
@@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
   } else if (IsTailCall) {
-    unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+    unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
 
     Chain = DAG.getCopyToReg(Chain,  dl,
                              DAG.getRegister(Opc, getPointerTy()),
@@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
                                      SelectionDAG &DAG, MachineFrameInfo *MFI,
                                      const TargetLowering &TLI) {
   LDBase = NULL;
-  LastLoadedElt = -1;
+  LastLoadedElt = -1U;
   for (unsigned i = 0; i < NumElems; ++i) {
     if (N->getMaskElt(i) < 0) {
       if (!LDBase)
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c733f26..6c0074e 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
+  unsigned ReadyLabelId = 0;
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer is ready.
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+  }
+
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
-  if (NumBytes) {   // Adjust stack pointer: ESP -= numbytes.
-    if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
-      // Check, whether EAX is livein for this function.
-      bool isEAXAlive = false;
-      for (MachineRegisterInfo::livein_iterator
+  // Adjust stack pointer: ESP -= numbytes.
+  if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+    // Check, whether EAX is livein for this function.
+    bool isEAXAlive = false;
+    for (MachineRegisterInfo::livein_iterator
            II = MF.getRegInfo().livein_begin(),
            EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
-        unsigned Reg = II->first;
-        isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                      Reg == X86::AH  || Reg == X86::AL);
-      }
+      unsigned Reg = II->first;
+      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+                    Reg == X86::AH || Reg == X86::AL);
+    }
 
-      // Function prologue calls _alloca to probe the stack when allocating more
-      // than 4k bytes in one go. Touching the stack at 4K increments is
-      // necessary to ensure that the guard pages used by the OS virtual memory
-      // manager are allocated in correct sequence.
-      if (!isEAXAlive) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-          .addImm(NumBytes);
-        BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-          .addExternalSymbol("_alloca");
-      } else {
-        // Save EAX
-        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
-          .addReg(X86::EAX, RegState::Kill);
-
-        // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
-        // allocated bytes for EAX.
-        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-          .addImm(NumBytes-4);
-        BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-          .addExternalSymbol("_alloca");
-
-        // Restore EAX
-        MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
-                                                X86::EAX),
-                                        StackPtr, false, NumBytes-4);
-        MBB.insert(MBBI, MI);
-      }
+    // Function prologue calls _alloca to probe the stack when allocating more
+    // than 4k bytes in one go. Touching the stack at 4K increments is necessary
+    // to ensure that the guard pages used by the OS virtual memory manager are
+    // allocated in correct sequence.
+    if (!isEAXAlive) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+        .addExternalSymbol("_alloca");
     } else {
-      // If there is an SUB32ri of ESP immediately before this instruction,
-      // merge the two. This can be the case when tail call elimination is
-      // enabled and the callee has more arguments then the caller.
-      NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+      // Save EAX
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+        .addReg(X86::EAX, RegState::Kill);
+
+      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+      // allocated bytes for EAX.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes - 4);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+        .addExternalSymbol("_alloca");
+
+      // Restore EAX
+      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                              X86::EAX),
+                                      StackPtr, false, NumBytes - 4);
+      MBB.insert(MBBI, MI);
+    }
+  } else if (NumBytes) {
+    // If there is an SUB32ri of ESP immediately before this instruction, merge
+    // the two. This can be the case when tail call elimination is enabled and
+    // the callee has more arguments then the caller.
+    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
 
-      // If there is an ADD32ri or SUB32ri of ESP immediately after this
-      // instruction, merge the two instructions.
-      mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+    // If there is an ADD32ri or SUB32ri of ESP immediately after this
+    // instruction, merge the two instructions.
+    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
 
-      if (NumBytes)
-        emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
-    }
+    if (NumBytes)
+      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
   }
 
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    unsigned ReadyLabelId = 0;
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+  if (needsFrameMoves)
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
-  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 03ce1ae..56983ce 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
   , MaxInlineSizeThreshold(128)
   , Is64Bit(is64Bit)
   , TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;
     
   // Determine default and user specified characteristics
   if (!FS.empty()) {
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 88ab247..dfb055f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
     DataLayout(Subtarget.getDataLayout()),
     FrameInfo(TargetFrameInfo::StackGrowsDown,
               Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this),
-    ELFWriterInfo(Subtarget.is64Bit()) {
+    InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
   DefRelocModel = getRelocationModel();
   // FIXME: Correctly select PIC model for Win64 stuff
   if (getRelocationModel() == Reloc::Default) {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 2bb6428..a612634 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -127,17 +127,8 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
-       UI != E; ++UI) {
-    CallSite CS = CallSite::get(*UI);
-    if (!CS.getInstruction())       // "Taking the address" of the function
-      return false;
-
-    // Ensure that this call site is CALLING the function, not passing it as
-    // an argument.
-    if (!CS.isCallee(UI))
-      return false;
-  }
+  if (F->hasAddressTaken())
+    return false;
 
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 666db7e..e480dad 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -175,15 +175,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
 
   // Ensure that the function is only directly called.
-  for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) {
-    // If this use is anything other than a call site, give up.
-    CallSite CS = CallSite::get(*I);
-    Instruction *TheCall = CS.getInstruction();
-    if (!TheCall) return false;   // Not a direct call site?
-
-    // The addr of this function is passed to the call.
-    if (!CS.isCallee(I)) return false;
-  }
+  if (Fn.hasAddressTaken())
+    return false;
 
   // Okay, we know we can transform this function if safe.  Scan its body
   // looking for calls to llvm.vastart.
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index db378b0..9c652b9 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -47,7 +47,6 @@ namespace {
     void GlobalIsNeeded(GlobalValue *GV);
     void MarkUsedGlobalsAsNeeded(Constant *C);
 
-    bool SafeToDestroyConstant(Constant* C);
     bool RemoveUnusedGlobalValue(GlobalValue &GV);
   };
 }
@@ -211,17 +210,3 @@ bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
   GV.removeDeadConstantUsers();
   return GV.use_empty();
 }
-
-// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-// by constants itself.  Note that constants cannot be cyclic, so this test is
-// pretty easy to implement recursively.
-//
-bool GlobalDCE::SafeToDestroyConstant(Constant *C) {
-  for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I)
-    if (Constant *User = dyn_cast<Constant>(*I)) {
-      if (!SafeToDestroyConstant(User)) return false;
-    } else {
-      return false;
-    }
-  return true;
-}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 5f12825..9a1b294 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -136,16 +136,16 @@ struct VISIBILITY_HIDDEN GlobalStatus {
 
 }
 
-/// ConstantIsDead - Return true if the specified constant is (transitively)
-/// dead.  The constant may be used by other constants (e.g. constant arrays and
-/// constant exprs) as long as they are dead, but it cannot be used by anything
-/// else.
-static bool ConstantIsDead(Constant *C) {
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself.  Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(Constant *C) {
   if (isa<GlobalValue>(C)) return false;
 
   for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
     if (Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!ConstantIsDead(CU)) return false;
+      if (!SafeToDestroyConstant(CU)) return false;
     } else
       return false;
   return true;
@@ -233,7 +233,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
     } else if (Constant *C = dyn_cast<Constant>(*UI)) {
       GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
-      if (!ConstantIsDead(C))
+      if (!SafeToDestroyConstant(C))
         return true;
     } else {
       GS.HasNonInstructionUser = true;
@@ -338,7 +338,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
     } else if (Constant *C = dyn_cast<Constant>(U)) {
       // If we have a chain of dead constantexprs or other things dangling from
       // us, and if they are all dead, nuke them without remorse.
-      if (ConstantIsDead(C)) {
+      if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
         CleanupConstantGlobalUsers(V, Init);
@@ -354,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
 static bool isSafeSROAElementUse(Value *V) {
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
-    return ConstantIsDead(C);
+    return SafeToDestroyConstant(C);
   
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
@@ -1769,22 +1769,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   return false;
 }
 
-/// OnlyCalledDirectly - Return true if the specified function is only called
-/// directly.  In other words, its address is never taken.
-static bool OnlyCalledDirectly(Function *F) {
-  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
-    Instruction *User = dyn_cast<Instruction>(*UI);
-    if (!User) return false;
-    if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
-
-    // See if the function address is passed as an argument.
-    for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
-         i != e; ++i)
-      if (*i == F) return false;
-  }
-  return true;
-}
-
 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
 /// function, changing them to FastCC.
 static void ChangeCalleesToFastCall(Function *F) {
@@ -1830,7 +1814,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       ++NumFnDeleted;
     } else if (F->hasLocalLinkage()) {
       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
         // If this function has C calling conventions, is not a varargs
         // function, and is only called directly, promote it to use the Fast
         // calling convention.
@@ -1841,7 +1825,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       }
 
       if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
-          OnlyCalledDirectly(F)) {
+          !F->hasAddressTaken()) {
         // The function is not used by a trampoline intrinsic, so it is safe
         // to remove the 'nest' attribute.
         RemoveNestAttribute(F);
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 17bc2d4..5693cc0 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -9,10 +9,6 @@
 //
 // This pass looks for equivalent functions that are mergable and folds them.
 //
-// A Function will not be analyzed if:
-// * it is overridable at runtime (except for weak linkage), or
-// * it is used by anything other than the callee parameter of a call/invoke
-//
 // A hash is computed from the function, based on its type and number of
 // basic blocks.
 //
@@ -24,8 +20,6 @@
 // When a match is found, the functions are folded. We can only fold two
 // functions when we know that the definition of one of them is not
 // overridable.
-// * fold a function marked internal by replacing all of its users.
-// * fold extern or weak functions by replacing them with a global alias
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,6 +42,7 @@
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
@@ -62,7 +57,6 @@
 using namespace llvm;
 
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
-STATISTIC(NumMergeFails, "Number of identical function pairings not merged");
 
 namespace {
   struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {
@@ -81,16 +75,169 @@ ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
 }
 
+// ===----------------------------------------------------------------------===
+// Comparison of functions
+// ===----------------------------------------------------------------------===
+
 static unsigned long hash(const Function *F) {
-  return F->size() ^ reinterpret_cast<unsigned long>(F->getType());
-  //return F->size() ^ F->arg_size() ^ F->getReturnType();
+  const FunctionType *FTy = F->getFunctionType();
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
+}
+
+/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
+/// walking the chain of cast operands. Otherwise, returns the argument.
+static Value* IgnoreBitcasts(Value *V) {
+  while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
+    V = BC->getOperand(0);
+
+  return V;
+}
+
+/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
+/// type equivalence rules apply.
+static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+  if (Ty1 == Ty2)
+    return true;
+  if (Ty1->getTypeID() != Ty2->getTypeID())
+    return false;
+
+  switch(Ty1->getTypeID()) {
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+    return true;
+
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
+  default:
+    assert(0 && "Unknown type!");
+    return false;
+
+  case Type::PointerTyID: {
+    const PointerType *PTy1 = cast<PointerType>(Ty1);
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+  }
+
+  case Type::StructTyID: {
+    const StructType *STy1 = cast<StructType>(Ty1);
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy1->getNumElements() != STy2->getNumElements())
+      return false;
+
+    if (STy1->isPacked() != STy2->isPacked())
+      return false;
+
+    for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy1->getNumParams() != FTy2->getNumParams() ||
+        FTy1->isVarArg() != FTy2->isVarArg())
+      return false;
+
+    if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+      return false;
+
+    for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+      if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::ArrayTyID:
+  case Type::VectorTyID: {
+    const SequentialType *STy1 = cast<SequentialType>(Ty1);
+    const SequentialType *STy2 = cast<SequentialType>(Ty2);
+    return isEquivalentType(STy1->getElementType(), STy2->getElementType());
+  }
+  }
+}
+
+/// isEquivalentOperation - determine whether the two operations are the same
+/// except that pointer-to-A and pointer-to-B are equivalent. This should be
+/// kept in sync with Instruction::isSameOperationAs.
+static bool
+isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+  if (I1->getOpcode() != I2->getOpcode() ||
+      I1->getNumOperands() != I2->getNumOperands() ||
+      !isEquivalentType(I1->getType(), I2->getType()))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+    if (!isEquivalentType(I1->getOperand(i)->getType(),
+                          I2->getOperand(i)->getType()))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(I1))
+    return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<CallInst>(I2)->getAttributes().getRawPointer();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+           CI->getAttributes().getRawPointer() ==
+             cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
 }
 
 static bool compare(const Value *V, const Value *U) {
   assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
          "Must not compare basic blocks.");
 
-  assert(V->getType() == U->getType() &&
+  assert(isEquivalentType(V->getType(), U->getType()) &&
         "Two of the same operation have operands of different type.");
 
   // TODO: If the constant is an expression of F, we should accept that it's
@@ -117,20 +264,40 @@ static bool compare(const Value *V, const Value *U) {
 static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
                    DenseMap<const Value *, const Value *> &ValueMap,
                    DenseMap<const Value *, const Value *> &SpeculationMap) {
-  // Specutively add it anyways. If it's false, we'll notice a difference later, and
-  // this won't matter.
+  // Speculatively add it anyways. If it's false, we'll notice a difference
+  // later, and this won't matter.
   ValueMap[BB1] = BB2;
 
   BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
   BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
 
   do {
-    if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI)))
-      return false;
+    if (isa<BitCastInst>(FI)) {
+      ++FI;
+      continue;
+    }
+    if (isa<BitCastInst>(GI)) {
+      ++GI;
+      continue;
+    }
 
-    if (FI->getNumOperands() != GI->getNumOperands())
+    if (!isEquivalentOperation(FI, GI))
       return false;
 
+    if (isa<GetElementPtrInst>(FI)) {
+      const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
+      const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
+      if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
+        // It's effectively a bitcast.
+        ++FI, ++GI;
+        continue;
+      }
+
+      // TODO: we only really care about the elements before the index
+      if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+        return false;
+    }
+
     if (ValueMap[FI] == GI) {
       ++FI, ++GI;
       continue;
@@ -140,8 +307,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
       return false;
 
     for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-      Value *OpF = FI->getOperand(i);
-      Value *OpG = GI->getOperand(i);
+      Value *OpF = IgnoreBitcasts(FI->getOperand(i));
+      Value *OpG = IgnoreBitcasts(GI->getOperand(i));
 
       if (ValueMap[OpF] == OpG)
         continue;
@@ -149,10 +316,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
       if (ValueMap[OpF] != NULL)
         return false;
 
-      assert(OpF->getType() == OpG->getType() &&
-             "Two of the same operation has operands of different type.");
-
-      if (OpF->getValueID() != OpG->getValueID())
+      if (OpF->getValueID() != OpG->getValueID() ||
+          !isEquivalentType(OpF->getType(), OpG->getType()))
         return false;
 
       if (isa<PHINode>(FI)) {
@@ -203,14 +368,15 @@ static bool equals(const Function *F, const Function *G) {
   if (F->hasSection() && F->getSection() != G->getSection())
     return false;
 
+  if (F->isVarArg() != G->isVarArg())
+    return false;
+
   // TODO: if it's internal and only used in direct calls, we could handle this
   // case too.
   if (F->getCallingConv() != G->getCallingConv())
     return false;
 
-  // TODO: We want to permit cases where two functions take T* and S* but
-  // only load or store them into T** and S**.
-  if (F->getType() != G->getType())
+  if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
     return false;
 
   DenseMap<const Value *, const Value *> ValueMap;
@@ -237,89 +403,213 @@ static bool equals(const Function *F, const Function *G) {
   return true;
 }
 
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
-  if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden())
-    std::swap(FnVec[i], FnVec[j]);
-
-  Function *F = FnVec[i];
-  Function *G = FnVec[j];
+// ===----------------------------------------------------------------------===
+// Folding of functions
+// ===----------------------------------------------------------------------===
+
+// Cases:
+// * F is external strong, G is external strong:
+//   turn G into a thunk to F    (1)
+// * F is external strong, G is external weak:
+//   turn G into a thunk to F    (1)
+// * F is external weak, G is external weak:
+//   unfoldable
+// * F is external strong, G is internal:
+//   address of G taken:
+//     turn G into a thunk to F  (1)
+//   address of G not taken:
+//     make G an alias to F      (2)
+// * F is internal, G is external weak
+//   address of F is taken:
+//     turn G into a thunk to F  (1)
+//   address of F is not taken:
+//     make G an alias of F      (2)
+// * F is internal, G is internal:
+//   address of F and G are taken:
+//     turn G into a thunk to F  (1)
+//   address of G is not taken:
+//     make G an alias to F      (2)
+//
+// alias requires linkage == (external,local,weak) fallback to creating a thunk
+// external means 'externally visible' linkage != (internal,private)
+// internal means linkage == (internal,private)
+// weak means linkage mayBeOverridable
+// being external implies that the address is taken
+//
+// 1. turn G into a thunk to F
+// 2. make G an alias to F
+
+enum LinkageCategory {
+  ExternalStrong,
+  ExternalWeak,
+  Internal
+};
+
+static LinkageCategory categorize(const Function *F) {
+  switch (F->getLinkage()) {
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    return Internal;
+
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalWeakLinkage:
+    return ExternalWeak;
+
+  case GlobalValue::ExternalLinkage:
+  case GlobalValue::AvailableExternallyLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::AppendingLinkage:
+  case GlobalValue::DLLImportLinkage:
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::GhostLinkage:
+  case GlobalValue::CommonLinkage:
+    return ExternalStrong;
+  }
 
-  if (!F->mayBeOverridden()) {
-    if (G->hasLocalLinkage()) {
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      G->replaceAllUsesWith(F);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
-    }
+  assert(0 && "Unknown LinkageType.");
+  return ExternalWeak;
+}
 
-    if (G->hasExternalLinkage() || G->hasWeakLinkage()) {
-      GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-      GA->takeName(G);
-      GA->setVisibility(G->getVisibility());
-      G->replaceAllUsesWith(GA);
-      G->eraseFromParent();
-      ++NumFunctionsMerged;
-      return true;
+static void ThunkGToF(Function *F, Function *G) {
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create("", NewG);
+
+  std::vector<Value *> Args;
+  unsigned i = 0;
+  const FunctionType *FFTy = F->getFunctionType();
+  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+       AI != AE; ++AI) {
+    if (FFTy->getParamType(i) == AI->getType())
+      Args.push_back(AI);
+    else {
+      Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
+      Args.push_back(BCI);
     }
+    ++i;
   }
 
-  if (F->hasWeakLinkage() && G->hasWeakLinkage()) {
-    GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "",
-                                        0, F->getParent());
-    GA_F->takeName(F);
-    GA_F->setVisibility(F->getVisibility());
-    F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
-    F->replaceAllUsesWith(GA_F);
-    F->setName("folded." + GA_F->getName());
-    F->setLinkage(GlobalValue::ExternalLinkage);
-    GA_F->setAliasee(F);
-
-    GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                        F, G->getParent());
-    GA_G->takeName(G);
-    GA_G->setVisibility(G->getVisibility());
-    G->replaceAllUsesWith(GA_G);
-    G->eraseFromParent();
-
-    ++NumFunctionsMerged;
-    return true;
+  CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  if (NewG->getReturnType() == Type::VoidTy) {
+    ReturnInst::Create(BB);
+  } else if (CI->getType() != NewG->getReturnType()) {
+    Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
+    ReturnInst::Create(BCI, BB);
+  } else {
+    ReturnInst::Create(CI, BB);
   }
 
-  DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n";
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
 
-  ++NumMergeFails;
-  return false;
+  // TODO: look at direct callers to G and make them all direct callers to F.
 }
 
-static bool hasAddressTaken(User *U) {
-  for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) {
-    User *Use = *I;
+static void AliasGToF(Function *F, Function *G) {
+  if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
+    return ThunkGToF(F, G);
+
+  GlobalAlias *GA = new GlobalAlias(
+    G->getType(), G->getLinkage(), "",
+    ConstantExpr::getBitCast(F, G->getType()), G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+}
 
-    // 'call (bitcast @F to ...)' happens a lot.
-    while (isa<ConstantExpr>(Use) && Use->hasOneUse()) {
-      Use = *Use->use_begin();
-    }
+static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
+  Function *F = FnVec[i];
+  Function *G = FnVec[j];
 
-    if (isa<ConstantExpr>(Use)) {
-      if (hasAddressTaken(Use))
-        return true;
-    }
+  LinkageCategory catF = categorize(F);
+  LinkageCategory catG = categorize(G);
 
-    if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use))
-      return true;
+  if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
+    std::swap(FnVec[i], FnVec[j]);
+    std::swap(F, G);
+    std::swap(catF, catG);
+  }
 
-    // Make sure we aren't passing U as a parameter to call instead of the
-    // callee.
-    if (CallSite(cast<Instruction>(Use)).hasArgument(U))
-      return true;
+  switch (catF) {
+    case ExternalStrong:
+      switch (catG) {
+        case ExternalStrong:
+        case ExternalWeak:
+          ThunkGToF(F, G);
+          break;
+        case Internal:
+          if (G->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+          break;
+      }
+      break;
+
+    case ExternalWeak: {
+      assert(catG == ExternalWeak);
+
+      // Make them both thunks to the same internal function.
+      F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      F->replaceAllUsesWith(H);
+
+      ThunkGToF(F, G);
+      ThunkGToF(F, H);
+
+      F->setLinkage(GlobalValue::InternalLinkage);
+    } break;
+
+    case Internal:
+      switch (catG) {
+        case ExternalStrong:
+          assert(0);
+          // fall-through
+        case ExternalWeak:
+	  if (F->hasAddressTaken())
+            ThunkGToF(F, G);
+          else
+            AliasGToF(F, G);
+	  break;
+        case Internal: {
+          bool addrTakenF = F->hasAddressTaken();
+          bool addrTakenG = G->hasAddressTaken();
+          if (!addrTakenF && addrTakenG) {
+            std::swap(FnVec[i], FnVec[j]);
+            std::swap(F, G);
+	    std::swap(addrTakenF, addrTakenG);
+	  }
+
+          if (addrTakenF && addrTakenG) {
+            ThunkGToF(F, G);
+          } else {
+            assert(!addrTakenG);
+            AliasGToF(F, G);
+          }
+	} break;
+      }
+      break;
   }
 
-  return false;
+  ++NumFunctionsMerged;
+  return true;
 }
 
+// ===----------------------------------------------------------------------===
+// Pass definition
+// ===----------------------------------------------------------------------===
+
 bool MergeFunctions::runOnModule(Module &M) {
   bool Changed = false;
 
@@ -329,25 +619,19 @@ bool MergeFunctions::runOnModule(Module &M) {
     if (F->isDeclaration() || F->isIntrinsic())
       continue;
 
-    if (!F->hasLocalLinkage() && !F->hasExternalLinkage() &&
-        !F->hasWeakLinkage())
-      continue;
-
-    if (hasAddressTaken(F))
-      continue;
-
     FnMap[hash(F)].push_back(F);
   }
 
-  // TODO: instead of running in a loop, we could also fold functions in callgraph
-  // order. Constructing the CFG probably isn't cheaper than just running in a loop.
+  // TODO: instead of running in a loop, we could also fold functions in
+  // callgraph order. Constructing the CFG probably isn't cheaper than just
+  // running in a loop, unless it happened to already be available.
 
   bool LocalChanged;
   do {
     LocalChanged = false;
+    DOUT << "size: " << FnMap.size() << "\n";
     for (std::map<unsigned long, std::vector<Function *> >::iterator
          I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
-      DOUT << "size: " << FnMap.size() << "\n";
       std::vector<Function *> &FnVec = I->second;
       DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n";
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 0000000..b3a25540
--- /dev/null
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,171 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+namespace {
+  struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(&ID) {}
+    
+    bool runOnModule(Module& M);
+    
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner");
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock* entryBlock = F->begin();
+  if (!isa<BranchInst>(entryBlock->getTerminator()))
+    return 0;
+  
+  BasicBlock* returnBlock = 0;
+  BasicBlock* nonReturnBlock = 0;
+  unsigned returnCount = 0;
+  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+       SI != SE; ++SI)
+    if (isa<ReturnInst>((*SI)->getTerminator())) {
+      returnBlock = *SI;
+      returnCount++;
+    } else
+      nonReturnBlock = *SI;
+  
+  if (returnCount != 1)
+    return 0;
+  
+  // Clone the function, so that we can hack away on it.
+  DenseMap<const Value*, Value*> ValueMap;
+  Function* duplicateFunction = CloneFunction(F, ValueMap);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+                                              newReturnBlock->getFirstNonPHI());
+  BasicBlock::iterator I = preReturn->begin();
+  BasicBlock::iterator Ins = newReturnBlock->begin();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+    
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+    
+    retPhi->addIncoming(I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(FI);
+      
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.runOnFunction(*duplicateFunction);
+  
+  // Extract the body of the the if.
+  Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User*> Users(duplicateFunction->use_begin(), 
+                           duplicateFunction->use_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst* CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI);
+    else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+    worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (Function::use_iterator UI = currFunc->use_begin(),
+         UE = currFunc->use_end(); UI != UE; ++UI)
+      if (Instruction* I = dyn_cast<Instruction>(UI))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
+\ No newline at end of file
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 83503fd..38b1198 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 
   // Expand the code for the iteration count into the preheader of the loop.
   BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
                                           Preheader->getTerminator());
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
@@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
   if (NeedCannIV) {
+    // Check to see if the loop already has a canonical-looking induction
+    // variable. If one is present and it's wider than the planned canonical
+    // induction variable, temporarily remove it, so that the Rewriter
+    // doesn't attempt to reuse it.
+    PHINode *OldCannIV = L->getCanonicalInductionVariable();
+    if (OldCannIV) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        OldCannIV = 0;
+    }
+
     IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+
     ++NumInserted;
     Changed = true;
     DOUT << "INDVARS: New CanIV: " << *IndVar;
+
+    // Now that the official induction variable is established, reinsert
+    // the old canonical-looking variable after it so that the IR remains
+    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    if (OldCannIV)
+      OldCannIV->insertAfter(cast<Instruction>(IndVar));
   }
 
   // If we have a trip count expression, rewrite the loop's exit condition
@@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
          E = List.end(); UI != E; ++UI) {
       SCEVHandle Offset = UI->getOffset();
       Value *Op = UI->getOperandValToReplace();
+      const Type *UseTy = Op->getType();
       Instruction *User = UI->getUser();
-      bool isSigned = UI->isSigned();
 
       // Compute the final addrec to expand into code.
       SCEVHandle AR = IU->getReplacementExpr(*UI);
@@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         // Expand loop-invariant values in the loop preheader. They will
         // be sunk to the exit block later, if possible.
         NewVal =
-          Rewriter.expandCodeFor(AR, LargestType,
+          Rewriter.expandCodeFor(AR, UseTy,
                                  L->getLoopPreheader()->getTerminator());
         Rewriter.setInsertionPoint(I);
         ++NumReplaced;
@@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         if (!Stride->isLoopInvariant(L))
           continue;
 
-        const Type *IVTy = Offset->getType();
-        const Type *UseTy = Op->getType();
-
-        // Promote the Offset and Stride up to the canonical induction
-        // variable's bit width.
-        SCEVHandle PromotedOffset = Offset;
-        SCEVHandle PromotedStride = Stride;
-        if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
-          // It doesn't matter for correctness whether zero or sign extension
-          // is used here, since the value is truncated away below, but if the
-          // value is signed, sign extension is more likely to be folded.
-          if (isSigned) {
-            PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
-            PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
-          } else {
-            PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
-            // If the stride is obviously negative, use sign extension to
-            // produce things like x-1 instead of x+255.
-            if (isa<SCEVConstant>(PromotedStride) &&
-                cast<SCEVConstant>(PromotedStride)
-                  ->getValue()->getValue().isNegative())
-              PromotedStride = SE->getSignExtendExpr(PromotedStride,
-                                                     LargestType);
-            else
-              PromotedStride = SE->getZeroExtendExpr(PromotedStride,
-                                                     LargestType);
-          }
-        }
-
-        // Create the SCEV representing the offset from the canonical
-        // induction variable, still in the canonical induction variable's
-        // type, so that all expanded arithmetic is done in the same type.
-        SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
-                                             PromotedStride, L);
-        // Add the PromotedOffset as a separate step, because it may not be
-        // loop-invariant.
-        NewAR = SE->getAddExpr(NewAR, PromotedOffset);
-
-        // Expand the addrec into instructions.
-        Value *V = Rewriter.expandCodeFor(NewAR);
-
-        // Insert an explicit cast if necessary to truncate the value
-        // down to the original stride type. This is done outside of
-        // SCEVExpander because in SCEV expressions, a truncate of an
-        // addrec is always folded.
-        if (LargestType != IVTy) {
-          if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
-            NewAR = SE->getTruncateExpr(NewAR, IVTy);
-          if (Rewriter.isInsertedExpression(NewAR))
-            V = Rewriter.expandCodeFor(NewAR);
-          else {
-            V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
-                                                                IVTy, false),
-                                        V, IVTy);
-            assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
-                   "LargestType wasn't actually the largest type!");
-            // Force the rewriter to use this trunc whenever this addrec
-            // appears so that it doesn't insert new phi nodes or
-            // arithmetic in a different type.
-            Rewriter.addInsertedValue(V, NewAR);
-          }
-        }
-
-        DOUT << "INDVARS: Made offset-and-trunc IV for offset "
-             << *IVTy << " " << *Offset << ": ";
-        DEBUG(WriteAsOperand(*DOUT, V, false));
-        DOUT << "\n";
-
         // Now expand it into actual Instructions and patch it into place.
         NewVal = Rewriter.expandCodeFor(AR, UseTy);
       }
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 6d2ff0e..5465e4a 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2608,21 +2608,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
         return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName());
     }
-
-    if (Op1I->hasOneUse()) {
-      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
-      // is not used by anyone else...
-      //
-      if (Op1I->getOpcode() == Instruction::FSub) {
-        // Swap the two operands of the subexpr...
-        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
-        Op1I->setOperand(0, IIOp1);
-        Op1I->setOperand(1, IIOp0);
-
-        // Create the new top level fadd instruction...
-        return BinaryOperator::CreateFAdd(Op0, Op1);
-      }
-    }
   }
 
   return 0;
@@ -11824,7 +11809,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
 
   // store X, null    -> turns into 'unreachable' in SimplifyCFG
-  if (isa<ConstantPointerNull>(Ptr)) {
+  if (isa<ConstantPointerNull>(Ptr) &&
+      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) {
     if (!isa<UndefValue>(Val)) {
       SI.setOperand(0, UndefValue::get(Val->getType()));
       if (Instruction *U = dyn_cast<Instruction>(Val))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b499279..5a85a04 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -125,13 +125,17 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         }
       }
       
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
-        if (isa<ConstantPointerNull>(SI->getOperand(1)) ||
-            isa<UndefValue>(SI->getOperand(1))) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        Value *Ptr = SI->getOperand(1);
+        
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) {
           ChangeToUnreachable(SI);
           Changed = true;
           break;
         }
+      }
     }
 
     // Turn invokes that call 'nounwind' functions into ordinary calls.
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 3a991f6..54bd895 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -364,4 +364,15 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys,
 #include "llvm/Intrinsics.gen"
 #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
 
+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it.
+bool Function::hasAddressTaken() const {
+  for (Value::use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    if (I.getOperandNo() != 0 ||
+        (!isa<CallInst>(*I) && !isa<InvokeInst>(*I)))
+      return true;
+  }
+  return false;
+}
+
 // vim: sw=2 ai
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 7556b8e..e0764e4 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -218,9 +218,12 @@ bool Instruction::isIdenticalTo(const Instruction *I) const {
 }
 
 // isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
 bool Instruction::isSameOperationAs(const Instruction *I) const {
-  if (getOpcode() != I->getOpcode() || getType() != I->getType() ||
-      getNumOperands() != I->getNumOperands())
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
     return false;
 
   // We have two instructions of identical opcode and #operands.  Check to see
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index b1297ff..e9f2acd 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -276,8 +276,8 @@ namespace {
                           int VT, unsigned ArgNo, std::string &Suffix);
     void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
                                   unsigned RetNum, unsigned ParamNum, ...);
-    void VerifyAttrs(Attributes Attrs, const Type *Ty,
-                     bool isReturnValue, const Value *V);
+    void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                              bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
                              const Value *V);
 
@@ -437,22 +437,23 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
 void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
 }
 
-// VerifyAttrs - Check the given parameter attributes for an argument or return
+// VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyAttrs(Attributes Attrs, const Type *Ty, 
-                           bool isReturnValue, const Value *V) {
+void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                                    bool isReturnValue, const Value *V) {
   if (Attrs == Attribute::None)
     return;
 
+  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+          " only applies to the function!", V);
+
   if (isReturnValue) {
     Attributes RetI = Attrs & Attribute::ParameterOnly;
     Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
             " does not apply to return values!", V);
   }
-  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
-  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
-          " only applies to functions!", V);
-  
+
   for (unsigned i = 0;
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
@@ -495,9 +496,9 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
     else if (Attr.Index-1 < FT->getNumParams())
       Ty = FT->getParamType(Attr.Index-1);
     else
-      break;  // VarArgs attributes, don't verify.
-    
-    VerifyAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+      break;  // VarArgs attributes, verified elsewhere.
+
+    VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
 
     if (Attr.Attrs & Attribute::Nest) {
       Assert1(!SawNest, "More than one parameter has attribute nest!", V);
@@ -509,10 +510,10 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
   }
 
   Attributes FAttrs = Attrs.getFnAttributes();
-  Assert1(!(FAttrs & (~Attribute::FunctionOnly)),
-          "Attribute " + Attribute::getAsString(FAttrs) +
-          " does not apply to function!", V);
-      
+  Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
+  Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+          " does not apply to the function!", V);
+
   for (unsigned i = 0;
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
@@ -1025,7 +1026,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
     for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
       Attributes Attr = Attrs.getParamAttributes(Idx);
 
-      VerifyAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+      VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
 
       Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
       Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
diff --git a/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll b/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
new file mode 100644
index 0000000..415f88e
--- /dev/null
+++ b/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis -disable-output
+; PR4373
+
+@foo = weak global { i32 } zeroinitializer              
+@bar = weak global i32 0                
+
+define void @test() {
+entry:
+        store { i32 } zeroinitializer, { i32 }* @foo
+        store i32 1, i32* @bar
+        ret void
+}
diff --git a/test/BugPoint/misopt-basictest.ll b/test/BugPoint/misopt-basictest.ll
index 11cc2ae..462a637 100644
--- a/test/BugPoint/misopt-basictest.ll
+++ b/test/BugPoint/misopt-basictest.ll
@@ -1,4 +1,7 @@
 ; RUN: bugpoint %s -dce -bugpoint-deletecalls -simplifycfg -silence-passes %bugpoint_topts
+; XFAIL: powerpc-.*-linux
+; END.
+; Failure on PPC Linux is due to PR4293.
 
 @.LC0 = internal global [13 x i8] c"Hello World\0A\00"          ; <[13 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
new file mode 100644
index 0000000..45b4bd4
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -0,0 +1,77 @@
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+
+	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
+	type { i32, %struct.D_Reduction** }		; type %1
+	type { i32, %struct.D_RightEpsilonHint* }		; type %2
+	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
+	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
+	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
+	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
+	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
+	%struct.D_Pass = type { i8*, i32, i32, i32 }
+	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
+	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
+	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
+	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
+	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
+	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
+	%struct.D_SymHash = type { i32, i32, %0 }
+	%struct.D_Symbol = type { i32, i8*, i32 }
+	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
+	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
+	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
+	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
+	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
+	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
+	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
+	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
+	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
+	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
+	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
+	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
+	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
+entry:
+	store i8* undef, i8** undef, align 4
+	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
+	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
+	br label %bb10
+
+bb10:		; preds = %bb30, %bb29, %bb26, %entry
+	br i1 undef, label %bb18, label %bb20
+
+bb18:		; preds = %bb10
+	br i1 undef, label %bb20, label %bb19
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb10
+	br i1 undef, label %bb21, label %bb22
+
+bb21:		; preds = %bb20
+	unreachable
+
+bb22:		; preds = %bb20
+	br i1 undef, label %bb24, label %bb26
+
+bb24:		; preds = %bb22
+	unreachable
+
+bb26:		; preds = %bb22
+	br i1 undef, label %bb10, label %bb29
+
+bb29:		; preds = %bb26
+	br i1 undef, label %bb10, label %bb30
+
+bb30:		; preds = %bb29
+	br i1 undef, label %bb31, label %bb10
+
+bb31:		; preds = %bb30
+	unreachable
+}
diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll
new file mode 100644
index 0000000..07d928a
--- /dev/null
+++ b/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1}
+
+define float @f(float %z, double %a, float %b) {
+        %tmp = call float @g(float %b)
+        ret float %tmp
+}
+
+declare float @g(float)
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 0a92279..3881e91 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -stats |& grep {40.*Number of machine instrs printed}
+; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed}
 ; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
new file mode 100644
index 0000000..585645b
--- /dev/null
+++ b/test/CodeGen/ARM/stm.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 -arm-pre-alloc-loadstore-opti | grep stm | count 2
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
new file mode 100644
index 0000000..001b7fc
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+
+; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
+; calling convention out of sync with standard c calling convention on x86_64)
+; Bug 4278.
+
+declare fastcc double @tailcallee(x86_fp80, <2 x float>) 
+	
+define fastcc double @tailcall() {
+entry:
+  %tmp = fpext float 1.000000e+00 to x86_fp80
+	%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp,  <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+	ret double %tmp2
+}
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index d734065..c81327e 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -3,19 +3,19 @@
 ; Move param %in1 to temp register (%eax).
 ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	40(%rsp), %eax}
 ; Add %in1 %p1 to another temporary register (%r9d).
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r9d}
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r9d}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%edi, %r10d}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {addl	32(%rsp), %r10d}
 ; Move result of addition to stack.
-; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r9d, 40(%rsp)}
+; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%r10d, 40(%rsp)}
 ; Move param %in2 to stack.
 ; RUN: llvm-as < %s | llc -tailcallopt -march=x86-64 -x86-asm-syntax=att | grep {movl	%eax, 32(%rsp)}
 
-declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %a, i32 %b)
+declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b)
 
-define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in1, i32 %in2) {
+define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) {
 entry:
         %tmp = add i32 %in1, %p1
-        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %in2,i32 %tmp)
+        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
         ret i32 %retval
 }
 
diff --git a/test/DebugInfo/2009-06-12-Inline.ll b/test/DebugInfo/2009-06-12-Inline.ll
new file mode 100644
index 0000000..87c42d5
--- /dev/null
+++ b/test/DebugInfo/2009-06-12-Inline.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as < %s | llc -f -o /dev/null 
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+	%struct._objc_cache = type opaque
+	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol_list*, i32, %struct._prop_list_t* }
+	%struct._objc_class = type { %struct._objc_class*, %struct._objc_class*, i8*, i32, i32, i32, %struct._objc_ivar_list*, %struct._objc_method_list*, %struct._objc_cache*, %struct._objc_protocol_list*, i8*, %struct._objc_class_extension* }
+	%struct._objc_class_extension = type { i32, i8*, %struct._prop_list_t* }
+	%struct._objc_exception_data = type { [18 x i32], [4 x i8*] }
+	%struct._objc_ivar = type { i8*, i8*, i32 }
+	%struct._objc_ivar_list = type opaque
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_description = type { %struct.objc_selector*, i8* }
+	%struct._objc_method_description_list = type { i32, [0 x %struct._objc_method_description] }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol_list*, %struct._objc_method_description_list*, %struct._objc_method_description_list* }
+	%struct._objc_protocol_extension = type { i32, %struct._objc_method_description_list*, %struct._objc_method_description_list*, %struct._prop_list_t* }
+	%struct._objc_protocol_list = type { %struct._objc_protocol_list*, i32, [0 x %struct._objc_protocol] }
+	%struct._objc_super = type <{ %struct.objc_object*, %struct.objc_class* }>
+	%struct._objc_symtab = type { i32, %struct.objc_selector*, i16, i16, [0 x i8*] }
+	%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+	%struct._prop_t = type { i8*, i8* }
+	%struct.objc_class = type opaque
+	%struct.objc_object = type opaque
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.m\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [20 x i8] c"/Volumes/work/Radar\00", section "llvm.metadata"		; <[20 x i8]*> [#uses=1]
+@.str2 = internal constant [10 x i8] c"clang 1.0\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 16, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([10 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str3 = internal constant [3 x i8] c"f1\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str5 = internal constant [3 x i8] c"l0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([3 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@.str6 = internal constant [3 x i8] c"f0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.subprogram7 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str8 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable9 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [3 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata"		; <[3 x i8*]*> [#uses=0]
+
+define void @f1() nounwind {
+entry:
+	%x.addr.i = alloca i32		; <i32*> [#uses=2]
+	%l0 = alloca void (i32)*, align 4		; <void (i32)**> [#uses=2]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	store i32 1, i32* %x.addr.i
+	%0 = bitcast i32* %x.addr.i to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
+	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.stoppoint(i32 5, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	%1 = bitcast void (i32)** %l0 to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
+	store void (i32)* @f0, void (i32)** %l0
+	call void @llvm.dbg.stoppoint(i32 6, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret void
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind readnone
+
+define internal void @f0(i32 %x) nounwind alwaysinline {
+entry:
+	%x.addr = alloca i32		; <i32*> [#uses=2]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	store i32 %x, i32* %x.addr
+	%0 = bitcast i32* %x.addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
+	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
+	ret void
+}
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind readnone
+
+declare void @llvm.dbg.region.end({ }*) nounwind readnone
diff --git a/test/DebugInfo/2009-06-12-InlineFuncStart.ll b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
new file mode 100644
index 0000000..32e20ff
--- /dev/null
+++ b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
@@ -0,0 +1,75 @@
+; RUN: llvm-as < %s | llc
+; RUN: llvm-as < %s | llc -O0
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+
+define i32 @foo() nounwind alwaysinline {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	store i32 42, i32* %0, align 4
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %1, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret i32 %retval1
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
+
+define i32 @main() nounwind {
+entry:
+	%retval.i = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%1 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+        br label %bb1
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+	ret i32 %retval1
+
+bb1:
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	store i32 42, i32* %0, align 4
+	%2 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %2, i32* %retval.i, align 4
+	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	store i32 %retval1.i, i32* %1, align 4
+	%3 = load i32* %1, align 4		; <i32> [#uses=1]
+	store i32 %3, i32* %retval, align 4
+	br label %return
+}
diff --git a/test/FrontendC/pr3518.c b/test/FrontendC/pr3518.c
new file mode 100644
index 0000000..4c193c7
--- /dev/null
+++ b/test/FrontendC/pr3518.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep {= internal global} | count 4
+// PR 3518
+// Some of the objects were coming out as unintialized (external) before 3518
+// was fixed.  Internal names are different between llvm-gcc and clang so they
+// are not tested.
+
+extern void abort (void);
+
+struct A { int i; int j; };
+struct B { struct A *a; struct A *b; };
+struct C { struct B *c; struct A *d; };
+struct C e = { &(struct B) { &(struct A) { 1, 2 }, &(struct A) { 3, 4 } }, &(struct A) { 5, 6 } };
+
+int
+main (void)
+{
+  if (e.c->a->i != 1 || e.c->a->j != 2)
+    abort ();
+  if (e.c->b->i != 3 || e.c->b->j != 4)
+    abort ();
+  if (e.d->i != 5 || e.d->j != 6)
+    abort ();
+  return 0;
+}
diff --git a/test/FrontendC/pr4349.c b/test/FrontendC/pr4349.c
new file mode 100644
index 0000000..890482e
--- /dev/null
+++ b/test/FrontendC/pr4349.c
@@ -0,0 +1,39 @@
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars2 | grep {\\\[2 x \\\[2 x i8\\\]\\\]}
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars2 | grep {, i\[\[:digit:\]\]\\+ 1)} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars3 | grep {\\\[2 x i16\\\]}
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars3 | grep {, i\[\[:digit:\]\]\\+ 1)} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars4 | grep {\\\[2 x \\\[2 x i8\\\]\\\]} | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep svars4 | grep {, i\[\[:digit:\]\]\\+ 1, i\[\[:digit:\]\]\\+ 1)} | count 1
+// PR 4349
+
+union reg
+{
+    unsigned char b[2][2];
+    unsigned short w[2];
+    unsigned int d;
+};
+struct cpu
+{
+    union reg pc;
+};
+extern struct cpu cpu;
+struct svar
+{
+    void *ptr;
+};
+struct svar svars1[] =
+{
+    { &((cpu.pc).w[0]) }
+};
+struct svar svars2[] =
+{
+    { &((cpu.pc).b[0][1]) }
+};
+struct svar svars3[] =
+{
+    { &((cpu.pc).w[1]) }
+};
+struct svar svars4[] =
+{
+    { &((cpu.pc).b[1][1]) }
+};
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
new file mode 100644
index 0000000..cd9c6da
--- /dev/null
+++ b/test/TableGen/Slice.td
@@ -0,0 +1,87 @@
+// RUN: tblgen %s | grep {\\\[(set} | count 2
+// RUN: tblgen %s | grep {\\\[\\\]} | count 2
+
+class ValueType<int size, int value> {
+  int Size = size;
+  int Value = value;
+}
+
+def f32  : ValueType<32, 1>;   //  2 x i64 vector value
+
+class Intrinsic<string name> {
+  string Name = name;
+}
+
+class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
+           list<dag> pattern> {
+  bits<8> Opcode = opcode;
+  dag OutOperands = oopnds;
+  dag InOperands = iopnds;
+  string AssemblyString = asmstr;
+  list<dag> Pattern = pattern;
+}
+
+def ops;
+def outs;
+def ins;
+
+def set;
+
+// Define registers
+class Register<string n> {
+  string Name = n;
+}
+
+class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
+  list<ValueType> RegTypes = regTypes;
+  list<Register> MemberList = regList;
+}
+
+def XMM0: Register<"xmm0">;
+def XMM1: Register<"xmm1">;
+def XMM2: Register<"xmm2">;
+def XMM3: Register<"xmm3">;
+def XMM4: Register<"xmm4">;
+def XMM5: Register<"xmm5">;
+def XMM6: Register<"xmm6">;
+def XMM7: Register<"xmm7">;
+def XMM8:  Register<"xmm8">;
+def XMM9:  Register<"xmm9">;
+def XMM10: Register<"xmm10">;
+def XMM11: Register<"xmm11">;
+def XMM12: Register<"xmm12">;
+def XMM13: Register<"xmm13">;
+def XMM14: Register<"xmm14">;
+def XMM15: Register<"xmm15">;
+
+def FR32 : RegisterClass<[f32],
+                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                          XMM8, XMM9, XMM10, XMM11,
+                          XMM12, XMM13, XMM14, XMM15]>;
+
+class SDNode {}
+def not : SDNode;
+
+multiclass scalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,patterns[0])>;
+  def SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass vscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def V#NAME#SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,patterns[0])>;
+  def V#NAME#SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> :
+  scalar<opcode, asmstr, patterns>,
+  vscalar<opcode, asmstr, patterns>;
+
+defm NOT : myscalar<0x10, "not", [[], [(set FR32:$dst, (f32 (not FR32:$src)))]]>;
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 3c45d95..9b24382 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,5 +1,5 @@
-// RUN: tblgen %s | grep {1, 2, 3} | count 4
-// RUN: tblgen %s | grep {4, 5, 6} | count 2
+// RUN: tblgen %s | grep {\\\[1, 2, 3\\\]} | count 4
+// RUN: tblgen %s | grep {\\\[4, 5, 6\\\]} | count 2
 
 class A<list<list<int>> vals> {
   list<int> first = vals[0];
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index d70e604..b4a2c50 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars  | llvm-dis | %prcontext Loop: 1 | grep %indvar
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | %prcontext ^Loop: 1 | grep %Canonical
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
diff --git a/test/Transforms/IndVarSimplify/masked-iv.ll b/test/Transforms/IndVarSimplify/masked-iv.ll
new file mode 100644
index 0000000..c7583c9
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/masked-iv.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep trunc | count 1
+
+; Indvars should do the IV arithmetic in the canonical IV type (i64),
+; and only use one truncation.
+
+define void @foo(i64* %A, i64* %B, i64 %n, i64 %a, i64 %s) nounwind {
+entry:
+	%t0 = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.01 = phi i64 [ %t6, %bb ], [ %a, %bb.preheader ]		; <i64> [#uses=3]
+	%t1 = and i64 %i.01, 255		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %A, i64 %t1		; <i64*> [#uses=1]
+	store i64 %i.01, i64* %t2, align 8
+	%t6 = add i64 %i.01, %s		; <i64> [#uses=1]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
new file mode 100644
index 0000000..e86fed3
--- /dev/null
+++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep store
+; PR4366
+
+define void @a() {
+  store i32 0, i32 addrspace(1)* null
+  ret void
+}
diff --git a/test/Transforms/InstCombine/fsub-fsub.ll b/test/Transforms/InstCombine/fsub-fsub.ll
new file mode 100644
index 0000000..ab70479
--- /dev/null
+++ b/test/Transforms/InstCombine/fsub-fsub.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep fsub | count 2
+; PR4374
+
+define float @func(float %a, float %b) nounwind {
+        %tmp3 = fsub float %a, %b
+        %tmp4 = fsub float -0.000000e+00, %tmp3
+        ret float %tmp4
+}
diff --git a/test/Transforms/MergeFunc/fold-weak.ll b/test/Transforms/MergeFunc/fold-weak.ll
index d98fde0..cea49fb 100644
--- a/test/Transforms/MergeFunc/fold-weak.ll
+++ b/test/Transforms/MergeFunc/fold-weak.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s | opt -mergefunc | llvm-dis | grep {alias weak} | count 2
+; RUN: llvm-as < %s | opt -mergefunc | llvm-dis > %t
+; RUN: grep {define weak} %t | count 2
+; RUN: grep {call} %t | count 2
 
 define weak i32 @sum(i32 %x, i32 %y) {
   %sum = add i32 %x, %y
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
index d2ddb8f..3e2632f 100644
--- a/test/lib/llvm.exp
+++ b/test/lib/llvm.exp
@@ -184,7 +184,7 @@ proc RunLLVMTests { test_source_files } {
 
         #split up target if more then 1 specified
         foreach target [split $targets ,] {
-          if { [regexp {\*} $target match] } {
+          if { $target == "*" } {
               if {$targetPASS != 1} {
                  set outcome XFAIL
               }
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 0bd2abe..6f8a028 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -248,7 +248,7 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
     std::vector<const char*> args;
     std::string targetTriple = _linker.getModule()->getTargetTriple();
     args.push_back(tool.c_str());
-    if ( targetTriple.find("darwin") != targetTriple.size() ) {
+    if ( targetTriple.find("darwin") != std::string::npos ) {
         // darwin specific command line options
         if (strncmp(targetTriple.c_str(), "i386-apple-", 11) == 0) {
             args.push_back("-arch");
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 771d1c7..dee09f4 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -18,120 +18,120 @@ TEST(TripleTest, BasicParsing) {
   Triple T;
 
   T = Triple("");
-  EXPECT_EQ(T.getArchName(), "");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("-");
-  EXPECT_EQ(T.getArchName(), "");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("--");
-  EXPECT_EQ(T.getArchName(), "");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("---");
-  EXPECT_EQ(T.getArchName(), "");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("----");
-  EXPECT_EQ(T.getArchName(), "");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "-");
+  EXPECT_EQ("", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("-", T.getEnvironmentName());
 
   T = Triple("a");
-  EXPECT_EQ(T.getArchName(), "a");
-  EXPECT_EQ(T.getVendorName(), "");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("a", T.getArchName());
+  EXPECT_EQ("", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("a-b");
-  EXPECT_EQ(T.getArchName(), "a");
-  EXPECT_EQ(T.getVendorName(), "b");
-  EXPECT_EQ(T.getOSName(), "");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("a", T.getArchName());
+  EXPECT_EQ("b", T.getVendorName());
+  EXPECT_EQ("", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("a-b-c");
-  EXPECT_EQ(T.getArchName(), "a");
-  EXPECT_EQ(T.getVendorName(), "b");
-  EXPECT_EQ(T.getOSName(), "c");
-  EXPECT_EQ(T.getEnvironmentName(), "");
+  EXPECT_EQ("a", T.getArchName());
+  EXPECT_EQ("b", T.getVendorName());
+  EXPECT_EQ("c", T.getOSName());
+  EXPECT_EQ("", T.getEnvironmentName());
 
   T = Triple("a-b-c-d");
-  EXPECT_EQ(T.getArchName(), "a");
-  EXPECT_EQ(T.getVendorName(), "b");
-  EXPECT_EQ(T.getOSName(), "c");
-  EXPECT_EQ(T.getEnvironmentName(), "d");
+  EXPECT_EQ("a", T.getArchName());
+  EXPECT_EQ("b", T.getVendorName());
+  EXPECT_EQ("c", T.getOSName());
+  EXPECT_EQ("d", T.getEnvironmentName());
 }
 
 TEST(TripleTest, ParsedIDs) {
   Triple T;
 
   T = Triple("i386-apple-darwin");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::Apple);
-  EXPECT_EQ(T.getOS(), Triple::Darwin);
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::Apple, T.getVendor());
+  EXPECT_EQ(Triple::Darwin, T.getOS());
 
   T = Triple("x86_64-pc-linux-gnu");
-  EXPECT_EQ(T.getArch(), Triple::x86_64);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getOS(), Triple::Linux);
+  EXPECT_EQ(Triple::x86_64, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
 
   T = Triple("powerpc-dunno-notsure");
-  EXPECT_EQ(T.getArch(), Triple::ppc);
-  EXPECT_EQ(T.getVendor(), Triple::UnknownVendor);
-  EXPECT_EQ(T.getOS(), Triple::UnknownOS);
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
 
   T = Triple("huh");
-  EXPECT_EQ(T.getArch(), Triple::UnknownArch);
+  EXPECT_EQ(Triple::UnknownArch, T.getArch());
 }
 
 TEST(TripleTest, MutateName) {
   Triple T;
-  EXPECT_EQ(T.getArch(), Triple::UnknownArch);
-  EXPECT_EQ(T.getVendor(), Triple::UnknownVendor);
-  EXPECT_EQ(T.getOS(), Triple::UnknownOS);
+  EXPECT_EQ(Triple::UnknownArch, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
 
   T.setArchName("i386");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getTriple(), "i386--");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ("i386--", T.getTriple());
 
   T.setVendorName("pc");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getTriple(), "i386-pc-");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ("i386-pc-", T.getTriple());
 
   T.setOSName("linux");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getOS(), Triple::Linux);
-  EXPECT_EQ(T.getTriple(), "i386-pc-linux");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ("i386-pc-linux", T.getTriple());
 
   T.setEnvironmentName("gnu");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getOS(), Triple::Linux);
-  EXPECT_EQ(T.getTriple(), "i386-pc-linux-gnu");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ("i386-pc-linux-gnu", T.getTriple());
 
   T.setOSName("freebsd");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getOS(), Triple::FreeBSD);
-  EXPECT_EQ(T.getTriple(), "i386-pc-freebsd-gnu");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::FreeBSD, T.getOS());
+  EXPECT_EQ("i386-pc-freebsd-gnu", T.getTriple());
 
   T.setOSAndEnvironmentName("darwin");
-  EXPECT_EQ(T.getArch(), Triple::x86);
-  EXPECT_EQ(T.getVendor(), Triple::PC);
-  EXPECT_EQ(T.getOS(), Triple::Darwin);
-  EXPECT_EQ(T.getTriple(), "i386-pc-darwin");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Darwin, T.getOS());
+  EXPECT_EQ("i386-pc-darwin", T.getTriple());
 }
 
 }
diff --git a/utils/GenLibDeps.pl b/utils/GenLibDeps.pl
index 73f3e71..6d0b13e 100755
--- a/utils/GenLibDeps.pl
+++ b/utils/GenLibDeps.pl
@@ -38,6 +38,10 @@ if (!$FLAT) {
   die "Can't find 'dot'" if (! -x "$DotPath");
 }
 
+if (defined($ENV{NM})) {
+  chomp($nmPath=$ENV{NM});
+}
+
 if (!defined($nmPath) || $nmPath eq "") {
   chomp($nmPath=`which nm`);
   die "Can't find 'nm'" if (! -x "$nmPath");
@@ -96,7 +100,7 @@ sub gen_one_entry {
     print "  <dt><b>$lib</b</dt><dd><ul>\n";
   }
   open UNDEFS, 
-    "$nmPath -g -u $Directory/$lib | sed -e 's/^[ 0]* U //' | sort | uniq |";
+    "$nmPath -u $Directory/$lib | sed -e 's/^[ 0]* U //' | sort | uniq |";
   my %DepLibs;
   while (<UNDEFS>) {
     chomp;
@@ -116,7 +120,7 @@ sub gen_one_entry {
   close UNDEFS or die "nm failed";
   unless(keys %DepLibs) {
     # above failed
-    open UNDEFS, "$nmPath -g -u $Directory/$lib |";
+    open UNDEFS, "$nmPath -u $Directory/$lib |";
     while (<UNDEFS>) {
       # to bypass non-working sed
       if ('  ' eq substr($_,0,2) and index($_,'U ')) {
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index a40b3f1..feac974 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -11,8 +11,6 @@ use Socket;
 #           regressions and performance changes. Submits this information
 #           to llvm.org where it is placed into the nightlytestresults database.
 #
-# Modified heavily by Patrick Jenkins, July 2006
-#
 # Syntax:   NightlyTest.pl [OPTIONS] [CVSROOT BUILDDIR WEBDIR]
 #   where
 # OPTIONS may include one or more of the following:
@@ -26,10 +24,12 @@ use Socket;
 #  -nodejagnu       Do not run feature or regression tests
 #  -parallel        Run parallel jobs with GNU Make (see -parallel-jobs).
 #  -parallel-jobs   The number of parallel Make jobs to use (default is two).
+#  -with-clang      Checkout Clang source into tools/clang.
 #  -release         Build an LLVM Release version
 #  -release-asserts Build an LLVM ReleaseAsserts version
 #  -enable-llcbeta  Enable testing of beta features in llc.
 #  -enable-lli      Enable testing of lli (interpreter) features, default is off
+#  -disable-pic	    Disable building with Position Independent Code.
 #  -disable-llc     Disable LLC tests in the nightly tester.
 #  -disable-jit     Disable JIT tests in the nightly tester.
 #  -disable-cbe     Disable C backend tests in the nightly tester.
@@ -98,7 +98,7 @@ use Socket;
 ##############################################################
 my $HOME       = $ENV{'HOME'};
 my $SVNURL     = $ENV{"SVNURL"};
-$SVNURL        = 'https://llvm.org/svn/llvm-project' unless $SVNURL;
+$SVNURL        = 'http://llvm.org/svn/llvm-project' unless $SVNURL;
 my $CVSRootDir = $ENV{'CVSROOT'};
 $CVSRootDir    = "/home/vadve/shared/PublicCVS" unless $CVSRootDir;
 my $BuildDir   = $ENV{'BUILDDIR'};
@@ -145,6 +145,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   if (/^-norunningtests$/) { next; } # Backward compatibility, ignored.
   if (/^-parallel-jobs$/)  { $PARALLELJOBS = "$ARGV[0]"; shift; next;}
   if (/^-parallel$/)       { $MAKEOPTS = "$MAKEOPTS -j$PARALLELJOBS -l3.0"; next; }
+  if (/^-with-clang$/)     { $WITHCLANG = 1; next; }
   if (/^-release$/)        { $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
                              "OPTIMIZE_OPTION=-O2"; $BUILDTYPE="release"; next;}
   if (/^-release-asserts$/){ $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
@@ -152,6 +153,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
                              "OPTIMIZE_OPTION=-O2";
                              $BUILDTYPE="release-asserts"; next;}
   if (/^-enable-llcbeta$/) { $PROGTESTOPTS .= " ENABLE_LLCBETA=1"; next; }
+  if (/^-disable-pic$/)    { $CONFIGUREARGS .= " --enable-pic=no"; next; }
   if (/^-enable-lli$/)     { $PROGTESTOPTS .= " ENABLE_LLI=1";
                              $CONFIGUREARGS .= " --enable-lli"; next; }
   if (/^-disable-llc$/)    { $PROGTESTOPTS .= " DISABLE_LLC=1";
@@ -534,13 +536,20 @@ ChangeDir( $BuildDir, "checkout directory" );
 if (!$NOCHECKOUT) {
   if ( $VERBOSE ) { print "CHECKOUT STAGE:\n"; }
   if ($USESVN) {
-    my $SVNCMD = "$NICE svn co $SVNURL";
-    if ($VERBOSE) {
-      print "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
+      my $SVNCMD = "$NICE svn co --non-interactive $SVNURL";
+      if ($VERBOSE) {
+        print "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
+              "$SVNCMD/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
+      }
+      system "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
             "$SVNCMD/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
-    }
-    system "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
-          "$SVNCMD/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
+	if ($WITHCLANG) {
+	  my $SVNCMD = "$NICE svn co --non-interactive $SVNURL/cfe/trunk";
+	  if ($VERBOSE) {
+	   print "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n"; 
+	}
+	system "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n";
+	} 
   } else {
     my $CVSOPT = "";
     $CVSOPT = "-z3" # Use compression if going over ssh.
@@ -611,7 +620,7 @@ if (!$NOCVSSTATS) {
   if ($VERBOSE) { print "CHANGE HISTORY ANALYSIS STAGE\n"; }
 
   if ($USESVN) {
-    @SVNHistory = split /<logentry/, `svn log --xml --verbose -r{$DATE}:HEAD`;
+    @SVNHistory = split /<logentry/, `svn log --non-interactive --xml --verbose -r{$DATE}:HEAD`;
     # Skip very first entry because it is the XML header cruft
     shift @SVNHistory;
     my $Now = time();
@@ -717,9 +726,11 @@ if (!$NOCHECKOUT && !$NOBUILD) {
          "> $BuildLog 2>&1";
   if ( $VERBOSE ) {
     print "BUILD STAGE:\n";
+    print "(time -p $NICE $MAKECMD clean) >> $BuildLog 2>&1\n";
     print "(time -p $NICE $MAKECMD $MAKEOPTS) >> $BuildLog 2>&1\n";
   }
   # Build the entire tree, capturing the output into $BuildLog
+  system "(time -p $NICE $MAKECMD clean) >> $BuildLog 2>&1";
   system "(time -p $NICE $MAKECMD $MAKEOPTS) >> $BuildLog 2>&1";
 }
 
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 919ae9b..a4a5698 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -65,6 +65,12 @@ void ClangDiagsDefsEmitter::run(std::ostream &OS) {
     } else {
       OS << ", 0";
     }
+
+    // SFINAE bit
+    if (R.getValueAsBit("SFINAE"))
+      OS << ", true";
+    else
+      OS << ", false";
     OS << ")\n";
   }
 }
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index db76dab..e668468 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2007,9 +2007,28 @@ void CodeGenDAGPatterns::ParsePatterns() {
       Pattern = new TreePattern(Patterns[i], Tree, true, *this);
     else {
       std::vector<Init*> Values;
-      for (unsigned j = 0, ee = Tree->getNumArgs(); j != ee; ++j)
+      RecTy *ListTy = 0;
+      for (unsigned j = 0, ee = Tree->getNumArgs(); j != ee; ++j) {
         Values.push_back(Tree->getArg(j));
-      ListInit *LI = new ListInit(Values);
+        TypedInit *TArg = dynamic_cast<TypedInit*>(Tree->getArg(j));
+        if (TArg == 0) {
+          cerr << "In dag: " << Tree->getAsString();
+          cerr << " --  Untyped argument in pattern\n";
+          assert(0 && "Untyped argument in pattern");
+        }
+        if (ListTy != 0) {
+          ListTy = resolveTypes(ListTy, TArg->getType());
+          if (ListTy == 0) {
+            cerr << "In dag: " << Tree->getAsString();
+            cerr << " --  Incompatible types in pattern arguments\n";
+            assert(0 && "Incompatible types in pattern arguments");
+          }
+        }
+        else {
+          ListTy = TArg->getType();
+        }
+      }
+      ListInit *LI = new ListInit(Values, new ListRecTy(ListTy));
       Pattern = new TreePattern(Patterns[i], LI, true, *this);
     }
 
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index 45804b9..c62e21b 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -189,7 +189,12 @@ Init *ListRecTy::convertValue(ListInit *LI) {
     else
       return 0;
 
-  return new ListInit(Elements);
+  ListRecTy *LType = dynamic_cast<ListRecTy*>(LI->getType());
+  if (LType == 0) {
+    return 0;
+  }
+
+  return new ListInit(Elements, new ListRecTy(Ty));
 }
 
 Init *ListRecTy::convertValue(TypedInit *TI) {
@@ -270,6 +275,57 @@ bool RecordRecTy::baseClassOf(const RecordRecTy *RHS) const {
 }
 
 
+/// resolveTypes - Find a common type that T1 and T2 convert to.  
+/// Return 0 if no such type exists.
+///
+RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
+  if (!T1->typeIsConvertibleTo(T2)) {
+    if (!T2->typeIsConvertibleTo(T1)) {
+      // If one is a Record type, check superclasses
+      RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1);
+      if (RecTy1) {
+        // See if T2 inherits from a type T1 also inherits from
+        const std::vector<Record *> &T1SuperClasses = RecTy1->getRecord()->getSuperClasses();
+        for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
+              iend = T1SuperClasses.end();
+            i != iend;
+            ++i) {
+          RecordRecTy *SuperRecTy1 = new RecordRecTy(*i);
+          RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
+          if (NewType1 != 0) {
+            if (NewType1 != SuperRecTy1) {
+              delete SuperRecTy1;
+            }
+            return NewType1;
+          }
+        }
+      }
+      RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2);
+      if (RecTy2) {
+        // See if T1 inherits from a type T2 also inherits from
+        const std::vector<Record *> &T2SuperClasses = RecTy2->getRecord()->getSuperClasses();
+        for(std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
+              iend = T2SuperClasses.end();
+            i != iend;
+            ++i) {
+          RecordRecTy *SuperRecTy2 = new RecordRecTy(*i);
+          RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
+          if (NewType2 != 0) {
+            if (NewType2 != SuperRecTy2) {
+              delete SuperRecTy2;
+            }
+            return NewType2;
+          }
+        }
+      }
+      return 0;
+    }
+    return T2;
+  }
+  return T1;
+}
+
+
 //===----------------------------------------------------------------------===//
 //    Initializer implementations
 //===----------------------------------------------------------------------===//
@@ -398,7 +454,7 @@ Init *ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) {
       return 0;
     Vals.push_back(getElement(Elements[i]));
   }
-  return new ListInit(Vals);
+  return new ListInit(Vals, getType());
 }
 
 Record *ListInit::getElementAsRecord(unsigned i) const {
@@ -426,10 +482,20 @@ Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) {
   }
 
   if (Changed)
-    return new ListInit(Resolved);
+    return new ListInit(Resolved, getType());
   return this;
 }
 
+Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+                                           unsigned Elt) {
+  if (Elt >= getSize())
+    return 0;  // Out of range reference.
+  Init *E = getElement(Elt);
+  if (!dynamic_cast<UnsetInit*>(E))  // If the element is set
+    return E;                        // Replace the VarListElementInit with it.
+  return 0;
+}
+
 std::string ListInit::getAsString() const {
   std::string Result = "[";
   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
@@ -538,7 +604,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
         assert(0 && "Empty list in cdr");
         return 0;
       }
-      ListInit *Result = new ListInit(LHSl->begin()+1, LHSl->end());
+      ListInit *Result = new ListInit(LHSl->begin()+1, LHSl->end(), LHSl->getType());
       return Result;
     }
     break;
@@ -553,6 +619,16 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
         return new IntInit(0);
       }
     }
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    if (LHSs) {
+      if (LHSs->getValue().empty()) {
+        return new IntInit(1);
+      }
+      else {
+        return new IntInit(0);
+      }
+    }
+    
     break;
   }
   }
@@ -665,8 +741,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
       if (Record *D = Records.getDef(Name))
         return new DefInit(D);
 
-      cerr << "Variable not defined: '" + Name + "'\n";
-      assert(0 && "Variable not found");
+      cerr << "Variable not defined in !nameconcat: '" + Name + "'\n";
+      assert(0 && "Variable not found in !nameconcat");
       return 0;
     }
     break;
@@ -848,7 +924,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
           delete NewOp;
         }
       }
-      return new ListInit(NewList);
+      return new ListInit(NewList, MHSl->getType());
     }
   }
   return 0;
@@ -932,9 +1008,25 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
 
 Init *TernOpInit::resolveReferences(Record &R, const RecordVal *RV) {
   Init *lhs = LHS->resolveReferences(R, RV);
+
+  if (Opc == IF && lhs != LHS) {
+    IntInit *Value = dynamic_cast<IntInit*>(lhs);
+    if (Value != 0) {
+      // Short-circuit
+      if (Value->getValue()) {
+        Init *mhs = MHS->resolveReferences(R, RV);
+        return (new TernOpInit(getOpcode(), lhs, mhs, RHS, getType()))->Fold(&R, 0);
+      }
+      else {
+        Init *rhs = RHS->resolveReferences(R, RV);
+        return (new TernOpInit(getOpcode(), lhs, MHS, rhs, getType()))->Fold(&R, 0);
+      }
+    }
+  }
+  
   Init *mhs = MHS->resolveReferences(R, RV);
   Init *rhs = RHS->resolveReferences(R, RV);
-  
+
   if (LHS != lhs || MHS != mhs || RHS != rhs)
     return (new TernOpInit(getOpcode(), lhs, mhs, rhs, getType()))->Fold(&R, 0);
   return Fold(&R, 0);
@@ -978,7 +1070,7 @@ Init *TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) {
   ListInits.reserve(Elements.size());
   for (unsigned i = 0, e = Elements.size(); i != e; ++i)
     ListInits.push_back(new VarListElementInit(this, Elements[i]));
-  return new ListInit(ListInits);
+  return new ListInit(ListInits, T);
 }
 
 
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 4284cab..ac06cae 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -442,7 +442,10 @@ public:
   virtual bool baseClassOf(const RecordRecTy *RHS) const;
 };
 
-
+/// resolveTypes - Find a common type that T1 and T2 convert to.  
+/// Return 0 if no such type exists.
+///
+RecTy *resolveTypes(RecTy *T1, RecTy *T2);
 
 //===----------------------------------------------------------------------===//
 //  Initializer Classes
@@ -618,10 +621,10 @@ public:
 
 /// IntInit - 7 - Represent an initalization by a literal integer value.
 ///
-class IntInit : public Init {
+class IntInit : public TypedInit {
   int64_t Value;
 public:
-  explicit IntInit(int64_t V) : Value(V) {}
+  explicit IntInit(int64_t V) : TypedInit(new IntRecTy), Value(V) {}
 
   int64_t getValue() const { return Value; }
 
@@ -631,6 +634,25 @@ public:
   virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
 
   virtual std::string getAsString() const;
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off int");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) {
+    assert(0 && "Illegal element reference off int");
+    return 0;
+  }
 };
 
 
@@ -688,17 +710,18 @@ public:
 
 /// ListInit - [AL, AH, CL] - Represent a list of defs
 ///
-class ListInit : public Init {
+class ListInit : public TypedInit {
   std::vector<Init*> Values;
 public:
   typedef std::vector<Init*>::iterator       iterator;
   typedef std::vector<Init*>::const_iterator const_iterator;
 
-  explicit ListInit(std::vector<Init*> &Vs) {
+  explicit ListInit(std::vector<Init*> &Vs, RecTy *EltTy)
+    : TypedInit(new ListRecTy(EltTy)) {
     Values.swap(Vs);
   }
-  explicit ListInit(iterator Start, iterator End)
-    : Values(Start, End) {}
+  explicit ListInit(iterator Start, iterator End, RecTy *EltTy)
+      : TypedInit(new ListRecTy(EltTy)), Values(Start, End) {}
 
   unsigned getSize() const { return Values.size(); }
   Init *getElement(unsigned i) const {
@@ -730,6 +753,22 @@ public:
 
   inline size_t         size () const { return Values.size();  }
   inline bool           empty() const { return Values.empty(); }
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off list");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
 };
 
 
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index fc6f29f..cdd2857 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
+#include <sstream>
 
 #include "TGParser.h"
 #include "Record.h"
@@ -396,7 +397,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
     return Result;
   }
   
-  Result.TemplateArgs = ParseValueList(CurRec);
+  Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
   if (Result.TemplateArgs.empty()) {
     Result.Rec = 0;   // Error parsing value list.
     return Result;
@@ -438,7 +439,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
     return Result;
   }
 
-  Result.TemplateArgs = ParseValueList(&CurMC->Rec);
+  Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
   if (Result.TemplateArgs.empty()) {
     Result.MC = 0;   // Error parsing value list.
     return Result;
@@ -728,21 +729,28 @@ Init *TGParser::ParseOperation(Record *CurRec) {
         || Code == UnOpInit::CDR
         || Code == UnOpInit::LNULL) {
       ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
       TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
-      if (LHSl == 0 && LHSt == 0) {
-        TokError("expected list type argument in unary operator");
+      if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
+        TokError("expected list or string type argument in unary operator");
         return 0;
       }
       if (LHSt) {
         ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType());
-        if (LType == 0) {
-          TokError("expected list type argumnet in unary operator");
+        StringRecTy *SType = dynamic_cast<StringRecTy*>(LHSt->getType());
+        if (LType == 0 && SType == 0) {
+          TokError("expected list or string type argumnet in unary operator");
           return 0;
         }
       }
 
       if (Code == UnOpInit::CAR
           || Code == UnOpInit::CDR) {
+        if (LHSl == 0 && LHSt == 0) {
+          TokError("expected list type argumnet in unary operator");
+          return 0;
+        }
+        
         if (LHSl && LHSl->getSize() == 0) {
           TokError("empty list argument in unary operator");
           return 0;
@@ -1011,7 +1019,7 @@ RecTy *TGParser::ParseOperatorType(void) {
 ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
 ///
-Init *TGParser::ParseSimpleValue(Record *CurRec) {
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
   Init *R = 0;
   switch (Lex.getCode()) {
   default: TokError("Unknown token when parsing a value"); break;
@@ -1043,15 +1051,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec) {
       TokError("expected non-empty value list");
       return 0;
     }
-    std::vector<Init*> ValueList = ParseValueList(CurRec);
-    if (ValueList.empty()) return 0;
-    
-    if (Lex.getCode() != tgtok::greater) {
-      TokError("expected '>' at end of value list");
-      return 0;
-    }
-    Lex.Lex();  // eat the '>'
-    
+
     // This is a CLASS<initvalslist> expression.  This is supposed to synthesize
     // a new anonymous definition, deriving from CLASS<initvalslist> with no
     // body.
@@ -1060,6 +1060,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec) {
       Error(NameLoc, "Expected a class name, got '" + Name + "'");
       return 0;
     }
+
+    std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
+    if (ValueList.empty()) return 0;
+    
+    if (Lex.getCode() != tgtok::greater) {
+      TokError("expected '>' at end of value list");
+      return 0;
+    }
+    Lex.Lex();  // eat the '>'
     
     // Create the new record, set it as CurRec temporarily.
     static unsigned AnonCounter = 0;
@@ -1108,8 +1117,22 @@ Init *TGParser::ParseSimpleValue(Record *CurRec) {
     Lex.Lex(); // eat the '['
     std::vector<Init*> Vals;
     
+    RecTy *DeducedEltTy = 0;
+    ListRecTy *GivenListTy = 0;
+    
+    if (ItemType != 0) {
+      ListRecTy *ListType = dynamic_cast<ListRecTy*>(ItemType);
+      if (ListType == 0) {
+        std::stringstream s;
+        s << "Type mismatch for list, expected list type, got " 
+          << ItemType->getAsString();
+        TokError(s.str());
+      }
+      GivenListTy = ListType;
+    }    
+
     if (Lex.getCode() != tgtok::r_square) {
-      Vals = ParseValueList(CurRec);
+      Vals = ParseValueList(CurRec, 0, GivenListTy ? GivenListTy->getElementType() : 0);
       if (Vals.empty()) return 0;
     }
     if (Lex.getCode() != tgtok::r_square) {
@@ -1117,7 +1140,77 @@ Init *TGParser::ParseSimpleValue(Record *CurRec) {
       return 0;
     }
     Lex.Lex();  // eat the ']'
-    return new ListInit(Vals);
+
+    RecTy *GivenEltTy = 0;
+    if (Lex.getCode() == tgtok::less) {
+      // Optional list element type
+      Lex.Lex();  // eat the '<'
+
+      GivenEltTy = ParseType();
+      if (GivenEltTy == 0) {
+        // Couldn't parse element type
+        return 0;
+      }
+
+      if (Lex.getCode() != tgtok::greater) {
+        TokError("expected '>' at end of list element type");
+        return 0;
+      }
+      Lex.Lex();  // eat the '>'
+    }
+
+    // Check elements
+    RecTy *EltTy = 0;
+    for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
+         i != ie;
+         ++i) {
+      TypedInit *TArg = dynamic_cast<TypedInit*>(*i);
+      if (TArg == 0) {
+        TokError("Untyped list element");
+        return 0;
+      }
+      if (EltTy != 0) {
+        EltTy = resolveTypes(EltTy, TArg->getType());
+        if (EltTy == 0) {
+          TokError("Incompatible types in list elements");
+          return 0;
+        }
+      }
+      else {
+        EltTy = TArg->getType();
+      }
+    }
+
+    if (GivenEltTy != 0) {
+      if (EltTy != 0) {
+        // Verify consistency
+        if (!EltTy->typeIsConvertibleTo(GivenEltTy)) {
+          TokError("Incompatible types in list elements");
+          return 0;
+        }
+      }
+      EltTy = GivenEltTy;
+    }
+
+    if (EltTy == 0) {
+      if (ItemType == 0) {
+        TokError("No type for list");
+        return 0;
+      }
+      DeducedEltTy = GivenListTy->getElementType();
+   }
+    else {
+      // Make sure the deduced type is compatible with the given type
+      if (GivenListTy) {
+        if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) {
+          TokError("Element type mismatch for list");
+          return 0;
+        }
+      }
+      DeducedEltTy = EltTy;
+    }
+    
+    return new ListInit(Vals, DeducedEltTy);
   }
   case tgtok::l_paren: {         // Value ::= '(' IDValue DagArgList ')'
     Lex.Lex();   // eat the '('
@@ -1193,8 +1286,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec) {
 ///   ValueSuffix ::= '[' BitList ']'
 ///   ValueSuffix ::= '.' ID
 ///
-Init *TGParser::ParseValue(Record *CurRec) {
-  Init *Result = ParseSimpleValue(CurRec);
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
+  Init *Result = ParseSimpleValue(CurRec, ItemType);
   if (Result == 0) return 0;
   
   // Parse the suffixes now if present.
@@ -1299,15 +1392,31 @@ TGParser::ParseDagArgList(Record *CurRec) {
 ///
 ///   ValueList ::= Value (',' Value)
 ///
-std::vector<Init*> TGParser::ParseValueList(Record *CurRec) {
+std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, RecTy *EltTy) {
   std::vector<Init*> Result;
-  Result.push_back(ParseValue(CurRec));
+  RecTy *ItemType = EltTy;
+  int ArgN = 0;
+  if (ArgsRec != 0 && EltTy == 0) {
+    const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+    const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+    assert(RV && "Template argument record not found??");
+    ItemType = RV->getType();
+    ++ArgN;
+  }
+  Result.push_back(ParseValue(CurRec, ItemType));
   if (Result.back() == 0) return std::vector<Init*>();
   
   while (Lex.getCode() == tgtok::comma) {
     Lex.Lex();  // Eat the comma
     
-    Result.push_back(ParseValue(CurRec));
+    if (ArgsRec != 0 && EltTy == 0) {
+      const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+      const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+      assert(RV && "Template argument record not found??");
+      ItemType = RV->getType();
+      ++ArgN;
+    }
+    Result.push_back(ParseValue(CurRec, ItemType));
     if (Result.back() == 0) return std::vector<Init*>();
   }
   
@@ -1362,7 +1471,7 @@ std::string TGParser::ParseDeclaration(Record *CurRec,
   if (Lex.getCode() == tgtok::equal) {
     Lex.Lex();
     TGLoc ValLoc = Lex.getLoc();
-    Init *Val = ParseValue(CurRec);
+    Init *Val = ParseValue(CurRec, Type);
     if (Val == 0 ||
         SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
       return "";
@@ -1440,7 +1549,13 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
     return TokError("expected '=' in let expression");
   Lex.Lex();  // eat the '='.
   
-  Init *Val = ParseValue(CurRec);
+  RecordVal *Field = CurRec->getValue(FieldName);
+  if (Field == 0)
+    return TokError("Value '" + FieldName + "' unknown!");
+
+  RecTy *Type = Field->getType();
+  
+  Init *Val = ParseValue(CurRec, Type);
   if (Val == 0) return true;
   
   if (Lex.getCode() != tgtok::semi)
diff --git a/utils/TableGen/TGParser.h b/utils/TableGen/TGParser.h
index f03052e..3af467d 100644
--- a/utils/TableGen/TGParser.h
+++ b/utils/TableGen/TGParser.h
@@ -93,9 +93,9 @@ private:  // Parser methods.
 
   Init *ParseIDValue(Record *CurRec);
   Init *ParseIDValue(Record *CurRec, const std::string &Name, TGLoc NameLoc);
-  Init *ParseSimpleValue(Record *CurRec);
-  Init *ParseValue(Record *CurRec);
-  std::vector<Init*> ParseValueList(Record *CurRec);
+  Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
+  Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
+  std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
   std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
   bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
   bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
diff --git a/utils/crosstool/ARM/build-install-linux.sh b/utils/crosstool/ARM/build-install-linux.sh
new file mode 100755
index 0000000..33833b5
--- /dev/null
+++ b/utils/crosstool/ARM/build-install-linux.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+#
+# Compiles and installs a Linux/x86_64 -> Linux/ARM crosstool based on LLVM and
+# LLVM-GCC-4.2 using SVN snapshots in provided tarballs.
+
+set -o nounset
+set -o errexit
+
+echo -n "Welcome to LLVM Linux/X86_64 -> Linux/ARM crosstool "
+echo "builder/installer; some steps will require sudo privileges."
+
+readonly INSTALL_ROOT="${INSTALL_ROOT:-/usr/local}"
+# Both $USER and root *must* have read/write access to this dir.
+readonly SCRATCH_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/llvm-project.XXXXXX")
+readonly SRC_ROOT="${SCRATCH_ROOT}/src"
+readonly OBJ_ROOT="${SCRATCH_ROOT}/obj"
+
+readonly CROSS_HOST="x86_64-unknown-linux-gnu"
+readonly CROSS_TARGET="arm-none-linux-gnueabi"
+
+readonly CODE_SOURCERY="${INSTALL_ROOT}/codesourcery"
+readonly CODE_SOURCERY_PKG_PATH="${CODE_SOURCERY_PKG_PATH:-${HOME}/codesourcery}"
+readonly CODE_SOURCERY_HTTP="http://www.codesourcery.com/sgpp/lite/arm/portal/package1787/public"
+readonly CODE_SOURCERY_PKG="arm-2007q3-51-arm-none-linux-gnueabi-i686-pc-linux-gnu.tar.bz2"
+readonly CODE_SOURCERY_ROOT="${CODE_SOURCERY}/arm-2007q3"
+readonly CODE_SOURCERY_BIN="${CODE_SOURCERY_ROOT}/bin"
+# Make sure ${CROSS_TARGET}-* binutils are in command path
+export PATH="${CODE_SOURCERY_BIN}:${PATH}"
+
+readonly CROSS_TARGET_AS="${CODE_SOURCERY_BIN}/${CROSS_TARGET}-as"
+readonly CROSS_TARGET_LD="${CODE_SOURCERY_BIN}/${CROSS_TARGET}-ld"
+
+readonly SYSROOT="${CODE_SOURCERY_ROOT}/${CROSS_TARGET}/libc"
+
+readonly LLVM_PROJECT="${INSTALL_ROOT}/llvm-project"
+readonly LLVM_INSTALL_ROOT="${LLVM_PROJECT}/${CROSS_HOST}/${CROSS_TARGET}"
+readonly LLVM_PKG_PATH="${LLVM_PKG_PATH:-${HOME}/llvm-project/snapshots}"
+
+# Latest SVN revision known to be working in this configuration.
+readonly LLVM_DEFAULT_REV="70786"
+
+readonly LLVM_PKG="llvm-${LLVM_SVN_REV:-${LLVM_DEFAULT_REV}}.tar.bz2"
+readonly LLVM_SRC_DIR="${SRC_ROOT}/llvm"
+readonly LLVM_OBJ_DIR="${OBJ_ROOT}/llvm"
+readonly LLVM_INSTALL_DIR="${LLVM_INSTALL_ROOT}/llvm"
+
+readonly LLVMGCC_PKG="llvm-gcc-4.2-${LLVMGCC_SVN_REV:-${LLVM_DEFAULT_REV}}.tar.bz2"
+readonly LLVMGCC_SRC_DIR="${SRC_ROOT}/llvm-gcc-4.2"
+readonly LLVMGCC_OBJ_DIR="${OBJ_ROOT}/llvm-gcc-4.2"
+readonly LLVMGCC_INSTALL_DIR="${LLVM_INSTALL_ROOT}/llvm-gcc-4.2"
+
+readonly MAKE_OPTS="-j2"
+
+# Verify we aren't going to install into an existing directory as this might
+# create problems as we won't have a clean install.
+verifyNotDir() {
+  if [[ -d $1 ]]; then
+    echo "Install dir $1 already exists; remove it to continue."
+    exit
+  fi
+}
+
+# Params:
+#   $1: directory to be created
+#   $2: optional mkdir command prefix, e.g. "sudo"
+createDir() {
+  if [[ ! -e $1 ]]; then
+    ${2:-} mkdir -p $1
+  elif [[ -e $1 && ! -d $1 ]]; then
+    echo "$1 exists but is not a directory; exiting."
+    exit 3
+  fi
+}
+
+sudoCreateDir() {
+  createDir $1 sudo
+  sudo chown ${USER} $1
+}
+
+# Prints out and runs the command, but without logging -- intended for use with
+# lightweight commands that don't have useful output to parse, e.g. mkdir, tar,
+# etc.
+runCommand() {
+  local message="$1"
+  shift
+  echo "=> $message"
+  echo "==> Running: $*"
+  $*
+}
+
+runAndLog() {
+  local message="$1"
+  local log_file="$2"
+  shift 2
+  echo "=> $message; log in $log_file"
+  echo "==> Running: $*"
+  # Pop-up a terminal with the output of the current command?
+  # e.g.: xterm -e /bin/bash -c "$* >| tee $log_file"
+  $* &> $log_file
+  if [[ $? != 0 ]]; then
+    echo "Error occurred: see most recent log file for details."
+    exit
+  fi
+}
+
+installCodeSourcery() {
+  # Create CodeSourcery dir, if necessary.
+  verifyNotDir ${CODE_SOURCERY}
+  sudoCreateDir ${CODE_SOURCERY}
+
+  # Unpack the tarball.
+  if [[ ! -d ${CODE_SOURCERY_ROOT} ]]; then
+    cd ${CODE_SOURCERY}
+    if [[ -e ${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG} ]]; then
+      runCommand "Unpacking CodeSourcery in ${CODE_SOURCERY}" \
+          tar jxf ${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG}
+    else
+      echo -n "CodeSourcery tarball not found in "
+      echo "${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG}"
+      echo -n "Fix the path or download it from "
+      echo "${CODE_SOURCERY_HTTP}/${CROSS_TARGET}/${CODE_SOURCERY_PKG}"
+      exit
+    fi
+  else
+    echo "CodeSourcery install dir already exists."
+  fi
+
+  # Verify our CodeSourcery toolchain installation.
+  if [[ ! -d "${SYSROOT}" ]]; then
+    echo -n "Error: CodeSourcery does not contain libc for ${CROSS_TARGET}: "
+    echo "${SYSROOT} not found."
+    exit
+  fi
+
+  for tool in ${CROSS_TARGET_AS} ${CROSS_TARGET_LD}; do
+    if [[ ! -e $tool ]]; then
+      echo "${tool} not found; exiting."
+      exit
+    fi
+  done
+}
+
+installLLVM() {
+  verifyNotDir ${LLVM_INSTALL_DIR}
+  sudoCreateDir ${LLVM_INSTALL_DIR}
+
+  # Unpack LLVM tarball; should create the directory "llvm".
+  cd ${SRC_ROOT}
+  runCommand "Unpacking LLVM" tar jxf ${LLVM_PKG_PATH}/${LLVM_PKG}
+
+  # Configure, build, and install LLVM.
+  createDir ${LLVM_OBJ_DIR}
+  cd ${LLVM_OBJ_DIR}
+  runAndLog "Configuring LLVM" ${LLVM_OBJ_DIR}/llvm-configure.log \
+      ${LLVM_SRC_DIR}/configure \
+      --disable-jit \
+      --enable-optimized \
+      --prefix=${LLVM_INSTALL_DIR} \
+      --target=${CROSS_TARGET} \
+      --with-llvmgccdir=${LLVMGCC_INSTALL_DIR}
+  runAndLog "Building LLVM" ${LLVM_OBJ_DIR}/llvm-build.log \
+      make ${MAKE_OPTS}
+  runAndLog "Installing LLVM" ${LLVM_OBJ_DIR}/llvm-install.log \
+      make ${MAKE_OPTS} install
+}
+
+installLLVMGCC() {
+  verifyNotDir ${LLVMGCC_INSTALL_DIR}
+  sudoCreateDir ${LLVMGCC_INSTALL_DIR}
+
+  # Unpack LLVM-GCC tarball; should create the directory "llvm-gcc-4.2".
+  cd ${SRC_ROOT}
+  runCommand "Unpacking LLVM-GCC" tar jxf ${LLVM_PKG_PATH}/${LLVMGCC_PKG}
+
+  # Configure, build, and install LLVM-GCC.
+  createDir ${LLVMGCC_OBJ_DIR}
+  cd ${LLVMGCC_OBJ_DIR}
+  runAndLog "Configuring LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-configure.log \
+      ${LLVMGCC_SRC_DIR}/configure \
+      --enable-languages=c,c++ \
+      --enable-llvm=${LLVM_INSTALL_DIR} \
+      --prefix=${LLVMGCC_INSTALL_DIR} \
+      --program-prefix=llvm- \
+      --target=${CROSS_TARGET} \
+      --with-gnu-as=${CROSS_TARGET_AS} \
+      --with-gnu-ld=${CROSS_TARGET_LD} \
+      --with-sysroot=${SYSROOT}
+  runAndLog "Building LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-build.log \
+      make
+  runAndLog "Installing LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-install.log \
+      make install
+}
+
+echo "Building in ${SCRATCH_ROOT}; installing in ${INSTALL_ROOT}"
+
+createDir ${SRC_ROOT}
+createDir ${OBJ_ROOT}
+
+installCodeSourcery
+installLLVM
+installLLVMGCC
+
+echo "Done."
diff --git a/utils/crosstool/create-snapshots.sh b/utils/crosstool/create-snapshots.sh
new file mode 100755
index 0000000..7c640bc
--- /dev/null
+++ b/utils/crosstool/create-snapshots.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Creates LLVM SVN snapshots: llvm-$REV.tar.bz2 and llvm-gcc-4.2-$REV.tar.bz2,
+# where $REV is an SVN revision of LLVM.  This is used for creating stable
+# tarballs which can be used to build known-to-work crosstools.
+#
+# Syntax:
+#   $0 [REV] -- grabs the revision $REV from SVN; if not specified, grabs the
+#   latest SVN revision.
+
+set -o nounset
+set -o errexit
+
+readonly REV="${1:-HEAD}"
+
+runOnModule() {
+  local module=$1
+  local log="${module}.log"
+  echo "Running: svn co -r ${REV} ${module}; log in ${log}"
+  svn co -r ${REV} http://llvm.org/svn/llvm-project/${module}/trunk ${module} \
+      > ${log} 2>&1
+
+  # Delete all the ".svn" dirs; they take quite a lot of space.
+  echo "Cleaning up .svn dirs"
+  find ${module} -type d -name \.svn -print0 | xargs -0 /bin/rm -rf
+
+  # Create "module-revision.tar.bz2" packages from the SVN checkout dirs.
+  local revision=$(grep "Checked out revision" ${log} | \
+                   sed 's/[^0-9]\+\([0-9]\+\)[^0-9]\+/\1/')
+  local tarball="${module}-${revision}.tar.bz2"
+  echo "Creating tarball: ${tarball}"
+  tar cjf ${tarball} ${module}
+
+  echo "Cleaning SVN checkout dir ${module}"
+  rm -rf ${module} ${log}
+}
+
+for module in "llvm" "llvm-gcc-4.2"; do
+  runOnModule ${module}
+done
+
author	ed <ed@FreeBSD.org>	2009-06-14 09:23:33 +0000
committer	ed <ed@FreeBSD.org>	2009-06-14 09:23:33 +0000
commit	db89e312d968c258aba3c79c1c398f5fb19267a3 (patch)
tree	49817b316c4fdaa56d9d16ebf2555303d1a990e0
parent	de000e339094f8c6e06a635dac9a803861416ec6 (diff)
download	FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.zip FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.tar.gz