12 files changed, 0 insertions, 1069 deletions
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt
deleted file mode 100644
index 9ba7c01..0000000
--- a/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARMAsmParser
-  ARMAsmLexer.cpp
-  ARMAsmParser.cpp
-  )
-
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/Makefile b/contrib/llvm/lib/Target/ARM/AsmParser/Makefile
deleted file mode 100644
index 841516f..0000000
--- a/contrib/llvm/lib/Target/ARM/AsmParser/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmParser
-
-# Hack: we need to include 'main' ARM target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 18645c0..0000000
--- a/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARMAsmPrinter
-  ARMInstPrinter.cpp
-  )
-add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile b/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile
deleted file mode 100644
index 65d372e..0000000
--- a/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/CMakeLists.txt
deleted file mode 100644
index 6b4dee5..0000000
--- a/contrib/llvm/lib/Target/ARM/CMakeLists.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ARM.td)
-
-tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(ARMGenRegisterNames.inc -gen-register-enums)
-tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
-tablegen(ARMGenInstrNames.inc -gen-instr-enums)
-tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
-tablegen(ARMGenCodeEmitter.inc -gen-emitter)
-tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
-tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(ARMGenDAGISel.inc -gen-dag-isel)
-tablegen(ARMGenFastISel.inc -gen-fast-isel)
-tablegen(ARMGenCallingConv.inc -gen-callingconv)
-tablegen(ARMGenSubtarget.inc -gen-subtarget)
-tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-
-add_llvm_target(ARMCodeGen
-  ARMAsmPrinter.cpp
-  ARMBaseInstrInfo.cpp
-  ARMBaseRegisterInfo.cpp
-  ARMCodeEmitter.cpp
-  ARMConstantIslandPass.cpp
-  ARMConstantPoolValue.cpp
-  ARMExpandPseudoInsts.cpp
-  ARMFastISel.cpp
-  ARMGlobalMerge.cpp
-  ARMISelDAGToDAG.cpp
-  ARMISelLowering.cpp
-  ARMInstrInfo.cpp
-  ARMJITInfo.cpp
-  ARMLoadStoreOptimizer.cpp
-  ARMMCAsmInfo.cpp
-  ARMMCInstLower.cpp
-  ARMRegisterInfo.cpp
-  ARMSelectionDAGInfo.cpp
-  ARMSubtarget.cpp
-  ARMTargetMachine.cpp
-  ARMTargetObjectFile.cpp
-  NEONMoveFix.cpp
-  NEONPreAllocPass.cpp
-  Thumb1InstrInfo.cpp
-  Thumb1RegisterInfo.cpp
-  Thumb2HazardRecognizer.cpp
-  Thumb2ITBlockPass.cpp
-  Thumb2InstrInfo.cpp
-  Thumb2RegisterInfo.cpp
-  Thumb2SizeReduction.cpp
-  )
-
-target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/Makefile b/contrib/llvm/lib/Target/ARM/Disassembler/Makefile
deleted file mode 100644
index 031b6ac..0000000
--- a/contrib/llvm/lib/Target/ARM/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMDisassembler
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/Makefile b/contrib/llvm/lib/Target/ARM/Makefile
deleted file mode 100644
index b3fcfaf6..0000000
--- a/contrib/llvm/lib/Target/ARM/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMARMCodeGen
-TARGET = ARM
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
-                ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
-                ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
-                ARMGenDAGISel.inc ARMGenSubtarget.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
-                ARMGenDecoderTables.inc ARMGenEDInfo.inc \
-                ARMGenFastISel.inc
-
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/README-Thumb.txt b/contrib/llvm/lib/Target/ARM/README-Thumb.txt
deleted file mode 100644
index 6b605bb..0000000
--- a/contrib/llvm/lib/Target/ARM/README-Thumb.txt
+++ /dev/null
@@ -1,248 +0,0 @@
-//===---------------------------------------------------------------------===//
-// Random ideas for the ARM backend (Thumb specific).
-//===---------------------------------------------------------------------===//
-
-* Add support for compiling functions in both ARM and Thumb mode, then taking
-  the smallest.
-
-* Add support for compiling individual basic blocks in thumb mode, when in a 
-  larger ARM function.  This can be used for presumed cold code, like paths
-  to abort (failure path of asserts), EH handling code, etc.
-
-* Thumb doesn't have normal pre/post increment addressing modes, but you can
-  load/store 32-bit integers with pre/postinc by using load/store multiple
-  instrs with a single register.
-
-* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
-  and cmp instructions can use high registers. Also, we can use them as
-  temporaries to spill values into.
-
-* In thumb mode, short, byte, and bool preferred alignments are currently set
-  to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
-  of 4).
-
-//===---------------------------------------------------------------------===//
-
-Potential jumptable improvements:
-
-* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
-  jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
-  function is even smaller. This also applies to ARM.
-
-* Thumb jumptable codegen can improve given some help from the assembler. This
-  is what we generate right now:
-
-	.set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
-LPCRELL0:
-	mov r1, #PCRELV0
-	add r1, pc
-	ldr r0, [r0, r1]
-	mov pc, r0 
-	.align	2
-LJTI1_0_0:
-	.long	 LBB1_3
-        ...
-
-Note there is another pc relative add that we can take advantage of.
-     add r1, pc, #imm_8 * 4
-
-We should be able to generate:
-
-LPCRELL0:
-	add r1, LJTI1_0_0
-	ldr r0, [r0, r1]
-	mov pc, r0 
-	.align	2
-LJTI1_0_0:
-	.long	 LBB1_3
-
-if the assembler can translate the add to:
-       add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
-
-Note the assembler also does something similar to constpool load:
-LPCRELL0:
-     ldr r0, LCPI1_0
-=>
-     ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
-
-
-//===---------------------------------------------------------------------===//
-
-We compiles the following:
-
-define i16 @func_entry_2E_ce(i32 %i) {
-        switch i32 %i, label %bb12.exitStub [
-                 i32 0, label %bb4.exitStub
-                 i32 1, label %bb9.exitStub
-                 i32 2, label %bb4.exitStub
-                 i32 3, label %bb4.exitStub
-                 i32 7, label %bb9.exitStub
-                 i32 8, label %bb.exitStub
-                 i32 9, label %bb9.exitStub
-        ]
-
-bb12.exitStub:
-        ret i16 0
-
-bb4.exitStub:
-        ret i16 1
-
-bb9.exitStub:
-        ret i16 2
-
-bb.exitStub:
-        ret i16 3
-}
-
-into:
-
-_func_entry_2E_ce:
-        mov r2, #1
-        lsl r2, r0
-        cmp r0, #9
-        bhi LBB1_4      @bb12.exitStub
-LBB1_1: @newFuncRoot
-        mov r1, #13
-        tst r2, r1
-        bne LBB1_5      @bb4.exitStub
-LBB1_2: @newFuncRoot
-        ldr r1, LCPI1_0
-        tst r2, r1
-        bne LBB1_6      @bb9.exitStub
-LBB1_3: @newFuncRoot
-        mov r1, #1
-        lsl r1, r1, #8
-        tst r2, r1
-        bne LBB1_7      @bb.exitStub
-LBB1_4: @bb12.exitStub
-        mov r0, #0
-        bx lr
-LBB1_5: @bb4.exitStub
-        mov r0, #1
-        bx lr
-LBB1_6: @bb9.exitStub
-        mov r0, #2
-        bx lr
-LBB1_7: @bb.exitStub
-        mov r0, #3
-        bx lr
-LBB1_8:
-        .align  2
-LCPI1_0:
-        .long   642
-
-
-gcc compiles to:
-
-	cmp	r0, #9
-	@ lr needed for prologue
-	bhi	L2
-	ldr	r3, L11
-	mov	r2, #1
-	mov	r1, r2, asl r0
-	ands	r0, r3, r2, asl r0
-	movne	r0, #2
-	bxne	lr
-	tst	r1, #13
-	beq	L9
-L3:
-	mov	r0, r2
-	bx	lr
-L9:
-	tst	r1, #256
-	movne	r0, #3
-	bxne	lr
-L2:
-	mov	r0, #0
-	bx	lr
-L12:
-	.align 2
-L11:
-	.long	642
-        
-
-GCC is doing a couple of clever things here:
-  1. It is predicating one of the returns.  This isn't a clear win though: in
-     cases where that return isn't taken, it is replacing one condbranch with
-     two 'ne' predicated instructions.
-  2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
-     tst.  This will probably require whole function isel.
-  3. GCC emits:
-  	tst	r1, #256
-     we emit:
-        mov r1, #1
-        lsl r1, r1, #8
-        tst r2, r1
-  
-
-//===---------------------------------------------------------------------===//
-
-When spilling in thumb mode and the sp offset is too large to fit in the ldr /
-str offset field, we load the offset from a constpool entry and add it to sp:
-
-ldr r2, LCPI
-add r2, sp
-ldr r2, [r2]
-
-These instructions preserve the condition code which is important if the spill
-is between a cmp and a bcc instruction. However, we can use the (potentially)
-cheaper sequnce if we know it's ok to clobber the condition register.
-
-add r2, sp, #255 * 4
-add r2, #132
-ldr r2, [r2, #7 * 4]
-
-This is especially bad when dynamic alloca is used. The all fixed size stack
-objects are referenced off the frame pointer with negative offsets. See
-oggenc for an example.
-
-
-//===---------------------------------------------------------------------===//
-
-Poor codegen test/CodeGen/ARM/select.ll f7:
-
-	ldr r5, LCPI1_0
-LPC0:
-	add r5, pc
-	ldr r6, LCPI1_1
-	ldr r2, LCPI1_2
-	mov r3, r6
-	mov lr, pc
-	bx r5
-
-//===---------------------------------------------------------------------===//
-
-Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
-etc. Almost all Thumb instructions clobber condition code.
-
-//===---------------------------------------------------------------------===//
-
-Add ldmia, stmia support.
-
-//===---------------------------------------------------------------------===//
-
-Thumb load / store address mode offsets are scaled. The values kept in the
-instruction operands are pre-scale values. This probably ought to be changed
-to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
-
-//===---------------------------------------------------------------------===//
-
-We need to make (some of the) Thumb1 instructions predicable. That will allow
-shrinking of predicated Thumb2 instructions. To allow this, we need to be able
-to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.
-
-//===---------------------------------------------------------------------===//
-
-Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
-
-//===---------------------------------------------------------------------===//
-
-Thumb1 immediate field sometimes keep pre-scaled values. See
-Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
-Thumb2.
-
-//===---------------------------------------------------------------------===//
-
-Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
-add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
-won't have to over-estimate. It can also be used for loop alignment pass.
diff --git a/contrib/llvm/lib/Target/ARM/README-Thumb2.txt b/contrib/llvm/lib/Target/ARM/README-Thumb2.txt
deleted file mode 100644
index e7c2552..0000000
--- a/contrib/llvm/lib/Target/ARM/README-Thumb2.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-//===---------------------------------------------------------------------===//
-// Random ideas for the ARM backend (Thumb2 specific).
-//===---------------------------------------------------------------------===//
-
-Make sure jumptable destinations are below the jumptable in order to make use
-of tbb / tbh.
diff --git a/contrib/llvm/lib/Target/ARM/README.txt b/contrib/llvm/lib/Target/ARM/README.txt
deleted file mode 100644
index 9fc3fb9..0000000
--- a/contrib/llvm/lib/Target/ARM/README.txt
+++ /dev/null
@@ -1,659 +0,0 @@
-//===---------------------------------------------------------------------===//
-// Random ideas for the ARM backend.
-//===---------------------------------------------------------------------===//
-
-Reimplement 'select' in terms of 'SEL'.
-
-* We would really like to support UXTAB16, but we need to prove that the
-  add doesn't need to overflow between the two 16-bit chunks.
-
-* Implement pre/post increment support.  (e.g. PR935)
-* Implement smarter constant generation for binops with large immediates.
-
-A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
-
-Interesting optimization for PIC codegen on arm-linux:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129
-
-//===---------------------------------------------------------------------===//
-
-Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
-time regalloc happens, these values are now in a 32-bit register, usually with
-the top-bits known to be sign or zero extended.  If spilled, we should be able
-to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
-of the reload.
-
-Doing this reduces the size of the stack frame (important for thumb etc), and
-also increases the likelihood that we will be able to reload multiple values
-from the stack with a single load.
-
-//===---------------------------------------------------------------------===//
-
-The constant island pass is in good shape.  Some cleanups might be desirable,
-but there is unlikely to be much improvement in the generated code.
-
-1.  There may be some advantage to trying to be smarter about the initial
-placement, rather than putting everything at the end.
-
-2.  There might be some compile-time efficiency to be had by representing
-consecutive islands as a single block rather than multiple blocks.
-
-3.  Use a priority queue to sort constant pool users in inverse order of
-    position so we always process the one closed to the end of functions
-    first. This may simply CreateNewWater.
-
-//===---------------------------------------------------------------------===//
-
-Eliminate copysign custom expansion. We are still generating crappy code with
-default expansion + if-conversion.
-
-//===---------------------------------------------------------------------===//
-
-Eliminate one instruction from:
-
-define i32 @_Z6slow4bii(i32 %x, i32 %y) {
-        %tmp = icmp sgt i32 %x, %y
-        %retval = select i1 %tmp, i32 %x, i32 %y
-        ret i32 %retval
-}
-
-__Z6slow4bii:
-        cmp r0, r1
-        movgt r1, r0
-        mov r0, r1
-        bx lr
-=>
-
-__Z6slow4bii:
-        cmp r0, r1
-        movle r0, r1
-        bx lr
-
-//===---------------------------------------------------------------------===//
-
-Implement long long "X-3" with instructions that fold the immediate in.  These
-were disabled due to badness with the ARM carry flag on subtracts.
-
-//===---------------------------------------------------------------------===//
-
-More load / store optimizations:
-1) Better representation for block transfer? This is from Olden/power:
-
-	fldd d0, [r4]
-	fstd d0, [r4, #+32]
-	fldd d0, [r4, #+8]
-	fstd d0, [r4, #+40]
-	fldd d0, [r4, #+16]
-	fstd d0, [r4, #+48]
-	fldd d0, [r4, #+24]
-	fstd d0, [r4, #+56]
-
-If we can spare the registers, it would be better to use fldm and fstm here.
-Need major register allocator enhancement though.
-
-2) Can we recognize the relative position of constantpool entries? i.e. Treat
-
-	ldr r0, LCPI17_3
-	ldr r1, LCPI17_4
-	ldr r2, LCPI17_5
-
-   as
-	ldr r0, LCPI17
-	ldr r1, LCPI17+4
-	ldr r2, LCPI17+8
-
-   Then the ldr's can be combined into a single ldm. See Olden/power.
-
-Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
-double 64-bit FP constant:
-
-	adr	r0, L6
-	ldmia	r0, {r0-r1}
-
-	.align 2
-L6:
-	.long	-858993459
-	.long	1074318540
-
-3) struct copies appear to be done field by field 
-instead of by words, at least sometimes:
-
-struct foo { int x; short s; char c1; char c2; };
-void cpy(struct foo*a, struct foo*b) { *a = *b; }
-
-llvm code (-O2)
-        ldrb r3, [r1, #+6]
-        ldr r2, [r1]
-        ldrb r12, [r1, #+7]
-        ldrh r1, [r1, #+4]
-        str r2, [r0]
-        strh r1, [r0, #+4]
-        strb r3, [r0, #+6]
-        strb r12, [r0, #+7]
-gcc code (-O2)
-        ldmia   r1, {r1-r2}
-        stmia   r0, {r1-r2}
-
-In this benchmark poor handling of aggregate copies has shown up as
-having a large effect on size, and possibly speed as well (we don't have
-a good way to measure on ARM).
-
-//===---------------------------------------------------------------------===//
-
-* Consider this silly example:
-
-double bar(double x) {  
-  double r = foo(3.1);
-  return x+r;
-}
-
-_bar:
-        stmfd sp!, {r4, r5, r7, lr}
-        add r7, sp, #8
-        mov r4, r0
-        mov r5, r1
-        fldd d0, LCPI1_0
-        fmrrd r0, r1, d0
-        bl _foo
-        fmdrr d0, r4, r5
-        fmsr s2, r0
-        fsitod d1, s2
-        faddd d0, d1, d0
-        fmrrd r0, r1, d0
-        ldmfd sp!, {r4, r5, r7, pc}
-
-Ignore the prologue and epilogue stuff for a second. Note 
-	mov r4, r0
-	mov r5, r1
-the copys to callee-save registers and the fact they are only being used by the
-fmdrr instruction. It would have been better had the fmdrr been scheduled
-before the call and place the result in a callee-save DPR register. The two
-mov ops would not have been necessary.
-
-//===---------------------------------------------------------------------===//
-
-Calling convention related stuff:
-
-* gcc's parameter passing implementation is terrible and we suffer as a result:
-
-e.g.
-struct s {
-  double d1;
-  int s1;
-};
-
-void foo(struct s S) {
-  printf("%g, %d\n", S.d1, S.s1);
-}
-
-'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
-then reload them to r1, r2, and r3 before issuing the call (r0 contains the
-address of the format string):
-
-	stmfd	sp!, {r7, lr}
-	add	r7, sp, #0
-	sub	sp, sp, #12
-	stmia	sp, {r0, r1, r2}
-	ldmia	sp, {r1-r2}
-	ldr	r0, L5
-	ldr	r3, [sp, #8]
-L2:
-	add	r0, pc, r0
-	bl	L_printf$stub
-
-Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
-
-* Return an aggregate type is even worse:
-
-e.g.
-struct s foo(void) {
-  struct s S = {1.1, 2};
-  return S;
-}
-
-	mov	ip, r0
-	ldr	r0, L5
-	sub	sp, sp, #12
-L2:
-	add	r0, pc, r0
-	@ lr needed for prologue
-	ldmia	r0, {r0, r1, r2}
-	stmia	sp, {r0, r1, r2}
-	stmia	ip, {r0, r1, r2}
-	mov	r0, ip
-	add	sp, sp, #12
-	bx	lr
-
-r0 (and later ip) is the hidden parameter from caller to store the value in. The
-first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
-r2 into the address passed in. However, there is one additional stmia that
-stores r0, r1, and r2 to some stack location. The store is dead.
-
-The llvm-gcc generated code looks like this:
-
-csretcc void %foo(%struct.s* %agg.result) {
-entry:
-	%S = alloca %struct.s, align 4		; <%struct.s*> [#uses=1]
-	%memtmp = alloca %struct.s		; <%struct.s*> [#uses=1]
-	cast %struct.s* %S to sbyte*		; <sbyte*>:0 [#uses=2]
-	call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
-	cast %struct.s* %agg.result to sbyte*		; <sbyte*>:1 [#uses=2]
-	call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
-	cast %struct.s* %memtmp to sbyte*		; <sbyte*>:2 [#uses=1]
-	call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
-	ret void
-}
-
-llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
-constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
-into a number of load and stores, or 2) custom lower memcpy (of small size) to
-be ldmia / stmia. I think option 2 is better but the current register
-allocator cannot allocate a chunk of registers at a time.
-
-A feasible temporary solution is to use specific physical registers at the
-lowering time for small (<= 4 words?) transfer size.
-
-* ARM CSRet calling convention requires the hidden argument to be returned by
-the callee.
-
-//===---------------------------------------------------------------------===//
-
-We can definitely do a better job on BB placements to eliminate some branches.
-It's very common to see llvm generated assembly code that looks like this:
-
-LBB3:
- ...
-LBB4:
-...
-  beq LBB3
-  b LBB2
-
-If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
-then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
-
-See McCat/18-imp/ComputeBoundingBoxes for an example.
-
-//===---------------------------------------------------------------------===//
-
-Pre-/post- indexed load / stores:
-
-1) We should not make the pre/post- indexed load/store transform if the base ptr
-is guaranteed to be live beyond the load/store. This can happen if the base
-ptr is live out of the block we are performing the optimization. e.g.
-
-mov r1, r2
-ldr r3, [r1], #4
-...
-
-vs.
-
-ldr r3, [r2]
-add r1, r2, #4
-...
-
-In most cases, this is just a wasted optimization. However, sometimes it can
-negatively impact the performance because two-address code is more restrictive
-when it comes to scheduling.
-
-Unfortunately, liveout information is currently unavailable during DAG combine
-time.
-
-2) Consider spliting a indexed load / store into a pair of add/sub + load/store
-   to solve #1 (in TwoAddressInstructionPass.cpp).
-
-3) Enhance LSR to generate more opportunities for indexed ops.
-
-4) Once we added support for multiple result patterns, write indexed loads
-   patterns instead of C++ instruction selection code.
-
-5) Use VLDM / VSTM to emulate indexed FP load / store.
-
-//===---------------------------------------------------------------------===//
-
-Implement support for some more tricky ways to materialize immediates.  For
-example, to get 0xffff8000, we can use:
-
-mov r9, #&3f8000
-sub r9, r9, #&400000
-
-//===---------------------------------------------------------------------===//
-
-We sometimes generate multiple add / sub instructions to update sp in prologue
-and epilogue if the inc / dec value is too large to fit in a single immediate
-operand. In some cases, perhaps it might be better to load the value from a
-constantpool instead.
-
-//===---------------------------------------------------------------------===//
-
-GCC generates significantly better code for this function.
-
-int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
-    int i = 0;
-
-    if (StackPtr != 0) {
-       while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
-          Line[i++] = Stack[--StackPtr];
-        if (LineLen > 32768)
-        {
-            while (StackPtr != 0 && i < LineLen)
-            {
-                i++;
-                --StackPtr;
-            }
-        }
-    }
-    return StackPtr;
-}
-
-//===---------------------------------------------------------------------===//
-
-This should compile to the mlas instruction:
-int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
-
-//===---------------------------------------------------------------------===//
-
-At some point, we should triage these to see if they still apply to us:
-
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
-
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
-
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
-
-http://www.inf.u-szeged.hu/gcc-arm/
-http://citeseer.ist.psu.edu/debus04linktime.html
-
-//===---------------------------------------------------------------------===//
-
-gcc generates smaller code for this function at -O2 or -Os:
-
-void foo(signed char* p) {
-  if (*p == 3)
-     bar();
-   else if (*p == 4)
-    baz();
-  else if (*p == 5)
-    quux();
-}
-
-llvm decides it's a good idea to turn the repeated if...else into a
-binary tree, as if it were a switch; the resulting code requires -1 
-compare-and-branches when *p<=2 or *p==5, the same number if *p==4
-or *p>6, and +1 if *p==3.  So it should be a speed win
-(on balance).  However, the revised code is larger, with 4 conditional 
-branches instead of 3.
-
-More seriously, there is a byte->word extend before
-each comparison, where there should be only one, and the condition codes
-are not remembered when the same two values are compared twice.
-
-//===---------------------------------------------------------------------===//
-
-More LSR enhancements possible:
-
-1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
-   in a load / store.
-2. Allow iv reuse even when a type conversion is required. For example, i8
-   and i32 load / store addressing modes are identical.
-
-
-//===---------------------------------------------------------------------===//
-
-This:
-
-int foo(int a, int b, int c, int d) {
-  long long acc = (long long)a * (long long)b;
-  acc += (long long)c * (long long)d;
-  return (int)(acc >> 32);
-}
-
-Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies 
-two signed 32-bit values to produce a 64-bit value, and accumulates this with 
-a 64-bit value.
-
-We currently get this with both v4 and v6:
-
-_foo:
-        smull r1, r0, r1, r0
-        smull r3, r2, r3, r2
-        adds r3, r3, r1
-        adc r0, r2, r0
-        bx lr
-
-//===---------------------------------------------------------------------===//
-
-This:
-        #include <algorithm>
-        std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
-        { return std::make_pair(a + b, a + b < a); }
-        bool no_overflow(unsigned a, unsigned b)
-        { return !full_add(a, b).second; }
-
-Should compile to:
-
-_Z8full_addjj:
-	adds	r2, r1, r2
-	movcc	r1, #0
-	movcs	r1, #1
-	str	r2, [r0, #0]
-	strb	r1, [r0, #4]
-	mov	pc, lr
-
-_Z11no_overflowjj:
-	cmn	r0, r1
-	movcs	r0, #0
-	movcc	r0, #1
-	mov	pc, lr
-
-not:
-
-__Z8full_addjj:
-        add r3, r2, r1
-        str r3, [r0]
-        mov r2, #1
-        mov r12, #0
-        cmp r3, r1
-        movlo r12, r2
-        str r12, [r0, #+4]
-        bx lr
-__Z11no_overflowjj:
-        add r3, r1, r0
-        mov r2, #1
-        mov r1, #0
-        cmp r3, r0
-        movhs r1, r2
-        mov r0, r1
-        bx lr
-
-//===---------------------------------------------------------------------===//
-
-Some of the NEON intrinsics may be appropriate for more general use, either
-as target-independent intrinsics or perhaps elsewhere in the ARM backend.
-Some of them may also be lowered to target-independent SDNodes, and perhaps
-some new SDNodes could be added.
-
-For example, maximum, minimum, and absolute value operations are well-defined
-and standard operations, both for vector and scalar types.
-
-The current NEON-specific intrinsics for count leading zeros and count one
-bits could perhaps be replaced by the target-independent ctlz and ctpop
-intrinsics.  It may also make sense to add a target-independent "ctls"
-intrinsic for "count leading sign bits".  Likewise, the backend could use
-the target-independent SDNodes for these operations.
-
-ARMv6 has scalar saturating and halving adds and subtracts.  The same
-intrinsics could possibly be used for both NEON's vector implementations of
-those operations and the ARMv6 scalar versions.
-
-//===---------------------------------------------------------------------===//
-
-ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
-ARMInstrInfo::commuteInstruction() to support it.
-
-//===---------------------------------------------------------------------===//
-
-Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
-LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
-ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
-while ARMConstantIslandPass only need to worry about LDR (literal).
-
-//===---------------------------------------------------------------------===//
-
-Constant island pass should make use of full range SoImm values for LEApcrel.
-Be careful though as the last attempt caused infinite looping on lencod.
-
-//===---------------------------------------------------------------------===//
-
-Predication issue. This function:   
-
-extern unsigned array[ 128 ];
-int     foo( int x ) {
-  int     y;
-  y = array[ x & 127 ];
-  if ( x & 128 )
-     y = 123456789 & ( y >> 2 );
-  else
-     y = 123456789 & y;
-  return y;
-}
-
-compiles to:
-
-_foo:
-	and r1, r0, #127
-	ldr r2, LCPI1_0
-	ldr r2, [r2]
-	ldr r1, [r2, +r1, lsl #2]
-	mov r2, r1, lsr #2
-	tst r0, #128
-	moveq r2, r1
-	ldr r0, LCPI1_1
-	and r0, r2, r0
-	bx lr
-
-It would be better to do something like this, to fold the shift into the
-conditional move:
-
-	and r1, r0, #127
-	ldr r2, LCPI1_0
-	ldr r2, [r2]
-	ldr r1, [r2, +r1, lsl #2]
-	tst r0, #128
-	movne r1, r1, lsr #2
-	ldr r0, LCPI1_1
-	and r0, r1, r0
-	bx lr
-
-it saves an instruction and a register.
-
-//===---------------------------------------------------------------------===//
-
-It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
-with the same bottom half.
-
-//===---------------------------------------------------------------------===//
-
-Robert Muth started working on an alternate jump table implementation that
-does not put the tables in-line in the text.  This is more like the llvm
-default jump table implementation.  This might be useful sometime.  Several
-revisions of patches are on the mailing list, beginning at:
-http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
-
-//===---------------------------------------------------------------------===//
-
-Make use of the "rbit" instruction.
-
-//===---------------------------------------------------------------------===//
-
-Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how
-to licm and cse the unnecessary load from cp#1.
-
-//===---------------------------------------------------------------------===//
-
-The CMN instruction sets the flags like an ADD instruction, while CMP sets
-them like a subtract. Therefore to be able to use CMN for comparisons other
-than the Z bit, we'll need additional logic to reverse the conditionals
-associated with the comparison. Perhaps a pseudo-instruction for the comparison,
-with a post-codegen pass to clean up and handle the condition codes?
-See PR5694 for testcase.
-
-//===---------------------------------------------------------------------===//
-
-Given the following on armv5:
-int test1(int A, int B) {
-  return (A&-8388481)|(B&8388480);
-}
-
-We currently generate:
-	ldr	r2, .LCPI0_0
-	and	r0, r0, r2
-	ldr	r2, .LCPI0_1
-	and	r1, r1, r2
-	orr	r0, r1, r0
-	bx	lr
-
-We should be able to replace the second ldr+and with a bic (i.e. reuse the
-constant which was already loaded).  Not sure what's necessary to do that.
-
-//===---------------------------------------------------------------------===//
-
-The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
-
-int a(int x) { return __builtin_bswap32(x); }
-
-a:
-	mov	r1, #255, 24
-	mov	r2, #255, 16
-	and	r1, r1, r0, lsr #8
-	and	r2, r2, r0, lsl #8
-	orr	r1, r1, r0, lsr #24
-	orr	r0, r2, r0, lsl #24
-	orr	r0, r0, r1
-	bx	lr
-
-Something like the following would be better (fewer instructions/registers):
-	eor     r1, r0, r0, ror #16
-	bic     r1, r1, #0xff0000
-	mov     r1, r1, lsr #8
-	eor     r0, r1, r0, ror #8
-	bx	lr
-
-A custom Thumb version would also be a slight improvement over the generic
-version.
-
-//===---------------------------------------------------------------------===//
-
-Consider the following simple C code:
-
-void foo(unsigned char *a, unsigned char *b, int *c) {
- if ((*a | *b) == 0) *c = 0;
-}
-
-currently llvm-gcc generates something like this (nice branchless code I'd say):
-
-       ldrb    r0, [r0]
-       ldrb    r1, [r1]
-       orr     r0, r1, r0
-       tst     r0, #255
-       moveq   r0, #0
-       streq   r0, [r2]
-       bx      lr
-
-Note that both "tst" and "moveq" are redundant.
-
-//===---------------------------------------------------------------------===//
-
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 3910bb0..0000000
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARMInfo
-  ARMTargetInfo.cpp
-  )
-
-add_dependencies(LLVMARMInfo ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile b/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile
deleted file mode 100644
index 6292ab1..0000000
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common