diff options
Diffstat (limited to 'contrib/llvm/lib')
177 files changed, 0 insertions, 10797 deletions
diff --git a/contrib/llvm/lib/Analysis/CMakeLists.txt b/contrib/llvm/lib/Analysis/CMakeLists.txt deleted file mode 100644 index 6a2ab68..0000000 --- a/contrib/llvm/lib/Analysis/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -add_llvm_library(LLVMAnalysis - AliasAnalysis.cpp - AliasAnalysisCounter.cpp - AliasAnalysisEvaluator.cpp - AliasDebugger.cpp - AliasSetTracker.cpp - Analysis.cpp - BasicAliasAnalysis.cpp - CFGPrinter.cpp - CaptureTracking.cpp - ConstantFolding.cpp - DbgInfoPrinter.cpp - DebugInfo.cpp - DomPrinter.cpp - IVUsers.cpp - InlineCost.cpp - InstCount.cpp - InstructionSimplify.cpp - Interval.cpp - IntervalPartition.cpp - LazyValueInfo.cpp - LibCallAliasAnalysis.cpp - LibCallSemantics.cpp - Lint.cpp - LiveValues.cpp - Loads.cpp - LoopDependenceAnalysis.cpp - LoopInfo.cpp - LoopPass.cpp - MemoryBuiltins.cpp - MemoryDependenceAnalysis.cpp - ModuleDebugInfoPrinter.cpp - PHITransAddr.cpp - PointerTracking.cpp - PostDominators.cpp - ProfileEstimatorPass.cpp - ProfileInfo.cpp - ProfileInfoLoader.cpp - ProfileInfoLoaderPass.cpp - ProfileVerifierPass.cpp - RegionInfo.cpp - RegionPrinter.cpp - ScalarEvolution.cpp - ScalarEvolutionAliasAnalysis.cpp - ScalarEvolutionExpander.cpp - ScalarEvolutionNormalization.cpp - SparsePropagation.cpp - Trace.cpp - TypeBasedAliasAnalysis.cpp - ValueTracking.cpp - ) - -target_link_libraries (LLVMAnalysis LLVMSupport) diff --git a/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt b/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt deleted file mode 100644 index 007ad22..0000000 --- a/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMipa - CallGraph.cpp - CallGraphSCCPass.cpp - FindUsedTypes.cpp - GlobalsModRef.cpp - ) diff --git a/contrib/llvm/lib/Analysis/IPA/Makefile b/contrib/llvm/lib/Analysis/IPA/Makefile deleted file mode 100644 index b850c9f..0000000 --- a/contrib/llvm/lib/Analysis/IPA/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMipa -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Analysis/Makefile b/contrib/llvm/lib/Analysis/Makefile deleted file mode 100644 index 4af6d35..0000000 --- a/contrib/llvm/lib/Analysis/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Analysis/Makefile -------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMAnalysis -DIRS = IPA -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Analysis/README.txt b/contrib/llvm/lib/Analysis/README.txt deleted file mode 100644 index 0e96e4c..0000000 --- a/contrib/llvm/lib/Analysis/README.txt +++ /dev/null @@ -1,30 +0,0 @@ -Analysis Opportunities: - -//===---------------------------------------------------------------------===// - -In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the -ScalarEvolution expression for %r is this: - - {1,+,3,+,2}<loop> - -Outside the loop, this could be evaluated simply as (%n * %n), however -ScalarEvolution currently evaluates it as - - (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) - -In addition to being much more complicated, it involves i65 arithmetic, -which is very inefficient when expanded into code. - -//===---------------------------------------------------------------------===// - -In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll, - -ScalarEvolution is forming this expression: - -((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) - -This could be folded to - -(-1 * (trunc i64 undef to i32)) - -//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Archive/CMakeLists.txt b/contrib/llvm/lib/Archive/CMakeLists.txt deleted file mode 100644 index 7ff478a..0000000 --- a/contrib/llvm/lib/Archive/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_library(LLVMArchive - Archive.cpp - ArchiveReader.cpp - ArchiveWriter.cpp - ) diff --git a/contrib/llvm/lib/Archive/Makefile b/contrib/llvm/lib/Archive/Makefile deleted file mode 100644 index da97804..0000000 --- a/contrib/llvm/lib/Archive/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Archive/Makefile --------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMArchive - -# We only want an archive so only those modules actually used by a tool are -# included. -BUILD_ARCHIVE := 1 - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/AsmParser/CMakeLists.txt b/contrib/llvm/lib/AsmParser/CMakeLists.txt deleted file mode 100644 index 985ebe2..0000000 --- a/contrib/llvm/lib/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -# AsmParser -add_llvm_library(LLVMAsmParser - LLLexer.cpp - LLParser.cpp - Parser.cpp - ) diff --git a/contrib/llvm/lib/AsmParser/Makefile b/contrib/llvm/lib/AsmParser/Makefile deleted file mode 100644 index 995bb0e..0000000 --- a/contrib/llvm/lib/AsmParser/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -##===- lib/AsmParser/Makefile ------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME := LLVMAsmParser -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Bitcode/Makefile b/contrib/llvm/lib/Bitcode/Makefile deleted file mode 100644 index 2d6b5ad..0000000 --- a/contrib/llvm/lib/Bitcode/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -PARALLEL_DIRS = Reader Writer - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Bitcode/Reader/CMakeLists.txt b/contrib/llvm/lib/Bitcode/Reader/CMakeLists.txt deleted file mode 100644 index 693d431..0000000 --- a/contrib/llvm/lib/Bitcode/Reader/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMBitReader - BitReader.cpp - BitcodeReader.cpp - ) diff --git a/contrib/llvm/lib/Bitcode/Reader/Makefile b/contrib/llvm/lib/Bitcode/Reader/Makefile deleted file mode 100644 index 59af8d53..0000000 --- a/contrib/llvm/lib/Bitcode/Reader/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMBitReader -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Bitcode/Writer/CMakeLists.txt b/contrib/llvm/lib/Bitcode/Writer/CMakeLists.txt deleted file mode 100644 index f097b09..0000000 --- a/contrib/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMBitWriter - BitWriter.cpp - BitcodeWriter.cpp - BitcodeWriterPass.cpp - ValueEnumerator.cpp - ) diff --git a/contrib/llvm/lib/Bitcode/Writer/Makefile b/contrib/llvm/lib/Bitcode/Writer/Makefile deleted file mode 100644 index 7b0bd72..0000000 --- a/contrib/llvm/lib/Bitcode/Writer/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMBitWriter -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt deleted file mode 100644 index ca8b843..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_llvm_library(LLVMAsmPrinter - AsmPrinter.cpp - AsmPrinterDwarf.cpp - AsmPrinterInlineAsm.cpp - DIE.cpp - DwarfDebug.cpp - DwarfException.cpp - OcamlGCPrinter.cpp - ) - -target_link_libraries (LLVMAsmPrinter LLVMMCParser) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile b/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile deleted file mode 100644 index 60aa6cb..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/CodeGen/AsmPrinter/Makefile ---------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMAsmPrinter - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/CodeGen/CMakeLists.txt b/contrib/llvm/lib/CodeGen/CMakeLists.txt deleted file mode 100644 index 2ef115d..0000000 --- a/contrib/llvm/lib/CodeGen/CMakeLists.txt +++ /dev/null @@ -1,86 +0,0 @@ -add_llvm_library(LLVMCodeGen - AggressiveAntiDepBreaker.cpp - Analysis.cpp - BranchFolding.cpp - CalcSpillWeights.cpp - CallingConvLower.cpp - CodePlacementOpt.cpp - CriticalAntiDepBreaker.cpp - DeadMachineInstructionElim.cpp - DwarfEHPrepare.cpp - ELFCodeEmitter.cpp - ELFWriter.cpp - GCMetadata.cpp - GCMetadataPrinter.cpp - GCStrategy.cpp - IfConversion.cpp - InlineSpiller.cpp - IntrinsicLowering.cpp - LLVMTargetMachine.cpp - LatencyPriorityQueue.cpp - LiveInterval.cpp - LiveIntervalAnalysis.cpp - LiveStackAnalysis.cpp - LiveVariables.cpp - LocalStackSlotAllocation.cpp - LowerSubregs.cpp - MachineBasicBlock.cpp - MachineCSE.cpp - MachineDominators.cpp - MachineFunction.cpp - MachineFunctionAnalysis.cpp - MachineFunctionPass.cpp - MachineFunctionPrinterPass.cpp - MachineInstr.cpp - MachineLICM.cpp - MachineLoopInfo.cpp - MachineModuleInfo.cpp - MachineModuleInfoImpls.cpp - MachinePassRegistry.cpp - MachineRegisterInfo.cpp - MachineSSAUpdater.cpp - MachineSink.cpp - MachineVerifier.cpp - ObjectCodeEmitter.cpp - OcamlGC.cpp - OptimizePHIs.cpp - PHIElimination.cpp - Passes.cpp - PeepholeOptimizer.cpp - PostRAHazardRecognizer.cpp - PostRASchedulerList.cpp - PreAllocSplitting.cpp - ProcessImplicitDefs.cpp - PrologEpilogInserter.cpp - PseudoSourceValue.cpp - RegAllocFast.cpp - RegAllocLinearScan.cpp - RegAllocPBQP.cpp - RegisterCoalescer.cpp - RegisterScavenging.cpp - RenderMachineFunction.cpp - ScheduleDAG.cpp - ScheduleDAGEmit.cpp - ScheduleDAGInstrs.cpp - ScheduleDAGPrinter.cpp - ShadowStackGC.cpp - ShrinkWrapping.cpp - SimpleRegisterCoalescing.cpp - SjLjEHPrepare.cpp - SlotIndexes.cpp - Spiller.cpp - SplitKit.cpp - Splitter.cpp - StackProtector.cpp - StackSlotColoring.cpp - StrongPHIElimination.cpp - TailDuplication.cpp - TargetInstrInfoImpl.cpp - TargetLoweringObjectFileImpl.cpp - TwoAddressInstructionPass.cpp - UnreachableBlockElim.cpp - VirtRegMap.cpp - VirtRegRewriter.cpp - ) - -target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts) diff --git a/contrib/llvm/lib/CodeGen/Makefile b/contrib/llvm/lib/CodeGen/Makefile deleted file mode 100644 index 4ab3e3c..0000000 --- a/contrib/llvm/lib/CodeGen/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMCodeGen -PARALLEL_DIRS = SelectionDAG AsmPrinter -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - -# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL -# in this directory. Disable -pedantic for this broken compiler. -ifneq ($(HUGE_VAL_SANITY),yes) -CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts)) -endif - diff --git a/contrib/llvm/lib/CodeGen/README.txt b/contrib/llvm/lib/CodeGen/README.txt deleted file mode 100644 index b655dda4..0000000 --- a/contrib/llvm/lib/CodeGen/README.txt +++ /dev/null @@ -1,199 +0,0 @@ -//===---------------------------------------------------------------------===// - -Common register allocation / spilling problem: - - mul lr, r4, lr - str lr, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - ldr r4, [sp, #+52] - mla r4, r3, lr, r4 - -can be: - - mul lr, r4, lr - mov r4, lr - str lr, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - mla r4, r3, lr, r4 - -and then "merge" mul and mov: - - mul r4, r4, lr - str lr, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - mla r4, r3, lr, r4 - -It also increase the likelyhood the store may become dead. - -//===---------------------------------------------------------------------===// - -bb27 ... - ... - %reg1037 = ADDri %reg1039, 1 - %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 - Successors according to CFG: 0x8b03bf0 (#5) - -bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): - Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) - %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> - -Note ADDri is not a two-address instruction. However, its result %reg1037 is an -operand of the PHI node in bb76 and its operand %reg1039 is the result of the -PHI node. We should treat it as a two-address code and make sure the ADDri is -scheduled after any node that reads %reg1039. - -//===---------------------------------------------------------------------===// - -Use local info (i.e. register scavenger) to assign it a free register to allow -reuse: - ldr r3, [sp, #+4] - add r3, r3, #3 - ldr r2, [sp, #+8] - add r2, r2, #2 - ldr r1, [sp, #+4] <== - add r1, r1, #1 - ldr r0, [sp, #+4] - add r0, r0, #2 - -//===---------------------------------------------------------------------===// - -LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- -effects: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -load [i + R1] -... -load [i + R2] -... -load [i + R3] - -Suppose there is high register pressure, R1, R2, R3, can be spilled. We need -to implement proper re-materialization to handle this: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -R1 = X + 4 @ re-materialized -load [i + R1] -... -R2 = X + 7 @ re-materialized -load [i + R2] -... -R3 = X + 15 @ re-materialized -load [i + R3] - -Furthermore, with re-association, we can enable sharing: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -T = i + X -load [T + 4] -... -load [T + 7] -... -load [T + 15] -//===---------------------------------------------------------------------===// - -It's not always a good idea to choose rematerialization over spilling. If all -the load / store instructions would be folded then spilling is cheaper because -it won't require new live intervals / registers. See 2003-05-31-LongShifts for -an example. - -//===---------------------------------------------------------------------===// - -With a copying garbage collector, derived pointers must not be retained across -collector safe points; the collector could move the objects and invalidate the -derived pointer. This is bad enough in the first place, but safe points can -crop up unpredictably. Consider: - - %array = load { i32, [0 x %obj] }** %array_addr - %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n - %old = load %obj** %nth_el - %z = div i64 %x, %y - store %obj* %new, %obj** %nth_el - -If the i64 division is lowered to a libcall, then a safe point will (must) -appear for the call site. If a collection occurs, %array and %nth_el no longer -point into the correct object. - -The fix for this is to copy address calculations so that dependent pointers -are never live across safe point boundaries. But the loads cannot be copied -like this if there was an intervening store, so may be hard to get right. - -Only a concurrent mutator can trigger a collection at the libcall safe point. -So single-threaded programs do not have this requirement, even with a copying -collector. Still, LLVM optimizations would probably undo a front-end's careful -work. - -//===---------------------------------------------------------------------===// - -The ocaml frametable structure supports liveness information. It would be good -to support it. - -//===---------------------------------------------------------------------===// - -The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be -revisited. The check is there to work around a misuse of directives in inline -assembly. - -//===---------------------------------------------------------------------===// - -It would be good to detect collector/target compatibility instead of silently -doing the wrong thing. - -//===---------------------------------------------------------------------===// - -It would be really nice to be able to write patterns in .td files for copies, -which would eliminate a bunch of explicit predicates on them (e.g. no side -effects). Once this is in place, it would be even better to have tblgen -synthesize the various copy insertion/inspection methods in TargetInstrInfo. - -//===---------------------------------------------------------------------===// - -Stack coloring improvments: - -1. Do proper LiveStackAnalysis on all stack objects including those which are - not spill slots. -2. Reorder objects to fill in gaps between objects. - e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 - -//===---------------------------------------------------------------------===// - -The scheduler should be able to sort nearby instructions by their address. For -example, in an expanded memset sequence it's not uncommon to see code like this: - - movl $0, 4(%rdi) - movl $0, 8(%rdi) - movl $0, 12(%rdi) - movl $0, 0(%rdi) - -Each of the stores is independent, and the scheduler is currently making an -arbitrary decision about the order. - -//===---------------------------------------------------------------------===// - -Another opportunitiy in this code is that the $0 could be moved to a register: - - movl $0, 4(%rdi) - movl $0, 8(%rdi) - movl $0, 12(%rdi) - movl $0, 0(%rdi) - -This would save substantial code size, especially for longer sequences like -this. It would be easy to have a rule telling isel to avoid matching MOV32mi -if the immediate has more than some fixed number of uses. It's more involved -to teach the register allocator how to do late folding to recover from -excessive register pressure. - diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt b/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt deleted file mode 100644 index 799988a..0000000 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -add_llvm_library(LLVMSelectionDAG - DAGCombiner.cpp - FastISel.cpp - FunctionLoweringInfo.cpp - InstrEmitter.cpp - LegalizeDAG.cpp - LegalizeFloatTypes.cpp - LegalizeIntegerTypes.cpp - LegalizeTypes.cpp - LegalizeTypesGeneric.cpp - LegalizeVectorOps.cpp - LegalizeVectorTypes.cpp - ScheduleDAGFast.cpp - ScheduleDAGList.cpp - ScheduleDAGRRList.cpp - ScheduleDAGSDNodes.cpp - SelectionDAG.cpp - SelectionDAGBuilder.cpp - SelectionDAGISel.cpp - SelectionDAGPrinter.cpp - TargetLowering.cpp - TargetSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile b/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile deleted file mode 100644 index ea716fd..0000000 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMSelectionDAG - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/CompilerDriver/CMakeLists.txt b/contrib/llvm/lib/CompilerDriver/CMakeLists.txt deleted file mode 100644 index 153dd44..0000000 --- a/contrib/llvm/lib/CompilerDriver/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(LLVM_LINK_COMPONENTS support system) -set(LLVM_REQUIRES_EH 1) - -add_llvm_tool(llvmc - Action.cpp - CompilationGraph.cpp - llvmc.cpp - Plugin.cpp - Tool.cpp - ) diff --git a/contrib/llvm/lib/CompilerDriver/Makefile b/contrib/llvm/lib/CompilerDriver/Makefile deleted file mode 100644 index 8e8b73c..0000000 --- a/contrib/llvm/lib/CompilerDriver/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -##===- lib/CompilerDriver/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open -# Source License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. - -# We don't want this library to appear in `llvm-config --libs` output, so its -# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set. - -LIBRARYNAME = CompilerDriver -LINK_COMPONENTS = support system -NO_LLVM_CONFIG = 1 - - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/ExecutionEngine/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/CMakeLists.txt deleted file mode 100644 index 0e118cc..0000000 --- a/contrib/llvm/lib/ExecutionEngine/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMExecutionEngine - ExecutionEngine.cpp - ExecutionEngineBindings.cpp - ) diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/Interpreter/CMakeLists.txt deleted file mode 100644 index dff97fa..0000000 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_library(LLVMInterpreter - Execution.cpp - ExternalFunctions.cpp - Interpreter.cpp - ) diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Makefile b/contrib/llvm/lib/ExecutionEngine/Interpreter/Makefile deleted file mode 100644 index 5def136..0000000 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/ExecutionEngine/Interpreter/Makefile ------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMInterpreter - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/JIT/CMakeLists.txt deleted file mode 100644 index 42020d6..0000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# TODO: Support other architectures. See Makefile. -add_definitions(-DENABLE_X86_JIT) - -add_llvm_library(LLVMJIT - Intercept.cpp - JIT.cpp - JITDebugRegisterer.cpp - JITDwarfEmitter.cpp - JITEmitter.cpp - JITMemoryManager.cpp - OProfileJITEventListener.cpp - TargetSelect.cpp - ) diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Makefile b/contrib/llvm/lib/ExecutionEngine/JIT/Makefile deleted file mode 100644 index aafa3d9..0000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMJIT - -# Get the $(ARCH) setting -include $(LEVEL)/Makefile.config - -# Enable the X86 JIT if compiling on X86 -ifeq ($(ARCH), x86) - ENABLE_X86_JIT = 1 -endif - -# This flag can also be used on the command line to force inclusion -# of the X86 JIT on non-X86 hosts -ifdef ENABLE_X86_JIT - CPPFLAGS += -DENABLE_X86_JIT -endif - -# Enable the Sparc JIT if compiling on Sparc -ifeq ($(ARCH), Sparc) - ENABLE_SPARC_JIT = 1 -endif - -# This flag can also be used on the command line to force inclusion -# of the Sparc JIT on non-Sparc hosts -ifdef ENABLE_SPARC_JIT - CPPFLAGS += -DENABLE_SPARC_JIT -endif - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/ExecutionEngine/Makefile b/contrib/llvm/lib/ExecutionEngine/Makefile deleted file mode 100644 index e0e050e..0000000 --- a/contrib/llvm/lib/ExecutionEngine/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -##===- lib/ExecutionEngine/Makefile ------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../.. -LIBRARYNAME = LLVMExecutionEngine -PARALLEL_DIRS = Interpreter JIT - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Linker/CMakeLists.txt b/contrib/llvm/lib/Linker/CMakeLists.txt deleted file mode 100644 index 0b6d2f4..0000000 --- a/contrib/llvm/lib/Linker/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMLinker - LinkArchives.cpp - LinkItems.cpp - LinkModules.cpp - Linker.cpp - ) diff --git a/contrib/llvm/lib/Linker/Makefile b/contrib/llvm/lib/Linker/Makefile deleted file mode 100644 index 19e646b..0000000 --- a/contrib/llvm/lib/Linker/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMLinker -BUILD_ARCHIVE := 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/MC/CMakeLists.txt b/contrib/llvm/lib/MC/CMakeLists.txt deleted file mode 100644 index 60a3a3e..0000000 --- a/contrib/llvm/lib/MC/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -add_llvm_library(LLVMMC - ELFObjectWriter.cpp - MCAsmInfo.cpp - MCAsmInfoCOFF.cpp - MCAsmInfoDarwin.cpp - MCAsmStreamer.cpp - MCAssembler.cpp - MCCodeEmitter.cpp - MCContext.cpp - MCDisassembler.cpp - MCELFStreamer.cpp - MCExpr.cpp - MCInst.cpp - MCInstPrinter.cpp - MCLabel.cpp - MCDwarf.cpp - MCLoggingStreamer.cpp - MCMachOStreamer.cpp - MCNullStreamer.cpp - MCObjectStreamer.cpp - MCObjectWriter.cpp - MCSection.cpp - MCSectionCOFF.cpp - MCSectionELF.cpp - MCSectionMachO.cpp - MCStreamer.cpp - MCSymbol.cpp - MCValue.cpp - MachObjectWriter.cpp - WinCOFFStreamer.cpp - WinCOFFObjectWriter.cpp - TargetAsmBackend.cpp - ) diff --git a/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt b/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt deleted file mode 100644 index 5fa7b70..0000000 --- a/contrib/llvm/lib/MC/MCDisassembler/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -add_llvm_library(LLVMMCDisassembler - EDDisassembler.cpp - EDOperand.cpp - EDInst.cpp - EDToken.cpp - ) diff --git a/contrib/llvm/lib/MC/MCDisassembler/Makefile b/contrib/llvm/lib/MC/MCDisassembler/Makefile deleted file mode 100644 index 7d71cd3..0000000 --- a/contrib/llvm/lib/MC/MCDisassembler/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMCDisassembler - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/MC/MCParser/CMakeLists.txt b/contrib/llvm/lib/MC/MCParser/CMakeLists.txt deleted file mode 100644 index 25a7bf4..0000000 --- a/contrib/llvm/lib/MC/MCParser/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_llvm_library(LLVMMCParser - AsmLexer.cpp - AsmParser.cpp - DarwinAsmParser.cpp - ELFAsmParser.cpp - MCAsmLexer.cpp - MCAsmParser.cpp - MCAsmParserExtension.cpp - TargetAsmParser.cpp - ) diff --git a/contrib/llvm/lib/MC/MCParser/Makefile b/contrib/llvm/lib/MC/MCParser/Makefile deleted file mode 100644 index 4477757..0000000 --- a/contrib/llvm/lib/MC/MCParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/MC/MCParser/Makefile ----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMCParser -BUILD_ARCHIVE := 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/MC/Makefile b/contrib/llvm/lib/MC/Makefile deleted file mode 100644 index bf8b7c0..0000000 --- a/contrib/llvm/lib/MC/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMMC -BUILD_ARCHIVE := 1 -PARALLEL_DIRS := MCParser MCDisassembler - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Makefile b/contrib/llvm/lib/Makefile deleted file mode 100644 index 3807f31..0000000 --- a/contrib/llvm/lib/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Makefile ----------------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = .. - -include $(LEVEL)/Makefile.config - -PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \ - Target ExecutionEngine Linker MC CompilerDriver - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Support/CMakeLists.txt b/contrib/llvm/lib/Support/CMakeLists.txt deleted file mode 100644 index 0c70a40..0000000 --- a/contrib/llvm/lib/Support/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -add_llvm_library(LLVMSupport - APFloat.cpp - APInt.cpp - APSInt.cpp - Allocator.cpp - circular_raw_ostream.cpp - CommandLine.cpp - ConstantRange.cpp - CrashRecoveryContext.cpp - Debug.cpp - DeltaAlgorithm.cpp - DAGDeltaAlgorithm.cpp - Dwarf.cpp - ErrorHandling.cpp - FileUtilities.cpp - FoldingSet.cpp - FormattedStream.cpp - GraphWriter.cpp - IsInf.cpp - IsNAN.cpp - ManagedStatic.cpp - MemoryBuffer.cpp - MemoryObject.cpp - PluginLoader.cpp - PrettyStackTrace.cpp - Regex.cpp - SmallPtrSet.cpp - SmallVector.cpp - SourceMgr.cpp - Statistic.cpp - StringExtras.cpp - StringMap.cpp - StringPool.cpp - StringRef.cpp - SystemUtils.cpp - TargetRegistry.cpp - Timer.cpp - Triple.cpp - Twine.cpp - raw_os_ostream.cpp - raw_ostream.cpp - regcomp.c - regerror.c - regexec.c - regfree.c - regstrlcpy.c - ) - -target_link_libraries (LLVMSupport LLVMSystem) diff --git a/contrib/llvm/lib/Support/COPYRIGHT.regex b/contrib/llvm/lib/Support/COPYRIGHT.regex deleted file mode 100644 index a6392fd..0000000 --- a/contrib/llvm/lib/Support/COPYRIGHT.regex +++ /dev/null @@ -1,54 +0,0 @@ -$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ - -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. - -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. - -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 - */ diff --git a/contrib/llvm/lib/Support/Makefile b/contrib/llvm/lib/Support/Makefile deleted file mode 100644 index 48c21f4..0000000 --- a/contrib/llvm/lib/Support/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Support/Makefile --------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMSupport -BUILD_ARCHIVE = 1 - -## FIXME: This only requires RTTI because tblgen uses it. Fix that. -REQUIRES_RTTI = 1 - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/System/CMakeLists.txt b/contrib/llvm/lib/System/CMakeLists.txt deleted file mode 100644 index b43c3af..0000000 --- a/contrib/llvm/lib/System/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ -add_llvm_library(LLVMSystem - Alarm.cpp - Atomic.cpp - Disassembler.cpp - DynamicLibrary.cpp - Errno.cpp - Host.cpp - IncludeFile.cpp - Memory.cpp - Mutex.cpp - Path.cpp - Process.cpp - Program.cpp - RWMutex.cpp - SearchForAddressOfSpecialSymbol.cpp - Signals.cpp - ThreadLocal.cpp - Threading.cpp - TimeValue.cpp - Valgrind.cpp - Unix/Alarm.inc - Unix/Host.inc - Unix/Memory.inc - Unix/Mutex.inc - Unix/Path.inc - Unix/Process.inc - Unix/Program.inc - Unix/RWMutex.inc - Unix/Signals.inc - Unix/ThreadLocal.inc - Unix/TimeValue.inc - Win32/Alarm.inc - Win32/DynamicLibrary.inc - Win32/Host.inc - Win32/Memory.inc - Win32/Mutex.inc - Win32/Path.inc - Win32/Process.inc - Win32/Program.inc - Win32/RWMutex.inc - Win32/Signals.inc - Win32/ThreadLocal.inc - Win32/TimeValue.inc - ) - -if( BUILD_SHARED_LIBS AND NOT WIN32 ) - target_link_libraries(LLVMSystem ${CMAKE_DL_LIBS}) -endif() diff --git a/contrib/llvm/lib/System/Makefile b/contrib/llvm/lib/System/Makefile deleted file mode 100644 index bb013b9..0000000 --- a/contrib/llvm/lib/System/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/System/Makefile ---------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMSystem -BUILD_ARCHIVE = 1 -REQUIRES_RTTI = 1 -include $(LEVEL)/Makefile.config - -ifeq ($(HOST_OS),MingW) - REQUIRES_EH := 1 -endif - -EXTRA_DIST = Unix Win32 README.txt - -include $(LEVEL)/Makefile.common - -CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts)) -CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts)) diff --git a/contrib/llvm/lib/System/README.txt b/contrib/llvm/lib/System/README.txt deleted file mode 100644 index eacb200..0000000 --- a/contrib/llvm/lib/System/README.txt +++ /dev/null @@ -1,43 +0,0 @@ -Design Of lib/System -==================== - -The software in this directory is designed to completely shield LLVM from any -and all operating system specific functionality. It is not intended to be a -complete operating system wrapper (such as ACE), but only to provide the -functionality necessary to support LLVM. - -The software located here, of necessity, has very specific and stringent design -rules. Violation of these rules means that cracks in the shield could form and -the primary goal of the library is defeated. By consistently using this library, -LLVM becomes more easily ported to new platforms since the only thing requiring -porting is this library. - -Complete documentation for the library can be found in the file: - llvm/docs/SystemLibrary.html -or at this URL: - http://llvm.org/docs/SystemLibrary.html - -While we recommend that you read the more detailed documentation, for the -impatient, here's a high level summary of the library's requirements. - - 1. No system header files are to be exposed through the interface. - 2. Std C++ and Std C header files are okay to be exposed through the interface. - 3. No exposed system-specific functions. - 4. No exposed system-specific data. - 5. Data in lib/System classes must use only simple C++ intrinsic types. - 6. Errors are handled by returning "true" and setting an optional std::string - 7. Library must not throw any exceptions, period. - 8. Interface functions must not have throw() specifications. - 9. No duplicate function impementations are permitted within an operating - system class. - -To accomplish these requirements, the library has numerous design criteria that -must be satisfied. Here's a high level summary of the library's design criteria: - - 1. No unused functionality (only what LLVM needs) - 2. High-Level Interfaces - 3. Use Opaque Classes - 4. Common Implementations</a></li> - 5. Multiple Implementations</a></li> - 6. Minimize Memory Allocation</a></li> - 7. No Virtual Methods diff --git a/contrib/llvm/lib/System/Unix/README.txt b/contrib/llvm/lib/System/Unix/README.txt deleted file mode 100644 index b3bace4..0000000 --- a/contrib/llvm/lib/System/Unix/README.txt +++ /dev/null @@ -1,16 +0,0 @@ -llvm/lib/System/Unix README -=========================== - -This directory provides implementations of the lib/System classes that -are common to two or more variants of UNIX. For example, the directory -structure underneath this directory could look like this: - -Unix - only code that is truly generic to all UNIX platforms - Posix - code that is specific to Posix variants of UNIX - SUS - code that is specific to the Single Unix Specification - SysV - code that is specific to System V variants of UNIX - -As a rule, only those directories actually needing to be created should be -created. Also, further subdirectories could be created to reflect versions of -the various standards. For example, under SUS there could be v1, v2, and v3 -subdirectories to reflect the three major versions of SUS. diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt deleted file mode 100644 index 9ba7c01..0000000 --- a/contrib/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMAsmParser - ARMAsmLexer.cpp - ARMAsmParser.cpp - ) - diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/Makefile b/contrib/llvm/lib/Target/ARM/AsmParser/Makefile deleted file mode 100644 index 841516f..0000000 --- a/contrib/llvm/lib/Target/ARM/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMAsmParser - -# Hack: we need to include 'main' ARM target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 18645c0..0000000 --- a/contrib/llvm/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMAsmPrinter - ARMInstPrinter.cpp - ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile b/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile deleted file mode 100644 index 65d372e..0000000 --- a/contrib/llvm/lib/Target/ARM/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/CMakeLists.txt deleted file mode 100644 index 6b4dee5..0000000 --- a/contrib/llvm/lib/Target/ARM/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS ARM.td) - -tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(ARMGenRegisterNames.inc -gen-register-enums) -tablegen(ARMGenRegisterInfo.inc -gen-register-desc) -tablegen(ARMGenInstrNames.inc -gen-instr-enums) -tablegen(ARMGenInstrInfo.inc -gen-instr-desc) -tablegen(ARMGenCodeEmitter.inc -gen-emitter) -tablegen(ARMGenAsmWriter.inc -gen-asm-writer) -tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) -tablegen(ARMGenDAGISel.inc -gen-dag-isel) -tablegen(ARMGenFastISel.inc -gen-fast-isel) -tablegen(ARMGenCallingConv.inc -gen-callingconv) -tablegen(ARMGenSubtarget.inc -gen-subtarget) -tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) - -add_llvm_target(ARMCodeGen - ARMAsmPrinter.cpp - ARMBaseInstrInfo.cpp - ARMBaseRegisterInfo.cpp - ARMCodeEmitter.cpp - ARMConstantIslandPass.cpp - ARMConstantPoolValue.cpp - ARMExpandPseudoInsts.cpp - ARMFastISel.cpp - ARMGlobalMerge.cpp - ARMISelDAGToDAG.cpp - ARMISelLowering.cpp - ARMInstrInfo.cpp - ARMJITInfo.cpp - ARMLoadStoreOptimizer.cpp - ARMMCAsmInfo.cpp - ARMMCInstLower.cpp - ARMRegisterInfo.cpp - ARMSelectionDAGInfo.cpp - ARMSubtarget.cpp - ARMTargetMachine.cpp - ARMTargetObjectFile.cpp - NEONMoveFix.cpp - NEONPreAllocPass.cpp - Thumb1InstrInfo.cpp - Thumb1RegisterInfo.cpp - Thumb2HazardRecognizer.cpp - Thumb2ITBlockPass.cpp - Thumb2InstrInfo.cpp - Thumb2RegisterInfo.cpp - Thumb2SizeReduction.cpp - ) - -target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/Makefile b/contrib/llvm/lib/Target/ARM/Disassembler/Makefile deleted file mode 100644 index 031b6ac..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMDisassembler - -# Hack: we need to include 'main' arm target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/Makefile b/contrib/llvm/lib/Target/ARM/Makefile deleted file mode 100644 index b3fcfaf6..0000000 --- a/contrib/llvm/lib/Target/ARM/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMARMCodeGen -TARGET = ARM - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ - ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ - ARMGenDAGISel.inc ARMGenSubtarget.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenDecoderTables.inc ARMGenEDInfo.inc \ - ARMGenFastISel.inc - -DIRS = AsmPrinter AsmParser Disassembler TargetInfo - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/README-Thumb.txt b/contrib/llvm/lib/Target/ARM/README-Thumb.txt deleted file mode 100644 index 6b605bb..0000000 --- a/contrib/llvm/lib/Target/ARM/README-Thumb.txt +++ /dev/null @@ -1,248 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend (Thumb specific). -//===---------------------------------------------------------------------===// - -* Add support for compiling functions in both ARM and Thumb mode, then taking - the smallest. - -* Add support for compiling individual basic blocks in thumb mode, when in a - larger ARM function. This can be used for presumed cold code, like paths - to abort (failure path of asserts), EH handling code, etc. - -* Thumb doesn't have normal pre/post increment addressing modes, but you can - load/store 32-bit integers with pre/postinc by using load/store multiple - instrs with a single register. - -* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add - and cmp instructions can use high registers. Also, we can use them as - temporaries to spill values into. - -* In thumb mode, short, byte, and bool preferred alignments are currently set - to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple - of 4). - -//===---------------------------------------------------------------------===// - -Potential jumptable improvements: - -* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit - jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the - function is even smaller. This also applies to ARM. - -* Thumb jumptable codegen can improve given some help from the assembler. This - is what we generate right now: - - .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4)) -LPCRELL0: - mov r1, #PCRELV0 - add r1, pc - ldr r0, [r0, r1] - mov pc, r0 - .align 2 -LJTI1_0_0: - .long LBB1_3 - ... - -Note there is another pc relative add that we can take advantage of. - add r1, pc, #imm_8 * 4 - -We should be able to generate: - -LPCRELL0: - add r1, LJTI1_0_0 - ldr r0, [r0, r1] - mov pc, r0 - .align 2 -LJTI1_0_0: - .long LBB1_3 - -if the assembler can translate the add to: - add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc) - -Note the assembler also does something similar to constpool load: -LPCRELL0: - ldr r0, LCPI1_0 -=> - ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc) - - -//===---------------------------------------------------------------------===// - -We compiles the following: - -define i16 @func_entry_2E_ce(i32 %i) { - switch i32 %i, label %bb12.exitStub [ - i32 0, label %bb4.exitStub - i32 1, label %bb9.exitStub - i32 2, label %bb4.exitStub - i32 3, label %bb4.exitStub - i32 7, label %bb9.exitStub - i32 8, label %bb.exitStub - i32 9, label %bb9.exitStub - ] - -bb12.exitStub: - ret i16 0 - -bb4.exitStub: - ret i16 1 - -bb9.exitStub: - ret i16 2 - -bb.exitStub: - ret i16 3 -} - -into: - -_func_entry_2E_ce: - mov r2, #1 - lsl r2, r0 - cmp r0, #9 - bhi LBB1_4 @bb12.exitStub -LBB1_1: @newFuncRoot - mov r1, #13 - tst r2, r1 - bne LBB1_5 @bb4.exitStub -LBB1_2: @newFuncRoot - ldr r1, LCPI1_0 - tst r2, r1 - bne LBB1_6 @bb9.exitStub -LBB1_3: @newFuncRoot - mov r1, #1 - lsl r1, r1, #8 - tst r2, r1 - bne LBB1_7 @bb.exitStub -LBB1_4: @bb12.exitStub - mov r0, #0 - bx lr -LBB1_5: @bb4.exitStub - mov r0, #1 - bx lr -LBB1_6: @bb9.exitStub - mov r0, #2 - bx lr -LBB1_7: @bb.exitStub - mov r0, #3 - bx lr -LBB1_8: - .align 2 -LCPI1_0: - .long 642 - - -gcc compiles to: - - cmp r0, #9 - @ lr needed for prologue - bhi L2 - ldr r3, L11 - mov r2, #1 - mov r1, r2, asl r0 - ands r0, r3, r2, asl r0 - movne r0, #2 - bxne lr - tst r1, #13 - beq L9 -L3: - mov r0, r2 - bx lr -L9: - tst r1, #256 - movne r0, #3 - bxne lr -L2: - mov r0, #0 - bx lr -L12: - .align 2 -L11: - .long 642 - - -GCC is doing a couple of clever things here: - 1. It is predicating one of the returns. This isn't a clear win though: in - cases where that return isn't taken, it is replacing one condbranch with - two 'ne' predicated instructions. - 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of - tst. This will probably require whole function isel. - 3. GCC emits: - tst r1, #256 - we emit: - mov r1, #1 - lsl r1, r1, #8 - tst r2, r1 - - -//===---------------------------------------------------------------------===// - -When spilling in thumb mode and the sp offset is too large to fit in the ldr / -str offset field, we load the offset from a constpool entry and add it to sp: - -ldr r2, LCPI -add r2, sp -ldr r2, [r2] - -These instructions preserve the condition code which is important if the spill -is between a cmp and a bcc instruction. However, we can use the (potentially) -cheaper sequnce if we know it's ok to clobber the condition register. - -add r2, sp, #255 * 4 -add r2, #132 -ldr r2, [r2, #7 * 4] - -This is especially bad when dynamic alloca is used. The all fixed size stack -objects are referenced off the frame pointer with negative offsets. See -oggenc for an example. - - -//===---------------------------------------------------------------------===// - -Poor codegen test/CodeGen/ARM/select.ll f7: - - ldr r5, LCPI1_0 -LPC0: - add r5, pc - ldr r6, LCPI1_1 - ldr r2, LCPI1_2 - mov r3, r6 - mov lr, pc - bx r5 - -//===---------------------------------------------------------------------===// - -Make register allocator / spiller smarter so we can re-materialize "mov r, imm", -etc. Almost all Thumb instructions clobber condition code. - -//===---------------------------------------------------------------------===// - -Add ldmia, stmia support. - -//===---------------------------------------------------------------------===// - -Thumb load / store address mode offsets are scaled. The values kept in the -instruction operands are pre-scale values. This probably ought to be changed -to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. - -//===---------------------------------------------------------------------===// - -We need to make (some of the) Thumb1 instructions predicable. That will allow -shrinking of predicated Thumb2 instructions. To allow this, we need to be able -to toggle the 's' bit since they do not set CPSR when they are inside IT blocks. - -//===---------------------------------------------------------------------===// - -Make use of hi register variants of cmp: tCMPhir / tCMPZhir. - -//===---------------------------------------------------------------------===// - -Thumb1 immediate field sometimes keep pre-scaled values. See -Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and -Thumb2. - -//===---------------------------------------------------------------------===// - -Rather than having tBR_JTr print a ".align 2" and constant island pass pad it, -add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes -won't have to over-estimate. It can also be used for loop alignment pass. diff --git a/contrib/llvm/lib/Target/ARM/README-Thumb2.txt b/contrib/llvm/lib/Target/ARM/README-Thumb2.txt deleted file mode 100644 index e7c2552..0000000 --- a/contrib/llvm/lib/Target/ARM/README-Thumb2.txt +++ /dev/null @@ -1,6 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend (Thumb2 specific). -//===---------------------------------------------------------------------===// - -Make sure jumptable destinations are below the jumptable in order to make use -of tbb / tbh. diff --git a/contrib/llvm/lib/Target/ARM/README.txt b/contrib/llvm/lib/Target/ARM/README.txt deleted file mode 100644 index 9fc3fb9..0000000 --- a/contrib/llvm/lib/Target/ARM/README.txt +++ /dev/null @@ -1,659 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend. -//===---------------------------------------------------------------------===// - -Reimplement 'select' in terms of 'SEL'. - -* We would really like to support UXTAB16, but we need to prove that the - add doesn't need to overflow between the two 16-bit chunks. - -* Implement pre/post increment support. (e.g. PR935) -* Implement smarter constant generation for binops with large immediates. - -A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX - -Interesting optimization for PIC codegen on arm-linux: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129 - -//===---------------------------------------------------------------------===// - -Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the -time regalloc happens, these values are now in a 32-bit register, usually with -the top-bits known to be sign or zero extended. If spilled, we should be able -to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part -of the reload. - -Doing this reduces the size of the stack frame (important for thumb etc), and -also increases the likelihood that we will be able to reload multiple values -from the stack with a single load. - -//===---------------------------------------------------------------------===// - -The constant island pass is in good shape. Some cleanups might be desirable, -but there is unlikely to be much improvement in the generated code. - -1. There may be some advantage to trying to be smarter about the initial -placement, rather than putting everything at the end. - -2. There might be some compile-time efficiency to be had by representing -consecutive islands as a single block rather than multiple blocks. - -3. Use a priority queue to sort constant pool users in inverse order of - position so we always process the one closed to the end of functions - first. This may simply CreateNewWater. - -//===---------------------------------------------------------------------===// - -Eliminate copysign custom expansion. We are still generating crappy code with -default expansion + if-conversion. - -//===---------------------------------------------------------------------===// - -Eliminate one instruction from: - -define i32 @_Z6slow4bii(i32 %x, i32 %y) { - %tmp = icmp sgt i32 %x, %y - %retval = select i1 %tmp, i32 %x, i32 %y - ret i32 %retval -} - -__Z6slow4bii: - cmp r0, r1 - movgt r1, r0 - mov r0, r1 - bx lr -=> - -__Z6slow4bii: - cmp r0, r1 - movle r0, r1 - bx lr - -//===---------------------------------------------------------------------===// - -Implement long long "X-3" with instructions that fold the immediate in. These -were disabled due to badness with the ARM carry flag on subtracts. - -//===---------------------------------------------------------------------===// - -More load / store optimizations: -1) Better representation for block transfer? This is from Olden/power: - - fldd d0, [r4] - fstd d0, [r4, #+32] - fldd d0, [r4, #+8] - fstd d0, [r4, #+40] - fldd d0, [r4, #+16] - fstd d0, [r4, #+48] - fldd d0, [r4, #+24] - fstd d0, [r4, #+56] - -If we can spare the registers, it would be better to use fldm and fstm here. -Need major register allocator enhancement though. - -2) Can we recognize the relative position of constantpool entries? i.e. Treat - - ldr r0, LCPI17_3 - ldr r1, LCPI17_4 - ldr r2, LCPI17_5 - - as - ldr r0, LCPI17 - ldr r1, LCPI17+4 - ldr r2, LCPI17+8 - - Then the ldr's can be combined into a single ldm. See Olden/power. - -Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a -double 64-bit FP constant: - - adr r0, L6 - ldmia r0, {r0-r1} - - .align 2 -L6: - .long -858993459 - .long 1074318540 - -3) struct copies appear to be done field by field -instead of by words, at least sometimes: - -struct foo { int x; short s; char c1; char c2; }; -void cpy(struct foo*a, struct foo*b) { *a = *b; } - -llvm code (-O2) - ldrb r3, [r1, #+6] - ldr r2, [r1] - ldrb r12, [r1, #+7] - ldrh r1, [r1, #+4] - str r2, [r0] - strh r1, [r0, #+4] - strb r3, [r0, #+6] - strb r12, [r0, #+7] -gcc code (-O2) - ldmia r1, {r1-r2} - stmia r0, {r1-r2} - -In this benchmark poor handling of aggregate copies has shown up as -having a large effect on size, and possibly speed as well (we don't have -a good way to measure on ARM). - -//===---------------------------------------------------------------------===// - -* Consider this silly example: - -double bar(double x) { - double r = foo(3.1); - return x+r; -} - -_bar: - stmfd sp!, {r4, r5, r7, lr} - add r7, sp, #8 - mov r4, r0 - mov r5, r1 - fldd d0, LCPI1_0 - fmrrd r0, r1, d0 - bl _foo - fmdrr d0, r4, r5 - fmsr s2, r0 - fsitod d1, s2 - faddd d0, d1, d0 - fmrrd r0, r1, d0 - ldmfd sp!, {r4, r5, r7, pc} - -Ignore the prologue and epilogue stuff for a second. Note - mov r4, r0 - mov r5, r1 -the copys to callee-save registers and the fact they are only being used by the -fmdrr instruction. It would have been better had the fmdrr been scheduled -before the call and place the result in a callee-save DPR register. The two -mov ops would not have been necessary. - -//===---------------------------------------------------------------------===// - -Calling convention related stuff: - -* gcc's parameter passing implementation is terrible and we suffer as a result: - -e.g. -struct s { - double d1; - int s1; -}; - -void foo(struct s S) { - printf("%g, %d\n", S.d1, S.s1); -} - -'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and -then reload them to r1, r2, and r3 before issuing the call (r0 contains the -address of the format string): - - stmfd sp!, {r7, lr} - add r7, sp, #0 - sub sp, sp, #12 - stmia sp, {r0, r1, r2} - ldmia sp, {r1-r2} - ldr r0, L5 - ldr r3, [sp, #8] -L2: - add r0, pc, r0 - bl L_printf$stub - -Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? - -* Return an aggregate type is even worse: - -e.g. -struct s foo(void) { - struct s S = {1.1, 2}; - return S; -} - - mov ip, r0 - ldr r0, L5 - sub sp, sp, #12 -L2: - add r0, pc, r0 - @ lr needed for prologue - ldmia r0, {r0, r1, r2} - stmia sp, {r0, r1, r2} - stmia ip, {r0, r1, r2} - mov r0, ip - add sp, sp, #12 - bx lr - -r0 (and later ip) is the hidden parameter from caller to store the value in. The -first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, -r2 into the address passed in. However, there is one additional stmia that -stores r0, r1, and r2 to some stack location. The store is dead. - -The llvm-gcc generated code looks like this: - -csretcc void %foo(%struct.s* %agg.result) { -entry: - %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] - %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] - cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2] - call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) - cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2] - call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) - cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1] - call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) - ret void -} - -llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from -constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated -into a number of load and stores, or 2) custom lower memcpy (of small size) to -be ldmia / stmia. I think option 2 is better but the current register -allocator cannot allocate a chunk of registers at a time. - -A feasible temporary solution is to use specific physical registers at the -lowering time for small (<= 4 words?) transfer size. - -* ARM CSRet calling convention requires the hidden argument to be returned by -the callee. - -//===---------------------------------------------------------------------===// - -We can definitely do a better job on BB placements to eliminate some branches. -It's very common to see llvm generated assembly code that looks like this: - -LBB3: - ... -LBB4: -... - beq LBB3 - b LBB2 - -If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can -then eliminate beq and and turn the unconditional branch to LBB2 to a bne. - -See McCat/18-imp/ComputeBoundingBoxes for an example. - -//===---------------------------------------------------------------------===// - -Pre-/post- indexed load / stores: - -1) We should not make the pre/post- indexed load/store transform if the base ptr -is guaranteed to be live beyond the load/store. This can happen if the base -ptr is live out of the block we are performing the optimization. e.g. - -mov r1, r2 -ldr r3, [r1], #4 -... - -vs. - -ldr r3, [r2] -add r1, r2, #4 -... - -In most cases, this is just a wasted optimization. However, sometimes it can -negatively impact the performance because two-address code is more restrictive -when it comes to scheduling. - -Unfortunately, liveout information is currently unavailable during DAG combine -time. - -2) Consider spliting a indexed load / store into a pair of add/sub + load/store - to solve #1 (in TwoAddressInstructionPass.cpp). - -3) Enhance LSR to generate more opportunities for indexed ops. - -4) Once we added support for multiple result patterns, write indexed loads - patterns instead of C++ instruction selection code. - -5) Use VLDM / VSTM to emulate indexed FP load / store. - -//===---------------------------------------------------------------------===// - -Implement support for some more tricky ways to materialize immediates. For -example, to get 0xffff8000, we can use: - -mov r9, #&3f8000 -sub r9, r9, #&400000 - -//===---------------------------------------------------------------------===// - -We sometimes generate multiple add / sub instructions to update sp in prologue -and epilogue if the inc / dec value is too large to fit in a single immediate -operand. In some cases, perhaps it might be better to load the value from a -constantpool instead. - -//===---------------------------------------------------------------------===// - -GCC generates significantly better code for this function. - -int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { - int i = 0; - - if (StackPtr != 0) { - while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) - Line[i++] = Stack[--StackPtr]; - if (LineLen > 32768) - { - while (StackPtr != 0 && i < LineLen) - { - i++; - --StackPtr; - } - } - } - return StackPtr; -} - -//===---------------------------------------------------------------------===// - -This should compile to the mlas instruction: -int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } - -//===---------------------------------------------------------------------===// - -At some point, we should triage these to see if they still apply to us: - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 - -http://www.inf.u-szeged.hu/gcc-arm/ -http://citeseer.ist.psu.edu/debus04linktime.html - -//===---------------------------------------------------------------------===// - -gcc generates smaller code for this function at -O2 or -Os: - -void foo(signed char* p) { - if (*p == 3) - bar(); - else if (*p == 4) - baz(); - else if (*p == 5) - quux(); -} - -llvm decides it's a good idea to turn the repeated if...else into a -binary tree, as if it were a switch; the resulting code requires -1 -compare-and-branches when *p<=2 or *p==5, the same number if *p==4 -or *p>6, and +1 if *p==3. So it should be a speed win -(on balance). However, the revised code is larger, with 4 conditional -branches instead of 3. - -More seriously, there is a byte->word extend before -each comparison, where there should be only one, and the condition codes -are not remembered when the same two values are compared twice. - -//===---------------------------------------------------------------------===// - -More LSR enhancements possible: - -1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged - in a load / store. -2. Allow iv reuse even when a type conversion is required. For example, i8 - and i32 load / store addressing modes are identical. - - -//===---------------------------------------------------------------------===// - -This: - -int foo(int a, int b, int c, int d) { - long long acc = (long long)a * (long long)b; - acc += (long long)c * (long long)d; - return (int)(acc >> 32); -} - -Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies -two signed 32-bit values to produce a 64-bit value, and accumulates this with -a 64-bit value. - -We currently get this with both v4 and v6: - -_foo: - smull r1, r0, r1, r0 - smull r3, r2, r3, r2 - adds r3, r3, r1 - adc r0, r2, r0 - bx lr - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -_Z8full_addjj: - adds r2, r1, r2 - movcc r1, #0 - movcs r1, #1 - str r2, [r0, #0] - strb r1, [r0, #4] - mov pc, lr - -_Z11no_overflowjj: - cmn r0, r1 - movcs r0, #0 - movcc r0, #1 - mov pc, lr - -not: - -__Z8full_addjj: - add r3, r2, r1 - str r3, [r0] - mov r2, #1 - mov r12, #0 - cmp r3, r1 - movlo r12, r2 - str r12, [r0, #+4] - bx lr -__Z11no_overflowjj: - add r3, r1, r0 - mov r2, #1 - mov r1, #0 - cmp r3, r0 - movhs r1, r2 - mov r0, r1 - bx lr - -//===---------------------------------------------------------------------===// - -Some of the NEON intrinsics may be appropriate for more general use, either -as target-independent intrinsics or perhaps elsewhere in the ARM backend. -Some of them may also be lowered to target-independent SDNodes, and perhaps -some new SDNodes could be added. - -For example, maximum, minimum, and absolute value operations are well-defined -and standard operations, both for vector and scalar types. - -The current NEON-specific intrinsics for count leading zeros and count one -bits could perhaps be replaced by the target-independent ctlz and ctpop -intrinsics. It may also make sense to add a target-independent "ctls" -intrinsic for "count leading sign bits". Likewise, the backend could use -the target-independent SDNodes for these operations. - -ARMv6 has scalar saturating and halving adds and subtracts. The same -intrinsics could possibly be used for both NEON's vector implementations of -those operations and the ARMv6 scalar versions. - -//===---------------------------------------------------------------------===// - -ARM::MOVCCr is commutable (by flipping the condition). But we need to implement -ARMInstrInfo::commuteInstruction() to support it. - -//===---------------------------------------------------------------------===// - -Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting -LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. -ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) -while ARMConstantIslandPass only need to worry about LDR (literal). - -//===---------------------------------------------------------------------===// - -Constant island pass should make use of full range SoImm values for LEApcrel. -Be careful though as the last attempt caused infinite looping on lencod. - -//===---------------------------------------------------------------------===// - -Predication issue. This function: - -extern unsigned array[ 128 ]; -int foo( int x ) { - int y; - y = array[ x & 127 ]; - if ( x & 128 ) - y = 123456789 & ( y >> 2 ); - else - y = 123456789 & y; - return y; -} - -compiles to: - -_foo: - and r1, r0, #127 - ldr r2, LCPI1_0 - ldr r2, [r2] - ldr r1, [r2, +r1, lsl #2] - mov r2, r1, lsr #2 - tst r0, #128 - moveq r2, r1 - ldr r0, LCPI1_1 - and r0, r2, r0 - bx lr - -It would be better to do something like this, to fold the shift into the -conditional move: - - and r1, r0, #127 - ldr r2, LCPI1_0 - ldr r2, [r2] - ldr r1, [r2, +r1, lsl #2] - tst r0, #128 - movne r1, r1, lsr #2 - ldr r0, LCPI1_1 - and r0, r1, r0 - bx lr - -it saves an instruction and a register. - -//===---------------------------------------------------------------------===// - -It might be profitable to cse MOVi16 if there are lots of 32-bit immediates -with the same bottom half. - -//===---------------------------------------------------------------------===// - -Robert Muth started working on an alternate jump table implementation that -does not put the tables in-line in the text. This is more like the llvm -default jump table implementation. This might be useful sometime. Several -revisions of patches are on the mailing list, beginning at: -http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html - -//===---------------------------------------------------------------------===// - -Make use of the "rbit" instruction. - -//===---------------------------------------------------------------------===// - -Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how -to licm and cse the unnecessary load from cp#1. - -//===---------------------------------------------------------------------===// - -The CMN instruction sets the flags like an ADD instruction, while CMP sets -them like a subtract. Therefore to be able to use CMN for comparisons other -than the Z bit, we'll need additional logic to reverse the conditionals -associated with the comparison. Perhaps a pseudo-instruction for the comparison, -with a post-codegen pass to clean up and handle the condition codes? -See PR5694 for testcase. - -//===---------------------------------------------------------------------===// - -Given the following on armv5: -int test1(int A, int B) { - return (A&-8388481)|(B&8388480); -} - -We currently generate: - ldr r2, .LCPI0_0 - and r0, r0, r2 - ldr r2, .LCPI0_1 - and r1, r1, r2 - orr r0, r1, r0 - bx lr - -We should be able to replace the second ldr+and with a bic (i.e. reuse the -constant which was already loaded). Not sure what's necessary to do that. - -//===---------------------------------------------------------------------===// - -The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: - -int a(int x) { return __builtin_bswap32(x); } - -a: - mov r1, #255, 24 - mov r2, #255, 16 - and r1, r1, r0, lsr #8 - and r2, r2, r0, lsl #8 - orr r1, r1, r0, lsr #24 - orr r0, r2, r0, lsl #24 - orr r0, r0, r1 - bx lr - -Something like the following would be better (fewer instructions/registers): - eor r1, r0, r0, ror #16 - bic r1, r1, #0xff0000 - mov r1, r1, lsr #8 - eor r0, r1, r0, ror #8 - bx lr - -A custom Thumb version would also be a slight improvement over the generic -version. - -//===---------------------------------------------------------------------===// - -Consider the following simple C code: - -void foo(unsigned char *a, unsigned char *b, int *c) { - if ((*a | *b) == 0) *c = 0; -} - -currently llvm-gcc generates something like this (nice branchless code I'd say): - - ldrb r0, [r0] - ldrb r1, [r1] - orr r0, r1, r0 - tst r0, #255 - moveq r0, #0 - streq r0, [r2] - bx lr - -Note that both "tst" and "moveq" are redundant. - -//===---------------------------------------------------------------------===// - diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt deleted file mode 100644 index 3910bb0..0000000 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMInfo - ARMTargetInfo.cpp - ) - -add_dependencies(LLVMARMInfo ARMCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile b/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile deleted file mode 100644 index 6292ab1..0000000 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 992c218..0000000 --- a/contrib/llvm/lib/Target/Alpha/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMAlphaAsmPrinter - AlphaAsmPrinter.cpp - ) -add_dependencies(LLVMAlphaAsmPrinter AlphaCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Alpha/AsmPrinter/Makefile b/contrib/llvm/lib/Target/Alpha/AsmPrinter/Makefile deleted file mode 100644 index ea13c38..0000000 --- a/contrib/llvm/lib/Target/Alpha/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Alpha/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaAsmPrinter - -# Hack: we need to include 'main' alpha target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Alpha/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/CMakeLists.txt deleted file mode 100644 index fbf7f3a..0000000 --- a/contrib/llvm/lib/Target/Alpha/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Alpha.td) - -tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(AlphaGenRegisterNames.inc -gen-register-enums) -tablegen(AlphaGenRegisterInfo.inc -gen-register-desc) -tablegen(AlphaGenInstrNames.inc -gen-instr-enums) -tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) -tablegen(AlphaGenCodeEmitter.inc -gen-emitter) -tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) -tablegen(AlphaGenDAGISel.inc -gen-dag-isel) -tablegen(AlphaGenCallingConv.inc -gen-callingconv) -tablegen(AlphaGenSubtarget.inc -gen-subtarget) - -add_llvm_target(AlphaCodeGen - AlphaBranchSelector.cpp - AlphaCodeEmitter.cpp - AlphaInstrInfo.cpp - AlphaISelDAGToDAG.cpp - AlphaISelLowering.cpp - AlphaJITInfo.cpp - AlphaLLRP.cpp - AlphaMCAsmInfo.cpp - AlphaRegisterInfo.cpp - AlphaSubtarget.cpp - AlphaTargetMachine.cpp - AlphaSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/Alpha/Makefile b/contrib/llvm/lib/Target/Alpha/Makefile deleted file mode 100644 index 54d53ab..0000000 --- a/contrib/llvm/lib/Target/Alpha/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMAlphaCodeGen -TARGET = Alpha - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ - AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ - AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \ - AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ - AlphaGenCallingConv.inc AlphaGenSubtarget.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Alpha/README.txt b/contrib/llvm/lib/Target/Alpha/README.txt deleted file mode 100644 index 9ae1517..0000000 --- a/contrib/llvm/lib/Target/Alpha/README.txt +++ /dev/null @@ -1,42 +0,0 @@ -*** - -add gcc builtins for alpha instructions - - -*** - -custom expand byteswap into nifty -extract/insert/mask byte/word/longword/quadword low/high -sequences - -*** - -see if any of the extract/insert/mask operations can be added - -*** - -match more interesting things for cmovlbc cmovlbs (move if low bit clear/set) - -*** - -lower srem and urem - -remq(i,j): i - (j * divq(i,j)) if j != 0 -remqu(i,j): i - (j * divqu(i,j)) if j != 0 -reml(i,j): i - (j * divl(i,j)) if j != 0 -remlu(i,j): i - (j * divlu(i,j)) if j != 0 - -*** - -add crazy vector instructions (MVI): - -(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word -PKWB, UNPKBW pack/unpack word to byte -PKLB UNPKBL pack/unpack long to byte -PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b)) - -cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions) - -this has some good examples for other operations that can be synthesised well -from these rather meager vector ops (such as saturating add). -http://www.alphalinux.org/docs/MVI-full.html diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/TargetInfo/CMakeLists.txt deleted file mode 100644 index 2a7291b..0000000 --- a/contrib/llvm/lib/Target/Alpha/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMAlphaInfo - AlphaTargetInfo.cpp - ) - -add_dependencies(LLVMAlphaInfo AlphaCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/Makefile b/contrib/llvm/lib/Target/Alpha/TargetInfo/Makefile deleted file mode 100644 index de01d7f..0000000 --- a/contrib/llvm/lib/Target/Alpha/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 795aebf..0000000 --- a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMBlackfinAsmPrinter - BlackfinAsmPrinter.cpp - ) -add_dependencies(LLVMBlackfinAsmPrinter BlackfinCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/Makefile b/contrib/llvm/lib/Target/Blackfin/AsmPrinter/Makefile deleted file mode 100644 index a106a23..0000000 --- a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Blackfin/AsmPrinter/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinAsmPrinter - -# Hack: we need to include 'main' Blackfin target directory to grab private -# headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Blackfin/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/CMakeLists.txt deleted file mode 100644 index f8847d0..0000000 --- a/contrib/llvm/lib/Target/Blackfin/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Blackfin.td) - -tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(BlackfinGenRegisterNames.inc -gen-register-enums) -tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc) -tablegen(BlackfinGenInstrNames.inc -gen-instr-enums) -tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc) -tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) -tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) -tablegen(BlackfinGenSubtarget.inc -gen-subtarget) -tablegen(BlackfinGenCallingConv.inc -gen-callingconv) -tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic) - -add_llvm_target(BlackfinCodeGen - BlackfinInstrInfo.cpp - BlackfinIntrinsicInfo.cpp - BlackfinISelDAGToDAG.cpp - BlackfinISelLowering.cpp - BlackfinMCAsmInfo.cpp - BlackfinRegisterInfo.cpp - BlackfinSubtarget.cpp - BlackfinTargetMachine.cpp - BlackfinSelectionDAGInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Blackfin/Makefile b/contrib/llvm/lib/Target/Blackfin/Makefile deleted file mode 100644 index 339bef9..0000000 --- a/contrib/llvm/lib/Target/Blackfin/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMBlackfinCodeGen -TARGET = Blackfin - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ - BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \ - BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \ - BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ - BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/Blackfin/README.txt b/contrib/llvm/lib/Target/Blackfin/README.txt deleted file mode 100644 index b4c8227..0000000 --- a/contrib/llvm/lib/Target/Blackfin/README.txt +++ /dev/null @@ -1,244 +0,0 @@ -//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===// - -* Condition codes -** DONE Problem with asymmetric SETCC operations -The instruction - - CC = R0 < 2 - -is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC -JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond -(not cc), target), the DAG optimizer removes that kind of thing. - -This is handled by creating a pseudo-register NCC that aliases CC. Register -classes JustCC and NotCC are used to control the inversion of CC. - -** DONE CC as an i32 register -The AnyCC register class pretends to hold i32 values. It can only represent the -values 0 and 1, but we can copy to and from the D class. This hack makes it -possible to represent the setcc instruction without having i1 as a legal type. - -In most cases, the CC register is set by a "CC = .." or BITTST instruction, and -then used in a conditional branch or move. The code generator thinks it is -moving 32 bits, but the value stays in CC. In other cases, the result of a -comparison is actually used as am i32 number, and CC will be copied to a D -register. - -* Stack frames -** TODO Use Push/Pop instructions -We should use the push/pop instructions when saving callee-saved -registers. The are smaller, and we may even use push multiple instructions. - -** TODO requiresRegisterScavenging -We need more intelligence in determining when the scavenger is needed. We -should keep track of: -- Spilling D16 registers -- Spilling AnyCC registers - -* Assembler -** TODO Implement PrintGlobalVariable -** TODO Remove LOAD32sym -It's a hack combining two instructions by concatenation. - -* Inline Assembly - -These are the GCC constraints from bfin/constraints.md: - -| Code | Register class | LLVM | -|-------+-------------------------------------------+------| -| a | P | C | -| d | D | C | -| z | Call clobbered P (P0, P1, P2) | X | -| D | EvenD | X | -| W | OddD | X | -| e | Accu | C | -| A | A0 | S | -| B | A1 | S | -| b | I | C | -| v | B | C | -| f | M | C | -| c | Circular I, B, L | X | -| C | JustCC | S | -| t | LoopTop | X | -| u | LoopBottom | X | -| k | LoopCount | X | -| x | GR | C | -| y | RET*, ASTAT, SEQSTAT, USP | X | -| w | ALL | C | -| Z | The FD-PIC GOT pointer (P3) | S | -| Y | The FD-PIC function pointer register (P1) | S | -| q0-q7 | R0-R7 individually | | -| qA | P0 | | -|-------+-------------------------------------------+------| -| Code | Constant | | -|-------+-------------------------------------------+------| -| J | 1<<N, N<32 | | -| Ks3 | imm3 | | -| Ku3 | uimm3 | | -| Ks4 | imm4 | | -| Ku4 | uimm4 | | -| Ks5 | imm5 | | -| Ku5 | uimm5 | | -| Ks7 | imm7 | | -| KN7 | -imm7 | | -| Ksh | imm16 | | -| Kuh | uimm16 | | -| L | ~(1<<N) | | -| M1 | 0xff | | -| M2 | 0xffff | | -| P0-P4 | 0-4 | | -| PA | Macflag, not M | | -| PB | Macflag, only M | | -| Q | Symbol | | - -** TODO Support all register classes -* DAG combiner -** Create test case for each Illegal SETCC case -The DAG combiner may someimes produce illegal i16 SETCC instructions. - -*** TODO SETCC (ctlz x), 5) == const -*** TODO SETCC (and load, const) == const -*** DONE SETCC (zext x) == const -*** TODO SETCC (sext x) == const - -* Instruction selection -** TODO Better imediate constants -Like ARM, build constants as small imm + shift. - -** TODO Implement cycle counter -We have CYCLES and CYCLES2 registers, but the readcyclecounter intrinsic wants -to return i64, and the code generator doesn't know how to legalize that. - -** TODO Instruction alternatives -Some instructions come in different variants for example: - - D = D + D - P = P + P - -Cross combinations are not allowed: - - P = D + D (bad) - -Similarly for the subreg pseudo-instructions: - - D16L = EXTRACT_SUBREG D16, bfin_subreg_lo16 - P16L = EXTRACT_SUBREG P16, bfin_subreg_lo16 - -We want to take advantage of the alternative instructions. This could be done by -changing the DAG after instruction selection. - - -** Multipatterns for load/store -We should try to identify multipatterns for load and store instructions. The -available instruction matrix is a bit irregular. - -Loads: - -| Addr | D | P | D 16z | D 16s | D16 | D 8z | D 8s | -|------------+---+---+-------+-------+-----+------+------| -| P | * | * | * | * | * | * | * | -| P++ | * | * | * | * | | * | * | -| P-- | * | * | * | * | | * | * | -| P+uimm5m2 | | | * | * | | | | -| P+uimm6m4 | * | * | | | | | | -| P+imm16 | | | | | | * | * | -| P+imm17m2 | | | * | * | | | | -| P+imm18m4 | * | * | | | | | | -| P++P | * | | * | * | * | | | -| FP-uimm7m4 | * | * | | | | | | -| I | * | | | | * | | | -| I++ | * | | | | * | | | -| I-- | * | | | | * | | | -| I++M | * | | | | | | | - -Stores: - -| Addr | D | P | D16H | D16L | D 8 | -|------------+---+---+------+------+-----| -| P | * | * | * | * | * | -| P++ | * | * | | * | * | -| P-- | * | * | | * | * | -| P+uimm5m2 | | | | * | | -| P+uimm6m4 | * | * | | | | -| P+imm16 | | | | | * | -| P+imm17m2 | | | | * | | -| P+imm18m4 | * | * | | | | -| P++P | * | | * | * | | -| FP-uimm7m4 | * | * | | | | -| I | * | | * | * | | -| I++ | * | | * | * | | -| I-- | * | | * | * | | -| I++M | * | | | | | - -* Workarounds and features -Blackfin CPUs have bugs. Each model comes in a number of silicon revisions with -different bugs. We learn about the CPU model from the -mcpu switch. - -** Interpretation of -mcpu value -- -mcpu=bf527 refers to the latest known BF527 revision -- -mcpu=bf527-0.2 refers to silicon rev. 0.2 -- -mcpu=bf527-any refers to all known revisions -- -mcpu=bf527-none disables all workarounds - -The -mcpu setting affects the __SILICON_REVISION__ macro and enabled workarounds: - -| -mcpu | __SILICON_REVISION__ | Workarounds | -|------------+----------------------+--------------------| -| bf527 | Def Latest | Specific to latest | -| bf527-1.3 | Def 0x0103 | Specific to 1.3 | -| bf527-any | Def 0xffff | All bf527-x.y | -| bf527-none | Undefined | None | - -These are the known cores and revisions: - -| Core | Silicon | Processors | -|-------------+--------------------+-------------------------| -| Edinburgh | 0.3, 0.4, 0.5, 0.6 | BF531 BF532 BF533 | -| Braemar | 0.2, 0.3 | BF534 BF536 BF537 | -| Stirling | 0.3, 0.4, 0.5 | BF538 BF539 | -| Moab | 0.0, 0.1, 0.2 | BF542 BF544 BF548 BF549 | -| Teton | 0.3, 0.5 | BF561 | -| Kookaburra | 0.0, 0.1, 0.2 | BF523 BF525 BF527 | -| Mockingbird | 0.0, 0.1 | BF522 BF524 BF526 | -| Brodie | 0.0, 0.1 | BF512 BF514 BF516 BF518 | - - -** Compiler implemented workarounds -Most workarounds are implemented in header files and source code using the -__ADSPBF527__ macros. A few workarounds require compiler support. - -| Anomaly | Macro | GCC Switch | -|----------+--------------------------------+------------------| -| Any | __WORKAROUNDS_ENABLED | | -| 05000074 | WA_05000074 | | -| 05000244 | __WORKAROUND_SPECULATIVE_SYNCS | -mcsync-anomaly | -| 05000245 | __WORKAROUND_SPECULATIVE_LOADS | -mspecld-anomaly | -| 05000257 | WA_05000257 | | -| 05000283 | WA_05000283 | | -| 05000312 | WA_LOAD_LCREGS | | -| 05000315 | WA_05000315 | | -| 05000371 | __WORKAROUND_RETS | | -| 05000426 | __WORKAROUND_INDIRECT_CALLS | Not -micplb | - -** GCC feature switches -| Switch | Description | -|---------------------------+----------------------------------------| -| -msim | Use simulator runtime | -| -momit-leaf-frame-pointer | Omit frame pointer for leaf functions | -| -mlow64k | | -| -mcsync-anomaly | | -| -mspecld-anomaly | | -| -mid-shared-library | | -| -mleaf-id-shared-library | | -| -mshared-library-id= | | -| -msep-data | Enable separate data segment | -| -mlong-calls | Use indirect calls | -| -mfast-fp | | -| -mfdpic | | -| -minline-plt | | -| -mstack-check-l1 | Do stack checking in L1 scratch memory | -| -mmulticore | Enable multicore support | -| -mcorea | Build for Core A | -| -mcoreb | Build for Core B | -| -msdram | Build for SDRAM | -| -micplb | Assume ICPLBs are enabled at runtime. | diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/TargetInfo/CMakeLists.txt deleted file mode 100644 index 5ca8060..0000000 --- a/contrib/llvm/lib/Target/Blackfin/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMBlackfinInfo - BlackfinTargetInfo.cpp - ) - -add_dependencies(LLVMBlackfinInfo BlackfinCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/Makefile b/contrib/llvm/lib/Target/Blackfin/TargetInfo/Makefile deleted file mode 100644 index c49cfbe..0000000 --- a/contrib/llvm/lib/Target/Blackfin/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CBackend/CMakeLists.txt b/contrib/llvm/lib/Target/CBackend/CMakeLists.txt deleted file mode 100644 index be24336..0000000 --- a/contrib/llvm/lib/Target/CBackend/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_target(CBackend - CBackend.cpp - ) diff --git a/contrib/llvm/lib/Target/CBackend/Makefile b/contrib/llvm/lib/Target/CBackend/Makefile deleted file mode 100644 index 621948a..0000000 --- a/contrib/llvm/lib/Target/CBackend/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMCBackend -DIRS = TargetInfo - -include $(LEVEL)/Makefile.common - -CompileCommonOpts += -Wno-format diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/CBackend/TargetInfo/CMakeLists.txt deleted file mode 100644 index 5b35fa7..0000000 --- a/contrib/llvm/lib/Target/CBackend/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCBackendInfo - CBackendTargetInfo.cpp - ) - diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/Makefile b/contrib/llvm/lib/Target/CBackend/TargetInfo/Makefile deleted file mode 100644 index d4d5e15..0000000 --- a/contrib/llvm/lib/Target/CBackend/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMCBackendInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CMakeLists.txt b/contrib/llvm/lib/Target/CMakeLists.txt deleted file mode 100644 index 43ebdac..0000000 --- a/contrib/llvm/lib/Target/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -add_llvm_library(LLVMTarget - Mangler.cpp - SubtargetFeature.cpp - Target.cpp - TargetAsmLexer.cpp - TargetData.cpp - TargetELFWriterInfo.cpp - TargetFrameInfo.cpp - TargetInstrInfo.cpp - TargetIntrinsicInfo.cpp - TargetLoweringObjectFile.cpp - TargetMachine.cpp - TargetRegisterInfo.cpp - TargetSubtarget.cpp - ) diff --git a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 8a2b59a..0000000 --- a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMCellSPUAsmPrinter - SPUAsmPrinter.cpp - ) -add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/Makefile b/contrib/llvm/lib/Target/CellSPU/AsmPrinter/Makefile deleted file mode 100644 index 4ec9d04..0000000 --- a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUAsmPrinter - -# Hack: we need to include 'main' CellSPU target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CellSPU/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/CMakeLists.txt deleted file mode 100644 index ddfca37..0000000 --- a/contrib/llvm/lib/Target/CellSPU/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS SPU.td) - -tablegen(SPUGenInstrNames.inc -gen-instr-enums) -tablegen(SPUGenRegisterNames.inc -gen-register-enums) -tablegen(SPUGenAsmWriter.inc -gen-asm-writer) -tablegen(SPUGenCodeEmitter.inc -gen-emitter) -tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SPUGenRegisterInfo.inc -gen-register-desc) -tablegen(SPUGenInstrInfo.inc -gen-instr-desc) -tablegen(SPUGenDAGISel.inc -gen-dag-isel) -tablegen(SPUGenSubtarget.inc -gen-subtarget) -tablegen(SPUGenCallingConv.inc -gen-callingconv) - -add_llvm_target(CellSPUCodeGen - SPUFrameInfo.cpp - SPUHazardRecognizers.cpp - SPUInstrInfo.cpp - SPUISelDAGToDAG.cpp - SPUISelLowering.cpp - SPUMCAsmInfo.cpp - SPURegisterInfo.cpp - SPUSubtarget.cpp - SPUTargetMachine.cpp - SPUSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/CellSPU/Makefile b/contrib/llvm/lib/Target/CellSPU/Makefile deleted file mode 100644 index cbdbd3c..0000000 --- a/contrib/llvm/lib/Target/CellSPU/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMCellSPUCodeGen -TARGET = SPU -BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ - SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ - SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \ - SPUGenInstrInfo.inc SPUGenDAGISel.inc \ - SPUGenSubtarget.inc SPUGenCallingConv.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CellSPU/README.txt b/contrib/llvm/lib/Target/CellSPU/README.txt deleted file mode 100644 index 0e7ad35..0000000 --- a/contrib/llvm/lib/Target/CellSPU/README.txt +++ /dev/null @@ -1,92 +0,0 @@ -//===- README.txt - Notes for improving CellSPU-specific code gen ---------===// - -This code was contributed by a team from the Computer Systems Research -Department in The Aerospace Corporation: - -- Scott Michel (head bottle washer and much of the non-floating point - instructions) -- Mark Thomas (floating point instructions) -- Michael AuYeung (intrinsics) -- Chandler Carruth (LLVM expertise) -- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) - -Some minor fixes added by Kalle Raiskila. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR -OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES -OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING -OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT -LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR -REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, -OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR -SUCH DAMAGES ARE FORESEEABLE. - ---------------------------------------------------------------------------- ---WARNING--: ---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code. ---WARNING--: - -If you are brave enough to try this code or help to hack on it, be sure -to add 'spu' to configure's --enable-targets option, e.g.: - - ./configure <your_configure_flags_here> \ - --enable-targets=x86,x86_64,powerpc,spu - ---------------------------------------------------------------------------- - -TODO: -* Create a machine pass for performing dual-pipeline scheduling specifically - for CellSPU, and insert branch prediction instructions as needed. - -* i32 instructions: - - * i32 division (work-in-progress) - -* i64 support (see i64operations.c test harness): - - * shifts and comparison operators: done - * sign and zero extension: done - * addition: done - * subtraction: needed - * multiplication: done - -* i128 support: - - * zero extension, any extension: done - * sign extension: needed - * arithmetic operators (add, sub, mul, div): needed - * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed - - * or: done - -* f64 support - - * Comparison operators: - SETOEQ unimplemented - SETOGT unimplemented - SETOGE unimplemented - SETOLT unimplemented - SETOLE unimplemented - SETONE unimplemented - SETO done (lowered) - SETUO done (lowered) - SETUEQ unimplemented - SETUGT unimplemented - SETUGE unimplemented - SETULT unimplemented - SETULE unimplemented - SETUNE unimplemented - -* LLVM vector suport - - * VSETCC needs to be implemented. It's pretty straightforward to code, but - needs implementation. - -* Intrinsics - - * spu.h instrinsics added but not tested. Need to have an operational - llvm-spu-gcc in order to write a unit test harness. - -===-------------------------------------------------------------------------=== diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt deleted file mode 100644 index 928d0fe..0000000 --- a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCellSPUInfo - CellSPUTargetInfo.cpp - ) - -add_dependencies(LLVMCellSPUInfo CellSPUCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/Makefile b/contrib/llvm/lib/Target/CellSPU/TargetInfo/Makefile deleted file mode 100644 index 9cb6827..0000000 --- a/contrib/llvm/lib/Target/CellSPU/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CppBackend/CMakeLists.txt b/contrib/llvm/lib/Target/CppBackend/CMakeLists.txt deleted file mode 100644 index f8182b8..0000000 --- a/contrib/llvm/lib/Target/CppBackend/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_target(CppBackend - CPPBackend.cpp - ) diff --git a/contrib/llvm/lib/Target/CppBackend/Makefile b/contrib/llvm/lib/Target/CppBackend/Makefile deleted file mode 100644 index d75f4e8..0000000 --- a/contrib/llvm/lib/Target/CppBackend/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CppBackend/Makefile --- ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMCppBackend -DIRS = TargetInfo - -include $(LEVEL)/Makefile.common - -CompileCommonOpts += -Wno-format diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt deleted file mode 100644 index edaf5d3..0000000 --- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCppBackendInfo - CppBackendTargetInfo.cpp - ) - diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/Makefile b/contrib/llvm/lib/Target/CppBackend/TargetInfo/Makefile deleted file mode 100644 index 6e68283..0000000 --- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/CppBackend/TargetInfo/Makefile -----------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMCppBackendInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt deleted file mode 100644 index fac2c19..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMMBlazeAsmPrinter - MBlazeAsmPrinter.cpp - ) -add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmPrinter/Makefile deleted file mode 100644 index c44651c..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmPrinter - -# Hack: we need to include 'main' MBlaze target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/CMakeLists.txt deleted file mode 100644 index 7f85bf8..0000000 --- a/contrib/llvm/lib/Target/MBlaze/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS MBlaze.td) - -tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) -tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) -tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) -tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) -tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) -tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) -tablegen(MBlazeGenCallingConv.inc -gen-callingconv) -tablegen(MBlazeGenSubtarget.inc -gen-subtarget) -tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) - -add_llvm_target(MBlazeCodeGen - MBlazeDelaySlotFiller.cpp - MBlazeInstrInfo.cpp - MBlazeISelDAGToDAG.cpp - MBlazeISelLowering.cpp - MBlazeMCAsmInfo.cpp - MBlazeRegisterInfo.cpp - MBlazeSubtarget.cpp - MBlazeTargetMachine.cpp - MBlazeTargetObjectFile.cpp - MBlazeIntrinsicInfo.cpp - MBlazeSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/MBlaze/Makefile b/contrib/llvm/lib/Target/MBlaze/Makefile deleted file mode 100644 index 19e508c..0000000 --- a/contrib/llvm/lib/Target/MBlaze/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../.. -LIBRARYNAME = LLVMMBlazeCodeGen -TARGET = MBlaze - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ - MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ - MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ - MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \ - MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/TargetInfo/CMakeLists.txt deleted file mode 100644 index 5afb14d..0000000 --- a/contrib/llvm/lib/Target/MBlaze/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeInfo - MBlazeTargetInfo.cpp - ) - -add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/Makefile b/contrib/llvm/lib/Target/MBlaze/TargetInfo/Makefile deleted file mode 100644 index fb7ea11..0000000 --- a/contrib/llvm/lib/Target/MBlaze/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 4b1f4e6..0000000 --- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSP430AsmPrinter - MSP430AsmPrinter.cpp - MSP430InstPrinter.cpp - MSP430MCInstLower.cpp - ) -add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/AsmPrinter/Makefile deleted file mode 100644 index a5293ab..0000000 --- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430AsmPrinter - -# Hack: we need to include 'main' MSP430 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/CMakeLists.txt deleted file mode 100644 index a3f60d2..0000000 --- a/contrib/llvm/lib/Target/MSP430/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS MSP430.td) - -tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MSP430GenRegisterNames.inc -gen-register-enums) -tablegen(MSP430GenRegisterInfo.inc -gen-register-desc) -tablegen(MSP430GenInstrNames.inc -gen-instr-enums) -tablegen(MSP430GenInstrInfo.inc -gen-instr-desc) -tablegen(MSP430GenAsmWriter.inc -gen-asm-writer) -tablegen(MSP430GenDAGISel.inc -gen-dag-isel) -tablegen(MSP430GenCallingConv.inc -gen-callingconv) -tablegen(MSP430GenSubtarget.inc -gen-subtarget) - -add_llvm_target(MSP430CodeGen - MSP430BranchSelector.cpp - MSP430ISelDAGToDAG.cpp - MSP430ISelLowering.cpp - MSP430InstrInfo.cpp - MSP430MCAsmInfo.cpp - MSP430RegisterInfo.cpp - MSP430Subtarget.cpp - MSP430TargetMachine.cpp - MSP430SelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/MSP430/Makefile b/contrib/llvm/lib/Target/MSP430/Makefile deleted file mode 100644 index b1f33d6..0000000 --- a/contrib/llvm/lib/Target/MSP430/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/MSP430/Makefile --------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMSP430CodeGen -TARGET = MSP430 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ - MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \ - MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \ - MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ - MSP430GenSubtarget.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/MSP430/README.txt b/contrib/llvm/lib/Target/MSP430/README.txt deleted file mode 100644 index 5b9634b..0000000 --- a/contrib/llvm/lib/Target/MSP430/README.txt +++ /dev/null @@ -1,40 +0,0 @@ -//===---------------------------------------------------------------------===// -// MSP430 backend. -//===---------------------------------------------------------------------===// - -DISCLAIMER: Thid backend should be considered as highly experimental. I never -seen nor worked with this MCU, all information was gathered from datasheet -only. The original intention of making this backend was to write documentation -of form "How to write backend for dummies" :) Thes notes hopefully will be -available pretty soon. - -Some things are incomplete / not implemented yet (this list surely is not -complete as well): - -1. Verify, how stuff is handling implicit zext with 8 bit operands (this might -be modelled currently in improper way - should we need to mark the superreg as -def for every 8 bit instruction?). - -2. Libcalls: multiplication, division, remainder. Note, that calling convention -for libcalls is incomptible with calling convention of libcalls of msp430-gcc -(these cannot be used though due to license restriction). - -3. Implement multiplication / division by constant (dag combiner hook?). - -4. Implement non-constant shifts. - -5. Implement varargs stuff. - -6. Verify and fix (if needed) how's stuff playing with i32 / i64. - -7. Implement floating point stuff (softfp?) - -8. Implement instruction encoding for (possible) direct code emission in the -future. - -9. Since almost all instructions set flags - implement brcond / select in better -way (currently they emit explicit comparison). - -10. Handle imm in comparisons in better way (see comment in MSP430InstrInfo.td) - -11. Implement hooks for better memory op folding, etc. diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt deleted file mode 100644 index 1d408d0..0000000 --- a/contrib/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSP430Info - MSP430TargetInfo.cpp - ) - -add_dependencies(LLVMMSP430Info MSP430Table_gen) diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/Makefile b/contrib/llvm/lib/Target/MSP430/TargetInfo/Makefile deleted file mode 100644 index abb08f2..0000000 --- a/contrib/llvm/lib/Target/MSP430/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSP430/TargetInfo/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Makefile b/contrib/llvm/lib/Target/Makefile deleted file mode 100644 index 50a360f..0000000 --- a/contrib/llvm/lib/Target/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -LIBRARYNAME = LLVMTarget -BUILD_ARCHIVE = 1 - -# We include this early so we can access the value of TARGETS_TO_BUILD as the -# value for PARALLEL_DIRS which must be set before Makefile.rules is included -include $(LEVEL)/Makefile.config - -PARALLEL_DIRS := $(TARGETS_TO_BUILD) - -include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/contrib/llvm/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/AsmPrinter/CMakeLists.txt deleted file mode 100644 index d3099d2..0000000 --- a/contrib/llvm/lib/Target/Mips/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMMipsAsmPrinter - MipsAsmPrinter.cpp - ) -add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Mips/AsmPrinter/Makefile b/contrib/llvm/lib/Target/Mips/AsmPrinter/Makefile deleted file mode 100644 index b1efe9b..0000000 --- a/contrib/llvm/lib/Target/Mips/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsAsmPrinter - -# Hack: we need to include 'main' Mips target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Mips/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/CMakeLists.txt deleted file mode 100644 index a77802a..0000000 --- a/contrib/llvm/lib/Target/Mips/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Mips.td) - -tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MipsGenRegisterNames.inc -gen-register-enums) -tablegen(MipsGenRegisterInfo.inc -gen-register-desc) -tablegen(MipsGenInstrNames.inc -gen-instr-enums) -tablegen(MipsGenInstrInfo.inc -gen-instr-desc) -tablegen(MipsGenAsmWriter.inc -gen-asm-writer) -tablegen(MipsGenDAGISel.inc -gen-dag-isel) -tablegen(MipsGenCallingConv.inc -gen-callingconv) -tablegen(MipsGenSubtarget.inc -gen-subtarget) - -add_llvm_target(MipsCodeGen - MipsDelaySlotFiller.cpp - MipsInstrInfo.cpp - MipsISelDAGToDAG.cpp - MipsISelLowering.cpp - MipsMCAsmInfo.cpp - MipsRegisterInfo.cpp - MipsSubtarget.cpp - MipsTargetMachine.cpp - MipsTargetObjectFile.cpp - MipsSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/Mips/Makefile b/contrib/llvm/lib/Target/Mips/Makefile deleted file mode 100644 index 2ed8d77..0000000 --- a/contrib/llvm/lib/Target/Mips/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMipsCodeGen -TARGET = Mips - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ - MipsGenRegisterInfo.inc MipsGenInstrNames.inc \ - MipsGenInstrInfo.inc MipsGenAsmWriter.inc \ - MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtarget.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt deleted file mode 100644 index 6e5d56b..0000000 --- a/contrib/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMipsInfo - MipsTargetInfo.cpp - ) - -add_dependencies(LLVMMipsInfo MipsCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/Makefile b/contrib/llvm/lib/Target/Mips/TargetInfo/Makefile deleted file mode 100644 index 32f4e16..0000000 --- a/contrib/llvm/lib/Target/Mips/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Mips/TargetInfo/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PIC16/AsmPrinter/CMakeLists.txt deleted file mode 100644 index d36bb8e..0000000 --- a/contrib/llvm/lib/Target/PIC16/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMPIC16AsmPrinter - PIC16AsmPrinter.cpp - ) -add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PIC16/AsmPrinter/Makefile b/contrib/llvm/lib/Target/PIC16/AsmPrinter/Makefile deleted file mode 100644 index e3c0684..0000000 --- a/contrib/llvm/lib/Target/PIC16/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPIC16AsmPrinter - -# Hack: we need to include 'main' pic16 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PIC16/CMakeLists.txt b/contrib/llvm/lib/Target/PIC16/CMakeLists.txt deleted file mode 100644 index 2b6cb9e..0000000 --- a/contrib/llvm/lib/Target/PIC16/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PIC16.td) - -tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PIC16GenRegisterNames.inc -gen-register-enums) -tablegen(PIC16GenRegisterInfo.inc -gen-register-desc) -tablegen(PIC16GenInstrNames.inc -gen-instr-enums) -tablegen(PIC16GenInstrInfo.inc -gen-instr-desc) -tablegen(PIC16GenAsmWriter.inc -gen-asm-writer) -tablegen(PIC16GenDAGISel.inc -gen-dag-isel) -tablegen(PIC16GenCallingConv.inc -gen-callingconv) -tablegen(PIC16GenSubtarget.inc -gen-subtarget) - -add_llvm_target(PIC16CodeGen - PIC16DebugInfo.cpp - PIC16InstrInfo.cpp - PIC16ISelDAGToDAG.cpp - PIC16ISelLowering.cpp - PIC16MemSelOpt.cpp - PIC16MCAsmInfo.cpp - PIC16RegisterInfo.cpp - PIC16Section.cpp - PIC16Subtarget.cpp - PIC16TargetMachine.cpp - PIC16TargetObjectFile.cpp - PIC16SelectionDAGInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PIC16/Makefile b/contrib/llvm/lib/Target/PIC16/Makefile deleted file mode 100644 index 9e784d1..0000000 --- a/contrib/llvm/lib/Target/PIC16/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMPIC16CodeGen -TARGET = PIC16 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ - PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \ - PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \ - PIC16GenDAGISel.inc PIC16GenCallingConv.inc \ - PIC16GenSubtarget.inc - -DIRS = AsmPrinter TargetInfo PIC16Passes - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Passes/Makefile b/contrib/llvm/lib/Target/PIC16/PIC16Passes/Makefile deleted file mode 100644 index 9684b8d..0000000 --- a/contrib/llvm/lib/Target/PIC16/PIC16Passes/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/PIC16Passes/Makefile -----------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -TARGET = PIC16 -LIBRARYNAME = LLVMpic16passes -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/PIC16/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PIC16/TargetInfo/CMakeLists.txt deleted file mode 100644 index bfc6ff4..0000000 --- a/contrib/llvm/lib/Target/PIC16/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPIC16Info - PIC16TargetInfo.cpp - ) - -add_dependencies(LLVMPIC16Info PIC16Table_gen) diff --git a/contrib/llvm/lib/Target/PIC16/TargetInfo/Makefile b/contrib/llvm/lib/Target/PIC16/TargetInfo/Makefile deleted file mode 100644 index 76609f6..0000000 --- a/contrib/llvm/lib/Target/PIC16/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPIC16Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 42cd486..0000000 --- a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPowerPCAsmPrinter - PPCAsmPrinter.cpp - ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/AsmPrinter/Makefile deleted file mode 100644 index bd5dce1..0000000 --- a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCAsmPrinter - -# Hack: we need to include 'main' PowerPC target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/CMakeLists.txt deleted file mode 100644 index 7ffc5eb..0000000 --- a/contrib/llvm/lib/Target/PowerPC/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PPC.td) - -tablegen(PPCGenInstrNames.inc -gen-instr-enums) -tablegen(PPCGenRegisterNames.inc -gen-register-enums) -tablegen(PPCGenAsmWriter.inc -gen-asm-writer) -tablegen(PPCGenCodeEmitter.inc -gen-emitter) -tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PPCGenRegisterInfo.inc -gen-register-desc) -tablegen(PPCGenInstrInfo.inc -gen-instr-desc) -tablegen(PPCGenDAGISel.inc -gen-dag-isel) -tablegen(PPCGenCallingConv.inc -gen-callingconv) -tablegen(PPCGenSubtarget.inc -gen-subtarget) - -add_llvm_target(PowerPCCodeGen - PPCBranchSelector.cpp - PPCCodeEmitter.cpp - PPCHazardRecognizers.cpp - PPCInstrInfo.cpp - PPCISelDAGToDAG.cpp - PPCISelLowering.cpp - PPCJITInfo.cpp - PPCMCAsmInfo.cpp - PPCPredicates.cpp - PPCRegisterInfo.cpp - PPCSubtarget.cpp - PPCTargetMachine.cpp - PPCSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/PowerPC/Makefile b/contrib/llvm/lib/Target/PowerPC/Makefile deleted file mode 100644 index 1265f1d..0000000 --- a/contrib/llvm/lib/Target/PowerPC/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMPowerPCCodeGen -TARGET = PPC - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ - PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ - PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \ - PPCGenInstrInfo.inc PPCGenDAGISel.inc \ - PPCGenSubtarget.inc PPCGenCallingConv.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/README.txt b/contrib/llvm/lib/Target/PowerPC/README.txt deleted file mode 100644 index 3465779..0000000 --- a/contrib/llvm/lib/Target/PowerPC/README.txt +++ /dev/null @@ -1,914 +0,0 @@ -//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// - -TODO: -* gpr0 allocation -* implement do-loop -> bdnz transform -* lmw/stmw pass a la arm load store optimizer for prolog/epilog - -===-------------------------------------------------------------------------=== - -On PPC64, this: - -long f2 (long x) { return 0xfffffff000000000UL; } -long f3 (long x) { return 0x1ffffffffUL; } - -could compile into: - -_f2: - li r3,-1 - rldicr r3,r3,0,27 - blr -_f3: - li r3,-1 - rldicl r3,r3,0,31 - blr - -we produce: - -_f2: - lis r2, 4095 - ori r2, r2, 65535 - sldi r3, r2, 36 - blr -_f3: - li r2, 1 - sldi r2, r2, 32 - oris r2, r2, 65535 - ori r3, r2, 65535 - blr - - -===-------------------------------------------------------------------------=== - -Support 'update' load/store instructions. These are cracked on the G5, but are -still a codesize win. - -With preinc enabled, this: - -long *%test4(long *%X, long *%dest) { - %Y = getelementptr long* %X, int 4 - %A = load long* %Y - store long %A, long* %dest - ret long* %Y -} - -compiles to: - -_test4: - mr r2, r3 - lwzu r5, 32(r2) - lwz r3, 36(r3) - stw r5, 0(r4) - stw r3, 4(r4) - mr r3, r2 - blr - -with -sched=list-burr, I get: - -_test4: - lwz r2, 36(r3) - lwzu r5, 32(r3) - stw r2, 4(r4) - stw r5, 0(r4) - blr - -===-------------------------------------------------------------------------=== - -We compile the hottest inner loop of viterbi to: - - li r6, 0 - b LBB1_84 ;bb432.i -LBB1_83: ;bb420.i - lbzx r8, r5, r7 - addi r6, r7, 1 - stbx r8, r4, r7 -LBB1_84: ;bb432.i - mr r7, r6 - cmplwi cr0, r7, 143 - bne cr0, LBB1_83 ;bb420.i - -The CBE manages to produce: - - li r0, 143 - mtctr r0 -loop: - lbzx r2, r2, r11 - stbx r0, r2, r9 - addi r2, r2, 1 - bdz later - b loop - -This could be much better (bdnz instead of bdz) but it still beats us. If we -produced this with bdnz, the loop would be a single dispatch group. - -===-------------------------------------------------------------------------=== - -Compile: - -void foo(int *P) { - if (P) *P = 0; -} - -into: - -_foo: - cmpwi cr0,r3,0 - beqlr cr0 - li r0,0 - stw r0,0(r3) - blr - -This is effectively a simple form of predication. - -===-------------------------------------------------------------------------=== - -Lump the constant pool for each function into ONE pic object, and reference -pieces of it as offsets from the start. For functions like this (contrived -to have lots of constants obviously): - -double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } - -We generate: - -_X: - lis r2, ha16(.CPI_X_0) - lfd f0, lo16(.CPI_X_0)(r2) - lis r2, ha16(.CPI_X_1) - lfd f2, lo16(.CPI_X_1)(r2) - fmadd f0, f1, f0, f2 - lis r2, ha16(.CPI_X_2) - lfd f1, lo16(.CPI_X_2)(r2) - lis r2, ha16(.CPI_X_3) - lfd f2, lo16(.CPI_X_3)(r2) - fmadd f1, f0, f1, f2 - blr - -It would be better to materialize .CPI_X into a register, then use immediates -off of the register to avoid the lis's. This is even more important in PIC -mode. - -Note that this (and the static variable version) is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -Here's another example (the sgn function): -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -it produces a BB like this: -LBB1_1: ; cond_true - lis r2, ha16(LCPI1_0) - lfs f0, lo16(LCPI1_0)(r2) - lis r2, ha16(LCPI1_1) - lis r3, ha16(LCPI1_2) - lfs f2, lo16(LCPI1_2)(r3) - lfs f3, lo16(LCPI1_1)(r2) - fsub f0, f0, f1 - fsel f1, f0, f2, f3 - blr - -===-------------------------------------------------------------------------=== - -PIC Code Gen IPO optimization: - -Squish small scalar globals together into a single global struct, allowing the -address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size -of the GOT on targets with one). - -Note that this is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -===-------------------------------------------------------------------------=== - -Implement Newton-Rhapson method for improving estimate instructions to the -correct accuracy, and implementing divide as multiply by reciprocal when it has -more than one use. Itanium would want this too. - -===-------------------------------------------------------------------------=== - -Compile offsets from allocas: - -int *%test() { - %X = alloca { int, int } - %Y = getelementptr {int,int}* %X, int 0, uint 1 - ret int* %Y -} - -into a single add, not two: - -_test: - addi r2, r1, -8 - addi r3, r2, 4 - blr - ---> important for C++. - -===-------------------------------------------------------------------------=== - -No loads or stores of the constants should be needed: - -struct foo { double X, Y; }; -void xxx(struct foo F); -void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); } - -===-------------------------------------------------------------------------=== - -Darwin Stub removal: - -We still generate calls to foo$stub, and stubs, on Darwin. This is not -necessary when building with the Leopard (10.5) or later linker, as stubs are -generated by ld when necessary. Parameterizing this based on the deployment -target (-mmacosx-version-min) is probably enough. x86-32 does this right, see -its logic. - -===-------------------------------------------------------------------------=== - -Darwin Stub LICM optimization: - -Loops like this: - - for (...) bar(); - -Have to go through an indirect stub if bar is external or linkonce. It would -be better to compile it as: - - fp = &bar; - for (...) fp(); - -which only computes the address of bar once (instead of each time through the -stub). This is Darwin specific and would have to be done in the code generator. -Probably not a win on x86. - -===-------------------------------------------------------------------------=== - -Simple IPO for argument passing, change: - void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y) - -the Darwin ABI specifies that any integer arguments in the first 32 bytes worth -of arguments get assigned to r3 through r10. That is, if you have a function -foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the -argument bytes for r4 and r5. The trick then would be to shuffle the argument -order for functions we can internalize so that the maximum number of -integers/pointers get passed in regs before you see any of the fp arguments. - -Instead of implementing this, it would actually probably be easier to just -implement a PPC fastcc, where we could do whatever we wanted to the CC, -including having this work sanely. - -===-------------------------------------------------------------------------=== - -Fix Darwin FP-In-Integer Registers ABI - -Darwin passes doubles in structures in integer registers, which is very very -bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation -that percolates these things out of functions. - -Check out how horrible this is: -http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html - -This is an extension of "interprocedural CC unmunging" that can't be done with -just fastcc. - -===-------------------------------------------------------------------------=== - -Compile this: - -int foo(int a) { - int b = (a < 8); - if (b) { - return b * 3; // ignore the fact that this is always 3. - } else { - return 2; - } -} - -into something not this: - -_foo: -1) cmpwi cr7, r3, 8 - mfcr r2, 1 - rlwinm r2, r2, 29, 31, 31 -1) cmpwi cr0, r3, 7 - bgt cr0, LBB1_2 ; UnifiedReturnBlock -LBB1_1: ; then - rlwinm r2, r2, 0, 31, 31 - mulli r3, r2, 3 - blr -LBB1_2: ; UnifiedReturnBlock - li r3, 2 - blr - -In particular, the two compares (marked 1) could be shared by reversing one. -This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the -same operands (but backwards) exists. In this case, this wouldn't save us -anything though, because the compares still wouldn't be shared. - -===-------------------------------------------------------------------------=== - -We should custom expand setcc instead of pretending that we have it. That -would allow us to expose the access of the crbit after the mfcr, allowing -that access to be trivially folded into other ops. A simple example: - -int foo(int a, int b) { return (a < b) << 4; } - -compiles into: - -_foo: - cmpw cr7, r3, r4 - mfcr r2, 1 - rlwinm r2, r2, 29, 31, 31 - slwi r3, r2, 4 - blr - -===-------------------------------------------------------------------------=== - -Fold add and sub with constant into non-extern, non-weak addresses so this: - -static int a; -void bar(int b) { a = b; } -void foo(unsigned char *c) { - *c = a; -} - -So that - -_foo: - lis r2, ha16(_a) - la r2, lo16(_a)(r2) - lbz r2, 3(r2) - stb r2, 0(r3) - blr - -Becomes - -_foo: - lis r2, ha16(_a+3) - lbz r2, lo16(_a+3)(r2) - stb r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -We generate really bad code for this: - -int f(signed char *a, _Bool b, _Bool c) { - signed char t = 0; - if (b) t = *a; - if (c) *a = t; -} - -===-------------------------------------------------------------------------=== - -This: -int test(unsigned *P) { return *P >> 24; } - -Should compile to: - -_test: - lbz r3,0(r3) - blr - -not: - -_test: - lwz r2, 0(r3) - srwi r3, r2, 24 - blr - -===-------------------------------------------------------------------------=== - -On the G5, logical CR operations are more expensive in their three -address form: ops that read/write the same register are half as expensive as -those that read from two registers that are different from their destination. - -We should model this with two separate instructions. The isel should generate -the "two address" form of the instructions. When the register allocator -detects that it needs to insert a copy due to the two-addresness of the CR -logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point -we can convert to the "three address" instruction, to save code space. - -This only matters when we start generating cr logical ops. - -===-------------------------------------------------------------------------=== - -We should compile these two functions to the same thing: - -#include <stdlib.h> -void f(int a, int b, int *P) { - *P = (a-b)>=0?(a-b):(b-a); -} -void g(int a, int b, int *P) { - *P = abs(a-b); -} - -Further, they should compile to something better than: - -_g: - subf r2, r4, r3 - subfic r3, r2, 0 - cmpwi cr0, r2, -1 - bgt cr0, LBB2_2 ; entry -LBB2_1: ; entry - mr r2, r3 -LBB2_2: ; entry - stw r2, 0(r5) - blr - -GCC produces: - -_g: - subf r4,r4,r3 - srawi r2,r4,31 - xor r0,r2,r4 - subf r0,r2,r0 - stw r0,0(r5) - blr - -... which is much nicer. - -This theoretically may help improve twolf slightly (used in dimbox.c:142?). - -===-------------------------------------------------------------------------=== - -PR5945: This: -define i32 @clamp0g(i32 %a) { -entry: - %cmp = icmp slt i32 %a, 0 - %sel = select i1 %cmp, i32 0, i32 %a - ret i32 %sel -} - -Is compile to this with the PowerPC (32-bit) backend: - -_clamp0g: - cmpwi cr0, r3, 0 - li r2, 0 - blt cr0, LBB1_2 -; BB#1: ; %entry - mr r2, r3 -LBB1_2: ; %entry - mr r3, r2 - blr - -This could be reduced to the much simpler: - -_clamp0g: - srawi r2, r3, 31 - andc r3, r3, r2 - blr - -===-------------------------------------------------------------------------=== - -int foo(int N, int ***W, int **TK, int X) { - int t, i; - - for (t = 0; t < N; ++t) - for (i = 0; i < 4; ++i) - W[t / X][i][t % X] = TK[i][t]; - - return 5; -} - -We generate relatively atrocious code for this loop compared to gcc. - -We could also strength reduce the rem and the div: -http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf - -===-------------------------------------------------------------------------=== - -float foo(float X) { return (int)(X); } - -Currently produces: - -_foo: - fctiwz f0, f1 - stfd f0, -8(r1) - lwz r2, -4(r1) - extsw r2, r2 - std r2, -16(r1) - lfd f0, -16(r1) - fcfid f0, f0 - frsp f1, f0 - blr - -We could use a target dag combine to turn the lwz/extsw into an lwa when the -lwz has a single use. Since LWA is cracked anyway, this would be a codesize -win only. - -===-------------------------------------------------------------------------=== - -We generate ugly code for this: - -void func(unsigned int *ret, float dx, float dy, float dz, float dw) { - unsigned code = 0; - if(dx < -dw) code |= 1; - if(dx > dw) code |= 2; - if(dy < -dw) code |= 4; - if(dy > dw) code |= 8; - if(dz < -dw) code |= 16; - if(dz > dw) code |= 32; - *ret = code; -} - -===-------------------------------------------------------------------------=== - -Complete the signed i32 to FP conversion code using 64-bit registers -transformation, good for PI. See PPCISelLowering.cpp, this comment: - - // FIXME: disable this lowered code. This generates 64-bit register values, - // and we don't model the fact that the top part is clobbered by calls. We - // need to flag these together so that the value isn't live across a call. - //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - -Also, if the registers are spilled to the stack, we have to ensure that all -64-bits of them are save/restored, otherwise we will miscompile the code. It -sounds like we need to get the 64-bit register classes going. - -===-------------------------------------------------------------------------=== - -%struct.B = type { i8, [3 x i8] } - -define void @bar(%struct.B* %b) { -entry: - %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp = load i32* %tmp ; <uint> [#uses=1] - %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] - %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] - %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] - %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] - %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] - %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] - %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] - %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] - %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] - store i32 %tmp13, i32* %tmp8 - ret void -} - -We emit: - -_foo: - lwz r2, 0(r3) - slwi r4, r2, 1 - or r4, r4, r2 - rlwimi r2, r4, 0, 0, 0 - stw r2, 0(r3) - blr - -We could collapse a bunch of those ORs and ANDs and generate the following -equivalent code: - -_foo: - lwz r2, 0(r3) - rlwinm r4, r2, 1, 0, 0 - or r2, r2, r4 - stw r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -We compile: - -unsigned test6(unsigned x) { - return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16); -} - -into: - -_test6: - lis r2, 255 - rlwinm r3, r3, 16, 0, 31 - ori r2, r2, 255 - and r3, r3, r2 - blr - -GCC gets it down to: - -_test6: - rlwinm r0,r3,16,8,15 - rlwinm r3,r3,16,24,31 - or r3,r3,r0 - blr - - -===-------------------------------------------------------------------------=== - -Consider a function like this: - -float foo(float X) { return X + 1234.4123f; } - -The FP constant ends up in the constant pool, so we need to get the LR register. - This ends up producing code like this: - -_foo: -.LBB_foo_0: ; entry - mflr r11 -*** stw r11, 8(r1) - bl "L00000$pb" -"L00000$pb": - mflr r2 - addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") - lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) - fadds f1, f1, f0 -*** lwz r11, 8(r1) - mtlr r11 - blr - -This is functional, but there is no reason to spill the LR register all the way -to the stack (the two marked instrs): spilling it to a GPR is quite enough. - -Implementing this will require some codegen improvements. Nate writes: - -"So basically what we need to support the "no stack frame save and restore" is a -generalization of the LR optimization to "callee-save regs". - -Currently, we have LR marked as a callee-save reg. The register allocator sees -that it's callee save, and spills it directly to the stack. - -Ideally, something like this would happen: - -LR would be in a separate register class from the GPRs. The class of LR would be -marked "unspillable". When the register allocator came across an unspillable -reg, it would ask "what is the best class to copy this into that I *can* spill" -If it gets a class back, which it will in this case (the gprs), it grabs a free -register of that class. If it is then later necessary to spill that reg, so be -it. - -===-------------------------------------------------------------------------=== - -We compile this: -int test(_Bool X) { - return X ? 524288 : 0; -} - -to: -_test: - cmplwi cr0, r3, 0 - lis r2, 8 - li r3, 0 - beq cr0, LBB1_2 ;entry -LBB1_1: ;entry - mr r3, r2 -LBB1_2: ;entry - blr - -instead of: -_test: - addic r2,r3,-1 - subfe r0,r2,r3 - slwi r3,r0,19 - blr - -This sort of thing occurs a lot due to globalopt. - -===-------------------------------------------------------------------------=== - -We compile: - -define i32 @bar(i32 %x) nounwind readnone ssp { -entry: - %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] - %neg = sext i1 %0 to i32 ; <i32> [#uses=1] - ret i32 %neg -} - -to: - -_bar: - cntlzw r2, r3 - slwi r2, r2, 26 - srawi r3, r2, 31 - blr - -it would be better to produce: - -_bar: - addic r3,r3,-1 - subfe r3,r3,r3 - blr - -===-------------------------------------------------------------------------=== - -We currently compile 32-bit bswap: - -declare i32 @llvm.bswap.i32(i32 %A) -define i32 @test(i32 %A) { - %B = call i32 @llvm.bswap.i32(i32 %A) - ret i32 %B -} - -to: - -_test: - rlwinm r2, r3, 24, 16, 23 - slwi r4, r3, 24 - rlwimi r2, r3, 8, 24, 31 - rlwimi r4, r3, 8, 8, 15 - rlwimi r4, r2, 0, 16, 31 - mr r3, r4 - blr - -it would be more efficient to produce: - -_foo: mr r0,r3 - rlwinm r3,r3,8,0xffffffff - rlwimi r3,r0,24,0,7 - rlwimi r3,r0,24,16,23 - blr - -===-------------------------------------------------------------------------=== - -test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to: - -__ZNK4llvm5APInt17countLeadingZerosEv: - ld r2, 0(r3) - cntlzd r2, r2 - or r2, r2, r2 <<-- silly. - addi r3, r2, -64 - blr - -The dead or is a 'truncate' from 64- to 32-bits. - -===-------------------------------------------------------------------------=== - -We generate horrible ppc code for this: - -#define N 2000000 -double a[N],c[N]; -void simpleloop() { - int j; - for (j=0; j<N; j++) - c[j] = a[j]; -} - -LBB1_1: ;bb - lfdx f0, r3, r4 - addi r5, r5, 1 ;; Extra IV for the exit value compare. - stfdx f0, r2, r4 - addi r4, r4, 8 - - xoris r6, r5, 30 ;; This is due to a large immediate. - cmplwi cr0, r6, 33920 - bne cr0, LBB1_1 - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -__Z11no_overflowjj: - add r4,r3,r4 - subfc r3,r3,r4 - li r3,0 - adde r3,r3,r3 - blr - -(or better) not: - -__Z11no_overflowjj: - add r2, r4, r3 - cmplw cr7, r2, r3 - mfcr r2 - rlwinm r2, r2, 29, 31, 31 - xori r3, r2, 1 - blr - -//===---------------------------------------------------------------------===// - -We compile some FP comparisons into an mfcr with two rlwinms and an or. For -example: -#include <math.h> -int test(double x, double y) { return islessequal(x, y);} -int test2(double x, double y) { return islessgreater(x, y);} -int test3(double x, double y) { return !islessequal(x, y);} - -Compiles into (all three are similar, but the bits differ): - -_test: - fcmpu cr7, f1, f2 - mfcr r2 - rlwinm r3, r2, 29, 31, 31 - rlwinm r2, r2, 31, 31, 31 - or r3, r2, r3 - blr - -GCC compiles this into: - - _test: - fcmpu cr7,f1,f2 - cror 30,28,30 - mfcr r3 - rlwinm r3,r3,31,1 - blr - -which is more efficient and can use mfocr. See PR642 for some more context. - -//===---------------------------------------------------------------------===// - -void foo(float *data, float d) { - long i; - for (i = 0; i < 8000; i++) - data[i] = d; -} -void foo2(float *data, float d) { - long i; - data--; - for (i = 0; i < 8000; i++) { - data[1] = d; - data++; - } -} - -These compile to: - -_foo: - li r2, 0 -LBB1_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB1_1 ; bb - blr -_foo2: - li r2, 0 -LBB2_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB2_1 ; bb - blr - -The 'mr' could be eliminated to folding the add into the cmp better. - -//===---------------------------------------------------------------------===// -Codegen for the following (low-probability) case deteriorated considerably -when the correctness fixes for unordered comparisons went in (PR 642, 58871). -It should be possible to recover the code quality described in the comments. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 -; This should produce one 'or' or 'cror' instruction per function. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 -; PR2964 - -define i32 @test(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test2(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test3(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} -//===----------------------------------------------------------------------===// -; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg - -; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and -; should not be generated except with -enable-finite-only-fp-math or the like). -; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to -; recognize a more elaborate tree than a simple SETxx. - -define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] - %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] - %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] - ret double %E -} - -//===----------------------------------------------------------------------===// -The save/restore sequence for CR in prolog/epilog is terrible: -- Each CR subreg is saved individually, rather than doing one save as a unit. -- On Darwin, the save is done after the decrement of SP, which means the offset -from SP of the save slot can be too big for a store instruction, which means we -need an additional register (currently hacked in 96015+96020; the solution there -is correct, but poor). -- On SVR4 the same thing can happen, and I don't think saving before the SP -decrement is safe on that target, as there is no red zone. This is currently -broken AFAIK, although it's not a target I can exercise. -The following demonstrates the problem: -extern void bar(char *p); -void foo() { - char x[100000]; - bar(x); - __asm__("" ::: "cr2"); -} diff --git a/contrib/llvm/lib/Target/PowerPC/README_ALTIVEC.txt b/contrib/llvm/lib/Target/PowerPC/README_ALTIVEC.txt deleted file mode 100644 index 1e4c6fb..0000000 --- a/contrib/llvm/lib/Target/PowerPC/README_ALTIVEC.txt +++ /dev/null @@ -1,211 +0,0 @@ -//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// - -Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector -registers, to generate better spill code. - -//===----------------------------------------------------------------------===// - -The first should be a single lvx from the constant pool, the second should be -a xor/stvx: - -void foo(void) { - int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; - bar (x); -} - -#include <string.h> -void foo(void) { - int x[8] __attribute__((aligned(128))); - memset (x, 0, sizeof (x)); - bar (x); -} - -//===----------------------------------------------------------------------===// - -Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 - -When -ffast-math is on, we can use 0.0. - -//===----------------------------------------------------------------------===// - - Consider this: - v4f32 Vector; - v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; - -Since we know that "Vector" is 16-byte aligned and we know the element offset -of ".X", we should change the load into a lve*x instruction, instead of doing -a load/store/lve*x sequence. - -//===----------------------------------------------------------------------===// - -For functions that use altivec AND have calls, we are VRSAVE'ing all call -clobbered regs. - -//===----------------------------------------------------------------------===// - -Implement passing vectors by value into calls and receiving them as arguments. - -//===----------------------------------------------------------------------===// - -GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load -of C1/C2/C3, then a load and vperm of Variable. - -//===----------------------------------------------------------------------===// - -We need a way to teach tblgen that some operands of an intrinsic are required to -be constants. The verifier should enforce this constraint. - -//===----------------------------------------------------------------------===// - -We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte -aligned stack slot, followed by a load/vperm. We should probably just store it -to a scalar stack slot, then use lvsl/vperm to load it. If the value is already -in memory this is a big win. - -//===----------------------------------------------------------------------===// - -extract_vector_elt of an arbitrary constant vector can be done with the -following instructions: - -vTemp = vec_splat(v0,2); // 2 is the element the src is in. -vec_ste(&destloc,0,vTemp); - -We can do an arbitrary non-constant value by using lvsr/perm/ste. - -//===----------------------------------------------------------------------===// - -If we want to tie instruction selection into the scheduler, we can do some -constant formation with different instructions. For example, we can generate -"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with -"vsplti 0" or "vxor", each of which use different execution units, thus could -help scheduling. - -This is probably only reasonable for a post-pass scheduler. - -//===----------------------------------------------------------------------===// - -For this function: - -void test(vector float *A, vector float *B) { - vector float C = (vector float)vec_cmpeq(*A, *B); - if (!vec_any_eq(*A, *B)) - *B = (vector float){0,0,0,0}; - *A = C; -} - -we get the following basic block: - - ... - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp v4, v3, v2 - vcmpeqfp. v2, v3, v2 - bne cr6, LBB1_2 ; cond_next - -The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the -vcmpeqfp. result is used by a branch. This can be improved. - -//===----------------------------------------------------------------------===// - -The code generated for this is truly aweful: - -vector float test(float a, float b) { - return (vector float){ 0.0, a, 0.0, 0.0}; -} - -LCPI1_0: ; float - .space 4 - .text - .globl _test - .align 4 -_test: - mfspr r2, 256 - oris r3, r2, 4096 - mtspr 256, r3 - lis r3, ha16(LCPI1_0) - addi r4, r1, -32 - stfs f1, -16(r1) - addi r5, r1, -16 - lfs f0, lo16(LCPI1_0)(r3) - stfs f0, -32(r1) - lvx v2, 0, r4 - lvx v3, 0, r5 - vmrghw v3, v3, v2 - vspltw v2, v2, 0 - vmrghw v2, v2, v3 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -int foo(vector float *x, vector float *y) { - if (vec_all_eq(*x,*y)) return 3245; - else return 12; -} - -A predicate compare being used in a select_cc should have the same peephole -applied to it as a predicate compare used by a br_cc. There should be no -mfcr here: - -_foo: - mfspr r2, 256 - oris r5, r2, 12288 - mtspr 256, r5 - li r5, 12 - li r6, 3245 - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp. v2, v3, v2 - mfcr r3, 2 - rlwinm r3, r3, 25, 31, 31 - cmpwi cr0, r3, 0 - bne cr0, LBB1_2 ; entry -LBB1_1: ; entry - mr r6, r5 -LBB1_2: ; entry - mr r3, r6 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -CodeGen/PowerPC/vec_constants.ll has an and operation that should be -codegen'd to andc. The issue is that the 'all ones' build vector is -SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected -which prevents the vnot pattern from matching. - - -//===----------------------------------------------------------------------===// - -An alternative to the store/store/load approach for illegal insert element -lowering would be: - -1. store element to any ol' slot -2. lvx the slot -3. lvsl 0; splat index; vcmpeq to generate a select mask -4. lvsl slot + x; vperm to rotate result into correct slot -5. vsel result together. - -//===----------------------------------------------------------------------===// - -Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: - -#include <altivec.h> - int f(vector float a, vector float b) - { - int aa = 0; - if (vec_all_ge(a, b)) - aa |= 0x1; - if (vec_any_ge(a,b)) - aa |= 0x2; - return aa; -} - -vector float f(vector float a, vector float b) { - if (vec_any_eq(a, b)) - return a; - else - return b; -} - diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt deleted file mode 100644 index 058d599..0000000 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPowerPCInfo - PowerPCTargetInfo.cpp - ) - -add_dependencies(LLVMPowerPCInfo PowerPCCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/Makefile b/contrib/llvm/lib/Target/PowerPC/TargetInfo/Makefile deleted file mode 100644 index a101aa4..0000000 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PowerPC/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/README.txt b/contrib/llvm/lib/Target/README.txt deleted file mode 100644 index 4faf8bc..0000000 --- a/contrib/llvm/lib/Target/README.txt +++ /dev/null @@ -1,1939 +0,0 @@ -Target Independent Opportunities: - -//===---------------------------------------------------------------------===// - -Dead argument elimination should be enhanced to handle cases when an argument is -dead to an externally visible function. Though the argument can't be removed -from the externally visible function, the caller doesn't need to pass it in. -For example in this testcase: - - void foo(int X) __attribute__((noinline)); - void foo(int X) { sideeffect(); } - void bar(int A) { foo(A+1); } - -We compile bar to: - -define void @bar(i32 %A) nounwind ssp { - %0 = add nsw i32 %A, 1 ; <i32> [#uses=1] - tail call void @foo(i32 %0) nounwind noinline ssp - ret void -} - -The add is dead, we could pass in 'i32 undef' instead. This occurs for C++ -templates etc, which usually have linkonce_odr/weak_odr linkage, not internal -linkage. - -//===---------------------------------------------------------------------===// - -With the recent changes to make the implicit def/use set explicit in -machineinstrs, we should change the target descriptions for 'call' instructions -so that the .td files don't list all the call-clobbered registers as implicit -defs. Instead, these should be added by the code generator (e.g. on the dag). - -This has a number of uses: - -1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions - for their different impdef sets. -2. Targets with multiple calling convs (e.g. x86) which have different clobber - sets don't need copies of call instructions. -3. 'Interprocedural register allocation' can be done to reduce the clobber sets - of calls. - -//===---------------------------------------------------------------------===// - -Make the PPC branch selector target independant - -//===---------------------------------------------------------------------===// - -Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and -precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't -safe in general, even on darwin. See the libm implementation of hypot for -examples (which special case when x/y are exactly zero to get signed zeros etc -right). - -//===---------------------------------------------------------------------===// - -Solve this DAG isel folding deficiency: - -int X, Y; - -void fn1(void) -{ - X = X | (Y << 3); -} - -compiles to - -fn1: - movl Y, %eax - shll $3, %eax - orl X, %eax - movl %eax, X - ret - -The problem is the store's chain operand is not the load X but rather -a TokenFactor of the load X and load Y, which prevents the folding. - -There are two ways to fix this: - -1. The dag combiner can start using alias analysis to realize that y/x - don't alias, making the store to X not dependent on the load from Y. -2. The generated isel could be made smarter in the case it can't - disambiguate the pointers. - -Number 1 is the preferred solution. - -This has been "fixed" by a TableGen hack. But that is a short term workaround -which will be removed once the proper fix is made. - -//===---------------------------------------------------------------------===// - -On targets with expensive 64-bit multiply, we could LSR this: - -for (i = ...; ++i) { - x = 1ULL << i; - -into: - long long tmp = 1; - for (i = ...; ++i, tmp+=tmp) - x = tmp; - -This would be a win on ppc32, but not x86 or ppc64. - -//===---------------------------------------------------------------------===// - -Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0) - -//===---------------------------------------------------------------------===// - -Reassociate should turn things like: - -int factorial(int X) { - return X*X*X*X*X*X*X*X; -} - -into llvm.powi calls, allowing the code generator to produce balanced -multiplication trees. - -First, the intrinsic needs to be extended to support integers, and second the -code generator needs to be enhanced to lower these to multiplication trees. - -//===---------------------------------------------------------------------===// - -Interesting? testcase for add/shift/mul reassoc: - -int bar(int x, int y) { - return x*x*x+y+x*x*x*x*x*y*y*y*y; -} -int foo(int z, int n) { - return bar(z, n) + bar(2*z, 2*n); -} - -This is blocked on not handling X*X*X -> powi(X, 3) (see note above). The issue -is that we end up getting t = 2*X s = t*t and don't turn this into 4*X*X, -which is the same number of multiplies and is canonical, because the 2*X has -multiple uses. Here's a simple example: - -define i32 @test15(i32 %X1) { - %B = mul i32 %X1, 47 ; X1*47 - %C = mul i32 %B, %B - ret i32 %C -} - - -//===---------------------------------------------------------------------===// - -Reassociate should handle the example in GCC PR16157: - -extern int a0, a1, a2, a3, a4; extern int b0, b1, b2, b3, b4; -void f () { /* this can be optimized to four additions... */ - b4 = a4 + a3 + a2 + a1 + a0; - b3 = a3 + a2 + a1 + a0; - b2 = a2 + a1 + a0; - b1 = a1 + a0; -} - -This requires reassociating to forms of expressions that are already available, -something that reassoc doesn't think about yet. - - -//===---------------------------------------------------------------------===// - -This function: (derived from GCC PR19988) -double foo(double x, double y) { - return ((x + 0.1234 * y) * (x + -0.1234 * y)); -} - -compiles to: -_foo: - movapd %xmm1, %xmm2 - mulsd LCPI1_1(%rip), %xmm1 - mulsd LCPI1_0(%rip), %xmm2 - addsd %xmm0, %xmm1 - addsd %xmm0, %xmm2 - movapd %xmm1, %xmm0 - mulsd %xmm2, %xmm0 - ret - -Reassociate should be able to turn it into: - -double foo(double x, double y) { - return ((x + 0.1234 * y) * (x - 0.1234 * y)); -} - -Which allows the multiply by constant to be CSE'd, producing: - -_foo: - mulsd LCPI1_0(%rip), %xmm1 - movapd %xmm1, %xmm2 - addsd %xmm0, %xmm2 - subsd %xmm1, %xmm0 - mulsd %xmm2, %xmm0 - ret - -This doesn't need -ffast-math support at all. This is particularly bad because -the llvm-gcc frontend is canonicalizing the later into the former, but clang -doesn't have this problem. - -//===---------------------------------------------------------------------===// - -These two functions should generate the same code on big-endian systems: - -int g(int *j,int *l) { return memcmp(j,l,4); } -int h(int *j, int *l) { return *j - *l; } - -this could be done in SelectionDAGISel.cpp, along with other special cases, -for 1,2,4,8 bytes. - -//===---------------------------------------------------------------------===// - -It would be nice to revert this patch: -http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html - -And teach the dag combiner enough to simplify the code expanded before -legalize. It seems plausible that this knowledge would let it simplify other -stuff too. - -//===---------------------------------------------------------------------===// - -For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal -to the type size. It works but can be overly conservative as the alignment of -specific vector types are target dependent. - -//===---------------------------------------------------------------------===// - -We should produce an unaligned load from code like this: - -v4sf example(float *P) { - return (v4sf){P[0], P[1], P[2], P[3] }; -} - -//===---------------------------------------------------------------------===// - -Add support for conditional increments, and other related patterns. Instead -of: - - movl 136(%esp), %eax - cmpl $0, %eax - je LBB16_2 #cond_next -LBB16_1: #cond_true - incl _foo -LBB16_2: #cond_next - -emit: - movl _foo, %eax - cmpl $1, %edi - sbbl $-1, %eax - movl %eax, _foo - -//===---------------------------------------------------------------------===// - -Combine: a = sin(x), b = cos(x) into a,b = sincos(x). - -Expand these to calls of sin/cos and stores: - double sincos(double x, double *sin, double *cos); - float sincosf(float x, float *sin, float *cos); - long double sincosl(long double x, long double *sin, long double *cos); - -Doing so could allow SROA of the destination pointers. See also: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687 - -This is now easily doable with MRVs. We could even make an intrinsic for this -if anyone cared enough about sincos. - -//===---------------------------------------------------------------------===// - -quantum_sigma_x in 462.libquantum contains the following loop: - - for(i=0; i<reg->size; i++) - { - /* Flip the target bit of each basis state */ - reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target); - } - -Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just -so cool to turn it into something like: - - long long Res = ((MAX_UNSIGNED) 1 << target); - if (target < 32) { - for(i=0; i<reg->size; i++) - reg->node[i].state ^= Res & 0xFFFFFFFFULL; - } else { - for(i=0; i<reg->size; i++) - reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL - } - -... which would only do one 32-bit XOR per loop iteration instead of two. - -It would also be nice to recognize the reg->size doesn't alias reg->node[i], but -this requires TBAA. - -//===---------------------------------------------------------------------===// - -This isn't recognized as bswap by instcombine (yes, it really is bswap): - -unsigned long reverse(unsigned v) { - unsigned t; - t = v ^ ((v << 16) | (v >> 16)); - t &= ~0xff0000; - v = (v << 24) | (v >> 8); - return v ^ (t >> 8); -} - -Neither is this (very standard idiom): - -int f(int n) -{ - return (((n) << 24) | (((n) & 0xff00) << 8) - | (((n) >> 8) & 0xff00) | ((n) >> 24)); -} - -//===---------------------------------------------------------------------===// - -[LOOP RECOGNITION] - -These idioms should be recognized as popcount (see PR1488): - -unsigned countbits_slow(unsigned v) { - unsigned c; - for (c = 0; v; v >>= 1) - c += v & 1; - return c; -} -unsigned countbits_fast(unsigned v){ - unsigned c; - for (c = 0; v; c++) - v &= v - 1; // clear the least significant bit set - return c; -} - -BITBOARD = unsigned long long -int PopCnt(register BITBOARD a) { - register int c=0; - while(a) { - c++; - a &= a - 1; - } - return c; -} -unsigned int popcount(unsigned int input) { - unsigned int count = 0; - for (unsigned int i = 0; i < 4 * 8; i++) - count += (input >> i) & i; - return count; -} - -This is a form of idiom recognition for loops, the same thing that could be -useful for recognizing memset/memcpy. - -//===---------------------------------------------------------------------===// - -These should turn into single 16-bit (unaligned?) loads on little/big endian -processors. - -unsigned short read_16_le(const unsigned char *adr) { - return adr[0] | (adr[1] << 8); -} -unsigned short read_16_be(const unsigned char *adr) { - return (adr[0] << 8) | adr[1]; -} - -//===---------------------------------------------------------------------===// - --instcombine should handle this transform: - icmp pred (sdiv X / C1 ), C2 -when X, C1, and C2 are unsigned. Similarly for udiv and signed operands. - -Currently InstCombine avoids this transform but will do it when the signs of -the operands and the sign of the divide match. See the FIXME in -InstructionCombining.cpp in the visitSetCondInst method after the switch case -for Instruction::UDiv (around line 4447) for more details. - -The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of -this construct. - -//===---------------------------------------------------------------------===// - -[LOOP RECOGNITION] - -viterbi speeds up *significantly* if the various "history" related copy loops -are turned into memcpy calls at the source level. We need a "loops to memcpy" -pass. - -//===---------------------------------------------------------------------===// - -[LOOP OPTIMIZATION] - -SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization -opportunities in its double_array_divs_variable function: it needs loop -interchange, memory promotion (which LICM already does), vectorization and -variable trip count loop unrolling (since it has a constant trip count). ICC -apparently produces this very nice code with -ffast-math: - -..B1.70: # Preds ..B1.70 ..B1.69 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - addl $8, %edx # - cmpl $131072, %edx #108.2 - jb ..B1.70 # Prob 99% #108.2 - -It would be better to count down to zero, but this is a lot better than what we -do. - -//===---------------------------------------------------------------------===// - -Consider: - -typedef unsigned U32; -typedef unsigned long long U64; -int test (U32 *inst, U64 *regs) { - U64 effective_addr2; - U32 temp = *inst; - int r1 = (temp >> 20) & 0xf; - int b2 = (temp >> 16) & 0xf; - effective_addr2 = temp & 0xfff; - if (b2) effective_addr2 += regs[b2]; - b2 = (temp >> 12) & 0xf; - if (b2) effective_addr2 += regs[b2]; - effective_addr2 &= regs[4]; - if ((effective_addr2 & 3) == 0) - return 1; - return 0; -} - -Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems, -we don't eliminate the computation of the top half of effective_addr2 because -we don't have whole-function selection dags. On x86, this means we use one -extra register for the function when effective_addr2 is declared as U64 than -when it is declared U32. - -PHI Slicing could be extended to do this. - -//===---------------------------------------------------------------------===// - -LSR should know what GPR types a target has from TargetData. This code: - -volatile short X, Y; // globals - -void foo(int N) { - int i; - for (i = 0; i < N; i++) { X = i; Y = i*4; } -} - -produces two near identical IV's (after promotion) on PPC/ARM: - -LBB1_2: - ldr r3, LCPI1_0 - ldr r3, [r3] - strh r2, [r3] - ldr r3, LCPI1_1 - ldr r3, [r3] - strh r1, [r3] - add r1, r1, #4 - add r2, r2, #1 <- [0,+,1] - sub r0, r0, #1 <- [0,-,1] - cmp r0, #0 - bne LBB1_2 - -LSR should reuse the "+" IV for the exit test. - -//===---------------------------------------------------------------------===// - -Tail call elim should be more aggressive, checking to see if the call is -followed by an uncond branch to an exit block. - -; This testcase is due to tail-duplication not wanting to copy the return -; instruction into the terminating blocks because there was other code -; optimized out of the function after the taildup happened. -; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call - -define i32 @t4(i32 %a) { -entry: - %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1] - %tmp.2 = icmp ne i32 %tmp.1, 0 ; <i1> [#uses=1] - br i1 %tmp.2, label %then.0, label %else.0 - -then.0: ; preds = %entry - %tmp.5 = add i32 %a, -1 ; <i32> [#uses=1] - %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; <i32> [#uses=1] - br label %return - -else.0: ; preds = %entry - %tmp.7 = icmp ne i32 %a, 0 ; <i1> [#uses=1] - br i1 %tmp.7, label %then.1, label %return - -then.1: ; preds = %else.0 - %tmp.11 = add i32 %a, -2 ; <i32> [#uses=1] - %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; <i32> [#uses=1] - br label %return - -return: ; preds = %then.1, %else.0, %then.0 - %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ], - [ %tmp.9, %then.1 ] - ret i32 %result.0 -} - -//===---------------------------------------------------------------------===// - -Tail recursion elimination should handle: - -int pow2m1(int n) { - if (n == 0) - return 0; - return 2 * pow2m1 (n - 1) + 1; -} - -Also, multiplies can be turned into SHL's, so they should be handled as if -they were associative. "return foo() << 1" can be tail recursion eliminated. - -//===---------------------------------------------------------------------===// - -Argument promotion should promote arguments for recursive functions, like -this: - -; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val - -define internal i32 @foo(i32* %x) { -entry: - %tmp = load i32* %x ; <i32> [#uses=0] - %tmp.foo = call i32 @foo( i32* %x ) ; <i32> [#uses=1] - ret i32 %tmp.foo -} - -define i32 @bar(i32* %x) { -entry: - %tmp3 = call i32 @foo( i32* %x ) ; <i32> [#uses=1] - ret i32 %tmp3 -} - -//===---------------------------------------------------------------------===// - -We should investigate an instruction sinking pass. Consider this silly -example in pic mode: - -#include <assert.h> -void foo(int x) { - assert(x); - //... -} - -we compile this to: -_foo: - subl $28, %esp - call "L1$pb" -"L1$pb": - popl %eax - cmpl $0, 32(%esp) - je LBB1_2 # cond_true -LBB1_1: # return - # ... - addl $28, %esp - ret -LBB1_2: # cond_true -... - -The PIC base computation (call+popl) is only used on one path through the -code, but is currently always computed in the entry block. It would be -better to sink the picbase computation down into the block for the -assertion, as it is the only one that uses it. This happens for a lot of -code with early outs. - -Another example is loads of arguments, which are usually emitted into the -entry block on targets like x86. If not used in all paths through a -function, they should be sunk into the ones that do. - -In this case, whole-function-isel would also handle this. - -//===---------------------------------------------------------------------===// - -Investigate lowering of sparse switch statements into perfect hash tables: -http://burtleburtle.net/bob/hash/perfect.html - -//===---------------------------------------------------------------------===// - -We should turn things like "load+fabs+store" and "load+fneg+store" into the -corresponding integer operations. On a yonah, this loop: - -double a[256]; -void foo() { - int i, b; - for (b = 0; b < 10000000; b++) - for (i = 0; i < 256; i++) - a[i] = -a[i]; -} - -is twice as slow as this loop: - -long long a[256]; -void foo() { - int i, b; - for (b = 0; b < 10000000; b++) - for (i = 0; i < 256; i++) - a[i] ^= (1ULL << 63); -} - -and I suspect other processors are similar. On X86 in particular this is a -big win because doing this with integers allows the use of read/modify/write -instructions. - -//===---------------------------------------------------------------------===// - -DAG Combiner should try to combine small loads into larger loads when -profitable. For example, we compile this C++ example: - -struct THotKey { short Key; bool Control; bool Shift; bool Alt; }; -extern THotKey m_HotKey; -THotKey GetHotKey () { return m_HotKey; } - -into (-O3 -fno-exceptions -static -fomit-frame-pointer): - -__Z9GetHotKeyv: - pushl %esi - movl 8(%esp), %eax - movb _m_HotKey+3, %cl - movb _m_HotKey+4, %dl - movb _m_HotKey+2, %ch - movw _m_HotKey, %si - movw %si, (%eax) - movb %ch, 2(%eax) - movb %cl, 3(%eax) - movb %dl, 4(%eax) - popl %esi - ret $4 - -GCC produces: - -__Z9GetHotKeyv: - movl _m_HotKey, %edx - movl 4(%esp), %eax - movl %edx, (%eax) - movzwl _m_HotKey+4, %edx - movw %dx, 4(%eax) - ret $4 - -The LLVM IR contains the needed alignment info, so we should be able to -merge the loads and stores into 4-byte loads: - - %struct.THotKey = type { i16, i8, i8, i8 } -define void @_Z9GetHotKeyv(%struct.THotKey* sret %agg.result) nounwind { -... - %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8 - %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2 - %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1 - %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2 - -Alternatively, we should use a small amount of base-offset alias analysis -to make it so the scheduler doesn't need to hold all the loads in regs at -once. - -//===---------------------------------------------------------------------===// - -We should add an FRINT node to the DAG to model targets that have legal -implementations of ceil/floor/rint. - -//===---------------------------------------------------------------------===// - -Consider: - -int test() { - long long input[8] = {1,1,1,1,1,1,1,1}; - foo(input); -} - -We currently compile this into a memcpy from a global array since the -initializer is fairly large and not memset'able. This is good, but the memcpy -gets lowered to load/stores in the code generator. This is also ok, except -that the codegen lowering for memcpy doesn't handle the case when the source -is a constant global. This gives us atrocious code like this: - - call "L1$pb" -"L1$pb": - popl %eax - movl _C.0.1444-"L1$pb"+32(%eax), %ecx - movl %ecx, 40(%esp) - movl _C.0.1444-"L1$pb"+20(%eax), %ecx - movl %ecx, 28(%esp) - movl _C.0.1444-"L1$pb"+36(%eax), %ecx - movl %ecx, 44(%esp) - movl _C.0.1444-"L1$pb"+44(%eax), %ecx - movl %ecx, 52(%esp) - movl _C.0.1444-"L1$pb"+40(%eax), %ecx - movl %ecx, 48(%esp) - movl _C.0.1444-"L1$pb"+12(%eax), %ecx - movl %ecx, 20(%esp) - movl _C.0.1444-"L1$pb"+4(%eax), %ecx -... - -instead of: - movl $1, 16(%esp) - movl $0, 20(%esp) - movl $1, 24(%esp) - movl $0, 28(%esp) - movl $1, 32(%esp) - movl $0, 36(%esp) - ... - -//===---------------------------------------------------------------------===// - -http://llvm.org/PR717: - -The following code should compile into "ret int undef". Instead, LLVM -produces "ret int 0": - -int f() { - int x = 4; - int y; - if (x == 3) y = 0; - return y; -} - -//===---------------------------------------------------------------------===// - -The loop unroller should partially unroll loops (instead of peeling them) -when code growth isn't too bad and when an unroll count allows simplification -of some code within the loop. One trivial example is: - -#include <stdio.h> -int main() { - int nRet = 17; - int nLoop; - for ( nLoop = 0; nLoop < 1000; nLoop++ ) { - if ( nLoop & 1 ) - nRet += 2; - else - nRet -= 1; - } - return nRet; -} - -Unrolling by 2 would eliminate the '&1' in both copies, leading to a net -reduction in code size. The resultant code would then also be suitable for -exit value computation. - -//===---------------------------------------------------------------------===// - -We miss a bunch of rotate opportunities on various targets, including ppc, x86, -etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate -matching code in dag combine doesn't look through truncates aggressively -enough. Here are some testcases reduces from GCC PR17886: - -unsigned long long f(unsigned long long x, int y) { - return (x << y) | (x >> 64-y); -} -unsigned f2(unsigned x, int y){ - return (x << y) | (x >> 32-y); -} -unsigned long long f3(unsigned long long x){ - int y = 9; - return (x << y) | (x >> 64-y); -} -unsigned f4(unsigned x){ - int y = 10; - return (x << y) | (x >> 32-y); -} -unsigned long long f5(unsigned long long x, unsigned long long y) { - return (x << 8) | ((y >> 48) & 0xffull); -} -unsigned long long f6(unsigned long long x, unsigned long long y, int z) { - switch(z) { - case 1: - return (x << 8) | ((y >> 48) & 0xffull); - case 2: - return (x << 16) | ((y >> 40) & 0xffffull); - case 3: - return (x << 24) | ((y >> 32) & 0xffffffull); - case 4: - return (x << 32) | ((y >> 24) & 0xffffffffull); - default: - return (x << 40) | ((y >> 16) & 0xffffffffffull); - } -} - -On X86-64, we only handle f2/f3/f4 right. On x86-32, a few of these -generate truly horrible code, instead of using shld and friends. On -ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is -badness. PPC64 misses f, f5 and f6. CellSPU aborts in isel. - -//===---------------------------------------------------------------------===// - -We do a number of simplifications in simplify libcalls to strength reduce -standard library functions, but we don't currently merge them together. For -example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only -be done safely if "b" isn't modified between the strlen and memcpy of course. - -//===---------------------------------------------------------------------===// - -We compile this program: (from GCC PR11680) -http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487 - -Into code that runs the same speed in fast/slow modes, but both modes run 2x -slower than when compile with GCC (either 4.0 or 4.2): - -$ llvm-g++ perf.cpp -O3 -fno-exceptions -$ time ./a.out fast -1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w - -$ g++ perf.cpp -O3 -fno-exceptions -$ time ./a.out fast -0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w - -It looks like we are making the same inlining decisions, so this may be raw -codegen badness or something else (haven't investigated). - -//===---------------------------------------------------------------------===// - -We miss some instcombines for stuff like this: -void bar (void); -void foo (unsigned int a) { - /* This one is equivalent to a >= (3 << 2). */ - if ((a >> 2) >= 3) - bar (); -} - -A few other related ones are in GCC PR14753. - -//===---------------------------------------------------------------------===// - -Divisibility by constant can be simplified (according to GCC PR12849) from -being a mulhi to being a mul lo (cheaper). Testcase: - -void bar(unsigned n) { - if (n % 3 == 0) - true(); -} - -This is equivalent to the following, where 2863311531 is the multiplicative -inverse of 3, and 1431655766 is ((2^32)-1)/3+1: -void bar(unsigned n) { - if (n * 2863311531U < 1431655766U) - true(); -} - -The same transformation can work with an even modulo with the addition of a -rotate: rotate the result of the multiply to the right by the number of bits -which need to be zero for the condition to be true, and shrink the compare RHS -by the same amount. Unless the target supports rotates, though, that -transformation probably isn't worthwhile. - -The transformation can also easily be made to work with non-zero equality -comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0". - -//===---------------------------------------------------------------------===// - -Better mod/ref analysis for scanf would allow us to eliminate the vtable and a -bunch of other stuff from this example (see PR1604): - -#include <cstdio> -struct test { - int val; - virtual ~test() {} -}; - -int main() { - test t; - std::scanf("%d", &t.val); - std::printf("%d\n", t.val); -} - -//===---------------------------------------------------------------------===// - -These functions perform the same computation, but produce different assembly. - -define i8 @select(i8 %x) readnone nounwind { - %A = icmp ult i8 %x, 250 - %B = select i1 %A, i8 0, i8 1 - ret i8 %B -} - -define i8 @addshr(i8 %x) readnone nounwind { - %A = zext i8 %x to i9 - %B = add i9 %A, 6 ;; 256 - 250 == 6 - %C = lshr i9 %B, 8 - %D = trunc i9 %C to i8 - ret i8 %D -} - -//===---------------------------------------------------------------------===// - -From gcc bug 24696: -int -f (unsigned long a, unsigned long b, unsigned long c) -{ - return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0); -} -int -f (unsigned long a, unsigned long b, unsigned long c) -{ - return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0); -} -Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with -"clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -From GCC Bug 20192: -#define PMD_MASK (~((1UL << 23) - 1)) -void clear_pmd_range(unsigned long start, unsigned long end) -{ - if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK)) - f(); -} -The expression should optimize to something like -"!((start|end)&~PMD_MASK). Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -void a(int variable) -{ - if (variable == 4 || variable == 6) - bar(); -} -This should optimize to "if ((variable | 2) == 6)". Currently not -optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - -unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return -i;} -unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} -These should combine to the same thing. Currently, the first function -produces better code on X86. - -//===---------------------------------------------------------------------===// - -From GCC Bug 15784: -#define abs(x) x>0?x:-x -int f(int x, int y) -{ - return (abs(x)) >= 0; -} -This should optimize to x == INT_MIN. (With -fwrapv.) Currently not -optimized with "clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -From GCC Bug 14753: -void -rotate_cst (unsigned int a) -{ - a = (a << 10) | (a >> 22); - if (a == 123) - bar (); -} -void -minus_cst (unsigned int a) -{ - unsigned int tem; - - tem = 20 - a; - if (tem == 5) - bar (); -} -void -mask_gt (unsigned int a) -{ - /* This is equivalent to a > 15. */ - if ((a & ~7) > 8) - bar (); -} -void -rshift_gt (unsigned int a) -{ - /* This is equivalent to a > 23. */ - if ((a >> 2) > 5) - bar (); -} -All should simplify to a single comparison. All of these are -currently not optimized with "clang -emit-llvm-bc | opt --std-compile-opts". - -//===---------------------------------------------------------------------===// - -From GCC Bug 32605: -int c(int* x) {return (char*)x+2 == (char*)x;} -Should combine to 0. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it). - -//===---------------------------------------------------------------------===// - -int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} -Should be combined to "((b >> 1) | b) & 1". Currently not optimized -with "clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);} -Should combine to "x | (y & 3)". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} -Should fold to "(~a & c) | (a & b)". Currently not optimized with -"clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int a,int b) {return (~(a|b))|a;} -Should fold to "a|~b". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int a, int b) {return (a&&b) || (a&&!b);} -Should fold to "a". Currently not optimized with "clang -emit-llvm-bc -| opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (a&&b) || (!a&&c);} -Should fold to "a ? b : c", or at least something sane. Currently not -optimized with "clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);} -Should fold to a && (b || c). Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return x | ((x & 8) ^ 8);} -Should combine to x | 8. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return x ^ ((x & 8) ^ 8);} -Should also combine to x | 8. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return (x & 8) == 0 ? -1 : -9;} -Should combine to (x | -9) ^ 8. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return (x & 8) == 0 ? -9 : -1;} -Should combine to x | -9. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return ((x | -9) ^ 8) & x;} -Should combine to x & -9. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;} -Should combine to "a * 0x88888888 >> 31". Currently not optimized -with "clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -unsigned a(char* x) {if ((*x & 32) == 0) return b();} -There's an unnecessary zext in the generated code with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned long long x) {return 40 * (x >> 1);} -Should combine to "20 * (((unsigned)x) & -2)". Currently not -optimized with "clang -emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -This was noticed in the entryblock for grokdeclarator in 403.gcc: - - %tmp = icmp eq i32 %decl_context, 4 - %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context - %tmp1 = icmp eq i32 %decl_context_addr.0, 1 - %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0 - -tmp1 should be simplified to something like: - (!tmp || decl_context == 1) - -This allows recursive simplifications, tmp1 is used all over the place in -the function, e.g. by: - - %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; <i1> [#uses=1] - %tmp24 = xor i1 %tmp1, true ; <i1> [#uses=1] - %or.cond8 = and i1 %tmp23, %tmp24 ; <i1> [#uses=1] - -later. - -//===---------------------------------------------------------------------===// - -[STORE SINKING] - -Store sinking: This code: - -void f (int n, int *cond, int *res) { - int i; - *res = 0; - for (i = 0; i < n; i++) - if (*cond) - *res ^= 234; /* (*) */ -} - -On this function GVN hoists the fully redundant value of *res, but nothing -moves the store out. This gives us this code: - -bb: ; preds = %bb2, %entry - %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ] - %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ] - %1 = load i32* %cond, align 4 - %2 = icmp eq i32 %1, 0 - br i1 %2, label %bb2, label %bb1 - -bb1: ; preds = %bb - %3 = xor i32 %.rle, 234 - store i32 %3, i32* %res, align 4 - br label %bb2 - -bb2: ; preds = %bb, %bb1 - %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ] - %indvar.next = add i32 %i.05, 1 - %exitcond = icmp eq i32 %indvar.next, %n - br i1 %exitcond, label %return, label %bb - -DSE should sink partially dead stores to get the store out of the loop. - -Here's another partial dead case: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395 - -//===---------------------------------------------------------------------===// - -Scalar PRE hoists the mul in the common block up to the else: - -int test (int a, int b, int c, int g) { - int d, e; - if (a) - d = b * c; - else - d = b - c; - e = b * c + g; - return d + e; -} - -It would be better to do the mul once to reduce codesize above the if. -This is GCC PR38204. - -//===---------------------------------------------------------------------===// - -[STORE SINKING] - -GCC PR37810 is an interesting case where we should sink load/store reload -into the if block and outside the loop, so we don't reload/store it on the -non-call path. - -for () { - *P += 1; - if () - call(); - else - ... --> -tmp = *P -for () { - tmp += 1; - if () { - *P = tmp; - call(); - tmp = *P; - } else ... -} -*P = tmp; - -We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but -we don't sink the store. We need partially dead store sinking. - -//===---------------------------------------------------------------------===// - -[LOAD PRE CRIT EDGE SPLITTING] - -GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack -leading to excess stack traffic. This could be handled by GVN with some crazy -symbolic phi translation. The code we get looks like (g is on the stack): - -bb2: ; preds = %bb1 -.. - %9 = getelementptr %struct.f* %g, i32 0, i32 0 - store i32 %8, i32* %9, align bel %bb3 - -bb3: ; preds = %bb1, %bb2, %bb - %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ] - %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ] - %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 - %11 = load i32* %10, align 4 - -%11 is partially redundant, an in BB2 it should have the value %8. - -GCC PR33344 and PR35287 are similar cases. - - -//===---------------------------------------------------------------------===// - -[LOAD PRE] - -There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the -GCC testsuite, ones we don't get yet are (checked through loadpre25): - -[CRIT EDGE BREAKING] -loadpre3.c predcom-4.c - -[PRE OF READONLY CALL] -loadpre5.c - -[TURN SELECT INTO BRANCH] -loadpre14.c loadpre15.c - -actually a conditional increment: loadpre18.c loadpre19.c - - -//===---------------------------------------------------------------------===// - -[SCALAR PRE] -There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the -GCC testsuite. - -//===---------------------------------------------------------------------===// - -There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the -GCC testsuite. For example, we get the first example in predcom-1.c, but -miss the second one: - -unsigned fib[1000]; -unsigned avg[1000]; - -__attribute__ ((noinline)) -void count_averages(int n) { - int i; - for (i = 1; i < n; i++) - avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff; -} - -which compiles into two loads instead of one in the loop. - -predcom-2.c is the same as predcom-1.c - -predcom-3.c is very similar but needs loads feeding each other instead of -store->load. - - -//===---------------------------------------------------------------------===// - -[ALIAS ANALYSIS] - -Type based alias analysis: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 - -We should do better analysis of posix_memalign. At the least it should -no-capture its pointer argument, at best, we should know that the out-value -result doesn't point to anything (like malloc). One example of this is in -SingleSource/Benchmarks/Misc/dt.c - -//===---------------------------------------------------------------------===// - -A/B get pinned to the stack because we turn an if/then into a select instead -of PRE'ing the load/store. This may be fixable in instcombine: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892 - -struct X { int i; }; -int foo (int x) { - struct X a; - struct X b; - struct X *p; - a.i = 1; - b.i = 2; - if (x) - p = &a; - else - p = &b; - return p->i; -} - -//===---------------------------------------------------------------------===// - -Interesting missed case because of control flow flattening (should be 2 loads): -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 -With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | - opt -mem2reg -gvn -instcombine | llvm-dis -we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT -VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS - -//===---------------------------------------------------------------------===// - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633 -We could eliminate the branch condition here, loading from null is undefined: - -struct S { int w, x, y, z; }; -struct T { int r; struct S s; }; -void bar (struct S, int); -void foo (int a, struct T b) -{ - struct S *c = 0; - if (a) - c = &b.s; - bar (*c, a); -} - -//===---------------------------------------------------------------------===// - -simplifylibcalls should do several optimizations for strspn/strcspn: - -strcspn(x, "") -> strlen(x) -strcspn("", x) -> 0 -strspn("", x) -> 0 -strspn(x, "") -> strlen(x) -strspn(x, "a") -> strchr(x, 'a')-x - -strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn): - -size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2, - int __reject3) { - register size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 && - __s[__result] != __reject2 && __s[__result] != __reject3) - ++__result; - return __result; -} - -This should turn into a switch on the character. See PR3253 for some notes on -codegen. - -456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn. - -//===---------------------------------------------------------------------===// - -"gas" uses this idiom: - else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) -.. - else if (strchr ("<>", *intel_parser.op_string) - -Those should be turned into a switch. - -//===---------------------------------------------------------------------===// - -252.eon contains this interesting code: - - %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0 - %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind - %strlen = call i32 @strlen(i8* %3072) ; uses = 1 - %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen - call void @llvm.memcpy.i32(i8* %endptr, - i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1) - %3074 = call i32 @strlen(i8* %endptr) nounwind readonly - -This is interesting for a couple reasons. First, in this: - - %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind - %strlen = call i32 @strlen(i8* %3072) - -The strlen could be replaced with: %strlen = sub %3072, %3073, because the -strcpy call returns a pointer to the end of the string. Based on that, the -endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP. - -Second, the memcpy+strlen strlen can be replaced with: - - %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly - -Because the destination was just copied into the specified memory buffer. This, -in turn, can be constant folded to "4". - -In other code, it contains: - - %endptr6978 = bitcast i8* %endptr69 to i32* - store i32 7107374, i32* %endptr6978, align 1 - %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly - -Which could also be constant folded. Whatever is producing this should probably -be fixed to leave this as a memcpy from a string. - -Further, eon also has an interesting partially redundant strlen call: - -bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit - %682 = getelementptr i8** %argv, i32 6 ; <i8**> [#uses=2] - %683 = load i8** %682, align 4 ; <i8*> [#uses=4] - %684 = load i8* %683, align 1 ; <i8> [#uses=1] - %685 = icmp eq i8 %684, 0 ; <i1> [#uses=1] - br i1 %685, label %bb10, label %bb9 - -bb9: ; preds = %bb8 - %686 = call i32 @strlen(i8* %683) nounwind readonly - %687 = icmp ugt i32 %686, 254 ; <i1> [#uses=1] - br i1 %687, label %bb10, label %bb11 - -bb10: ; preds = %bb9, %bb8 - %688 = call i32 @strlen(i8* %683) nounwind readonly - -This could be eliminated by doing the strlen once in bb8, saving code size and -improving perf on the bb8->9->10 path. - -//===---------------------------------------------------------------------===// - -I see an interesting fully redundant call to strlen left in 186.crafty:InputMove -which looks like: - %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0 - - -bb62: ; preds = %bb55, %bb53 - %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ] - %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 - %172 = add i32 %171, -1 ; <i32> [#uses=1] - %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172 - -... no stores ... - br i1 %or.cond, label %bb65, label %bb72 - -bb65: ; preds = %bb62 - store i8 0, i8* %173, align 1 - br label %bb72 - -bb72: ; preds = %bb65, %bb62 - %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ] - %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 - -Note that on the bb62->bb72 path, that the %177 strlen call is partially -redundant with the %171 call. At worst, we could shove the %177 strlen call -up into the bb65 block moving it out of the bb62->bb72 path. However, note -that bb65 stores to the string, zeroing out the last byte. This means that on -that path the value of %177 is actually just %171-1. A sub is cheaper than a -strlen! - -This pattern repeats several times, basically doing: - - A = strlen(P); - P[A-1] = 0; - B = strlen(P); - where it is "obvious" that B = A-1. - -//===---------------------------------------------------------------------===// - -186.crafty also contains this code: - -%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) -%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906 -%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1 -%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) -%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909 - -The last strlen is computable as 1908-@pgn_event, which means 1910=1908. - -//===---------------------------------------------------------------------===// - -186.crafty has this interesting pattern with the "out.4543" variable: - -call void @llvm.memcpy.i32( - i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0), - i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1) -%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind - -It is basically doing: - - memcpy(globalarray, "string"); - printf(..., globalarray); - -Anyway, by knowing that printf just reads the memory and forward substituting -the string directly into the printf, this eliminates reads from globalarray. -Since this pattern occurs frequently in crafty (due to the "DisplayTime" and -other similar functions) there are many stores to "out". Once all the printfs -stop using "out", all that is left is the memcpy's into it. This should allow -globalopt to remove the "stored only" global. - -//===---------------------------------------------------------------------===// - -This code: - -define inreg i32 @foo(i8* inreg %p) nounwind { - %tmp0 = load i8* %p - %tmp1 = ashr i8 %tmp0, 5 - %tmp2 = sext i8 %tmp1 to i32 - ret i32 %tmp2 -} - -could be dagcombine'd to a sign-extending load with a shift. -For example, on x86 this currently gets this: - - movb (%eax), %al - sarb $5, %al - movsbl %al, %eax - -while it could get this: - - movsbl (%eax), %eax - sarl $5, %eax - -//===---------------------------------------------------------------------===// - -GCC PR31029: - -int test(int x) { return 1-x == x; } // --> return false -int test2(int x) { return 2-x == x; } // --> return x == 1 ? - -Always foldable for odd constants, what is the rule for even? - -//===---------------------------------------------------------------------===// - -PR 3381: GEP to field of size 0 inside a struct could be turned into GEP -for next field in struct (which is at same address). - -For example: store of float into { {{}}, float } could be turned into a store to -the float directly. - -//===---------------------------------------------------------------------===// - -#include <math.h> -double foo(double a) { return sin(a); } - -This compiles into this on x86-64 Linux: -foo: - subq $8, %rsp - call sin - addq $8, %rsp - ret -vs: - -foo: - jmp sin - -//===---------------------------------------------------------------------===// - -The arg promotion pass should make use of nocapture to make its alias analysis -stuff much more precise. - -//===---------------------------------------------------------------------===// - -The following functions should be optimized to use a select instead of a -branch (from gcc PR40072): - -char char_int(int m) {if(m>7) return 0; return m;} -int int_char(char m) {if(m>7) return 0; return m;} - -//===---------------------------------------------------------------------===// - -int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } - -Generates this: - -define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %0 = and i32 %a, 128 ; <i32> [#uses=1] - %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] - %2 = or i32 %b, 128 ; <i32> [#uses=1] - %3 = and i32 %b, -129 ; <i32> [#uses=1] - %b_addr.0 = select i1 %1, i32 %3, i32 %2 ; <i32> [#uses=1] - ret i32 %b_addr.0 -} - -However, it's functionally equivalent to: - - b = (b & ~0x80) | (a & 0x80); - -Which generates this: - -define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %0 = and i32 %b, -129 ; <i32> [#uses=1] - %1 = and i32 %a, 128 ; <i32> [#uses=1] - %2 = or i32 %0, %1 ; <i32> [#uses=1] - ret i32 %2 -} - -This can be generalized for other forms: - - b = (b & ~0x80) | (a & 0x40) << 1; - -//===---------------------------------------------------------------------===// - -These two functions produce different code. They shouldn't: - -#include <stdint.h> - -uint8_t p1(uint8_t b, uint8_t a) { - b = (b & ~0xc0) | (a & 0xc0); - return (b); -} - -uint8_t p2(uint8_t b, uint8_t a) { - b = (b & ~0x40) | (a & 0x40); - b = (b & ~0x80) | (a & 0x80); - return (b); -} - -define zeroext i8 @p1(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { -entry: - %0 = and i8 %b, 63 ; <i8> [#uses=1] - %1 = and i8 %a, -64 ; <i8> [#uses=1] - %2 = or i8 %1, %0 ; <i8> [#uses=1] - ret i8 %2 -} - -define zeroext i8 @p2(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { -entry: - %0 = and i8 %b, 63 ; <i8> [#uses=1] - %.masked = and i8 %a, 64 ; <i8> [#uses=1] - %1 = and i8 %a, -128 ; <i8> [#uses=1] - %2 = or i8 %1, %0 ; <i8> [#uses=1] - %3 = or i8 %2, %.masked ; <i8> [#uses=1] - ret i8 %3 -} - -//===---------------------------------------------------------------------===// - -IPSCCP does not currently propagate argument dependent constants through -functions where it does not not all of the callers. This includes functions -with normal external linkage as well as templates, C99 inline functions etc. -Specifically, it does nothing to: - -define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { -entry: - %0 = add nsw i32 %y, %z - %1 = mul i32 %0, %x - %2 = mul i32 %y, %z - %3 = add nsw i32 %1, %2 - ret i32 %3 -} - -define i32 @test2() nounwind { -entry: - %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind - ret i32 %0 -} - -It would be interesting extend IPSCCP to be able to handle simple cases like -this, where all of the arguments to a call are constant. Because IPSCCP runs -before inlining, trivial templates and inline functions are not yet inlined. -The results for a function + set of constant arguments should be memoized in a -map. - -//===---------------------------------------------------------------------===// - -The libcall constant folding stuff should be moved out of SimplifyLibcalls into -libanalysis' constantfolding logic. This would allow IPSCCP to be able to -handle simple things like this: - -static int foo(const char *X) { return strlen(X); } -int bar() { return foo("abcd"); } - -//===---------------------------------------------------------------------===// - -InstCombine should use SimplifyDemandedBits to remove the or instruction: - -define i1 @test(i8 %x, i8 %y) { - %A = or i8 %x, 1 - %B = icmp ugt i8 %A, 3 - ret i1 %B -} - -Currently instcombine calls SimplifyDemandedBits with either all bits or just -the sign bit, if the comparison is obviously a sign test. In this case, we only -need all but the bottom two bits from %A, and if we gave that mask to SDB it -would delete the or instruction for us. - -//===---------------------------------------------------------------------===// - -functionattrs doesn't know much about memcpy/memset. This function should be -marked readnone rather than readonly, since it only twiddles local memory, but -functionattrs doesn't handle memset/memcpy/memmove aggressively: - -struct X { int *p; int *q; }; -int foo() { - int i = 0, j = 1; - struct X x, y; - int **p; - y.p = &i; - x.q = &j; - p = __builtin_memcpy (&x, &y, sizeof (int *)); - return **p; -} - -//===---------------------------------------------------------------------===// - -Missed instcombine transformation: -define i1 @a(i32 %x) nounwind readnone { -entry: - %cmp = icmp eq i32 %x, 30 - %sub = add i32 %x, -30 - %cmp2 = icmp ugt i32 %sub, 9 - %or = or i1 %cmp, %cmp2 - ret i1 %or -} -This should be optimized to a single compare. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine transformation: -void b(); -void a(int x) { if (((1<<x)&8)==0) b(); } - -The shift should be optimized out. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine or reassociate transformation: -int a(int a, int b) { return (a==12)&(b>47)&(b<58); } - -The sgt and slt should be combined into a single comparison. Testcase derived -from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine transformation: -define i32 @a(i32 %x) nounwind readnone { -entry: - %rem = srem i32 %x, 32 - %shl = shl i32 1, %rem - ret i32 %shl -} - -The srem can be transformed to an and because if x is negative, the shift is -undefined. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine/dagcombine transformation: -define i32 @a(i32 %x, i32 %y) nounwind readnone { -entry: - %mul = mul i32 %y, -8 - %sub = sub i32 %x, %mul - ret i32 %sub -} - -Should compile to something like x+y*8, but currently compiles to an -inefficient result. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine/dagcombine transformation: -define void @lshift_lt(i8 zeroext %a) nounwind { -entry: - %conv = zext i8 %a to i32 - %shl = shl i32 %conv, 3 - %cmp = icmp ult i32 %shl, 33 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @bar() nounwind - ret void - -if.end: - ret void -} -declare void @bar() nounwind - -The shift should be eliminated. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -These compile into different code, one gets recognized as a switch and the -other doesn't due to phase ordering issues (PR6212): - -int test1(int mainType, int subType) { - if (mainType == 7) - subType = 4; - else if (mainType == 9) - subType = 6; - else if (mainType == 11) - subType = 9; - return subType; -} - -int test2(int mainType, int subType) { - if (mainType == 7) - subType = 4; - if (mainType == 9) - subType = 6; - if (mainType == 11) - subType = 9; - return subType; -} - -//===---------------------------------------------------------------------===// - -The following test case (from PR6576): - -define i32 @mul(i32 %a, i32 %b) nounwind readnone { -entry: - %cond1 = icmp eq i32 %b, 0 ; <i1> [#uses=1] - br i1 %cond1, label %exit, label %bb.nph -bb.nph: ; preds = %entry - %tmp = mul i32 %b, %a ; <i32> [#uses=1] - ret i32 %tmp -exit: ; preds = %entry - ret i32 0 -} - -could be reduced to: - -define i32 @mul(i32 %a, i32 %b) nounwind readnone { -entry: - %tmp = mul i32 %b, %a - ret i32 %tmp -} - -//===---------------------------------------------------------------------===// - -We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. -See GCC PR34949 - -Another interesting case is that something related could be used for variables -that go const after their ctor has finished. In these cases, globalopt (which -can statically run the constructor) could mark the global const (so it gets put -in the readonly section). A testcase would be: - -#include <complex> -using namespace std; -const complex<char> should_be_in_rodata (42,-42); -complex<char> should_be_in_data (42,-42); -complex<char> should_be_in_bss; - -Where we currently evaluate the ctors but the globals don't become const because -the optimizer doesn't know they "become const" after the ctor is done. See -GCC PR4131 for more examples. - -//===---------------------------------------------------------------------===// - -In this code: - -long foo(long x) { - return x > 1 ? x : 1; -} - -LLVM emits a comparison with 1 instead of 0. 0 would be equivalent -and cheaper on most targets. - -LLVM prefers comparisons with zero over non-zero in general, but in this -case it choses instead to keep the max operation obvious. - -//===---------------------------------------------------------------------===// - -Take the following testcase on x86-64 (similar testcases exist for all targets -with addc/adde): - -define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, -i64 %c) nounwind { -entry: - %0 = zext i64 %a to i128 ; <i128> [#uses=1] - %1 = zext i64 %b to i128 ; <i128> [#uses=1] - %2 = add i128 %1, %0 ; <i128> [#uses=2] - %3 = zext i64 %c to i128 ; <i128> [#uses=1] - %4 = shl i128 %3, 64 ; <i128> [#uses=1] - %5 = add i128 %4, %2 ; <i128> [#uses=1] - %6 = lshr i128 %5, 64 ; <i128> [#uses=1] - %7 = trunc i128 %6 to i64 ; <i64> [#uses=1] - store i64 %7, i64* %s, align 8 - %8 = trunc i128 %2 to i64 ; <i64> [#uses=1] - store i64 %8, i64* %t, align 8 - ret void -} - -Generated code: - addq %rcx, %rdx - movl $0, %eax - adcq $0, %rax - addq %r8, %rax - movq %rax, (%rdi) - movq %rdx, (%rsi) - ret - -Expected code: - addq %rcx, %rdx - adcq $0, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -The generated SelectionDAG has an ADD of an ADDE, where both operands of the -ADDE are zero. Replacing one of the operands of the ADDE with the other operand -of the ADD, and replacing the ADD with the ADDE, should give the desired result. - -(That said, we are doing a lot better than gcc on this testcase. :) ) - -//===---------------------------------------------------------------------===// - -Switch lowering generates less than ideal code for the following switch: -define void @a(i32 %x) nounwind { -entry: - switch i32 %x, label %if.end [ - i32 0, label %if.then - i32 1, label %if.then - i32 2, label %if.then - i32 3, label %if.then - i32 5, label %if.then - ] -if.then: - tail call void @foo() nounwind - ret void -if.end: - ret void -} -declare void @foo() - -Generated code on x86-64 (other platforms give similar results): -a: - cmpl $5, %edi - ja .LBB0_2 - movl %edi, %eax - movl $47, %ecx - btq %rax, %rcx - jb .LBB0_3 -.LBB0_2: - ret -.LBB0_3: - jmp foo # TAILCALL - -The movl+movl+btq+jb could be simplified to a cmpl+jne. - -Or, if we wanted to be really clever, we could simplify the whole thing to -something like the following, which eliminates a branch: - xorl $1, %edi - cmpl $4, %edi - ja .LBB0_2 - ret -.LBB0_2: - jmp foo # TAILCALL -//===---------------------------------------------------------------------===// -Given a branch where the two target blocks are identical ("ret i32 %b" in -both), simplifycfg will simplify them away. But not so for a switch statement: - -define i32 @f(i32 %a, i32 %b) nounwind readnone { -entry: - switch i32 %a, label %bb3 [ - i32 4, label %bb - i32 6, label %bb - ] - -bb: ; preds = %entry, %entry - ret i32 %b - -bb3: ; preds = %entry - ret i32 %b -} -//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/AsmPrinter/CMakeLists.txt deleted file mode 100644 index da629f6..0000000 --- a/contrib/llvm/lib/Target/Sparc/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSparcAsmPrinter - SparcAsmPrinter.cpp - ) -add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Sparc/AsmPrinter/Makefile b/contrib/llvm/lib/Target/Sparc/AsmPrinter/Makefile deleted file mode 100644 index fe47538..0000000 --- a/contrib/llvm/lib/Target/Sparc/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Sparc/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSparcAsmPrinter - -# Hack: we need to include 'main' Sparc target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Sparc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/CMakeLists.txt deleted file mode 100644 index 684cadf..0000000 --- a/contrib/llvm/lib/Target/Sparc/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Sparc.td) - -tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SparcGenRegisterNames.inc -gen-register-enums) -tablegen(SparcGenRegisterInfo.inc -gen-register-desc) -tablegen(SparcGenInstrNames.inc -gen-instr-enums) -tablegen(SparcGenInstrInfo.inc -gen-instr-desc) -tablegen(SparcGenAsmWriter.inc -gen-asm-writer) -tablegen(SparcGenDAGISel.inc -gen-dag-isel) -tablegen(SparcGenSubtarget.inc -gen-subtarget) -tablegen(SparcGenCallingConv.inc -gen-callingconv) - -add_llvm_target(SparcCodeGen - DelaySlotFiller.cpp - FPMover.cpp - SparcInstrInfo.cpp - SparcISelDAGToDAG.cpp - SparcISelLowering.cpp - SparcMCAsmInfo.cpp - SparcRegisterInfo.cpp - SparcSubtarget.cpp - SparcTargetMachine.cpp - SparcSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/Sparc/Makefile b/contrib/llvm/lib/Target/Sparc/Makefile deleted file mode 100644 index e407848..0000000 --- a/contrib/llvm/lib/Target/Sparc/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMSparcCodeGen -TARGET = Sparc - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ - SparcGenRegisterInfo.inc SparcGenInstrNames.inc \ - SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ - SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/Sparc/README.txt b/contrib/llvm/lib/Target/Sparc/README.txt deleted file mode 100644 index b4991fe..0000000 --- a/contrib/llvm/lib/Target/Sparc/README.txt +++ /dev/null @@ -1,59 +0,0 @@ - -To-do ------ - -* Keep the address of the constant pool in a register instead of forming its - address all of the time. -* We can fold small constant offsets into the %hi/%lo references to constant - pool addresses as well. -* When in V9 mode, register allocate %icc[0-3]. -* Add support for isel'ing UMUL_LOHI instead of marking it as Expand. -* Emit the 'Branch on Integer Register with Prediction' instructions. It's - not clear how to write a pattern for this though: - -float %t1(int %a, int* %p) { - %C = seteq int %a, 0 - br bool %C, label %T, label %F -T: - store int 123, int* %p - br label %F -F: - ret float undef -} - -codegens to this: - -t1: - save -96, %o6, %o6 -1) subcc %i0, 0, %l0 -1) bne .LBBt1_2 ! F - nop -.LBBt1_1: ! T - or %g0, 123, %l0 - st %l0, [%i1] -.LBBt1_2: ! F - restore %g0, %g0, %g0 - retl - nop - -1) should be replaced with a brz in V9 mode. - -* Same as above, but emit conditional move on register zero (p192) in V9 - mode. Testcase: - -int %t1(int %a, int %b) { - %C = seteq int %a, 0 - %D = select bool %C, int %a, int %b - ret int %D -} - -* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling - with the Y register, if they are faster. - -* Codegen bswap(load)/store(bswap) -> load/store ASI - -* Implement frame pointer elimination, e.g. eliminate save/restore for - leaf fns. -* Fill delay slots - -* Implement JIT support diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt deleted file mode 100644 index 870b56a..0000000 --- a/contrib/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSparcInfo - SparcTargetInfo.cpp - ) - -add_dependencies(LLVMSparcInfo SparcCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/Makefile b/contrib/llvm/lib/Target/Sparc/TargetInfo/Makefile deleted file mode 100644 index 641ed87..0000000 --- a/contrib/llvm/lib/Target/Sparc/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Sparc/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSparcInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt deleted file mode 100644 index c6be83a..0000000 --- a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSystemZAsmPrinter - SystemZAsmPrinter.cpp - ) -add_dependencies(LLVMSystemZAsmPrinter SystemZCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/Makefile b/contrib/llvm/lib/Target/SystemZ/AsmPrinter/Makefile deleted file mode 100644 index 0f90ed3..0000000 --- a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZAsmPrinter - -# Hack: we need to include 'main' SystemZ target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/SystemZ/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/CMakeLists.txt deleted file mode 100644 index 880e56f..0000000 --- a/contrib/llvm/lib/Target/SystemZ/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS SystemZ.td) - -tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SystemZGenRegisterNames.inc -gen-register-enums) -tablegen(SystemZGenRegisterInfo.inc -gen-register-desc) -tablegen(SystemZGenInstrNames.inc -gen-instr-enums) -tablegen(SystemZGenInstrInfo.inc -gen-instr-desc) -tablegen(SystemZGenAsmWriter.inc -gen-asm-writer) -tablegen(SystemZGenDAGISel.inc -gen-dag-isel) -tablegen(SystemZGenCallingConv.inc -gen-callingconv) -tablegen(SystemZGenSubtarget.inc -gen-subtarget) - -add_llvm_target(SystemZCodeGen - SystemZISelDAGToDAG.cpp - SystemZISelLowering.cpp - SystemZInstrInfo.cpp - SystemZMCAsmInfo.cpp - SystemZRegisterInfo.cpp - SystemZSubtarget.cpp - SystemZTargetMachine.cpp - SystemZSelectionDAGInfo.cpp - ) - -target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG) diff --git a/contrib/llvm/lib/Target/SystemZ/Makefile b/contrib/llvm/lib/Target/SystemZ/Makefile deleted file mode 100644 index 5b44090..0000000 --- a/contrib/llvm/lib/Target/SystemZ/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMSystemZCodeGen -TARGET = SystemZ - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ - SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \ - SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \ - SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt deleted file mode 100644 index 743d8d3..0000000 --- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSystemZInfo - SystemZTargetInfo.cpp - ) - -add_dependencies(LLVMSystemZInfo SystemZCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/Makefile b/contrib/llvm/lib/Target/SystemZ/TargetInfo/Makefile deleted file mode 100644 index 0be80eb..0000000 --- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/X86/AsmParser/CMakeLists.txt deleted file mode 100644 index 40dbdd7..0000000 --- a/contrib/llvm/lib/Target/X86/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86AsmParser - X86AsmLexer.cpp - X86AsmParser.cpp - ) -add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/AsmParser/Makefile b/contrib/llvm/lib/Target/X86/AsmParser/Makefile deleted file mode 100644 index fb97607..0000000 --- a/contrib/llvm/lib/Target/X86/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86AsmParser - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 033973e..0000000 --- a/contrib/llvm/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86AsmPrinter - X86ATTInstPrinter.cpp - X86IntelInstPrinter.cpp - X86InstComments.cpp - ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/Makefile b/contrib/llvm/lib/Target/X86/AsmPrinter/Makefile deleted file mode 100644 index c82aa33..0000000 --- a/contrib/llvm/lib/Target/X86/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86AsmPrinter - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/CMakeLists.txt b/contrib/llvm/lib/Target/X86/CMakeLists.txt deleted file mode 100644 index e9399f5..0000000 --- a/contrib/llvm/lib/Target/X86/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS X86.td) - -tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(X86GenRegisterNames.inc -gen-register-enums) -tablegen(X86GenRegisterInfo.inc -gen-register-desc) -tablegen(X86GenDisassemblerTables.inc -gen-disassembler) -tablegen(X86GenInstrNames.inc -gen-instr-enums) -tablegen(X86GenInstrInfo.inc -gen-instr-desc) -tablegen(X86GenAsmWriter.inc -gen-asm-writer) -tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) -tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) -tablegen(X86GenDAGISel.inc -gen-dag-isel) -tablegen(X86GenFastISel.inc -gen-fast-isel) -tablegen(X86GenCallingConv.inc -gen-callingconv) -tablegen(X86GenSubtarget.inc -gen-subtarget) -tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) - -set(sources - SSEDomainFix.cpp - X86AsmBackend.cpp - X86AsmPrinter.cpp - X86COFFMachineModuleInfo.cpp - X86CodeEmitter.cpp - X86ELFWriterInfo.cpp - X86FastISel.cpp - X86FloatingPoint.cpp - X86ISelDAGToDAG.cpp - X86ISelLowering.cpp - X86InstrInfo.cpp - X86JITInfo.cpp - X86MCAsmInfo.cpp - X86MCCodeEmitter.cpp - X86MCInstLower.cpp - X86RegisterInfo.cpp - X86SelectionDAGInfo.cpp - X86Subtarget.cpp - X86TargetMachine.cpp - X86TargetObjectFile.cpp - ) - -if( CMAKE_CL_64 ) - enable_language(ASM_MASM) - ADD_CUSTOM_COMMAND( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj - COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm - ) - set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) -endif() - -add_llvm_target(X86CodeGen ${sources}) - diff --git a/contrib/llvm/lib/Target/X86/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Disassembler/CMakeLists.txt deleted file mode 100644 index 97589c0..0000000 --- a/contrib/llvm/lib/Target/X86/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86Disassembler - X86Disassembler.cpp - X86DisassemblerDecoder.c - ) -# workaround for hanging compilation on MSVC9 and 10 -if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -set_property( - SOURCE X86Disassembler.cpp - PROPERTY COMPILE_FLAGS "/Od" - ) -endif() -add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/Disassembler/Makefile b/contrib/llvm/lib/Target/X86/Disassembler/Makefile deleted file mode 100644 index 8669fd8..0000000 --- a/contrib/llvm/lib/Target/X86/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Disassembler - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/Makefile b/contrib/llvm/lib/Target/X86/Makefile deleted file mode 100644 index f4ff894..0000000 --- a/contrib/llvm/lib/Target/X86/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMX86CodeGen -TARGET = X86 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ - X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ - X86GenAsmWriter1.inc X86GenDAGISel.inc \ - X86GenDisassemblerTables.inc X86GenFastISel.inc \ - X86GenCallingConv.inc X86GenSubtarget.inc \ - X86GenEDInfo.inc - -DIRS = AsmPrinter AsmParser Disassembler TargetInfo - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/README-FPStack.txt b/contrib/llvm/lib/Target/X86/README-FPStack.txt deleted file mode 100644 index 39efd2d..0000000 --- a/contrib/llvm/lib/Target/X86/README-FPStack.txt +++ /dev/null @@ -1,85 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend: FP stack related stuff -//===---------------------------------------------------------------------===// - -//===---------------------------------------------------------------------===// - -Some targets (e.g. athlons) prefer freep to fstp ST(0): -http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html - -//===---------------------------------------------------------------------===// - -This should use fiadd on chips where it is profitable: -double foo(double P, int *I) { return P+*I; } - -We have fiadd patterns now but the followings have the same cost and -complexity. We need a way to specify the later is more profitable. - -def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) + [mem32] - -def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) + [mem32int] - -//===---------------------------------------------------------------------===// - -The FP stackifier should handle simple permutates to reduce number of shuffle -instructions, e.g. turning: - -fld P -> fld Q -fld Q fld P -fxch - -or: - -fxch -> fucomi -fucomi jl X -jg X - -Ideas: -http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html - - -//===---------------------------------------------------------------------===// - -Add a target specific hook to DAG combiner to handle SINT_TO_FP and -FP_TO_SINT when the source operand is already in memory. - -//===---------------------------------------------------------------------===// - -Open code rint,floor,ceil,trunc: -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html - -Opencode the sincos[f] libcall. - -//===---------------------------------------------------------------------===// - -None of the FPStack instructions are handled in -X86RegisterInfo::foldMemoryOperand, which prevents the spiller from -folding spill code into the instructions. - -//===---------------------------------------------------------------------===// - -Currently the x86 codegen isn't very good at mixing SSE and FPStack -code: - -unsigned int foo(double x) { return x; } - -foo: - subl $20, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, 8(%esp) - fldl 8(%esp) - fisttpll (%esp) - movl (%esp), %eax - addl $20, %esp - ret - -This just requires being smarter when custom expanding fptoui. - -//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/README-MMX.txt b/contrib/llvm/lib/Target/X86/README-MMX.txt deleted file mode 100644 index a6c8616..0000000 --- a/contrib/llvm/lib/Target/X86/README-MMX.txt +++ /dev/null @@ -1,71 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend: MMX-specific stuff. -//===---------------------------------------------------------------------===// - -//===---------------------------------------------------------------------===// - -This: - -#include <mmintrin.h> - -__v2si qux(int A) { - return (__v2si){ 0, A }; -} - -is compiled into: - -_qux: - subl $28, %esp - movl 32(%esp), %eax - movd %eax, %mm0 - movq %mm0, (%esp) - movl (%esp), %eax - movl %eax, 20(%esp) - movq %mm0, 8(%esp) - movl 12(%esp), %eax - movl %eax, 16(%esp) - movq 16(%esp), %mm0 - addl $28, %esp - ret - -Yuck! - -GCC gives us: - -_qux: - subl $12, %esp - movl 16(%esp), %eax - movl 20(%esp), %edx - movl $0, (%eax) - movl %edx, 4(%eax) - addl $12, %esp - ret $4 - -//===---------------------------------------------------------------------===// - -We generate crappy code for this: - -__m64 t() { - return _mm_cvtsi32_si64(1); -} - -_t: - subl $12, %esp - movl $1, %eax - movd %eax, %mm0 - movq %mm0, (%esp) - movl (%esp), %eax - movl 4(%esp), %edx - addl $12, %esp - ret - -The extra stack traffic is covered in the previous entry. But the other reason -is we are not smart about materializing constants in MMX registers. With -m64 - - movl $1, %eax - movd %eax, %mm0 - movd %mm0, %rax - ret - -We should be using a constantpool load instead: - movq LC0(%rip), %rax diff --git a/contrib/llvm/lib/Target/X86/README-SSE.txt b/contrib/llvm/lib/Target/X86/README-SSE.txt deleted file mode 100644 index f96b22f..0000000 --- a/contrib/llvm/lib/Target/X86/README-SSE.txt +++ /dev/null @@ -1,907 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend: SSE-specific stuff. -//===---------------------------------------------------------------------===// - -//===---------------------------------------------------------------------===// - -SSE Variable shift can be custom lowered to something like this, which uses a -small table + unaligned load + shuffle instead of going through memory. - -__m128i_shift_right: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -... -__m128i shift_right(__m128i value, unsigned long offset) { - return _mm_shuffle_epi8(value, - _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); -} - -//===---------------------------------------------------------------------===// - -SSE has instructions for doing operations on complex numbers, we should pattern -match them. Compiling this: - -_Complex float f32(_Complex float A, _Complex float B) { - return A+B; -} - -into: - -_f32: - movdqa %xmm0, %xmm2 - addss %xmm1, %xmm2 - pshufd $16, %xmm2, %xmm2 - pshufd $1, %xmm1, %xmm1 - pshufd $1, %xmm0, %xmm0 - addss %xmm1, %xmm0 - pshufd $16, %xmm0, %xmm1 - movdqa %xmm2, %xmm0 - unpcklps %xmm1, %xmm0 - ret - -seems silly. - - -//===---------------------------------------------------------------------===// - -Expand libm rounding functions inline: Significant speedups possible. -http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html - -//===---------------------------------------------------------------------===// - -When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and -other fast SSE modes. - -//===---------------------------------------------------------------------===// - -Think about doing i64 math in SSE regs on x86-32. - -//===---------------------------------------------------------------------===// - -This testcase should have no SSE instructions in it, and only one load from -a constant pool: - -double %test3(bool %B) { - %C = select bool %B, double 123.412, double 523.01123123 - ret double %C -} - -Currently, the select is being lowered, which prevents the dag combiner from -turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)' - -The pattern isel got this one right. - -//===---------------------------------------------------------------------===// - -SSE should implement 'select_cc' using 'emulated conditional moves' that use -pcmp/pand/pandn/por to do a selection instead of a conditional branch: - -double %X(double %Y, double %Z, double %A, double %B) { - %C = setlt double %A, %B - %z = fadd double %Z, 0.0 ;; select operand is not a load - %D = select bool %C, double %Y, double %z - ret double %D -} - -We currently emit: - -_X: - subl $12, %esp - xorpd %xmm0, %xmm0 - addsd 24(%esp), %xmm0 - movsd 32(%esp), %xmm1 - movsd 16(%esp), %xmm2 - ucomisd 40(%esp), %xmm1 - jb LBB_X_2 -LBB_X_1: - movsd %xmm0, %xmm2 -LBB_X_2: - movsd %xmm2, (%esp) - fldl (%esp) - addl $12, %esp - ret - -//===---------------------------------------------------------------------===// - -Lower memcpy / memset to a series of SSE 128 bit move instructions when it's -feasible. - -//===---------------------------------------------------------------------===// - -Codegen: - if (copysign(1.0, x) == copysign(1.0, y)) -into: - if (x^y & mask) -when using SSE. - -//===---------------------------------------------------------------------===// - -Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half -of a v4sf value. - -//===---------------------------------------------------------------------===// - -Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}. -Perhaps use pxor / xorp* to clear a XMM register first? - -//===---------------------------------------------------------------------===// - -External test Nurbs exposed some problems. Look for -__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc -emits: - - movaps (%edx), %xmm2 #59.21 - movaps (%edx), %xmm5 #60.21 - movaps (%edx), %xmm4 #61.21 - movaps (%edx), %xmm3 #62.21 - movl 40(%ecx), %ebp #69.49 - shufps $0, %xmm2, %xmm5 #60.21 - movl 100(%esp), %ebx #69.20 - movl (%ebx), %edi #69.20 - imull %ebp, %edi #69.49 - addl (%eax), %edi #70.33 - shufps $85, %xmm2, %xmm4 #61.21 - shufps $170, %xmm2, %xmm3 #62.21 - shufps $255, %xmm2, %xmm2 #63.21 - lea (%ebp,%ebp,2), %ebx #69.49 - negl %ebx #69.49 - lea -3(%edi,%ebx), %ebx #70.33 - shll $4, %ebx #68.37 - addl 32(%ecx), %ebx #68.37 - testb $15, %bl #91.13 - jne L_B1.24 # Prob 5% #91.13 - -This is the llvm code after instruction scheduling: - -cond_next140 (0xa910740, LLVM BB @0xa90beb0): - %reg1078 = MOV32ri -3 - %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0 - %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40 - %reg1080 = IMUL32rr %reg1079, %reg1037 - %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0 - %reg1038 = LEA32r %reg1081, 1, %reg1080, -3 - %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32 - %reg1082 = SHL32ri %reg1038, 4 - %reg1039 = ADD32rr %reg1036, %reg1082 - %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0 - %reg1034 = SHUFPSrr %reg1083, %reg1083, 170 - %reg1032 = SHUFPSrr %reg1083, %reg1083, 0 - %reg1035 = SHUFPSrr %reg1083, %reg1083, 255 - %reg1033 = SHUFPSrr %reg1083, %reg1083, 85 - %reg1040 = MOV32rr %reg1039 - %reg1084 = AND32ri8 %reg1039, 15 - CMP32ri8 %reg1084, 0 - JE mbb<cond_next204,0xa914d30> - -Still ok. After register allocation: - -cond_next140 (0xa910740, LLVM BB @0xa90beb0): - %EAX = MOV32ri -3 - %EDX = MOV32rm <fi#3>, 1, %NOREG, 0 - ADD32rm %EAX<def&use>, %EDX, 1, %NOREG, 0 - %EDX = MOV32rm <fi#7>, 1, %NOREG, 0 - %EDX = MOV32rm %EDX, 1, %NOREG, 40 - IMUL32rr %EAX<def&use>, %EDX - %ESI = MOV32rm <fi#5>, 1, %NOREG, 0 - %ESI = MOV32rm %ESI, 1, %NOREG, 0 - MOV32mr <fi#4>, 1, %NOREG, 0, %ESI - %EAX = LEA32r %ESI, 1, %EAX, -3 - %ESI = MOV32rm <fi#7>, 1, %NOREG, 0 - %ESI = MOV32rm %ESI, 1, %NOREG, 32 - %EDI = MOV32rr %EAX - SHL32ri %EDI<def&use>, 4 - ADD32rr %EDI<def&use>, %ESI - %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0 - %XMM1 = MOVAPSrr %XMM0 - SHUFPSrr %XMM1<def&use>, %XMM1, 170 - %XMM2 = MOVAPSrr %XMM0 - SHUFPSrr %XMM2<def&use>, %XMM2, 0 - %XMM3 = MOVAPSrr %XMM0 - SHUFPSrr %XMM3<def&use>, %XMM3, 255 - SHUFPSrr %XMM0<def&use>, %XMM0, 85 - %EBX = MOV32rr %EDI - AND32ri8 %EBX<def&use>, 15 - CMP32ri8 %EBX, 0 - JE mbb<cond_next204,0xa914d30> - -This looks really bad. The problem is shufps is a destructive opcode. Since it -appears as operand two in more than one shufps ops. It resulted in a number of -copies. Note icc also suffers from the same problem. Either the instruction -selector should select pshufd or The register allocator can made the two-address -to three-address transformation. - -It also exposes some other problems. See MOV32ri -3 and the spills. - -//===---------------------------------------------------------------------===// - -Consider: - -__m128 test(float a) { - return _mm_set_ps(0.0, 0.0, 0.0, a*a); -} - -This compiles into: - -movss 4(%esp), %xmm1 -mulss %xmm1, %xmm1 -xorps %xmm0, %xmm0 -movss %xmm1, %xmm0 -ret - -Because mulss doesn't modify the top 3 elements, the top elements of -xmm1 are already zero'd. We could compile this to: - -movss 4(%esp), %xmm0 -mulss %xmm0, %xmm0 -ret - -//===---------------------------------------------------------------------===// - -Here's a sick and twisted idea. Consider code like this: - -__m128 test(__m128 a) { - float b = *(float*)&A; - ... - return _mm_set_ps(0.0, 0.0, 0.0, b); -} - -This might compile to this code: - -movaps c(%esp), %xmm1 -xorps %xmm0, %xmm0 -movss %xmm1, %xmm0 -ret - -Now consider if the ... code caused xmm1 to get spilled. This might produce -this code: - -movaps c(%esp), %xmm1 -movaps %xmm1, c2(%esp) -... - -xorps %xmm0, %xmm0 -movaps c2(%esp), %xmm1 -movss %xmm1, %xmm0 -ret - -However, since the reload is only used by these instructions, we could -"fold" it into the uses, producing something like this: - -movaps c(%esp), %xmm1 -movaps %xmm1, c2(%esp) -... - -movss c2(%esp), %xmm0 -ret - -... saving two instructions. - -The basic idea is that a reload from a spill slot, can, if only one 4-byte -chunk is used, bring in 3 zeros the one element instead of 4 elements. -This can be used to simplify a variety of shuffle operations, where the -elements are fixed zeros. - -//===---------------------------------------------------------------------===// - -This code generates ugly code, probably due to costs being off or something: - -define void @test(float* %P, <4 x float>* %P2 ) { - %xFloat0.688 = load float* %P - %tmp = load <4 x float>* %P2 - %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3 - store <4 x float> %inFloat3.713, <4 x float>* %P2 - ret void -} - -Generates: - -_test: - movl 8(%esp), %eax - movaps (%eax), %xmm0 - pxor %xmm1, %xmm1 - movaps %xmm0, %xmm2 - shufps $50, %xmm1, %xmm2 - shufps $132, %xmm2, %xmm0 - movaps %xmm0, (%eax) - ret - -Would it be better to generate: - -_test: - movl 8(%esp), %ecx - movaps (%ecx), %xmm0 - xor %eax, %eax - pinsrw $6, %eax, %xmm0 - pinsrw $7, %eax, %xmm0 - movaps %xmm0, (%ecx) - ret - -? - -//===---------------------------------------------------------------------===// - -Some useful information in the Apple Altivec / SSE Migration Guide: - -http://developer.apple.com/documentation/Performance/Conceptual/ -Accelerate_sse_migration/index.html - -e.g. SSE select using and, andnot, or. Various SSE compare translations. - -//===---------------------------------------------------------------------===// - -Add hooks to commute some CMPP operations. - -//===---------------------------------------------------------------------===// - -Apply the same transformation that merged four float into a single 128-bit load -to loads from constant pool. - -//===---------------------------------------------------------------------===// - -Floating point max / min are commutable when -enable-unsafe-fp-path is -specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other -nodes which are selected to max / min instructions that are marked commutable. - -//===---------------------------------------------------------------------===// - -We should materialize vector constants like "all ones" and "signbit" with -code like: - - cmpeqps xmm1, xmm1 ; xmm1 = all-ones - -and: - cmpeqps xmm1, xmm1 ; xmm1 = all-ones - psrlq xmm1, 31 ; xmm1 = all 100000000000... - -instead of using a load from the constant pool. The later is important for -ABS/NEG/copysign etc. - -//===---------------------------------------------------------------------===// - -These functions: - -#include <xmmintrin.h> -__m128i a; -void x(unsigned short n) { - a = _mm_slli_epi32 (a, n); -} -void y(unsigned n) { - a = _mm_slli_epi32 (a, n); -} - -compile to ( -O3 -static -fomit-frame-pointer): -_x: - movzwl 4(%esp), %eax - movd %eax, %xmm0 - movaps _a, %xmm1 - pslld %xmm0, %xmm1 - movaps %xmm1, _a - ret -_y: - movd 4(%esp), %xmm0 - movaps _a, %xmm1 - pslld %xmm0, %xmm1 - movaps %xmm1, _a - ret - -"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems -like movd would be sufficient in both cases as the value is already zero -extended in the 32-bit stack slot IIRC. For signed short, it should also be -save, as a really-signed value would be undefined for pslld. - - -//===---------------------------------------------------------------------===// - -#include <math.h> -int t1(double d) { return signbit(d); } - -This currently compiles to: - subl $12, %esp - movsd 16(%esp), %xmm0 - movsd %xmm0, (%esp) - movl 4(%esp), %eax - shrl $31, %eax - addl $12, %esp - ret - -We should use movmskp{s|d} instead. - -//===---------------------------------------------------------------------===// - -CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single -(aligned) vector load. This functionality has a couple of problems. - -1. The code to infer alignment from loads of globals is in the X86 backend, - not the dag combiner. This is because dagcombine2 needs to be able to see - through the X86ISD::Wrapper node, which DAGCombine can't really do. -2. The code for turning 4 x load into a single vector load is target - independent and should be moved to the dag combiner. -3. The code for turning 4 x load into a vector load can only handle a direct - load from a global or a direct load from the stack. It should be generalized - to handle any load from P, P+4, P+8, P+12, where P can be anything. -4. The alignment inference code cannot handle loads from globals in non-static - mode because it doesn't look through the extra dyld stub load. If you try - vec_align.ll without -relocation-model=static, you'll see what I mean. - -//===---------------------------------------------------------------------===// - -We should lower store(fneg(load p), q) into an integer load+xor+store, which -eliminates a constant pool load. For example, consider: - -define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { -entry: - %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] - %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly - ret i64 %tmp20 -} -declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly - -This currently compiles to: - -LCPI1_0: # <4 x float> - .long 2147483648 # float -0 - .long 2147483648 # float -0 - .long 2147483648 # float -0 - .long 2147483648 # float -0 -_ccosf: - subl $12, %esp - movss 16(%esp), %xmm0 - movss %xmm0, 4(%esp) - movss 20(%esp), %xmm0 - xorps LCPI1_0, %xmm0 - movss %xmm0, (%esp) - call L_ccoshf$stub - addl $12, %esp - ret - -Note the load into xmm0, then xor (to negate), then store. In PIC mode, -this code computes the pic base and does two loads to do the constant pool -load, so the improvement is much bigger. - -The tricky part about this xform is that the argument load/store isn't exposed -until post-legalize, and at that point, the fneg has been custom expanded into -an X86 fxor. This means that we need to handle this case in the x86 backend -instead of in target independent code. - -//===---------------------------------------------------------------------===// - -Non-SSE4 insert into 16 x i8 is atrociously bad. - -//===---------------------------------------------------------------------===// - -<2 x i64> extract is substantially worse than <2 x f64>, even if the destination -is memory. - -//===---------------------------------------------------------------------===// - -SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext -sitting between the truncate and the extract. - -//===---------------------------------------------------------------------===// - -INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert -any number of 0.0 simultaneously. Currently we only use it for simple -insertions. - -See comments in LowerINSERT_VECTOR_ELT_SSE4. - -//===---------------------------------------------------------------------===// - -On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not -Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are -legal, it'll just take a few extra patterns written in the .td file. - -Note: this is not a code quality issue; the custom lowered code happens to be -right, but we shouldn't have to custom lower anything. This is probably related -to <2 x i64> ops being so bad. - -//===---------------------------------------------------------------------===// - -'select' on vectors and scalars could be a whole lot better. We currently -lower them to conditional branches. On x86-64 for example, we compile this: - -double test(double a, double b, double c, double d) { return a<b ? c : d; } - -to: - -_test: - ucomisd %xmm0, %xmm1 - ja LBB1_2 # entry -LBB1_1: # entry - movapd %xmm3, %xmm2 -LBB1_2: # entry - movapd %xmm2, %xmm0 - ret - -instead of: - -_test: - cmpltsd %xmm1, %xmm0 - andpd %xmm0, %xmm2 - andnpd %xmm3, %xmm0 - orpd %xmm2, %xmm0 - ret - -For unpredictable branches, the later is much more efficient. This should -just be a matter of having scalar sse map to SELECT_CC and custom expanding -or iseling it. - -//===---------------------------------------------------------------------===// - -LLVM currently generates stack realignment code, when it is not necessary -needed. The problem is that we need to know about stack alignment too early, -before RA runs. - -At that point we don't know, whether there will be vector spill, or not. -Stack realignment logic is overly conservative here, but otherwise we can -produce unaligned loads/stores. - -Fixing this will require some huge RA changes. - -Testcase: -#include <emmintrin.h> - -typedef short vSInt16 __attribute__ ((__vector_size__ (16))); - -static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873, -- 22725, - 12873};; - -vSInt16 madd(vSInt16 b) -{ - return _mm_madd_epi16(a, b); -} - -Generated code (x86-32, linux): -madd: - pushl %ebp - movl %esp, %ebp - andl $-16, %esp - movaps .LCPI1_0, %xmm1 - pmaddwd %xmm1, %xmm0 - movl %ebp, %esp - popl %ebp - ret - -//===---------------------------------------------------------------------===// - -Consider: -#include <emmintrin.h> -__m128 foo2 (float x) { - return _mm_set_ps (0, 0, x, 0); -} - -In x86-32 mode, we generate this spiffy code: - -_foo2: - movss 4(%esp), %xmm0 - pshufd $81, %xmm0, %xmm0 - ret - -in x86-64 mode, we generate this code, which could be better: - -_foo2: - xorps %xmm1, %xmm1 - movss %xmm0, %xmm1 - pshufd $81, %xmm1, %xmm0 - ret - -In sse4 mode, we could use insertps to make both better. - -Here's another testcase that could use insertps [mem]: - -#include <xmmintrin.h> -extern float x2, x3; -__m128 foo1 (float x1, float x4) { - return _mm_set_ps (x2, x1, x3, x4); -} - -gcc mainline compiles it to: - -foo1: - insertps $0x10, x2(%rip), %xmm0 - insertps $0x10, x3(%rip), %xmm1 - movaps %xmm1, %xmm2 - movlhps %xmm0, %xmm2 - movaps %xmm2, %xmm0 - ret - -//===---------------------------------------------------------------------===// - -We compile vector multiply-by-constant into poor code: - -define <4 x i32> @f(<4 x i32> %i) nounwind { - %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 > - ret <4 x i32> %A -} - -On targets without SSE4.1, this compiles into: - -LCPI1_0: ## <4 x i32> - .long 10 - .long 10 - .long 10 - .long 10 - .text - .align 4,0x90 - .globl _f -_f: - pshufd $3, %xmm0, %xmm1 - movd %xmm1, %eax - imull LCPI1_0+12, %eax - movd %eax, %xmm1 - pshufd $1, %xmm0, %xmm2 - movd %xmm2, %eax - imull LCPI1_0+4, %eax - movd %eax, %xmm2 - punpckldq %xmm1, %xmm2 - movd %xmm0, %eax - imull LCPI1_0, %eax - movd %eax, %xmm1 - movhlps %xmm0, %xmm0 - movd %xmm0, %eax - imull LCPI1_0+8, %eax - movd %eax, %xmm0 - punpckldq %xmm0, %xmm1 - movaps %xmm1, %xmm0 - punpckldq %xmm2, %xmm0 - ret - -It would be better to synthesize integer vector multiplication by constants -using shifts and adds, pslld and paddd here. And even on targets with SSE4.1, -simple cases such as multiplication by powers of two would be better as -vector shifts than as multiplications. - -//===---------------------------------------------------------------------===// - -We compile this: - -__m128i -foo2 (char x) -{ - return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0); -} - -into: - movl $1, %eax - xorps %xmm0, %xmm0 - pinsrw $2, %eax, %xmm0 - movzbl 4(%esp), %eax - pinsrw $3, %eax, %xmm0 - movl $256, %eax - pinsrw $7, %eax, %xmm0 - ret - - -gcc-4.2: - subl $12, %esp - movzbl 16(%esp), %eax - movdqa LC0, %xmm0 - pinsrw $3, %eax, %xmm0 - addl $12, %esp - ret - .const - .align 4 -LC0: - .word 0 - .word 0 - .word 1 - .word 0 - .word 0 - .word 0 - .word 0 - .word 256 - -With SSE4, it should be - movdqa .LC0(%rip), %xmm0 - pinsrb $6, %edi, %xmm0 - -//===---------------------------------------------------------------------===// - -We should transform a shuffle of two vectors of constants into a single vector -of constants. Also, insertelement of a constant into a vector of constants -should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll. - -We compiled it to something horrible: - - .align 4 -LCPI1_1: ## float - .long 1065353216 ## float 1 - .const - - .align 4 -LCPI1_0: ## <4 x float> - .space 4 - .long 1065353216 ## float 1 - .space 4 - .long 1065353216 ## float 1 - .text - .align 4,0x90 - .globl _t -_t: - xorps %xmm0, %xmm0 - movhps LCPI1_0, %xmm0 - movss LCPI1_1, %xmm1 - movaps %xmm0, %xmm2 - shufps $2, %xmm1, %xmm2 - shufps $132, %xmm2, %xmm0 - movaps %xmm0, 0 - -//===---------------------------------------------------------------------===// -rdar://5907648 - -This function: - -float foo(unsigned char x) { - return x; -} - -compiles to (x86-32): - -define float @foo(i8 zeroext %x) nounwind { - %tmp12 = uitofp i8 %x to float ; <float> [#uses=1] - ret float %tmp12 -} - -compiles to: - -_foo: - subl $4, %esp - movzbl 8(%esp), %eax - cvtsi2ss %eax, %xmm0 - movss %xmm0, (%esp) - flds (%esp) - addl $4, %esp - ret - -We should be able to use: - cvtsi2ss 8($esp), %xmm0 -since we know the stack slot is already zext'd. - -//===---------------------------------------------------------------------===// - -Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64)) -when code size is critical. movlps is slower than movsd on core2 but it's one -byte shorter. - -//===---------------------------------------------------------------------===// - -We should use a dynamic programming based approach to tell when using FPStack -operations is cheaper than SSE. SciMark montecarlo contains code like this -for example: - -double MonteCarlo_num_flops(int Num_samples) { - return ((double) Num_samples)* 4.0; -} - -In fpstack mode, this compiles into: - -LCPI1_0: - .long 1082130432 ## float 4.000000e+00 -_MonteCarlo_num_flops: - subl $4, %esp - movl 8(%esp), %eax - movl %eax, (%esp) - fildl (%esp) - fmuls LCPI1_0 - addl $4, %esp - ret - -in SSE mode, it compiles into significantly slower code: - -_MonteCarlo_num_flops: - subl $12, %esp - cvtsi2sd 16(%esp), %xmm0 - mulsd LCPI1_0, %xmm0 - movsd %xmm0, (%esp) - fldl (%esp) - addl $12, %esp - ret - -There are also other cases in scimark where using fpstack is better, it is -cheaper to do fld1 than load from a constant pool for example, so -"load, add 1.0, store" is better done in the fp stack, etc. - -//===---------------------------------------------------------------------===// - -The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to -"cmpsd". For example, this code: - -double d1(double x) { return x == x ? x : x + x; } - -Compiles into: - -_d1: - ucomisd %xmm0, %xmm0 - jnp LBB1_2 - addsd %xmm0, %xmm0 - ret -LBB1_2: - ret - -Also, the 'ret's should be shared. This is PR6032. - -//===---------------------------------------------------------------------===// - -These should compile into the same code (PR6214): Perhaps instcombine should -canonicalize the former into the later? - -define float @foo(float %x) nounwind { - %t = bitcast float %x to i32 - %s = and i32 %t, 2147483647 - %d = bitcast i32 %s to float - ret float %d -} - -declare float @fabsf(float %n) -define float @bar(float %x) nounwind { - %d = call float @fabsf(float %x) - ret float %d -} - -//===---------------------------------------------------------------------===// - -This IR (from PR6194): - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -%0 = type { double, double } -%struct.float3 = type { float, float, float } - -define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp { -entry: - %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1] - %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1] - %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1] - %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1] - %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1] - %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1] - %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1] - store float %tmp12, float* %tmp5 - ret void -} - -Compiles to: - -_test: ## @test - movd %xmm0, %rax - shrq $32, %rax - movl %eax, 4(%rdi) - ret - -This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and -doing a shuffle from v[1] to v[0] then a float store. - -//===---------------------------------------------------------------------===// - -On SSE4 machines, we compile this code: - -define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, - <2 x float> *%P) nounwind { - %Z = fadd <2 x float> %Q, %R - - store <2 x float> %Z, <2 x float> *%P - ret <2 x float> %Z -} - -into: - -_test2: ## @test2 -## BB#0: - insertps $0, %xmm2, %xmm2 - insertps $16, %xmm3, %xmm2 - insertps $0, %xmm0, %xmm3 - insertps $16, %xmm1, %xmm3 - addps %xmm2, %xmm3 - movq %xmm3, (%rdi) - movaps %xmm3, %xmm0 - pshufd $1, %xmm3, %xmm1 - ## kill: XMM1<def> XMM1<kill> - ret - -The insertps's of $0 are pointless complex copies. - -//===---------------------------------------------------------------------===// - - diff --git a/contrib/llvm/lib/Target/X86/README-UNIMPLEMENTED.txt b/contrib/llvm/lib/Target/X86/README-UNIMPLEMENTED.txt deleted file mode 100644 index c26c75a..0000000 --- a/contrib/llvm/lib/Target/X86/README-UNIMPLEMENTED.txt +++ /dev/null @@ -1,14 +0,0 @@ -//===---------------------------------------------------------------------===// -// Testcases that crash the X86 backend because they aren't implemented -//===---------------------------------------------------------------------===// - -These are cases we know the X86 backend doesn't handle. Patches are welcome -and appreciated, because no one has signed up to implemented these yet. -Implementing these would allow elimination of the corresponding intrinsics, -which would be great. - -1) vector shifts -2) vector comparisons -3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688 -4) bitcasts from vectors to scalars: PR2804 -5) llvm.atomic.cmp.swap.i128.p0i128: PR3462 diff --git a/contrib/llvm/lib/Target/X86/README-X86-64.txt b/contrib/llvm/lib/Target/X86/README-X86-64.txt deleted file mode 100644 index 78c4dc0..0000000 --- a/contrib/llvm/lib/Target/X86/README-X86-64.txt +++ /dev/null @@ -1,273 +0,0 @@ -//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// - -AMD64 Optimization Manual 8.2 has some nice information about optimizing integer -multiplication by a constant. How much of it applies to Intel's X86-64 -implementation? There are definite trade-offs to consider: latency vs. register -pressure vs. code size. - -//===---------------------------------------------------------------------===// - -Are we better off using branches instead of cmove to implement FP to -unsigned i64? - -_conv: - ucomiss LC0(%rip), %xmm0 - cvttss2siq %xmm0, %rdx - jb L3 - subss LC0(%rip), %xmm0 - movabsq $-9223372036854775808, %rax - cvttss2siq %xmm0, %rdx - xorq %rax, %rdx -L3: - movq %rdx, %rax - ret - -instead of - -_conv: - movss LCPI1_0(%rip), %xmm1 - cvttss2siq %xmm0, %rcx - movaps %xmm0, %xmm2 - subss %xmm1, %xmm2 - cvttss2siq %xmm2, %rax - movabsq $-9223372036854775808, %rdx - xorq %rdx, %rax - ucomiss %xmm1, %xmm0 - cmovb %rcx, %rax - ret - -Seems like the jb branch has high likelyhood of being taken. It would have -saved a few instructions. - -//===---------------------------------------------------------------------===// - -Poor codegen: - -int X[2]; -int b; -void test(void) { - memset(X, b, 2*sizeof(X[0])); -} - -llc: - movq _b@GOTPCREL(%rip), %rax - movzbq (%rax), %rax - movq %rax, %rcx - shlq $8, %rcx - orq %rax, %rcx - movq %rcx, %rax - shlq $16, %rax - orq %rcx, %rax - movq %rax, %rcx - shlq $32, %rcx - movq _X@GOTPCREL(%rip), %rdx - orq %rax, %rcx - movq %rcx, (%rdx) - ret - -gcc: - movq _b@GOTPCREL(%rip), %rax - movabsq $72340172838076673, %rdx - movzbq (%rax), %rax - imulq %rdx, %rax - movq _X@GOTPCREL(%rip), %rdx - movq %rax, (%rdx) - ret - -And the codegen is even worse for the following -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): - void fill1(char *s, int a) - { - __builtin_memset(s, a, 15); - } - -For this version, we duplicate the computation of the constant to store. - -//===---------------------------------------------------------------------===// - -It's not possible to reference AH, BH, CH, and DH registers in an instruction -requiring REX prefix. However, divb and mulb both produce results in AH. If isel -emits a CopyFromReg which gets turned into a movb and that can be allocated a -r8b - r15b. - -To get around this, isel emits a CopyFromReg from AX and then right shift it -down by 8 and truncate it. It's not pretty but it works. We need some register -allocation magic to make the hack go away (e.g. putting additional constraints -on the result of the movb). - -//===---------------------------------------------------------------------===// - -The x86-64 ABI for hidden-argument struct returns requires that the -incoming value of %rdi be copied into %rax by the callee upon return. - -The idea is that it saves callers from having to remember this value, -which would often require a callee-saved register. Callees usually -need to keep this value live for most of their body anyway, so it -doesn't add a significant burden on them. - -We currently implement this in codegen, however this is suboptimal -because it means that it would be quite awkward to implement the -optimization for callers. - -A better implementation would be to relax the LLVM IR rules for sret -arguments to allow a function with an sret argument to have a non-void -return type, and to have the front-end to set up the sret argument value -as the return value of the function. The front-end could more easily -emit uses of the returned struct value to be in terms of the function's -lowered return value, and it would free non-C frontends from a -complication only required by a C-based ABI. - -//===---------------------------------------------------------------------===// - -We get a redundant zero extension for code like this: - -int mask[1000]; -int foo(unsigned x) { - if (x < 10) - x = x * 45; - else - x = x * 78; - return mask[x]; -} - -_foo: -LBB1_0: ## entry - cmpl $9, %edi - jbe LBB1_3 ## bb -LBB1_1: ## bb1 - imull $78, %edi, %eax -LBB1_2: ## bb2 - movl %eax, %eax <---- - movq _mask@GOTPCREL(%rip), %rcx - movl (%rcx,%rax,4), %eax - ret -LBB1_3: ## bb - imull $45, %edi, %eax - jmp LBB1_2 ## bb2 - -Before regalloc, we have: - - %reg1025<def> = IMUL32rri8 %reg1024, 45, %EFLAGS<imp-def> - JMP mbb<bb2,0x203afb0> - Successors according to CFG: 0x203afb0 (#3) - -bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: - Predecessors according to CFG: 0x203aec0 (#0) - %reg1026<def> = IMUL32rri8 %reg1024, 78, %EFLAGS<imp-def> - Successors according to CFG: 0x203afb0 (#3) - -bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: - Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) - %reg1027<def> = PHI %reg1025, mbb<bb,0x203af10>, - %reg1026, mbb<bb1,0x203af60> - %reg1029<def> = MOVZX64rr32 %reg1027 - -so we'd have to know that IMUL32rri8 leaves the high word zero extended and to -be able to recognize the zero extend. This could also presumably be implemented -if we have whole-function selectiondags. - -//===---------------------------------------------------------------------===// - -Take the following C code -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640): - -struct u1 -{ - float x; - float y; -}; - -float foo(struct u1 u) -{ - return u.x + u.y; -} - -Optimizes to the following IR: -define float @foo(double %u.0) nounwind readnone { -entry: - %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2] - %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1] - %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1] - %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1] - %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1] - %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1] - %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1] - ret float %0 -} - -And current llvm-gcc/clang output: - movd %xmm0, %rax - movd %eax, %xmm1 - shrq $32, %rax - movd %eax, %xmm0 - addss %xmm1, %xmm0 - ret - -We really shouldn't move the floats to RAX, only to immediately move them -straight back to the XMM registers. - -There really isn't any good way to handle this purely in IR optimizers; it -could possibly be handled by changing the output of the fronted, though. It -would also be feasible to add a x86-specific DAGCombine to optimize the -bitcast+trunc+(lshr+)bitcast combination. - -//===---------------------------------------------------------------------===// - -Take the following code -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): -extern unsigned long table[]; -unsigned long foo(unsigned char *p) { - unsigned long tag = *p; - return table[tag >> 4] + table[tag & 0xf]; -} - -Current code generated: - movzbl (%rdi), %eax - movq %rax, %rcx - andq $240, %rcx - shrq %rcx - andq $15, %rax - movq table(,%rax,8), %rax - addq table(%rcx), %rax - ret - -Issues: -1. First movq should be movl; saves a byte. -2. Both andq's should be andl; saves another two bytes. I think this was - implemented at one point, but subsequently regressed. -3. shrq should be shrl; saves another byte. -4. The first andq can be completely eliminated by using a slightly more - expensive addressing mode. - -//===---------------------------------------------------------------------===// - -Consider the following (contrived testcase, but contains common factors): - -#include <stdarg.h> -int test(int x, ...) { - int sum, i; - va_list l; - va_start(l, x); - for (i = 0; i < x; i++) - sum += va_arg(l, int); - va_end(l); - return sum; -} - -Testcase given in C because fixing it will likely involve changing the IR -generated for it. The primary issue with the result is that it doesn't do any -of the optimizations which are possible if we know the address of a va_list -in the current function is never taken: -1. We shouldn't spill the XMM registers because we only call va_arg with "int". -2. It would be nice if we could scalarrepl the va_list. -3. Probably overkill, but it'd be cool if we could peel off the first five -iterations of the loop. - -Other optimizations involving functions which use va_arg on floats which don't -have the address of a va_list taken: -1. Conversely to the above, we shouldn't spill general registers if we only - call va_arg on "double". -2. If we know nothing more than 64 bits wide is read from the XMM registers, - we can change the spilling code to reduce the amount of stack used by half. - -//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/README.txt b/contrib/llvm/lib/Target/X86/README.txt deleted file mode 100644 index a305ae6..0000000 --- a/contrib/llvm/lib/Target/X86/README.txt +++ /dev/null @@ -1,1962 +0,0 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend. -//===---------------------------------------------------------------------===// - -We should add support for the "movbe" instruction, which does a byte-swapping -copy (3-addr bswap + memory support?) This is available on Atom processors. - -//===---------------------------------------------------------------------===// - -CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86 -backend knows how to three-addressify this shift, but it appears the register -allocator isn't even asking it to do so in this case. We should investigate -why this isn't happening, it could have significant impact on other important -cases for X86 as well. - -//===---------------------------------------------------------------------===// - -This should be one DIV/IDIV instruction, not a libcall: - -unsigned test(unsigned long long X, unsigned Y) { - return X/Y; -} - -This can be done trivially with a custom legalizer. What about overflow -though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224 - -//===---------------------------------------------------------------------===// - -Improvements to the multiply -> shift/add algorithm: -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html - -//===---------------------------------------------------------------------===// - -Improve code like this (occurs fairly frequently, e.g. in LLVM): -long long foo(int x) { return 1LL << x; } - -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html - -Another useful one would be ~0ULL >> X and ~0ULL << X. - -One better solution for 1LL << x is: - xorl %eax, %eax - xorl %edx, %edx - testb $32, %cl - sete %al - setne %dl - sall %cl, %eax - sall %cl, %edx - -But that requires good 8-bit subreg support. - -Also, this might be better. It's an extra shift, but it's one instruction -shorter, and doesn't stress 8-bit subreg support. -(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html, -but without the unnecessary and.) - movl %ecx, %eax - shrl $5, %eax - movl %eax, %edx - xorl $1, %edx - sall %cl, %eax - sall %cl. %edx - -64-bit shifts (in general) expand to really bad code. Instead of using -cmovs, we should expand to a conditional branch like GCC produces. - -//===---------------------------------------------------------------------===// - -Compile this: -_Bool f(_Bool a) { return a!=1; } - -into: - movzbl %dil, %eax - xorl $1, %eax - ret - -(Although note that this isn't a legal way to express the code that llvm-gcc -currently generates for that function.) - -//===---------------------------------------------------------------------===// - -Some isel ideas: - -1. Dynamic programming based approach when compile time if not an - issue. -2. Code duplication (addressing mode) during isel. -3. Other ideas from "Register-Sensitive Selection, Duplication, and - Sequencing of Instructions". -4. Scheduling for reduced register pressure. E.g. "Minimum Register - Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" - and other related papers. - http://citeseer.ist.psu.edu/govindarajan01minimum.html - -//===---------------------------------------------------------------------===// - -Should we promote i16 to i32 to avoid partial register update stalls? - -//===---------------------------------------------------------------------===// - -Leave any_extend as pseudo instruction and hint to register -allocator. Delay codegen until post register allocation. -Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach -the coalescer how to deal with it though. - -//===---------------------------------------------------------------------===// - -It appears icc use push for parameter passing. Need to investigate. - -//===---------------------------------------------------------------------===// - -Only use inc/neg/not instructions on processors where they are faster than -add/sub/xor. They are slower on the P4 due to only updating some processor -flags. - -//===---------------------------------------------------------------------===// - -The instruction selector sometimes misses folding a load into a compare. The -pattern is written as (cmp reg, (load p)). Because the compare isn't -commutative, it is not matched with the load on both sides. The dag combiner -should be made smart enough to cannonicalize the load into the RHS of a compare -when it can invert the result of the compare for free. - -//===---------------------------------------------------------------------===// - -In many cases, LLVM generates code like this: - -_test: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setl %al - movzbl %al, %eax - ret - -on some processors (which ones?), it is more efficient to do this: - -_test: - movl 8(%esp), %ebx - xor %eax, %eax - cmpl %ebx, 4(%esp) - setl %al - ret - -Doing this correctly is tricky though, as the xor clobbers the flags. - -//===---------------------------------------------------------------------===// - -We should generate bts/btr/etc instructions on targets where they are cheap or -when codesize is important. e.g., for: - -void setbit(int *target, int bit) { - *target |= (1 << bit); -} -void clearbit(int *target, int bit) { - *target &= ~(1 << bit); -} - -//===---------------------------------------------------------------------===// - -Instead of the following for memset char*, 1, 10: - - movl $16843009, 4(%edx) - movl $16843009, (%edx) - movw $257, 8(%edx) - -It might be better to generate - - movl $16843009, %eax - movl %eax, 4(%edx) - movl %eax, (%edx) - movw al, 8(%edx) - -when we can spare a register. It reduces code size. - -//===---------------------------------------------------------------------===// - -Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently -get this: - -define i32 @test1(i32 %X) { - %Y = sdiv i32 %X, 8 - ret i32 %Y -} - -_test1: - movl 4(%esp), %eax - movl %eax, %ecx - sarl $31, %ecx - shrl $29, %ecx - addl %ecx, %eax - sarl $3, %eax - ret - -GCC knows several different ways to codegen it, one of which is this: - -_test1: - movl 4(%esp), %eax - cmpl $-1, %eax - leal 7(%eax), %ecx - cmovle %ecx, %eax - sarl $3, %eax - ret - -which is probably slower, but it's interesting at least :) - -//===---------------------------------------------------------------------===// - -We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl -We should leave these as libcalls for everything over a much lower threshold, -since libc is hand tuned for medium and large mem ops (avoiding RFO for large -stores, TLB preheating, etc) - -//===---------------------------------------------------------------------===// - -Optimize this into something reasonable: - x * copysign(1.0, y) * copysign(1.0, z) - -//===---------------------------------------------------------------------===// - -Optimize copysign(x, *y) to use an integer load from y. - -//===---------------------------------------------------------------------===// - -The following tests perform worse with LSR: - -lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. - -//===---------------------------------------------------------------------===// - -Adding to the list of cmp / test poor codegen issues: - -int test(__m128 *A, __m128 *B) { - if (_mm_comige_ss(*A, *B)) - return 3; - else - return 4; -} - -_test: - movl 8(%esp), %eax - movaps (%eax), %xmm0 - movl 4(%esp), %eax - movaps (%eax), %xmm1 - comiss %xmm0, %xmm1 - setae %al - movzbl %al, %ecx - movl $3, %eax - movl $4, %edx - cmpl $0, %ecx - cmove %edx, %eax - ret - -Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There -are a number of issues. 1) We are introducing a setcc between the result of the -intrisic call and select. 2) The intrinsic is expected to produce a i32 value -so a any extend (which becomes a zero extend) is added. - -We probably need some kind of target DAG combine hook to fix this. - -//===---------------------------------------------------------------------===// - -We generate significantly worse code for this than GCC: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150 -http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701 - -There is also one case we do worse on PPC. - -//===---------------------------------------------------------------------===// - -For this: - -int test(int a) -{ - return a * 3; -} - -We currently emits - imull $3, 4(%esp), %eax - -Perhaps this is what we really should generate is? Is imull three or four -cycles? Note: ICC generates this: - movl 4(%esp), %eax - leal (%eax,%eax,2), %eax - -The current instruction priority is based on pattern complexity. The former is -more "complex" because it folds a load so the latter will not be emitted. - -Perhaps we should use AddedComplexity to give LEA32r a higher priority? We -should always try to match LEA first since the LEA matching code does some -estimate to determine whether the match is profitable. - -However, if we care more about code size, then imull is better. It's two bytes -shorter than movl + leal. - -On a Pentium M, both variants have the same characteristics with regard -to throughput; however, the multiplication has a latency of four cycles, as -opposed to two cycles for the movl+lea variant. - -//===---------------------------------------------------------------------===// - -__builtin_ffs codegen is messy. - -int ffs_(unsigned X) { return __builtin_ffs(X); } - -llvm produces: -ffs_: - movl 4(%esp), %ecx - bsfl %ecx, %eax - movl $32, %edx - cmove %edx, %eax - incl %eax - xorl %edx, %edx - testl %ecx, %ecx - cmove %edx, %eax - ret - -vs gcc: - -_ffs_: - movl $-1, %edx - bsfl 4(%esp), %eax - cmove %edx, %eax - addl $1, %eax - ret - -Another example of __builtin_ffs (use predsimplify to eliminate a select): - -int foo (unsigned long j) { - if (j) - return __builtin_ffs (j) - 1; - else - return 0; -} - -//===---------------------------------------------------------------------===// - -It appears gcc place string data with linkonce linkage in -.section __TEXT,__const_coal,coalesced instead of -.section __DATA,__const_coal,coalesced. -Take a look at darwin.h, there are other Darwin assembler directives that we -do not make use of. - -//===---------------------------------------------------------------------===// - -define i32 @foo(i32* %a, i32 %t) { -entry: - br label %cond_true - -cond_true: ; preds = %cond_true, %entry - %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3] - %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1] - %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1] - %tmp3 = load i32* %tmp2 ; <i32> [#uses=1] - %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1] - %tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2] - %tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2] - %tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1] - br i1 %tmp, label %bb12, label %cond_true - -bb12: ; preds = %cond_true - ret i32 %tmp7 -} -is pessimized by -loop-reduce and -indvars - -//===---------------------------------------------------------------------===// - -u32 to float conversion improvement: - -float uint32_2_float( unsigned u ) { - float fl = (int) (u & 0xffff); - float fh = (int) (u >> 16); - fh *= 0x1.0p16f; - return fh + fl; -} - -00000000 subl $0x04,%esp -00000003 movl 0x08(%esp,1),%eax -00000007 movl %eax,%ecx -00000009 shrl $0x10,%ecx -0000000c cvtsi2ss %ecx,%xmm0 -00000010 andl $0x0000ffff,%eax -00000015 cvtsi2ss %eax,%xmm1 -00000019 mulss 0x00000078,%xmm0 -00000021 addss %xmm1,%xmm0 -00000025 movss %xmm0,(%esp,1) -0000002a flds (%esp,1) -0000002d addl $0x04,%esp -00000030 ret - -//===---------------------------------------------------------------------===// - -When using fastcc abi, align stack slot of argument of type double on 8 byte -boundary to improve performance. - -//===---------------------------------------------------------------------===// - -Codegen: - -int f(int a, int b) { - if (a == 4 || a == 6) - b++; - return b; -} - - -as: - -or eax, 2 -cmp eax, 6 -jz label - -//===---------------------------------------------------------------------===// - -GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting -simplifications for integer "x cmp y ? a : b". For example, instead of: - -int G; -void f(int X, int Y) { - G = X < 0 ? 14 : 13; -} - -compiling to: - -_f: - movl $14, %eax - movl $13, %ecx - movl 4(%esp), %edx - testl %edx, %edx - cmovl %eax, %ecx - movl %ecx, _G - ret - -it could be: -_f: - movl 4(%esp), %eax - sarl $31, %eax - notl %eax - addl $14, %eax - movl %eax, _G - ret - -etc. - -Another is: -int usesbb(unsigned int a, unsigned int b) { - return (a < b ? -1 : 0); -} -to: -_usesbb: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - sbbl %eax, %eax - ret - -instead of: -_usesbb: - xorl %eax, %eax - movl 8(%esp), %ecx - cmpl %ecx, 4(%esp) - movl $4294967295, %ecx - cmovb %ecx, %eax - ret - -//===---------------------------------------------------------------------===// - -Consider the expansion of: - -define i32 @test3(i32 %X) { - %tmp1 = urem i32 %X, 255 - ret i32 %tmp1 -} - -Currently it compiles to: - -... - movl $2155905153, %ecx - movl 8(%esp), %esi - movl %esi, %eax - mull %ecx -... - -This could be "reassociated" into: - - movl $2155905153, %eax - movl 8(%esp), %ecx - mull %ecx - -to avoid the copy. In fact, the existing two-address stuff would do this -except that mul isn't a commutative 2-addr instruction. I guess this has -to be done at isel time based on the #uses to mul? - -//===---------------------------------------------------------------------===// - -Make sure the instruction which starts a loop does not cross a cacheline -boundary. This requires knowning the exact length of each machine instruction. -That is somewhat complicated, but doable. Example 256.bzip2: - -In the new trace, the hot loop has an instruction which crosses a cacheline -boundary. In addition to potential cache misses, this can't help decoding as I -imagine there has to be some kind of complicated decoder reset and realignment -to grab the bytes from the next cacheline. - -532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines -942 942 0x3d03 movl %dh, (1809(%esp, %esi) -937 937 0x3d0a incl %esi -3 3 0x3d0b cmpb %bl, %dl -27 27 0x3d0d jnz 0x000062db <main+11707> - -//===---------------------------------------------------------------------===// - -In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE. - -//===---------------------------------------------------------------------===// - -This could be a single 16-bit load. - -int f(char *p) { - if ((p[0] == 1) & (p[1] == 2)) return 1; - return 0; -} - -//===---------------------------------------------------------------------===// - -We should inline lrintf and probably other libc functions. - -//===---------------------------------------------------------------------===// - -Use the FLAGS values from arithmetic instructions more. For example, compile: - -int add_zf(int *x, int y, int a, int b) { - if ((*x += y) == 0) - return a; - else - return b; -} - -to: - addl %esi, (%rdi) - movl %edx, %eax - cmovne %ecx, %eax - ret -instead of: - -_add_zf: - addl (%rdi), %esi - movl %esi, (%rdi) - testl %esi, %esi - cmove %edx, %ecx - movl %ecx, %eax - ret - -As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll -without a test instruction. - -//===---------------------------------------------------------------------===// - -These two functions have identical effects: - -unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;} -unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} - -We currently compile them to: - -_f: - movl 4(%esp), %eax - movl %eax, %ecx - incl %ecx - movl 8(%esp), %edx - cmpl %edx, %ecx - jne LBB1_2 #UnifiedReturnBlock -LBB1_1: #cond_true - addl $2, %eax - ret -LBB1_2: #UnifiedReturnBlock - movl %ecx, %eax - ret -_f2: - movl 4(%esp), %eax - movl %eax, %ecx - incl %ecx - cmpl 8(%esp), %ecx - sete %cl - movzbl %cl, %ecx - leal 1(%ecx,%eax), %eax - ret - -both of which are inferior to GCC's: - -_f: - movl 4(%esp), %edx - leal 1(%edx), %eax - addl $2, %edx - cmpl 8(%esp), %eax - cmove %edx, %eax - ret -_f2: - movl 4(%esp), %eax - addl $1, %eax - xorl %edx, %edx - cmpl 8(%esp), %eax - sete %dl - addl %edx, %eax - ret - -//===---------------------------------------------------------------------===// - -This code: - -void test(int X) { - if (X) abort(); -} - -is currently compiled to: - -_test: - subl $12, %esp - cmpl $0, 16(%esp) - jne LBB1_1 - addl $12, %esp - ret -LBB1_1: - call L_abort$stub - -It would be better to produce: - -_test: - subl $12, %esp - cmpl $0, 16(%esp) - jne L_abort$stub - addl $12, %esp - ret - -This can be applied to any no-return function call that takes no arguments etc. -Alternatively, the stack save/restore logic could be shrink-wrapped, producing -something like this: - -_test: - cmpl $0, 4(%esp) - jne LBB1_1 - ret -LBB1_1: - subl $12, %esp - call L_abort$stub - -Both are useful in different situations. Finally, it could be shrink-wrapped -and tail called, like this: - -_test: - cmpl $0, 4(%esp) - jne LBB1_1 - ret -LBB1_1: - pop %eax # realign stack. - call L_abort$stub - -Though this probably isn't worth it. - -//===---------------------------------------------------------------------===// - -Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with -a neg instead of a sub instruction. Consider: - -int test(char X) { return 7-X; } - -we currently produce: -_test: - movl $7, %eax - movsbl 4(%esp), %ecx - subl %ecx, %eax - ret - -We would use one fewer register if codegen'd as: - - movsbl 4(%esp), %eax - neg %eax - add $7, %eax - ret - -Note that this isn't beneficial if the load can be folded into the sub. In -this case, we want a sub: - -int test(int X) { return 7-X; } -_test: - movl $7, %eax - subl 4(%esp), %eax - ret - -//===---------------------------------------------------------------------===// - -Leaf functions that require one 4-byte spill slot have a prolog like this: - -_foo: - pushl %esi - subl $4, %esp -... -and an epilog like this: - addl $4, %esp - popl %esi - ret - -It would be smaller, and potentially faster, to push eax on entry and to -pop into a dummy register instead of using addl/subl of esp. Just don't pop -into any return registers :) - -//===---------------------------------------------------------------------===// - -The X86 backend should fold (branch (or (setcc, setcc))) into multiple -branches. We generate really poor code for: - -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -For example, the entry BB is: - -_testf: - subl $20, %esp - pxor %xmm0, %xmm0 - movsd 24(%esp), %xmm1 - ucomisd %xmm0, %xmm1 - setnp %al - sete %cl - testb %cl, %al - jne LBB1_5 # UnifiedReturnBlock -LBB1_1: # cond_true - - -it would be better to replace the last four instructions with: - - jp LBB1_1 - je LBB1_5 -LBB1_1: - -We also codegen the inner ?: into a diamond: - - cvtss2sd LCPI1_0(%rip), %xmm2 - cvtss2sd LCPI1_1(%rip), %xmm3 - ucomisd %xmm1, %xmm0 - ja LBB1_3 # cond_true -LBB1_2: # cond_true - movapd %xmm3, %xmm2 -LBB1_3: # cond_true - movapd %xmm2, %xmm0 - ret - -We should sink the load into xmm3 into the LBB1_2 block. This should -be pretty easy, and will nuke all the copies. - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - - - _Z11no_overflowjj: - addl %edi, %esi - setae %al - ret - -FIXME: That code looks wrong; bool return is normally defined as zext. - -on x86-64, not: - -__Z11no_overflowjj: - addl %edi, %esi - cmpl %edi, %esi - setae %al - movzbl %al, %eax - ret - - -//===---------------------------------------------------------------------===// - -The following code: - -bb114.preheader: ; preds = %cond_next94 - %tmp231232 = sext i16 %tmp62 to i32 ; <i32> [#uses=1] - %tmp233 = sub i32 32, %tmp231232 ; <i32> [#uses=1] - %tmp245246 = sext i16 %tmp65 to i32 ; <i32> [#uses=1] - %tmp252253 = sext i16 %tmp68 to i32 ; <i32> [#uses=1] - %tmp254 = sub i32 32, %tmp252253 ; <i32> [#uses=1] - %tmp553554 = bitcast i16* %tmp37 to i8* ; <i8*> [#uses=2] - %tmp583584 = sext i16 %tmp98 to i32 ; <i32> [#uses=1] - %tmp585 = sub i32 32, %tmp583584 ; <i32> [#uses=1] - %tmp614615 = sext i16 %tmp101 to i32 ; <i32> [#uses=1] - %tmp621622 = sext i16 %tmp104 to i32 ; <i32> [#uses=1] - %tmp623 = sub i32 32, %tmp621622 ; <i32> [#uses=1] - br label %bb114 - -produces: - -LBB3_5: # bb114.preheader - movswl -68(%ebp), %eax - movl $32, %ecx - movl %ecx, -80(%ebp) - subl %eax, -80(%ebp) - movswl -52(%ebp), %eax - movl %ecx, -84(%ebp) - subl %eax, -84(%ebp) - movswl -70(%ebp), %eax - movl %ecx, -88(%ebp) - subl %eax, -88(%ebp) - movswl -50(%ebp), %eax - subl %eax, %ecx - movl %ecx, -76(%ebp) - movswl -42(%ebp), %eax - movl %eax, -92(%ebp) - movswl -66(%ebp), %eax - movl %eax, -96(%ebp) - movw $0, -98(%ebp) - -This appears to be bad because the RA is not folding the store to the stack -slot into the movl. The above instructions could be: - movl $32, -80(%ebp) -... - movl $32, -84(%ebp) -... -This seems like a cross between remat and spill folding. - -This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't -change, so we could simply subtract %eax from %ecx first and then use %ecx (or -vice-versa). - -//===---------------------------------------------------------------------===// - -This code: - - %tmp659 = icmp slt i16 %tmp654, 0 ; <i1> [#uses=1] - br i1 %tmp659, label %cond_true662, label %cond_next715 - -produces this: - - testw %cx, %cx - movswl %cx, %esi - jns LBB4_109 # cond_next715 - -Shark tells us that using %cx in the testw instruction is sub-optimal. It -suggests using the 32-bit register (which is what ICC uses). - -//===---------------------------------------------------------------------===// - -We compile this: - -void compare (long long foo) { - if (foo < 4294967297LL) - abort(); -} - -to: - -compare: - subl $4, %esp - cmpl $0, 8(%esp) - setne %al - movzbw %al, %ax - cmpl $1, 12(%esp) - setg %cl - movzbw %cl, %cx - cmove %ax, %cx - testb $1, %cl - jne .LBB1_2 # UnifiedReturnBlock -.LBB1_1: # ifthen - call abort -.LBB1_2: # UnifiedReturnBlock - addl $4, %esp - ret - -(also really horrible code on ppc). This is due to the expand code for 64-bit -compares. GCC produces multiple branches, which is much nicer: - -compare: - subl $12, %esp - movl 20(%esp), %edx - movl 16(%esp), %eax - decl %edx - jle .L7 -.L5: - addl $12, %esp - ret - .p2align 4,,7 -.L7: - jl .L4 - cmpl $0, %eax - .p2align 4,,8 - ja .L5 -.L4: - .p2align 4,,9 - call abort - -//===---------------------------------------------------------------------===// - -Tail call optimization improvements: Tail call optimization currently -pushes all arguments on the top of the stack (their normal place for -non-tail call optimized calls) that source from the callers arguments -or that source from a virtual register (also possibly sourcing from -callers arguments). -This is done to prevent overwriting of parameters (see example -below) that might be used later. - -example: - -int callee(int32, int64); -int caller(int32 arg1, int32 arg2) { - int64 local = arg2 * 2; - return callee(arg2, (int64)local); -} - -[arg1] [!arg2 no longer valid since we moved local onto it] -[arg2] -> [(int64) -[RETADDR] local ] - -Moving arg1 onto the stack slot of callee function would overwrite -arg2 of the caller. - -Possible optimizations: - - - - Analyse the actual parameters of the callee to see which would - overwrite a caller parameter which is used by the callee and only - push them onto the top of the stack. - - int callee (int32 arg1, int32 arg2); - int caller (int32 arg1, int32 arg2) { - return callee(arg1,arg2); - } - - Here we don't need to write any variables to the top of the stack - since they don't overwrite each other. - - int callee (int32 arg1, int32 arg2); - int caller (int32 arg1, int32 arg2) { - return callee(arg2,arg1); - } - - Here we need to push the arguments because they overwrite each - other. - -//===---------------------------------------------------------------------===// - -main () -{ - int i = 0; - unsigned long int z = 0; - - do { - z -= 0x00004000; - i++; - if (i > 0x00040000) - abort (); - } while (z > 0); - exit (0); -} - -gcc compiles this to: - -_main: - subl $28, %esp - xorl %eax, %eax - jmp L2 -L3: - cmpl $262144, %eax - je L10 -L2: - addl $1, %eax - cmpl $262145, %eax - jne L3 - call L_abort$stub -L10: - movl $0, (%esp) - call L_exit$stub - -llvm: - -_main: - subl $12, %esp - movl $1, %eax - movl $16384, %ecx -LBB1_1: # bb - cmpl $262145, %eax - jge LBB1_4 # cond_true -LBB1_2: # cond_next - incl %eax - addl $4294950912, %ecx - cmpl $16384, %ecx - jne LBB1_1 # bb -LBB1_3: # bb11 - xorl %eax, %eax - addl $12, %esp - ret -LBB1_4: # cond_true - call L_abort$stub - -1. LSR should rewrite the first cmp with induction variable %ecx. -2. DAG combiner should fold - leal 1(%eax), %edx - cmpl $262145, %edx - => - cmpl $262144, %eax - -//===---------------------------------------------------------------------===// - -define i64 @test(double %X) { - %Y = fptosi double %X to i64 - ret i64 %Y -} - -compiles to: - -_test: - subl $20, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, 8(%esp) - fldl 8(%esp) - fisttpll (%esp) - movl 4(%esp), %edx - movl (%esp), %eax - addl $20, %esp - #FP_REG_KILL - ret - -This should just fldl directly from the input stack slot. - -//===---------------------------------------------------------------------===// - -This code: -int foo (int x) { return (x & 65535) | 255; } - -Should compile into: - -_foo: - movzwl 4(%esp), %eax - orl $255, %eax - ret - -instead of: -_foo: - movl $255, %eax - orl 4(%esp), %eax - andl $65535, %eax - ret - -//===---------------------------------------------------------------------===// - -We're codegen'ing multiply of long longs inefficiently: - -unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) { - return arg1 * arg2; -} - -We compile to (fomit-frame-pointer): - -_LLM: - pushl %esi - movl 8(%esp), %ecx - movl 16(%esp), %esi - movl %esi, %eax - mull %ecx - imull 12(%esp), %esi - addl %edx, %esi - imull 20(%esp), %ecx - movl %esi, %edx - addl %ecx, %edx - popl %esi - ret - -This looks like a scheduling deficiency and lack of remat of the load from -the argument area. ICC apparently produces: - - movl 8(%esp), %ecx - imull 12(%esp), %ecx - movl 16(%esp), %eax - imull 4(%esp), %eax - addl %eax, %ecx - movl 4(%esp), %eax - mull 12(%esp) - addl %ecx, %edx - ret - -Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236 - -//===---------------------------------------------------------------------===// - -We can fold a store into "zeroing a reg". Instead of: - -xorl %eax, %eax -movl %eax, 124(%esp) - -we should get: - -movl $0, 124(%esp) - -if the flags of the xor are dead. - -Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should -be folded into: shl [mem], 1 - -//===---------------------------------------------------------------------===// - -In SSE mode, we turn abs and neg into a load from the constant pool plus a xor -or and instruction, for example: - - xorpd LCPI1_0, %xmm2 - -However, if xmm2 gets spilled, we end up with really ugly code like this: - - movsd (%esp), %xmm0 - xorpd LCPI1_0, %xmm0 - movsd %xmm0, (%esp) - -Since we 'know' that this is a 'neg', we can actually "fold" the spill into -the neg/abs instruction, turning it into an *integer* operation, like this: - - xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31) - -you could also use xorb, but xorl is less likely to lead to a partial register -stall. Here is a contrived testcase: - -double a, b, c; -void test(double *P) { - double X = *P; - a = X; - bar(); - X = -X; - b = X; - bar(); - c = X; -} - -//===---------------------------------------------------------------------===// - -The generated code on x86 for checking for signed overflow on a multiply the -obvious way is much longer than it needs to be. - -int x(int a, int b) { - long long prod = (long long)a*b; - return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1); -} - -See PR2053 for more details. - -//===---------------------------------------------------------------------===// - -We should investigate using cdq/ctld (effect: edx = sar eax, 31) -more aggressively; it should cost the same as a move+shift on any modern -processor, but it's a lot shorter. Downside is that it puts more -pressure on register allocation because it has fixed operands. - -Example: -int abs(int x) {return x < 0 ? -x : x;} - -gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.: -abs: - movl 4(%esp), %eax - cltd - xorl %edx, %eax - subl %edx, %eax - ret - -//===---------------------------------------------------------------------===// - -Consider: -int test(unsigned long a, unsigned long b) { return -(a < b); } - -We currently compile this to: - -define i32 @test(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1] - ret i32 %tmp5 -} - -and - -_test: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - negl %eax - ret - -Several deficiencies here. First, we should instcombine zext+neg into sext: - -define i32 @test2(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -However, before we can do that, we have to fix the bad codegen that we get for -sext from bool: - -_test2: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - shll $31, %eax - sarl $31, %eax - ret - -This code should be at least as good as the code above. Once this is fixed, we -can optimize this specific case even more to: - - movl 8(%esp), %eax - xorl %ecx, %ecx - cmpl %eax, 4(%esp) - sbbl %ecx, %ecx - -//===---------------------------------------------------------------------===// - -Take the following code (from -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): - -extern unsigned char first_one[65536]; -int FirstOnet(unsigned long long arg1) -{ - if (arg1 >> 48) - return (first_one[arg1 >> 48]); - return 0; -} - - -The following code is currently generated: -FirstOnet: - movl 8(%esp), %eax - cmpl $65536, %eax - movl 4(%esp), %ecx - jb .LBB1_2 # UnifiedReturnBlock -.LBB1_1: # ifthen - shrl $16, %eax - movzbl first_one(%eax), %eax - ret -.LBB1_2: # UnifiedReturnBlock - xorl %eax, %eax - ret - -We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this -lets us change the cmpl into a testl, which is shorter, and eliminate the shift. - -//===---------------------------------------------------------------------===// - -We compile this function: - -define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind { -entry: - %tmp2 = icmp eq i8 %d, 0 ; <i1> [#uses=1] - br i1 %tmp2, label %bb7, label %bb - -bb: ; preds = %entry - %tmp6 = add i32 %b, %a ; <i32> [#uses=1] - ret i32 %tmp6 - -bb7: ; preds = %entry - %tmp10 = sub i32 %a, %c ; <i32> [#uses=1] - ret i32 %tmp10 -} - -to: - -foo: # @foo -# BB#0: # %entry - movl 4(%esp), %ecx - cmpb $0, 16(%esp) - je .LBB0_2 -# BB#1: # %bb - movl 8(%esp), %eax - addl %ecx, %eax - ret -.LBB0_2: # %bb7 - movl 12(%esp), %edx - movl %ecx, %eax - subl %edx, %eax - ret - -There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a -couple more movls by putting 4(%esp) into %eax instead of %ecx. - -//===---------------------------------------------------------------------===// - -See rdar://4653682. - -From flops: - -LBB1_15: # bb310 - cvtss2sd LCPI1_0, %xmm1 - addsd %xmm1, %xmm0 - movsd 176(%esp), %xmm2 - mulsd %xmm0, %xmm2 - movapd %xmm2, %xmm3 - mulsd %xmm3, %xmm3 - movapd %xmm3, %xmm4 - mulsd LCPI1_23, %xmm4 - addsd LCPI1_24, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_25, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_26, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_27, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_28, %xmm4 - mulsd %xmm3, %xmm4 - addsd %xmm1, %xmm4 - mulsd %xmm2, %xmm4 - movsd 152(%esp), %xmm1 - addsd %xmm4, %xmm1 - movsd %xmm1, 152(%esp) - incl %eax - cmpl %eax, %esi - jge LBB1_15 # bb310 -LBB1_16: # bb358.loopexit - movsd 152(%esp), %xmm0 - addsd %xmm0, %xmm0 - addsd LCPI1_22, %xmm0 - movsd %xmm0, 152(%esp) - -Rather than spilling the result of the last addsd in the loop, we should have -insert a copy to split the interval (one for the duration of the loop, one -extending to the fall through). The register pressure in the loop isn't high -enough to warrant the spill. - -Also check why xmm7 is not used at all in the function. - -//===---------------------------------------------------------------------===// - -Take the following: - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" -@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2] -define fastcc void @abort_gzip() noreturn nounwind { -entry: - %tmp.b.i = load i1* @in_exit.4870.b ; <i1> [#uses=1] - br i1 %tmp.b.i, label %bb.i, label %bb4.i -bb.i: ; preds = %entry - tail call void @exit( i32 1 ) noreturn nounwind - unreachable -bb4.i: ; preds = %entry - store i1 true, i1* @in_exit.4870.b - tail call void @exit( i32 1 ) noreturn nounwind - unreachable -} -declare void @exit(i32) noreturn nounwind - -This compiles into: -_abort_gzip: ## @abort_gzip -## BB#0: ## %entry - subl $12, %esp - movb _in_exit.4870.b, %al - cmpb $1, %al - jne LBB0_2 - -We somehow miss folding the movb into the cmpb. - -//===---------------------------------------------------------------------===// - -We compile: - -int test(int x, int y) { - return x-y-1; -} - -into (-m64): - -_test: - decl %edi - movl %edi, %eax - subl %esi, %eax - ret - -it would be better to codegen as: x+~y (notl+addl) - -//===---------------------------------------------------------------------===// - -This code: - -int foo(const char *str,...) -{ - __builtin_va_list a; int x; - __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a); - return x; -} - -gets compiled into this on x86-64: - subq $200, %rsp - movaps %xmm7, 160(%rsp) - movaps %xmm6, 144(%rsp) - movaps %xmm5, 128(%rsp) - movaps %xmm4, 112(%rsp) - movaps %xmm3, 96(%rsp) - movaps %xmm2, 80(%rsp) - movaps %xmm1, 64(%rsp) - movaps %xmm0, 48(%rsp) - movq %r9, 40(%rsp) - movq %r8, 32(%rsp) - movq %rcx, 24(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 8(%rsp) - leaq (%rsp), %rax - movq %rax, 192(%rsp) - leaq 208(%rsp), %rax - movq %rax, 184(%rsp) - movl $48, 180(%rsp) - movl $8, 176(%rsp) - movl 176(%rsp), %eax - cmpl $47, %eax - jbe .LBB1_3 # bb -.LBB1_1: # bb3 - movq 184(%rsp), %rcx - leaq 8(%rcx), %rax - movq %rax, 184(%rsp) -.LBB1_2: # bb4 - movl (%rcx), %eax - addq $200, %rsp - ret -.LBB1_3: # bb - movl %eax, %ecx - addl $8, %eax - addq 192(%rsp), %rcx - movl %eax, 176(%rsp) - jmp .LBB1_2 # bb4 - -gcc 4.3 generates: - subq $96, %rsp -.LCFI0: - leaq 104(%rsp), %rax - movq %rsi, -80(%rsp) - movl $8, -120(%rsp) - movq %rax, -112(%rsp) - leaq -88(%rsp), %rax - movq %rax, -104(%rsp) - movl $8, %eax - cmpl $48, %eax - jb .L6 - movq -112(%rsp), %rdx - movl (%rdx), %eax - addq $96, %rsp - ret - .p2align 4,,10 - .p2align 3 -.L6: - mov %eax, %edx - addq -104(%rsp), %rdx - addl $8, %eax - movl %eax, -120(%rsp) - movl (%rdx), %eax - addq $96, %rsp - ret - -and it gets compiled into this on x86: - pushl %ebp - movl %esp, %ebp - subl $4, %esp - leal 12(%ebp), %eax - movl %eax, -4(%ebp) - leal 16(%ebp), %eax - movl %eax, -4(%ebp) - movl 12(%ebp), %eax - addl $4, %esp - popl %ebp - ret - -gcc 4.3 generates: - pushl %ebp - movl %esp, %ebp - movl 12(%ebp), %eax - popl %ebp - ret - -//===---------------------------------------------------------------------===// - -Teach tblgen not to check bitconvert source type in some cases. This allows us -to consolidate the following patterns in X86InstrMMX.td: - -def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; - -There are other cases in various td files. - -//===---------------------------------------------------------------------===// - -Take something like the following on x86-32: -unsigned a(unsigned long long x, unsigned y) {return x % y;} - -We currently generate a libcall, but we really shouldn't: the expansion is -shorter and likely faster than the libcall. The expected code is something -like the following: - - movl 12(%ebp), %eax - movl 16(%ebp), %ecx - xorl %edx, %edx - divl %ecx - movl 8(%ebp), %eax - divl %ecx - movl %edx, %eax - ret - -A similar code sequence works for division. - -//===---------------------------------------------------------------------===// - -These should compile to the same code, but the later codegen's to useless -instructions on X86. This may be a trivial dag combine (GCC PR7061): - -struct s1 { unsigned char a, b; }; -unsigned long f1(struct s1 x) { - return x.a + x.b; -} -struct s2 { unsigned a: 8, b: 8; }; -unsigned long f2(struct s2 x) { - return x.a + x.b; -} - -//===---------------------------------------------------------------------===// - -We currently compile this: - -define i32 @func1(i32 %v1, i32 %v2) nounwind { -entry: - %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) - %sum = extractvalue {i32, i1} %t, 0 - %obit = extractvalue {i32, i1} %t, 1 - br i1 %obit, label %overflow, label %normal -normal: - ret i32 %sum -overflow: - call void @llvm.trap() - unreachable -} -declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) -declare void @llvm.trap() - -to: - -_func1: - movl 4(%esp), %eax - addl 8(%esp), %eax - jo LBB1_2 ## overflow -LBB1_1: ## normal - ret -LBB1_2: ## overflow - ud2 - -it would be nice to produce "into" someday. - -//===---------------------------------------------------------------------===// - -This code: - -void vec_mpys1(int y[], const int x[], int scaler) { -int i; -for (i = 0; i < 150; i++) - y[i] += (((long long)scaler * (long long)x[i]) >> 31); -} - -Compiles to this loop with GCC 3.x: - -.L5: - movl %ebx, %eax - imull (%edi,%ecx,4) - shrdl $31, %edx, %eax - addl %eax, (%esi,%ecx,4) - incl %ecx - cmpl $149, %ecx - jle .L5 - -llvm-gcc compiles it to the much uglier: - -LBB1_1: ## bb1 - movl 24(%esp), %eax - movl (%eax,%edi,4), %ebx - movl %ebx, %ebp - imull %esi, %ebp - movl %ebx, %eax - mull %ecx - addl %ebp, %edx - sarl $31, %ebx - imull %ecx, %ebx - addl %edx, %ebx - shldl $1, %eax, %ebx - movl 20(%esp), %eax - addl %ebx, (%eax,%edi,4) - incl %edi - cmpl $150, %edi - jne LBB1_1 ## bb1 - -The issue is that we hoist the cast of "scaler" to long long outside of the -loop, the value comes into the loop as two values, and -RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the -constructed BUILD_PAIR which represents the cast value. - -//===---------------------------------------------------------------------===// - -Test instructions can be eliminated by using EFLAGS values from arithmetic -instructions. This is currently not done for mul, and, or, xor, neg, shl, -sra, srl, shld, shrd, atomic ops, and others. It is also currently not done -for read-modify-write instructions. It is also current not done if the -OF or CF flags are needed. - -The shift operators have the complication that when the shift count is -zero, EFLAGS is not set, so they can only subsume a test instruction if -the shift count is known to be non-zero. Also, using the EFLAGS value -from a shift is apparently very slow on some x86 implementations. - -In read-modify-write instructions, the root node in the isel match is -the store, and isel has no way for the use of the EFLAGS result of the -arithmetic to be remapped to the new node. - -Add and subtract instructions set OF on signed overflow and CF on unsiged -overflow, while test instructions always clear OF and CF. In order to -replace a test with an add or subtract in a situation where OF or CF is -needed, codegen must be able to prove that the operation cannot see -signed or unsigned overflow, respectively. - -//===---------------------------------------------------------------------===// - -memcpy/memmove do not lower to SSE copies when possible. A silly example is: -define <16 x float> @foo(<16 x float> %A) nounwind { - %tmp = alloca <16 x float>, align 16 - %tmp2 = alloca <16 x float>, align 16 - store <16 x float> %A, <16 x float>* %tmp - %s = bitcast <16 x float>* %tmp to i8* - %s2 = bitcast <16 x float>* %tmp2 to i8* - call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16) - %R = load <16 x float>* %tmp2 - ret <16 x float> %R -} - -declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind - -which compiles to: - -_foo: - subl $140, %esp - movaps %xmm3, 112(%esp) - movaps %xmm2, 96(%esp) - movaps %xmm1, 80(%esp) - movaps %xmm0, 64(%esp) - movl 60(%esp), %eax - movl %eax, 124(%esp) - movl 56(%esp), %eax - movl %eax, 120(%esp) - movl 52(%esp), %eax - <many many more 32-bit copies> - movaps (%esp), %xmm0 - movaps 16(%esp), %xmm1 - movaps 32(%esp), %xmm2 - movaps 48(%esp), %xmm3 - addl $140, %esp - ret - -On Nehalem, it may even be cheaper to just use movups when unaligned than to -fall back to lower-granularity chunks. - -//===---------------------------------------------------------------------===// - -Implement processor-specific optimizations for parity with GCC on these -processors. GCC does two optimizations: - -1. ix86_pad_returns inserts a noop before ret instructions if immediately - preceeded by a conditional branch or is the target of a jump. -2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of - code contains more than 3 branches. - -The first one is done for all AMDs, Core2, and "Generic" -The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, - Core 2, and "Generic" - -//===---------------------------------------------------------------------===// - -Testcase: -int a(int x) { return (x & 127) > 31; } - -Current output: - movl 4(%esp), %eax - andl $127, %eax - cmpl $31, %eax - seta %al - movzbl %al, %eax - ret - -Ideal output: - xorl %eax, %eax - testl $96, 4(%esp) - setne %al - ret - -This should definitely be done in instcombine, canonicalizing the range -condition into a != condition. We get this IR: - -define i32 @a(i32 %x) nounwind readnone { -entry: - %0 = and i32 %x, 127 ; <i32> [#uses=1] - %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1] - %2 = zext i1 %1 to i32 ; <i32> [#uses=1] - ret i32 %2 -} - -Instcombine prefers to strength reduce relational comparisons to equality -comparisons when possible, this should be another case of that. This could -be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it -looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already -be redesigned to use ComputeMaskedBits and friends. - - -//===---------------------------------------------------------------------===// -Testcase: -int x(int a) { return (a&0xf0)>>4; } - -Current output: - movl 4(%esp), %eax - shrl $4, %eax - andl $15, %eax - ret - -Ideal output: - movzbl 4(%esp), %eax - shrl $4, %eax - ret - -//===---------------------------------------------------------------------===// - -Testcase: -int x(int a) { return (a & 0x80) ? 0x100 : 0; } -int y(int a) { return (a & 0x80) *2; } - -Current: - testl $128, 4(%esp) - setne %al - movzbl %al, %eax - shll $8, %eax - ret - -Better: - movl 4(%esp), %eax - addl %eax, %eax - andl $256, %eax - ret - -This is another general instcombine transformation that is profitable on all -targets. In LLVM IR, these functions look like this: - -define i32 @x(i32 %a) nounwind readnone { -entry: - %0 = and i32 %a, 128 - %1 = icmp eq i32 %0, 0 - %iftmp.0.0 = select i1 %1, i32 0, i32 256 - ret i32 %iftmp.0.0 -} - -define i32 @y(i32 %a) nounwind readnone { -entry: - %0 = shl i32 %a, 1 - %1 = and i32 %0, 256 - ret i32 %1 -} - -Replacing an icmp+select with a shift should always be considered profitable in -instcombine. - -//===---------------------------------------------------------------------===// - -Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch -properly. - -When the return value is not used (i.e. only care about the value in the -memory), x86 does not have to use add to implement these. Instead, it can use -add, sub, inc, dec instructions with the "lock" prefix. - -This is currently implemented using a bit of instruction selection trick. The -issue is the target independent pattern produces one output and a chain and we -want to map it into one that just output a chain. The current trick is to select -it into a MERGE_VALUES with the first definition being an implicit_def. The -proper solution is to add new ISD opcodes for the no-output variant. DAG -combiner can then transform the node before it gets to target node selection. - -Problem #2 is we are adding a whole bunch of x86 atomic instructions when in -fact these instructions are identical to the non-lock versions. We need a way to -add target specific information to target nodes and have this information -carried over to machine instructions. Asm printer (or JIT) can use this -information to add the "lock" prefix. - -//===---------------------------------------------------------------------===// - -_Bool bar(int *x) { return *x & 1; } - -define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { -entry: - %tmp1 = load i32* %x ; <i32> [#uses=1] - %and = and i32 %tmp1, 1 ; <i32> [#uses=1] - %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] - ret i1 %tobool -} - -bar: # @bar -# BB#0: # %entry - movl 4(%esp), %eax - movb (%eax), %al - andb $1, %al - movzbl %al, %eax - ret - -Missed optimization: should be movl+andl. - -//===---------------------------------------------------------------------===// - -Consider the following two functions compiled with clang: -_Bool foo(int *x) { return !(*x & 4); } -unsigned bar(int *x) { return !(*x & 4); } - -foo: - movl 4(%esp), %eax - testb $4, (%eax) - sete %al - movzbl %al, %eax - ret - -bar: - movl 4(%esp), %eax - movl (%eax), %eax - shrl $2, %eax - andl $1, %eax - xorl $1, %eax - ret - -The second function generates more code even though the two functions are -are functionally identical. - -//===---------------------------------------------------------------------===// - -Take the following C code: -int x(int y) { return (y & 63) << 14; } - -Code produced by gcc: - andl $63, %edi - sall $14, %edi - movl %edi, %eax - ret - -Code produced by clang: - shll $14, %edi - movl %edi, %eax - andl $1032192, %eax - ret - -The code produced by gcc is 3 bytes shorter. This sort of construct often -shows up with bitfields. - -//===---------------------------------------------------------------------===// - -Take the following C code: -int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } - -We generate the following IR with clang: -define i32 @f(i32 %a, i32 %b) nounwind readnone { -entry: - %tmp = xor i32 %b, %a ; <i32> [#uses=1] - %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1] - %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] - %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] - ret i32 %conv5 -} - -And the following x86 code: - xorl %esi, %edi - testb $-1, %dil - sete %al - movzbl %al, %eax - ret - -A cmpb instead of the xorl+testb would be one instruction shorter. - -//===---------------------------------------------------------------------===// - -Given the following C code: -int f(int a, int b) { return (signed char)a == (signed char)b; } - -We generate the following IR with clang: -define i32 @f(i32 %a, i32 %b) nounwind readnone { -entry: - %sext = shl i32 %a, 24 ; <i32> [#uses=1] - %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1] - %sext6 = shl i32 %b, 24 ; <i32> [#uses=1] - %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1] - %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1] - %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] - ret i32 %conv5 -} - -And the following x86 code: - movsbl %sil, %eax - movsbl %dil, %ecx - cmpl %eax, %ecx - sete %al - movzbl %al, %eax - ret - - -It should be possible to eliminate the sign extensions. - -//===---------------------------------------------------------------------===// - -LLVM misses a load+store narrowing opportunity in this code: - -%struct.bf = type { i64, i16, i16, i32 } - -@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] - -define void @t1() nounwind ssp { -entry: - %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] - %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] - %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] - %3 = load i32* %2, align 1 ; <i32> [#uses=1] - %4 = and i32 %3, -65537 ; <i32> [#uses=1] - store i32 %4, i32* %2, align 1 - %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] - %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] - %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] - %8 = load i32* %7, align 1 ; <i32> [#uses=1] - %9 = and i32 %8, -131073 ; <i32> [#uses=1] - store i32 %9, i32* %7, align 1 - ret void -} - -LLVM currently emits this: - - movq bfi(%rip), %rax - andl $-65537, 8(%rax) - movq bfi(%rip), %rax - andl $-131073, 8(%rax) - ret - -It could narrow the loads and stores to emit this: - - movq bfi(%rip), %rax - andb $-2, 10(%rax) - movq bfi(%rip), %rax - andb $-3, 10(%rax) - ret - -The trouble is that there is a TokenFactor between the store and the -load, making it non-trivial to determine if there's anything between -the load and the store which would prohibit narrowing. - -//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt deleted file mode 100644 index 90be9f5..0000000 --- a/contrib/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86Info - X86TargetInfo.cpp - ) - -add_dependencies(LLVMX86Info X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/Makefile b/contrib/llvm/lib/Target/X86/TargetInfo/Makefile deleted file mode 100644 index ee91982..0000000 --- a/contrib/llvm/lib/Target/X86/TargetInfo/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Info - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/X86CompilationCallback_Win64.asm b/contrib/llvm/lib/Target/X86/X86CompilationCallback_Win64.asm deleted file mode 100644 index f321778..0000000 --- a/contrib/llvm/lib/Target/X86/X86CompilationCallback_Win64.asm +++ /dev/null @@ -1,68 +0,0 @@ -;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---=== -;; -;; The LLVM Compiler Infrastructure -;; -;; This file is distributed under the University of Illinois Open Source -;; License. See LICENSE.TXT for details. -;; -;;===----------------------------------------------------------------------=== -;; -;; This file implements the JIT interfaces for the X86 target. -;; -;;===----------------------------------------------------------------------=== - -extrn X86CompilationCallback2: PROC - -.code -X86CompilationCallback proc - push rbp - - ; Save RSP. - mov rbp, rsp - - ; Save all int arg registers - ; WARNING: We cannot use register spill area - we're generating stubs by hands! - push rcx - push rdx - push r8 - push r9 - - ; Align stack on 16-byte boundary. - and rsp, -16 - - ; Save all XMM arg registers. Also allocate reg spill area. - sub rsp, 96 - movaps [rsp +32], xmm0 - movaps [rsp+16+32], xmm1 - movaps [rsp+32+32], xmm2 - movaps [rsp+48+32], xmm3 - - ; JIT callee - - ; Pass prev frame and return address. - mov rcx, rbp - mov rdx, qword ptr [rbp+8] - call X86CompilationCallback2 - - ; Restore all XMM arg registers. - movaps xmm3, [rsp+48+32] - movaps xmm2, [rsp+32+32] - movaps xmm1, [rsp+16+32] - movaps xmm0, [rsp +32] - - ; Restore RSP. - mov rsp, rbp - - ; Restore all int arg registers - sub rsp, 32 - pop r9 - pop r8 - pop rdx - pop rcx - - ; Restore RBP. - pop rbp - ret -X86CompilationCallback endp - -End diff --git a/contrib/llvm/lib/Target/XCore/AsmPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 7c7c2f4..0000000 --- a/contrib/llvm/lib/Target/XCore/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMXCoreAsmPrinter - XCoreAsmPrinter.cpp - ) -add_dependencies(LLVMXCoreAsmPrinter XCoreCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/XCore/AsmPrinter/Makefile b/contrib/llvm/lib/Target/XCore/AsmPrinter/Makefile deleted file mode 100644 index 581f736..0000000 --- a/contrib/llvm/lib/Target/XCore/AsmPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMXCoreAsmPrinter - -# Hack: we need to include 'main' XCore target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/XCore/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/CMakeLists.txt deleted file mode 100644 index 38b35d7..0000000 --- a/contrib/llvm/lib/Target/XCore/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS XCore.td) - -tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(XCoreGenRegisterNames.inc -gen-register-enums) -tablegen(XCoreGenRegisterInfo.inc -gen-register-desc) -tablegen(XCoreGenInstrNames.inc -gen-instr-enums) -tablegen(XCoreGenInstrInfo.inc -gen-instr-desc) -tablegen(XCoreGenAsmWriter.inc -gen-asm-writer) -tablegen(XCoreGenDAGISel.inc -gen-dag-isel) -tablegen(XCoreGenCallingConv.inc -gen-callingconv) -tablegen(XCoreGenSubtarget.inc -gen-subtarget) - -add_llvm_target(XCoreCodeGen - XCoreFrameInfo.cpp - XCoreInstrInfo.cpp - XCoreISelDAGToDAG.cpp - XCoreISelLowering.cpp - XCoreMCAsmInfo.cpp - XCoreRegisterInfo.cpp - XCoreSubtarget.cpp - XCoreTargetMachine.cpp - XCoreTargetObjectFile.cpp - XCoreSelectionDAGInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/XCore/Makefile b/contrib/llvm/lib/Target/XCore/Makefile deleted file mode 100644 index 1b70974..0000000 --- a/contrib/llvm/lib/Target/XCore/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/XCore/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMXCoreCodeGen -TARGET = XCore - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ - XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \ - XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \ - XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ - XCoreGenSubtarget.inc - -DIRS = AsmPrinter TargetInfo - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Target/XCore/README.txt b/contrib/llvm/lib/Target/XCore/README.txt deleted file mode 100644 index b69205b..0000000 --- a/contrib/llvm/lib/Target/XCore/README.txt +++ /dev/null @@ -1,7 +0,0 @@ -To-do ------ - -* Instruction encodings -* Tailcalls -* Investigate loop alignment -* Add builtins diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt deleted file mode 100644 index 0a568de..0000000 --- a/contrib/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMXCoreInfo - XCoreTargetInfo.cpp - ) - -add_dependencies(LLVMXCoreInfo XCoreTable_gen) diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/Makefile b/contrib/llvm/lib/Target/XCore/TargetInfo/Makefile deleted file mode 100644 index f8a4095..0000000 --- a/contrib/llvm/lib/Target/XCore/TargetInfo/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/XCore/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMXCoreInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt b/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt deleted file mode 100644 index 917b745..0000000 --- a/contrib/llvm/lib/Transforms/Hello/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_loadable_module( LLVMHello - Hello.cpp - ) diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.cpp b/contrib/llvm/lib/Transforms/Hello/Hello.cpp deleted file mode 100644 index 838d550..0000000 --- a/contrib/llvm/lib/Transforms/Hello/Hello.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements two versions of the LLVM "Hello World" pass described -// in docs/WritingAnLLVMPass.html -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "hello" -#include "llvm/Pass.h" -#include "llvm/Function.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(HelloCounter, "Counts number of functions greeted"); - -namespace { - // Hello - The first implementation, without getAnalysisUsage. - struct Hello : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - Hello() : FunctionPass(ID) {} - - virtual bool runOnFunction(Function &F) { - ++HelloCounter; - errs() << "Hello: "; - errs().write_escaped(F.getName()) << '\n'; - return false; - } - }; -} - -char Hello::ID = 0; -INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false); - -namespace { - // Hello2 - The second implementation with getAnalysisUsage implemented. - struct Hello2 : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - Hello2() : FunctionPass(ID) {} - - virtual bool runOnFunction(Function &F) { - ++HelloCounter; - errs() << "Hello: "; - errs().write_escaped(F.getName()) << '\n'; - return false; - } - - // We don't modify the program, so we preserve all analyses - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; -} - -char Hello2::ID = 0; -INITIALIZE_PASS(Hello2, "hello2", - "Hello World Pass (with getAnalysisUsage implemented)", - false, false); diff --git a/contrib/llvm/lib/Transforms/Hello/Hello.exports b/contrib/llvm/lib/Transforms/Hello/Hello.exports deleted file mode 100644 index e69de29..0000000 --- a/contrib/llvm/lib/Transforms/Hello/Hello.exports +++ /dev/null diff --git a/contrib/llvm/lib/Transforms/Hello/Makefile b/contrib/llvm/lib/Transforms/Hello/Makefile deleted file mode 100644 index f1e3148..0000000 --- a/contrib/llvm/lib/Transforms/Hello/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Transforms/Hello/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMHello -LOADABLE_MODULE = 1 -USEDLIBS = - -# If we don't need RTTI or EH, there's no reason to export anything -# from the hello plugin. -ifneq ($(REQUIRES_RTTI), 1) -ifneq ($(REQUIRES_EH), 1) -EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports -endif -endif - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt b/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt deleted file mode 100644 index 65483e8..0000000 --- a/contrib/llvm/lib/Transforms/IPO/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -add_llvm_library(LLVMipo - ArgumentPromotion.cpp - ConstantMerge.cpp - DeadArgumentElimination.cpp - DeadTypeElimination.cpp - ExtractGV.cpp - FunctionAttrs.cpp - GlobalDCE.cpp - GlobalOpt.cpp - IPConstantPropagation.cpp - IPO.cpp - InlineAlways.cpp - InlineSimple.cpp - Inliner.cpp - Internalize.cpp - LoopExtractor.cpp - LowerSetJmp.cpp - MergeFunctions.cpp - PartialInlining.cpp - PartialSpecialization.cpp - PruneEH.cpp - StripDeadPrototypes.cpp - StripSymbols.cpp - StructRetPromotion.cpp - ) - -target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine) diff --git a/contrib/llvm/lib/Transforms/IPO/Makefile b/contrib/llvm/lib/Transforms/IPO/Makefile deleted file mode 100644 index 5c42374..0000000 --- a/contrib/llvm/lib/Transforms/IPO/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMipo -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt b/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt deleted file mode 100644 index 5b1ff3e..0000000 --- a/contrib/llvm/lib/Transforms/InstCombine/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -add_llvm_library(LLVMInstCombine - InstructionCombining.cpp - InstCombineAddSub.cpp - InstCombineAndOrXor.cpp - InstCombineCalls.cpp - InstCombineCasts.cpp - InstCombineCompares.cpp - InstCombineLoadStoreAlloca.cpp - InstCombineMulDivRem.cpp - InstCombinePHI.cpp - InstCombineSelect.cpp - InstCombineShifts.cpp - InstCombineSimplifyDemanded.cpp - InstCombineVectorOps.cpp - ) - -target_link_libraries (LLVMInstCombine LLVMTransformUtils) diff --git a/contrib/llvm/lib/Transforms/InstCombine/Makefile b/contrib/llvm/lib/Transforms/InstCombine/Makefile deleted file mode 100644 index 0c488e78..0000000 --- a/contrib/llvm/lib/Transforms/InstCombine/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMInstCombine -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt deleted file mode 100644 index 128bf48..0000000 --- a/contrib/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_library(LLVMInstrumentation - EdgeProfiling.cpp - OptimalEdgeProfiling.cpp - ProfilingUtils.cpp - ) diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Makefile b/contrib/llvm/lib/Transforms/Instrumentation/Makefile deleted file mode 100644 index 6cbc7a9..0000000 --- a/contrib/llvm/lib/Transforms/Instrumentation/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMInstrumentation -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Transforms/Makefile b/contrib/llvm/lib/Transforms/Makefile deleted file mode 100644 index e527be2..0000000 --- a/contrib/llvm/lib/Transforms/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello - -include $(LEVEL)/Makefile.config - -# No support for plugins on windows targets -ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix)) - PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) -endif - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt b/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt deleted file mode 100644 index b7598ea..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -add_llvm_library(LLVMScalarOpts - ADCE.cpp - BasicBlockPlacement.cpp - CodeGenPrepare.cpp - ConstantProp.cpp - CorrelatedValuePropagation.cpp - DCE.cpp - DeadStoreElimination.cpp - GEPSplitter.cpp - GVN.cpp - IndVarSimplify.cpp - JumpThreading.cpp - LICM.cpp - LoopDeletion.cpp - LoopIndexSplit.cpp - LoopRotation.cpp - LoopStrengthReduce.cpp - LoopUnrollPass.cpp - LoopUnswitch.cpp - LowerAtomic.cpp - MemCpyOptimizer.cpp - Reassociate.cpp - Reg2Mem.cpp - SCCP.cpp - Scalar.cpp - ScalarReplAggregates.cpp - SimplifyCFGPass.cpp - SimplifyHalfPowrLibCalls.cpp - SimplifyLibCalls.cpp - Sink.cpp - TailDuplication.cpp - TailRecursionElimination.cpp - ) - -target_link_libraries (LLVMScalarOpts LLVMTransformUtils) diff --git a/contrib/llvm/lib/Transforms/Scalar/Makefile b/contrib/llvm/lib/Transforms/Scalar/Makefile deleted file mode 100644 index cc42fd0..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMScalarOpts -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt b/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt deleted file mode 100644 index 61cbeb2..0000000 --- a/contrib/llvm/lib/Transforms/Utils/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -add_llvm_library(LLVMTransformUtils - AddrModeMatcher.cpp - BasicBlockUtils.cpp - BasicInliner.cpp - BreakCriticalEdges.cpp - BuildLibCalls.cpp - CloneFunction.cpp - CloneLoop.cpp - CloneModule.cpp - CodeExtractor.cpp - DemoteRegToStack.cpp - InlineFunction.cpp - InstructionNamer.cpp - LCSSA.cpp - Local.cpp - LoopSimplify.cpp - LoopUnroll.cpp - LowerInvoke.cpp - LowerSwitch.cpp - Mem2Reg.cpp - PromoteMemoryToRegister.cpp - SSAUpdater.cpp - SimplifyCFG.cpp - UnifyFunctionExitNodes.cpp - ValueMapper.cpp - ) - -target_link_libraries (LLVMTransformUtils LLVMSupport) diff --git a/contrib/llvm/lib/Transforms/Utils/Makefile b/contrib/llvm/lib/Transforms/Utils/Makefile deleted file mode 100644 index d1e9336..0000000 --- a/contrib/llvm/lib/Transforms/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMTransformUtils -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/contrib/llvm/lib/VMCore/CMakeLists.txt b/contrib/llvm/lib/VMCore/CMakeLists.txt deleted file mode 100644 index 1388c93..0000000 --- a/contrib/llvm/lib/VMCore/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -add_llvm_library(LLVMCore - AsmWriter.cpp - Attributes.cpp - AutoUpgrade.cpp - BasicBlock.cpp - ConstantFold.cpp - Constants.cpp - Core.cpp - DebugLoc.cpp - Dominators.cpp - Function.cpp - GVMaterializer.cpp - Globals.cpp - IRBuilder.cpp - InlineAsm.cpp - Instruction.cpp - Instructions.cpp - IntrinsicInst.cpp - LLVMContext.cpp - LLVMContextImpl.cpp - LeakDetector.cpp - Metadata.cpp - Module.cpp - Pass.cpp - PassManager.cpp - PassRegistry.cpp - PrintModulePass.cpp - Type.cpp - TypeSymbolTable.cpp - Use.cpp - Value.cpp - ValueSymbolTable.cpp - ValueTypes.cpp - Verifier.cpp - ) diff --git a/contrib/llvm/lib/VMCore/Makefile b/contrib/llvm/lib/VMCore/Makefile deleted file mode 100644 index 03a4fc7..0000000 --- a/contrib/llvm/lib/VMCore/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../.. -LIBRARYNAME = LLVMCore -BUILD_ARCHIVE = 1 -REQUIRES_RTTI = 1 - -BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen - -include $(LEVEL)/Makefile.common - -GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen - -INTRINSICTD := $(PROJ_SRC_ROOT)/include/llvm/Intrinsics.td -INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/Intrinsics*.td) - -$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(TBLGEN) - $(Echo) Building Intrinsics.gen.tmp from Intrinsics.td - $(Verb) $(TableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic - -$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp - $(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \ - $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \ - changed significantly. ) - -install-local:: $(GENFILE) - $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen - $(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen |