diff options
Diffstat (limited to 'lib/Target/README.txt')
-rw-r--r-- | lib/Target/README.txt | 110 |
1 files changed, 88 insertions, 22 deletions
diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a345d3d..aad621f 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -339,6 +339,8 @@ we don't have whole-function selection dags. On x86, this means we use one extra register for the function when effective_addr2 is declared as U64 than when it is declared U32. +PHI Slicing could be extended to do this. + //===---------------------------------------------------------------------===// LSR should know what GPR types a target has. This code: @@ -406,22 +408,6 @@ return: ; preds = %then.1, %else.0, %then.0 //===---------------------------------------------------------------------===// -Tail recursion elimination is not transforming this function, because it is -returning n, which fails the isDynamicConstant check in the accumulator -recursion checks. - -long long fib(const long long n) { - switch(n) { - case 0: - case 1: - return n; - default: - return fib(n-1) + fib(n-2); - } -} - -//===---------------------------------------------------------------------===// - Tail recursion elimination should handle: int pow2m1(int n) { @@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case. //===---------------------------------------------------------------------===// +[PHI TRANSLATE INDEXED GEPs] PR5313 + +Load redundancy elimination for simple loop. This loop: + +void append_text(const char* text,unsigned char * const io) { + while(*text) + *io=*text++; +} + +Compiles to have a fully redundant load in the loop (%2): + +define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind { +entry: + %0 = load i8* %text, align 1 ; <i8> [#uses=1] + %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2] + %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1] + %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1] + store i8 %2, i8* %io, align 1 + %tmp = add i32 %indvar, 1 ; <i32> [#uses=2] + %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1] + %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1] + %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +//===---------------------------------------------------------------------===// + There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the GCC testsuite. There are many pre testcases as ssa-pre-*.c @@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;} //===---------------------------------------------------------------------===// -IPSCCP is propagating elements of first class aggregates, but is not propagating -the entire aggregate itself. This leads it to miss opportunities, for example -in test/Transforms/SCCP/ipsccp-basic.ll:test5b. - -//===---------------------------------------------------------------------===// - int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } Generates this: @@ -1668,3 +1682,55 @@ entry: } //===---------------------------------------------------------------------===// + +IPSCCP does not currently propagate argument dependent constants through +functions where it does not not all of the callers. This includes functions +with normal external linkage as well as templates, C99 inline functions etc. +Specifically, it does nothing to: + +define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { +entry: + %0 = add nsw i32 %y, %z + %1 = mul i32 %0, %x + %2 = mul i32 %y, %z + %3 = add nsw i32 %1, %2 + ret i32 %3 +} + +define i32 @test2() nounwind { +entry: + %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind + ret i32 %0 +} + +It would be interesting extend IPSCCP to be able to handle simple cases like +this, where all of the arguments to a call are constant. Because IPSCCP runs +before inlining, trivial templates and inline functions are not yet inlined. +The results for a function + set of constant arguments should be memoized in a +map. + +//===---------------------------------------------------------------------===// + +The libcall constant folding stuff should be moved out of SimplifyLibcalls into +libanalysis' constantfolding logic. This would allow IPSCCP to be able to +handle simple things like this: + +static int foo(const char *X) { return strlen(X); } +int bar() { return foo("abcd"); } + +//===---------------------------------------------------------------------===// + +InstCombine should use SimplifyDemandedBits to remove the or instruction: + +define i1 @test(i8 %x, i8 %y) { + %A = or i8 %x, 1 + %B = icmp ugt i8 %A, 3 + ret i1 %B +} + +Currently instcombine calls SimplifyDemandedBits with either all bits or just +the sign bit, if the comparison is obviously a sign test. In this case, we only +need all but the bottom two bits from %A, and if we gave that mask to SDB it +would delete the or instruction for us. + +//===---------------------------------------------------------------------===// |