diff options
Diffstat (limited to 'test/Analysis')
19 files changed, 1580 insertions, 70 deletions
diff --git a/test/Analysis/BasicAA/invariant_load.ll b/test/Analysis/BasicAA/invariant_load.ll index cd6ddb9..09b5401 100644 --- a/test/Analysis/BasicAA/invariant_load.ll +++ b/test/Analysis/BasicAA/invariant_load.ll @@ -10,10 +10,10 @@ define i32 @foo(i32* nocapture %p, i8* nocapture %q) { entry: - %0 = load i32* %p, align 4, !tbaa !0, !invariant.load !3 + %0 = load i32* %p, align 4, !invariant.load !3 %conv = trunc i32 %0 to i8 - store i8 %conv, i8* %q, align 1, !tbaa !1 - %1 = load i32* %p, align 4, !tbaa !0, !invariant.load !3 + store i8 %conv, i8* %q, align 1 + %1 = load i32* %p, align 4, !invariant.load !3 %add = add nsw i32 %1, 1 ret i32 %add @@ -23,7 +23,4 @@ entry: ; CHECK: %add = add nsw i32 %0, 1 } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{} diff --git a/test/Analysis/BasicAA/phi-spec-order.ll b/test/Analysis/BasicAA/phi-spec-order.ll index 27d47bc..4172d09 100644 --- a/test/Analysis/BasicAA/phi-spec-order.ll +++ b/test/Analysis/BasicAA/phi-spec-order.ll @@ -24,23 +24,23 @@ for.body4: ; preds = %for.body4, %for.con %lsr.iv46 = bitcast [16000 x double]* %lsr.iv4 to <4 x double>* %lsr.iv12 = bitcast [16000 x double]* %lsr.iv1 to <4 x double>* %scevgep11 = getelementptr <4 x double>* %lsr.iv46, i64 -2 - %i6 = load <4 x double>* %scevgep11, align 32, !tbaa !0 + %i6 = load <4 x double>* %scevgep11, align 32 %add = fadd <4 x double> %i6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> - store <4 x double> %add, <4 x double>* %lsr.iv12, align 32, !tbaa !0 + store <4 x double> %add, <4 x double>* %lsr.iv12, align 32 %scevgep10 = getelementptr <4 x double>* %lsr.iv46, i64 -1 - %i7 = load <4 x double>* %scevgep10, align 32, !tbaa !0 + %i7 = load <4 x double>* %scevgep10, align 32 %add.4 = fadd <4 x double> %i7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> %scevgep9 = getelementptr <4 x double>* %lsr.iv12, i64 1 - store <4 x double> %add.4, <4 x double>* %scevgep9, align 32, !tbaa !0 - %i8 = load <4 x double>* %lsr.iv46, align 32, !tbaa !0 + store <4 x double> %add.4, <4 x double>* %scevgep9, align 32 + %i8 = load <4 x double>* %lsr.iv46, align 32 %add.8 = fadd <4 x double> %i8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> %scevgep8 = getelementptr <4 x double>* %lsr.iv12, i64 2 - store <4 x double> %add.8, <4 x double>* %scevgep8, align 32, !tbaa !0 + store <4 x double> %add.8, <4 x double>* %scevgep8, align 32 %scevgep7 = getelementptr <4 x double>* %lsr.iv46, i64 1 - %i9 = load <4 x double>* %scevgep7, align 32, !tbaa !0 + %i9 = load <4 x double>* %scevgep7, align 32 %add.12 = fadd <4 x double> %i9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> %scevgep3 = getelementptr <4 x double>* %lsr.iv12, i64 3 - store <4 x double> %add.12, <4 x double>* %scevgep3, align 32, !tbaa !0 + store <4 x double> %add.12, <4 x double>* %scevgep3, align 32 ; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep7 ; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep7 @@ -65,7 +65,3 @@ for.end: ; preds = %for.body4 for.end10: ; preds = %for.end ret i32 0 } - -!0 = metadata !{metadata !"double", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index ba9d84c..0cdd61c 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -175,9 +175,9 @@ define i32 @casts() { %rext_5 = zext <4 x i16> undef to <4 x i64> ; Vector cast cost of instructions lowering the cast to the stack. - ; CHECK: cost of 19 {{.*}} trunc + ; CHECK: cost of 3 {{.*}} trunc %r74 = trunc <8 x i32> undef to <8 x i8> - ; CHECK: cost of 38 {{.*}} trunc + ; CHECK: cost of 6 {{.*}} trunc %r75 = trunc <16 x i32> undef to <16 x i8> ; Floating point truncation costs. diff --git a/test/Analysis/CostModel/ARM/divrem.ll b/test/Analysis/CostModel/ARM/divrem.ll new file mode 100644 index 0000000..c4ac59b --- /dev/null +++ b/test/Analysis/CostModel/ARM/divrem.ll @@ -0,0 +1,450 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s + +define <2 x i8> @sdiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: sdiv_v2_i8 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @sdiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: sdiv_v2_i16 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @sdiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: sdiv_v2_i32 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @sdiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: sdiv_v2_i64 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @sdiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: sdiv_v4_i8 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @sdiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: sdiv_v4_i16 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @sdiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: sdiv_v4_i32 + ; CHECK: cost of 80 {{.*}} sdiv + + %1 = sdiv <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @sdiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: sdiv_v4_i64 + ; CHECK: cost of 80 {{.*}} sdiv + + %1 = sdiv <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @sdiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: sdiv_v8_i8 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @sdiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: sdiv_v8_i16 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @sdiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: sdiv_v8_i32 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @sdiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: sdiv_v8_i64 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @sdiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: sdiv_v16_i8 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @sdiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: sdiv_v16_i16 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @sdiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: sdiv_v16_i32 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @sdiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: sdiv_v16_i64 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @udiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: udiv_v2_i8 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @udiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: udiv_v2_i16 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @udiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: udiv_v2_i32 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @udiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: udiv_v2_i64 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @udiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: udiv_v4_i8 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @udiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: udiv_v4_i16 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @udiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: udiv_v4_i32 + ; CHECK: cost of 80 {{.*}} udiv + + %1 = udiv <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @udiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: udiv_v4_i64 + ; CHECK: cost of 80 {{.*}} udiv + + %1 = udiv <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @udiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: udiv_v8_i8 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @udiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: udiv_v8_i16 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @udiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: udiv_v8_i32 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @udiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: udiv_v8_i64 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @udiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: udiv_v16_i8 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @udiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: udiv_v16_i16 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @udiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: udiv_v16_i32 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @udiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: udiv_v16_i64 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @srem_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: srem_v2_i8 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @srem_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: srem_v2_i16 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @srem_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: srem_v2_i32 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @srem_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: srem_v2_i64 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @srem_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: srem_v4_i8 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @srem_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: srem_v4_i16 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @srem_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: srem_v4_i32 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @srem_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: srem_v4_i64 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @srem_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: srem_v8_i8 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @srem_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: srem_v8_i16 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @srem_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: srem_v8_i32 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @srem_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: srem_v8_i64 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @srem_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: srem_v16_i8 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @srem_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: srem_v16_i16 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @srem_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: srem_v16_i32 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @srem_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: srem_v16_i64 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @urem_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: urem_v2_i8 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @urem_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: urem_v2_i16 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @urem_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: urem_v2_i32 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @urem_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: urem_v2_i64 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @urem_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: urem_v4_i8 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @urem_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: urem_v4_i16 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @urem_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: urem_v4_i32 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @urem_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: urem_v4_i64 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @urem_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: urem_v8_i8 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @urem_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: urem_v8_i16 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @urem_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: urem_v8_i32 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @urem_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: urem_v8_i64 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @urem_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: urem_v16_i8 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @urem_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: urem_v16_i16 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @urem_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: urem_v16_i32 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @urem_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: urem_v16_i64 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i64> %a, %b + ret <16 x i64> %1 +} diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index 85b4425..92f5a1e 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -66,9 +66,9 @@ define void @avx2mull() { ; CHECK: fmul define i32 @fmul(i32 %arg) { - ;CHECK: cost of 1 {{.*}} fmul + ;CHECK: cost of 2 {{.*}} fmul %A = fmul <4 x float> undef, undef - ;CHECK: cost of 1 {{.*}} fmul + ;CHECK: cost of 2 {{.*}} fmul %B = fmul <8 x float> undef, undef ret i32 undef } diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll index 260a606..348444e 100644 --- a/test/Analysis/CostModel/X86/loop_v2.ll +++ b/test/Analysis/CostModel/X86/loop_v2.ll @@ -20,10 +20,10 @@ vector.body: ; preds = %vector.body, %vecto ;CHECK: cost of 1 {{.*}} extract %6 = extractelement <2 x i64> %3, i32 1 %7 = getelementptr inbounds i32* %A, i64 %6 - %8 = load i32* %5, align 4, !tbaa !0 + %8 = load i32* %5, align 4 ;CHECK: cost of 1 {{.*}} insert %9 = insertelement <2 x i32> undef, i32 %8, i32 0 - %10 = load i32* %7, align 4, !tbaa !0 + %10 = load i32* %7, align 4 ;CHECK: cost of 1 {{.*}} insert %11 = insertelement <2 x i32> %9, i32 %10, i32 1 %12 = add nsw <2 x i32> %11, %vec.phi @@ -37,7 +37,3 @@ for.end: ; preds = %vector.body %16 = add i32 %14, %15 ret i32 %16 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll new file mode 100644 index 0000000..338d974 --- /dev/null +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -0,0 +1,281 @@ +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) { + ; SSE2: sitofpv2i8v2double + ; SSE2: cost of 20 {{.*}} sitofp + %1 = sitofp <2 x i8> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) { + ; SSE2: sitofpv4i8v4double + ; SSE2: cost of 40 {{.*}} sitofp + %1 = sitofp <4 x i8> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) { + ; SSE2: sitofpv8i8v8double + ; SSE2: cost of 80 {{.*}} sitofp +%1 = sitofp <8 x i8> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) { + ; SSE2: sitofpv16i8v16double + ; SSE2: cost of 160 {{.*}} sitofp + %1 = sitofp <16 x i8> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) { + ; SSE2: sitofpv32i8v32double + ; SSE2: cost of 320 {{.*}} sitofp + %1 = sitofp <32 x i8> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) { + ; SSE2: sitofpv2i16v2double + ; SSE2: cost of 20 {{.*}} sitofp + %1 = sitofp <2 x i16> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) { + ; SSE2: sitofpv4i16v4double + ; SSE2: cost of 40 {{.*}} sitofp + %1 = sitofp <4 x i16> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) { + ; SSE2: sitofpv8i16v8double + ; SSE2: cost of 80 {{.*}} sitofp + %1 = sitofp <8 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) { + ; SSE2: sitofpv16i16v16double + ; SSE2: cost of 160 {{.*}} sitofp + %1 = sitofp <16 x i16> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) { + ; SSE2: sitofpv32i16v32double + ; SSE2: cost of 320 {{.*}} sitofp + %1 = sitofp <32 x i16> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) { + ; SSE2: sitofpv2i32v2double + ; SSE2: cost of 20 {{.*}} sitofp + %1 = sitofp <2 x i32> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) { + ; SSE2: sitofpv4i32v4double + ; SSE2: cost of 40 {{.*}} sitofp + %1 = sitofp <4 x i32> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) { + ; SSE2: sitofpv8i32v8double + ; SSE2: cost of 80 {{.*}} sitofp + %1 = sitofp <8 x i32> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) { + ; SSE2: sitofpv16i32v16double + ; SSE2: cost of 160 {{.*}} sitofp + %1 = sitofp <16 x i32> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) { + ; SSE2: sitofpv32i32v32double + ; SSE2: cost of 320 {{.*}} sitofp + %1 = sitofp <32 x i32> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) { + ; SSE2: sitofpv2i64v2double + ; SSE2: cost of 20 {{.*}} sitofp + %1 = sitofp <2 x i64> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) { + ; SSE2: sitofpv4i64v4double + ; SSE2: cost of 40 {{.*}} sitofp + %1 = sitofp <4 x i64> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) { + %1 = sitofp <8 x i64> %a to <8 x double> + ; SSE2: sitofpv8i64v8double + ; SSE2: cost of 80 {{.*}} sitofp + ret <8 x double> %1 +} + +define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) { + ; SSE2: sitofpv16i64v16double + ; SSE2: cost of 160 {{.*}} sitofp + %1 = sitofp <16 x i64> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) { + ; SSE2: sitofpv32i64v32double + ; SSE2: cost of 320 {{.*}} sitofp + %1 = sitofp <32 x i64> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) { + ; SSE2: sitofpv2i8v2float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <2 x i8> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) { + ; SSE2: sitofpv4i8v4float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <4 x i8> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) { + ; SSE2: sitofpv8i8v8float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <8 x i8> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) { + ; SSE2: sitofpv16i8v16float + ; SSE2: cost of 8 {{.*}} sitofp + %1 = sitofp <16 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) { + ; SSE2: sitofpv32i8v32float + ; SSE2: cost of 16 {{.*}} sitofp + %1 = sitofp <32 x i8> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) { + ; SSE2: sitofpv2i16v2float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <2 x i16> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) { + ; SSE2: sitofpv4i16v4float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <4 x i16> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) { + ; SSE2: sitofpv8i16v8float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <8 x i16> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) { + ; SSE2: sitofpv16i16v16float + ; SSE2: cost of 30 {{.*}} sitofp + %1 = sitofp <16 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) { + ; SSE2: sitofpv32i16v32float + ; SSE2: cost of 60 {{.*}} sitofp + %1 = sitofp <32 x i16> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) { + ; SSE2: sitofpv2i32v2float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <2 x i32> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) { + ; SSE2: sitofpv4i32v4float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <4 x i32> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) { + ; SSE2: sitofpv8i32v8float + ; SSE2: cost of 30 {{.*}} sitofp + %1 = sitofp <8 x i32> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) { + ; SSE2: sitofpv16i32v16float + ; SSE2: cost of 60 {{.*}} sitofp + %1 = sitofp <16 x i32> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) { + ; SSE2: sitofpv32i32v32float + ; SSE2: cost of 120 {{.*}} sitofp + %1 = sitofp <32 x i32> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) { + ; SSE2: sitofpv2i64v2float + ; SSE2: cost of 15 {{.*}} sitofp + %1 = sitofp <2 x i64> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) { + ; SSE2: sitofpv4i64v4float + ; SSE2: cost of 30 {{.*}} sitofp + %1 = sitofp <4 x i64> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) { + ; SSE2: sitofpv8i64v8float + ; SSE2: cost of 60 {{.*}} sitofp + %1 = sitofp <8 x i64> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) { + ; SSE2: sitofpv16i64v16float + ; SSE2: cost of 120 {{.*}} sitofp + %1 = sitofp <16 x i64> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) { + ; SSE2: sitofpv32i64v32float + ; SSE2: cost of 240 {{.*}} sitofp + %1 = sitofp <32 x i64> %a to <32 x float> + ret <32 x float> %1 +} diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll index f35eea8..d96a92f 100644 --- a/test/Analysis/CostModel/X86/testshiftashr.ll +++ b/test/Analysis/CostModel/X86/testshiftashr.ll @@ -113,7 +113,7 @@ entry: define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { entry: ; SSE2: shift32i32 - ; SSE2: cost of 256 {{.*}} ashr + ; SSE2: cost of 320 {{.*}} ashr ; SSE2-CODEGEN: shift32i32 ; SSE2-CODEGEN: sarl %cl @@ -173,7 +173,7 @@ entry: define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { entry: ; SSE2: shift32i64 - ; SSE2: cost of 256 {{.*}} ashr + ; SSE2: cost of 320 {{.*}} ashr ; SSE2-CODEGEN: shift32i64 ; SSE2-CODEGEN: sarq %cl @@ -373,7 +373,7 @@ define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { entry: ; SSE2: shift32i32c ; getTypeConversion fails here and promotes this to a i64. - ; SSE2: cost of 256 {{.*}} ashr + ; SSE2: cost of 8 {{.*}} ashr ; SSE2-CODEGEN: shift32i32c ; SSE2-CODEGEN: psrad $3 %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, @@ -443,7 +443,7 @@ entry: define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { entry: ; SSE2: shift32i64c - ; SSE2: cost of 256 {{.*}} ashr + ; SSE2: cost of 320 {{.*}} ashr ; SSE2-CODEGEN: shift32i64c ; SSE2-CODEGEN: sarq $3 diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll index 8d6ef38..7bc8d89 100644 --- a/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -113,7 +113,7 @@ entry: define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { entry: ; SSE2: shift32i32 - ; SSE2: cost of 256 {{.*}} lshr + ; SSE2: cost of 320 {{.*}} lshr ; SSE2-CODEGEN: shift32i32 ; SSE2-CODEGEN: shrl %cl @@ -173,7 +173,7 @@ entry: define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { entry: ; SSE2: shift32i64 - ; SSE2: cost of 256 {{.*}} lshr + ; SSE2: cost of 320 {{.*}} lshr ; SSE2-CODEGEN: shift32i64 ; SSE2-CODEGEN: shrq %cl @@ -372,8 +372,7 @@ entry: define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { entry: ; SSE2: shift32i32c - ; getTypeConversion fails here and promotes this to a i64. - ; SSE2: cost of 256 {{.*}} lshr + ; SSE2: cost of 8 {{.*}} lshr ; SSE2-CODEGEN: shift32i32c ; SSE2-CODEGEN: psrld $3 %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, @@ -443,7 +442,7 @@ entry: define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { entry: ; SSE2: shift32i64c - ; SSE2: cost of 256 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN: shift32i64c ; SSE2-CODEGEN: psrlq $3 diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll index f45a698..40effd0 100644 --- a/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/test/Analysis/CostModel/X86/testshiftshl.ll @@ -113,7 +113,7 @@ entry: define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { entry: ; SSE2: shift32i32 - ; SSE2: cost of 256 {{.*}} shl + ; SSE2: cost of 80 {{.*}} shl ; SSE2-CODEGEN: shift32i32 ; SSE2-CODEGEN: pmuludq @@ -173,7 +173,7 @@ entry: define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { entry: ; SSE2: shift32i64 - ; SSE2: cost of 256 {{.*}} shl + ; SSE2: cost of 320 {{.*}} shl ; SSE2-CODEGEN: shift32i64 ; SSE2-CODEGEN: shlq %cl @@ -372,8 +372,7 @@ entry: define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { entry: ; SSE2: shift32i32c - ; getTypeConversion fails here and promotes this to a i64. - ; SSE2: cost of 256 {{.*}} shl + ; SSE2: cost of 8 {{.*}} shl ; SSE2-CODEGEN: shift32i32c ; SSE2-CODEGEN: pslld $3 %0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, @@ -443,7 +442,7 @@ entry: define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { entry: ; SSE2: shift32i64c - ; SSE2: cost of 256 {{.*}} shl + ; SSE2: cost of 16 {{.*}} shl ; SSE2-CODEGEN: shift32i64c ; SSE2-CODEGEN: psllq $3 diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll new file mode 100644 index 0000000..a41a04d --- /dev/null +++ b/test/Analysis/CostModel/X86/uitofp.ll @@ -0,0 +1,368 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +; In X86TargetTransformInfo::getCastInstrCost we have code that depends on +; getSimpleVT on a value type. On AVX2 we execute this code. Make sure we exit +; early if the type is not a simple value type before we call this function. +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s + +define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) { + ; SSE2: uitofpv2i8v2double + ; SSE2: cost of 20 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv2i8v2double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <2 x i8> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) { + ; SSE2: uitofpv4i8v4double + ; SSE2: cost of 40 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv4i8v4double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <4 x i8> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) { + ; SSE2: uitofpv8i8v8double + ; SSE2: cost of 80 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv8i8v8double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd +%1 = uitofp <8 x i8> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) { + ; SSE2: uitofpv16i8v16double + ; SSE2: cost of 160 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv16i8v16double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <16 x i8> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) { + ; SSE2: uitofpv32i8v32double + ; SSE2: cost of 320 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv32i8v32double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <32 x i8> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) { + ; SSE2: uitofpv2i16v2double + ; SSE2: cost of 20 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv2i16v2double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <2 x i16> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) { + ; SSE2: uitofpv4i16v4double + ; SSE2: cost of 40 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv4i16v4double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <4 x i16> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) { + ; SSE2: uitofpv8i16v8double + ; SSE2: cost of 80 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv8i16v8double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <8 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) { + ; SSE2: uitofpv16i16v16double + ; SSE2: cost of 160 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv16i16v16double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <16 x i16> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) { + ; SSE2: uitofpv32i16v32double + ; SSE2: cost of 320 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv32i16v32double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <32 x i16> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) { + ; SSE2: uitofpv2i32v2double + ; SSE2: cost of 20 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv2i32v2double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <2 x i32> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) { + ; SSE2: uitofpv4i32v4double + ; SSE2: cost of 40 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv4i32v4double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <4 x i32> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) { + ; SSE2: uitofpv8i32v8double + ; SSE2: cost of 80 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv8i32v8double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <8 x i32> %a to <8 x double> + ret <8 x double> %1 +} + +define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) { + ; SSE2: uitofpv16i32v16double + ; SSE2: cost of 160 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv16i32v16double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <16 x i32> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) { + ; SSE2: uitofpv32i32v32double + ; SSE2: cost of 320 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv32i32v32double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <32 x i32> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) { + ; SSE2: uitofpv2i64v2double + ; SSE2: cost of 20 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv2i64v2double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <2 x i64> %a to <2 x double> + ret <2 x double> %1 +} + +define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) { + ; SSE2: uitofpv4i64v4double + ; SSE2: cost of 40 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv4i64v4double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <4 x i64> %a to <4 x double> + ret <4 x double> %1 +} + +define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) { + %1 = uitofp <8 x i64> %a to <8 x double> + ; SSE2: uitofpv8i64v8double + ; SSE2: cost of 80 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv8i64v8double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + ret <8 x double> %1 +} + +define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) { + ; SSE2: uitofpv16i64v16double + ; SSE2: cost of 160 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv16i64v16double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <16 x i64> %a to <16 x double> + ret <16 x double> %1 +} + +define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) { + ; SSE2: uitofpv32i64v32double + ; SSE2: cost of 320 {{.*}} uitofp + ; SSE2-CODEGEN: uitofpv32i64v32double + ; SSE2-CODEGEN: movapd LCPI + ; SSE2-CODEGEN: subpd + ; SSE2-CODEGEN: addpd + %1 = uitofp <32 x i64> %a to <32 x double> + ret <32 x double> %1 +} + +define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) { + ; SSE2: uitofpv2i8v2float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <2 x i8> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) { + ; SSE2: uitofpv4i8v4float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <4 x i8> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) { + ; SSE2: uitofpv8i8v8float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <8 x i8> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) { + ; SSE2: uitofpv16i8v16float + ; SSE2: cost of 8 {{.*}} uitofp + %1 = uitofp <16 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) { + ; SSE2: uitofpv32i8v32float + ; SSE2: cost of 16 {{.*}} uitofp + %1 = uitofp <32 x i8> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) { + ; SSE2: uitofpv2i16v2float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <2 x i16> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) { + ; SSE2: uitofpv4i16v4float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <4 x i16> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) { + ; SSE2: uitofpv8i16v8float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <8 x i16> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) { + ; SSE2: uitofpv16i16v16float + ; SSE2: cost of 30 {{.*}} uitofp + %1 = uitofp <16 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) { + ; SSE2: uitofpv32i16v32float + ; SSE2: cost of 60 {{.*}} uitofp + %1 = uitofp <32 x i16> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) { + ; SSE2: uitofpv2i32v2float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <2 x i32> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) { + ; SSE2: uitofpv4i32v4float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) { + ; SSE2: uitofpv8i32v8float + ; SSE2: cost of 30 {{.*}} uitofp + %1 = uitofp <8 x i32> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) { + ; SSE2: uitofpv16i32v16float + ; SSE2: cost of 60 {{.*}} uitofp + %1 = uitofp <16 x i32> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) { + ; SSE2: uitofpv32i32v32float + ; SSE2: cost of 120 {{.*}} uitofp + %1 = uitofp <32 x i32> %a to <32 x float> + ret <32 x float> %1 +} + +define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) { + ; SSE2: uitofpv2i64v2float + ; SSE2: cost of 15 {{.*}} uitofp + %1 = uitofp <2 x i64> %a to <2 x float> + ret <2 x float> %1 +} + +define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) { + ; SSE2: uitofpv4i64v4float + ; SSE2: cost of 30 {{.*}} uitofp + %1 = uitofp <4 x i64> %a to <4 x float> + ret <4 x float> %1 +} + +define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) { + ; SSE2: uitofpv8i64v8float + ; SSE2: cost of 60 {{.*}} uitofp + %1 = uitofp <8 x i64> %a to <8 x float> + ret <8 x float> %1 +} + +define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) { + ; SSE2: uitofpv16i64v16float + ; SSE2: cost of 120 {{.*}} uitofp + %1 = uitofp <16 x i64> %a to <16 x float> + ret <16 x float> %1 +} + +define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) { + ; SSE2: uitofpv32i64v32float + ; SSE2: cost of 240 {{.*}} uitofp + %1 = uitofp <32 x i64> %a to <32 x float> + ret <32 x float> %1 +} + diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll index 25b1114..af7d1df 100644 --- a/test/Analysis/CostModel/X86/vectorized-loop.ll +++ b/test/Analysis/CostModel/X86/vectorized-loop.ll @@ -54,14 +54,14 @@ for.body: ; preds = %middle.block, %for. %13 = add nsw i64 %indvars.iv, 2 %arrayidx = getelementptr inbounds i32* %B, i64 %13 ;CHECK: cost of 1 {{.*}} load - %14 = load i32* %arrayidx, align 4, !tbaa !0 + %14 = load i32* %arrayidx, align 4 ;CHECK: cost of 1 {{.*}} mul %mul = mul nsw i32 %14, 5 %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv ;CHECK: cost of 1 {{.*}} load - %15 = load i32* %arrayidx2, align 4, !tbaa !0 + %15 = load i32* %arrayidx2, align 4 %add3 = add nsw i32 %15, %mul - store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0 + store i32 %add3, i32* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 ;CHECK: cost of 0 {{.*}} trunc %16 = trunc i64 %indvars.iv.next to i32 @@ -73,7 +73,3 @@ for.end: ; preds = %middle.block, %for. ;CHECK: cost of 0 {{.*}} ret ret i32 undef } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll index 49bce67..46d3d76 100644 --- a/test/Analysis/GlobalsModRef/volatile-instrs.ll +++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -22,13 +22,9 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, define i32 @main() nounwind uwtable ssp { main_entry: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) - %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4, !tbaa !0 - store i32 %0, i32* @c, align 4, !tbaa !0 + %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4 + store i32 %0, i32* @c, align 4 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind ret i32 0 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg new file mode 100644 index 0000000..c6106e4 --- /dev/null +++ b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll'] diff --git a/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll b/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll new file mode 100644 index 0000000..3c95770 --- /dev/null +++ b/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll @@ -0,0 +1,19 @@ +; RUN: opt -memdep -gvn < %s + +define void @__memdep_requires_dominator_tree(i32* nocapture %bufUInt, i32* nocapture %pattern) nounwind { +entry: + br label %for.body + +for.exit: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.01 = phi i32 [ 0, %entry ], [ %tmp8.7, %for.body ] + %arrayidx = getelementptr i32* %bufUInt, i32 %i.01 + %arrayidx5 = getelementptr i32* %pattern, i32 %i.01 + %tmp6 = load i32* %arrayidx5, align 4 + store i32 %tmp6, i32* %arrayidx, align 4 + %tmp8.7 = add i32 %i.01, 8 + %cmp.7 = icmp ult i32 %tmp8.7, 1024 + br i1 %cmp.7, label %for.body, label %for.exit +} diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg index 444b7dc..d40fa4f 100644 --- a/test/Analysis/Profiling/lit.local.cfg +++ b/test/Analysis/Profiling/lit.local.cfg @@ -7,10 +7,5 @@ def getRoot(config): root = getRoot(config) -# Most profiling tests rely on a JIT being present to gather their data; AArch64 -# doesn't have any JIT at present so they will fail when run there. -if root.host_arch in ['AArch64']: - config.unsupported = True - if 'hexagon' in root.target_triple: config.unsupported = True diff --git a/test/Analysis/RegionInfo/unreachable_bb.ll b/test/Analysis/RegionInfo/unreachable_bb.ll new file mode 100644 index 0000000..626ccbe --- /dev/null +++ b/test/Analysis/RegionInfo/unreachable_bb.ll @@ -0,0 +1,29 @@ +; RUN: opt -regions -analyze < %s | FileCheck %s + +; We should not crash if there are some bbs that are not reachable. +define void @f() { +entry: + br label %for.pre + +notintree: ; No predecessors! + br label %ret + +for.pre: ; preds = %entry + br label %for + +for: ; preds = %for.inc, %for.pre + %indvar = phi i64 [ 0, %for.pre ], [ %indvar.next, %for.inc ] + %exitcond = icmp ne i64 %indvar, 200 + br i1 %exitcond, label %for.inc, label %ret + +for.inc: ; preds = %for + %indvar.next = add i64 %indvar, 1 + br label %for + +ret: ; preds = %for, %notintree + ret void +} + +; CHECK: [0] entry => <Function Return> +; CHECK: [1] for => ret + diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll index 138c015..b88e33f 100644 --- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll +++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll @@ -15,24 +15,24 @@ entry: lbl_818: ; preds = %for.end, %entry call void (...)* @func_27() - store i32 0, i32* @g_814, align 4, !tbaa !0 + store i32 0, i32* @g_814, align 4 br label %for.cond for.cond: ; preds = %for.body, %lbl_818 - %0 = load i32* @g_814, align 4, !tbaa !0 + %0 = load i32* @g_814, align 4 %cmp = icmp sle i32 %0, 0 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %idxprom = sext i32 %0 to i64 %arrayidx = getelementptr inbounds [0 x i32]* getelementptr inbounds ([1 x [0 x i32]]* @g_244, i32 0, i64 0), i32 0, i64 %idxprom - %1 = load i32* %arrayidx, align 1, !tbaa !0 - store i32 %1, i32* @func_21_l_773, align 4, !tbaa !0 - store i32 1, i32* @g_814, align 4, !tbaa !0 + %1 = load i32* %arrayidx, align 1 + store i32 %1, i32* @func_21_l_773, align 4 + store i32 1, i32* @g_814, align 4 br label %for.cond for.end: ; preds = %for.cond - %2 = load i32* @func_21_l_773, align 4, !tbaa !0 + %2 = load i32* @func_21_l_773, align 4 %tobool = icmp ne i32 %2, 0 br i1 %tobool, label %lbl_818, label %if.end @@ -41,7 +41,3 @@ if.end: ; preds = %for.end } declare void @func_27(...) - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll new file mode 100644 index 0000000..ee52763 --- /dev/null +++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll @@ -0,0 +1,392 @@ +; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -gvn -S | FileCheck %s --check-prefix=OPT +; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns". + +%struct.StructA = type { i16, i32, i16, i32 } +%struct.StructB = type { i16, %struct.StructA, i32 } +%struct.StructS = type { i16, i32 } +%struct.StructS2 = type { i16, i32 } +%struct.StructC = type { i16, %struct.StructB, i32 } +%struct.StructD = type { i16, %struct.StructB, i32, i8 } + +define i32 @_Z1gPjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 { +entry: +; Access to i32* and &(A->f32). +; CHECK: Function +; CHECK: MayAlias: store i32 4, i32* %f32, align 4, !tbaa !8 <-> store i32 1, i32* %0, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; OPT: %[[RET:.*]] = load i32* +; OPT: ret i32 %[[RET]] + %s.addr = alloca i32*, align 8 + %A.addr = alloca %struct.StructA*, align 8 + %count.addr = alloca i64, align 8 + store i32* %s, i32** %s.addr, align 8, !tbaa !0 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load i32** %s.addr, align 8, !tbaa !0 + store i32 1, i32* %0, align 4, !tbaa !6 + %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 1 + store i32 4, i32* %f32, align 4, !tbaa !8 + %2 = load i32** %s.addr, align 8, !tbaa !0 + %3 = load i32* %2, align 4, !tbaa !6 + ret i32 %3 +} + +define i32 @_Z2g2PjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 { +entry: +; Access to i32* and &(A->f16). +; CHECK: Function +; CHECK: NoAlias: store i16 4, i16* %f16, align 2, !tbaa !8 <-> store i32 1, i32* %0, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i16 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %s.addr = alloca i32*, align 8 + %A.addr = alloca %struct.StructA*, align 8 + %count.addr = alloca i64, align 8 + store i32* %s, i32** %s.addr, align 8, !tbaa !0 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load i32** %s.addr, align 8, !tbaa !0 + store i32 1, i32* %0, align 4, !tbaa !6 + %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f16 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 0 + store i16 4, i16* %f16, align 2, !tbaa !11 + %2 = load i32** %s.addr, align 8, !tbaa !0 + %3 = load i32* %2, align 4, !tbaa !6 + ret i32 %3 +} + +define i32 @_Z2g3P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(B->a.f32). +; CHECK: Function +; CHECK: MayAlias: store i32 4, i32* %f321, align 4, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; OPT: %[[RET:.*]] = load i32* +; OPT: ret i32 %[[RET]] + %A.addr = alloca %struct.StructA*, align 8 + %B.addr = alloca %struct.StructB*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0 + %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1 + %f321 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1 + store i32 4, i32* %f321, align 4, !tbaa !12 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f322, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g4P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(B->a.f16). +; CHECK: Function +; CHECK: NoAlias: store i16 4, i16* %f16, align 2, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i16 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %A.addr = alloca %struct.StructA*, align 8 + %B.addr = alloca %struct.StructB*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0 + %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1 + %f16 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 0 + store i16 4, i16* %f16, align 2, !tbaa !14 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f321, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g5P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(B->f32). +; CHECK: Function +; CHECK: NoAlias: store i32 4, i32* %f321, align 4, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %A.addr = alloca %struct.StructA*, align 8 + %B.addr = alloca %struct.StructB*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructB* %1, i32 0, i32 2 + store i32 4, i32* %f321, align 4, !tbaa !15 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f322, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g6P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(B->a.f32_2). +; CHECK: Function +; CHECK: NoAlias: store i32 4, i32* %f32_2, align 4, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %A.addr = alloca %struct.StructA*, align 8 + %B.addr = alloca %struct.StructB*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0 + %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1 + %f32_2 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 3 + store i32 4, i32* %f32_2, align 4, !tbaa !16 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f321, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g7P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(S->f32). +; CHECK: Function +; CHECK: NoAlias: store i32 4, i32* %f321, align 4, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %A.addr = alloca %struct.StructA*, align 8 + %S.addr = alloca %struct.StructS*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 1 + store i32 4, i32* %f321, align 4, !tbaa !17 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f322, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g8P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 { +entry: +; Access to &(A->f32) and &(S->f16). +; CHECK: Function +; CHECK: NoAlias: store i16 4, i16* %f16, align 2, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i16 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %A.addr = alloca %struct.StructA*, align 8 + %S.addr = alloca %struct.StructS*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0 + store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !8 + %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f16 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 0 + store i16 4, i16* %f16, align 2, !tbaa !19 + %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1 + %3 = load i32* %f321, align 4, !tbaa !8 + ret i32 %3 +} + +define i32 @_Z2g9P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 { +entry: +; Access to &(S->f32) and &(S2->f32). +; CHECK: Function +; CHECK: NoAlias: store i32 4, i32* %f321, align 4, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %S.addr = alloca %struct.StructS*, align 8 + %S2.addr = alloca %struct.StructS2*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0 + store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !17 + %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 1 + store i32 4, i32* %f321, align 4, !tbaa !20 + %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f322 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1 + %3 = load i32* %f322, align 4, !tbaa !17 + ret i32 %3 +} + +define i32 @_Z3g10P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 { +entry: +; Access to &(S->f32) and &(S2->f16). +; CHECK: Function +; CHECK: NoAlias: store i16 4, i16* %f16, align 2, !tbaa !10 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i16 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %S.addr = alloca %struct.StructS*, align 8 + %S2.addr = alloca %struct.StructS2*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0 + store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !17 + %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0 + %f16 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 0 + store i16 4, i16* %f16, align 2, !tbaa !22 + %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0 + %f321 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1 + %3 = load i32* %f321, align 4, !tbaa !17 + ret i32 %3 +} + +define i32 @_Z3g11P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 { +entry: +; Access to &(C->b.a.f32) and &(D->b.a.f32). +; CHECK: Function +; CHECK: NoAlias: store i32 4, i32* %f323, align 4, !tbaa !12 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; Remove a load and propogate the value from store. +; OPT: ret i32 1 + %C.addr = alloca %struct.StructC*, align 8 + %D.addr = alloca %struct.StructD*, align 8 + %count.addr = alloca i64, align 8 + store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0 + store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0 + %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1 + %a = getelementptr inbounds %struct.StructB* %b, i32 0, i32 1 + %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !23 + %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0 + %b1 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1 + %a2 = getelementptr inbounds %struct.StructB* %b1, i32 0, i32 1 + %f323 = getelementptr inbounds %struct.StructA* %a2, i32 0, i32 1 + store i32 4, i32* %f323, align 4, !tbaa !25 + %2 = load %struct.StructC** %C.addr, align 8, !tbaa !0 + %b4 = getelementptr inbounds %struct.StructC* %2, i32 0, i32 1 + %a5 = getelementptr inbounds %struct.StructB* %b4, i32 0, i32 1 + %f326 = getelementptr inbounds %struct.StructA* %a5, i32 0, i32 1 + %3 = load i32* %f326, align 4, !tbaa !23 + ret i32 %3 +} + +define i32 @_Z3g12P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 { +entry: +; Access to &(b1->a.f32) and &(b2->a.f32). +; CHECK: Function +; CHECK: MayAlias: store i32 4, i32* %f325, align 4, !tbaa !6 <-> store i32 1, i32* %f32, align 4, !tbaa !6 +; OPT: define +; OPT: store i32 1 +; OPT: store i32 4 +; OPT: %[[RET:.*]] = load i32* +; OPT: ret i32 %[[RET]] + %C.addr = alloca %struct.StructC*, align 8 + %D.addr = alloca %struct.StructD*, align 8 + %count.addr = alloca i64, align 8 + %b1 = alloca %struct.StructB*, align 8 + %b2 = alloca %struct.StructB*, align 8 + store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0 + store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0 + store i64 %count, i64* %count.addr, align 8, !tbaa !4 + %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0 + %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1 + store %struct.StructB* %b, %struct.StructB** %b1, align 8, !tbaa !0 + %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0 + %b3 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1 + store %struct.StructB* %b3, %struct.StructB** %b2, align 8, !tbaa !0 + %2 = load %struct.StructB** %b1, align 8, !tbaa !0 + %a = getelementptr inbounds %struct.StructB* %2, i32 0, i32 1 + %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1 + store i32 1, i32* %f32, align 4, !tbaa !12 + %3 = load %struct.StructB** %b2, align 8, !tbaa !0 + %a4 = getelementptr inbounds %struct.StructB* %3, i32 0, i32 1 + %f325 = getelementptr inbounds %struct.StructA* %a4, i32 0, i32 1 + store i32 4, i32* %f325, align 4, !tbaa !12 + %4 = load %struct.StructB** %b1, align 8, !tbaa !0 + %a6 = getelementptr inbounds %struct.StructB* %4, i32 0, i32 1 + %f327 = getelementptr inbounds %struct.StructA* %a6, i32 0, i32 1 + %5 = load i32* %f327, align 4, !tbaa !12 + ret i32 %5 +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"any pointer", metadata !2} +!2 = metadata !{metadata !"omnipotent char", metadata !3} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +!4 = metadata !{metadata !5, metadata !5, i64 0} +!5 = metadata !{metadata !"long long", metadata !2} +!6 = metadata !{metadata !7, metadata !7, i64 0} +!7 = metadata !{metadata !"int", metadata !2} +!8 = metadata !{metadata !9, metadata !7, i64 4} +!9 = metadata !{metadata !"_ZTS7StructA", metadata !10, i64 0, metadata !7, i64 4, metadata !10, i64 8, metadata !7, i64 12} +!10 = metadata !{metadata !"short", metadata !2} +!11 = metadata !{metadata !9, metadata !10, i64 0} +!12 = metadata !{metadata !13, metadata !7, i64 8} +!13 = metadata !{metadata !"_ZTS7StructB", metadata !10, i64 0, metadata !9, i64 4, metadata !7, i64 20} +!14 = metadata !{metadata !13, metadata !10, i64 4} +!15 = metadata !{metadata !13, metadata !7, i64 20} +!16 = metadata !{metadata !13, metadata !7, i64 16} +!17 = metadata !{metadata !18, metadata !7, i64 4} +!18 = metadata !{metadata !"_ZTS7StructS", metadata !10, i64 0, metadata !7, i64 4} +!19 = metadata !{metadata !18, metadata !10, i64 0} +!20 = metadata !{metadata !21, metadata !7, i64 4} +!21 = metadata !{metadata !"_ZTS8StructS2", metadata !10, i64 0, metadata !7, i64 4} +!22 = metadata !{metadata !21, metadata !10, i64 0} +!23 = metadata !{metadata !24, metadata !7, i64 12} +!24 = metadata !{metadata !"_ZTS7StructC", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28} +!25 = metadata !{metadata !26, metadata !7, i64 12} +!26 = metadata !{metadata !"_ZTS7StructD", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28, metadata !2, i64 32} |