From e46b225a3137e62c975c49aaae7bb5f9583cc428 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 3 Sep 2013 08:27:38 +0200 Subject: tcg/optimize: fix known-zero bits for right shift ops 32-bit versions of sar and shr ops should not propagate known-zero bits from the unused 32 high bits. For sar it could even lead to wrong code being generated. Cc: qemu-stable@nongnu.org Reviewed-by: Paolo Bonzini Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 89e2d6a..c5cdde2 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -726,16 +726,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, mask = temps[args[1]].mask & mask; break; - CASE_OP_32_64(sar): + case INDEX_op_sar_i32: + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val; + } + break; + case INDEX_op_sar_i64: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = ((tcg_target_long)temps[args[1]].mask - >> temps[args[2]].val); + mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val; } break; - CASE_OP_32_64(shr): + case INDEX_op_shr_i32: + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val; + } + break; + case INDEX_op_shr_i64: if (temps[args[2]].state == TCG_TEMP_CONST) { - mask = temps[args[1]].mask >> temps[args[2]].val; + mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val; } break; -- cgit v1.1 From 3031244b01492528fd7b5e46b23eeb2124dc780a Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 3 Sep 2013 08:27:38 +0200 Subject: tcg/optimize: fix known-zero bits optimization Known-zero bits optimization is a great idea that helps to generate more optimized code. However the current implementation only works in very few cases as the computed mask is not saved. Fix this to make it really working. Reviewed-by: Paolo Bonzini Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index c5cdde2..7838be2 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -691,7 +691,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* Simplify using known-zero bits */ + /* Simplify using known-zero bits. Currently only ops with a single + output argument is supported. */ mask = -1; affected = -1; switch (op) { @@ -1149,6 +1150,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } else { for (i = 0; i < def->nb_oargs; i++) { reset_temp(args[i]); + /* Save the corresponding known-zero bits mask for the + first output argument (only one supported so far). */ + if (i == 0) { + temps[args[i]].mask = mask; + } } } for (i = 0; i < def->nb_args; i++) { -- cgit v1.1 From f096dc96188378bc2bcd80683490ca386b0c1683 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 3 Sep 2013 08:27:38 +0200 Subject: tcg/optimize: improve known-zero bits for 32-bit ops The shl_i32 op might set some bits of the unused 32 high bits of the mask. Fix that by clearing the unused 32 high bits for all 32-bit ops except load/store which operate on tl values. Reviewed-by: Paolo Bonzini Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 7838be2..1cf017a 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -783,6 +783,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } + /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit + results */ + if (!(tcg_op_defs[op].flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { + mask &= 0xffffffffu; + } + if (mask == 0) { assert(def->nb_oargs == 1); s->gen_opc_buf[op_index] = op_to_movi(op); -- cgit v1.1 From c8d70272535b84ccd3cd1a3dcad65aed34be6bb4 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Tue, 3 Sep 2013 08:27:39 +0200 Subject: tcg/optimize: add known-zero bits compute for load ops Reviewed-by: Paolo Bonzini Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 1cf017a..1b9fea5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -779,13 +779,37 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, mask = temps[args[3]].mask | temps[args[4]].mask; break; + CASE_OP_32_64(ld8u): + case INDEX_op_qemu_ld8u: + mask = 0xff; + break; + CASE_OP_32_64(ld16u): + case INDEX_op_qemu_ld16u: + mask = 0xffff; + break; + case INDEX_op_ld32u_i64: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_qemu_ld32u: +#endif + mask = 0xffffffffu; + break; + + CASE_OP_32_64(qemu_ld): + { + TCGMemOp mop = args[def->nb_oargs + def->nb_iargs]; + if (!(mop & MO_SIGN)) { + mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; + } + } + break; + default: break; } /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit results */ - if (!(tcg_op_defs[op].flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { + if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) { mask &= 0xffffffffu; } -- cgit v1.1 From 23ec69ed3759fe5d8374cb22795ade1305c331c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 28 Jan 2014 12:03:24 -0800 Subject: tcg/optimize: Handle known-zeros masks for ANDC Reviewed-by: Paolo Bonzini Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 1b9fea5..4bea8a5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -727,6 +727,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, mask = temps[args[1]].mask & mask; break; + CASE_OP_32_64(andc): + /* Known-zeros does not imply known-ones. Therefore unless + args[2] is constant, we can't infer anything from it. */ + if (temps[args[2]].state == TCG_TEMP_CONST) { + mask = ~temps[args[2]].mask; + goto and_const; + } + /* But we certainly know nothing outside args[1] may be set. */ + mask = temps[args[1]].mask; + break; + case INDEX_op_sar_i32: if (temps[args[2]].state == TCG_TEMP_CONST) { mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val; -- cgit v1.1 From e201b56418a5bb6afadc42df16f94880c091fad4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 28 Jan 2014 13:15:38 -0800 Subject: tcg/optimize: Simply some logical ops to NOT Given, of course, an appropriate constant. These could be generated from the "canonical" operation for inversion on the guest, or via other optimizations. Reviewed-by: Paolo Bonzini Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 4bea8a5..0b1dd13 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -655,6 +655,63 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } break; + CASE_OP_32_64(xor): + CASE_OP_32_64(nand): + if (temps[args[1]].state != TCG_TEMP_CONST + && temps[args[2]].state == TCG_TEMP_CONST + && temps[args[2]].val == -1) { + i = 1; + goto try_not; + } + break; + CASE_OP_32_64(nor): + if (temps[args[1]].state != TCG_TEMP_CONST + && temps[args[2]].state == TCG_TEMP_CONST + && temps[args[2]].val == 0) { + i = 1; + goto try_not; + } + break; + CASE_OP_32_64(andc): + if (temps[args[2]].state != TCG_TEMP_CONST + && temps[args[1]].state == TCG_TEMP_CONST + && temps[args[1]].val == -1) { + i = 2; + goto try_not; + } + break; + CASE_OP_32_64(orc): + CASE_OP_32_64(eqv): + if (temps[args[2]].state != TCG_TEMP_CONST + && temps[args[1]].state == TCG_TEMP_CONST + && temps[args[1]].val == 0) { + i = 2; + goto try_not; + } + break; + try_not: + { + TCGOpcode not_op; + bool have_not; + + if (def->flags & TCG_OPF_64BIT) { + not_op = INDEX_op_not_i64; + have_not = TCG_TARGET_HAS_not_i64; + } else { + not_op = INDEX_op_not_i32; + have_not = TCG_TARGET_HAS_not_i32; + } + if (!have_not) { + break; + } + s->gen_opc_buf[op_index] = not_op; + reset_temp(args[0]); + gen_args[0] = args[0]; + gen_args[1] = args[i]; + args += 3; + gen_args += 2; + continue; + } default: break; } -- cgit v1.1 From e64e958e202c563730159c52f7c9116c80ceca52 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 28 Jan 2014 13:26:17 -0800 Subject: tcg/optimize: Optmize ANDC X,Y,Y to MOV X,0 Like we already do for SUB and XOR. Reviewed-by: Paolo Bonzini Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index 0b1dd13..c1b9284 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -947,6 +947,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Simplify expression for "op r, a, a => movi r, 0" cases */ switch (op) { + CASE_OP_32_64(andc): CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { -- cgit v1.1 From 464a1441c138b4f29cff26d406298661e588235b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 31 Jan 2014 07:42:11 -0600 Subject: tcg/optimize: Add more identity simplifications Recognize 0 operand to andc, and -1 operands to and, orc, eqv. Reviewed-by: Paolo Bonzini Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'tcg/optimize.c') diff --git a/tcg/optimize.c b/tcg/optimize.c index c1b9284..7777743 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -716,7 +716,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; } - /* Simplify expression for "op r, a, 0 => mov r, a" cases */ + /* Simplify expression for "op r, a, const => mov r, a" cases */ switch (op) { CASE_OP_32_64(add): CASE_OP_32_64(sub): @@ -727,23 +727,32 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(rotr): CASE_OP_32_64(or): CASE_OP_32_64(xor): - if (temps[args[1]].state == TCG_TEMP_CONST) { - /* Proceed with possible constant folding. */ - break; - } - if (temps[args[2]].state == TCG_TEMP_CONST + CASE_OP_32_64(andc): + if (temps[args[1]].state != TCG_TEMP_CONST + && temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0) { - if (temps_are_copies(args[0], args[1])) { - s->gen_opc_buf[op_index] = INDEX_op_nop; - } else { - s->gen_opc_buf[op_index] = op_to_mov(op); - tcg_opt_gen_mov(s, gen_args, args[0], args[1]); - gen_args += 2; - } - args += 3; - continue; + goto do_mov3; } break; + CASE_OP_32_64(and): + CASE_OP_32_64(orc): + CASE_OP_32_64(eqv): + if (temps[args[1]].state != TCG_TEMP_CONST + && temps[args[2]].state == TCG_TEMP_CONST + && temps[args[2]].val == -1) { + goto do_mov3; + } + break; + do_mov3: + if (temps_are_copies(args[0], args[1])) { + s->gen_opc_buf[op_index] = INDEX_op_nop; + } else { + s->gen_opc_buf[op_index] = op_to_mov(op); + tcg_opt_gen_mov(s, gen_args, args[0], args[1]); + gen_args += 2; + } + args += 3; + continue; default: break; } -- cgit v1.1