summaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
authorEmilio G. Cota <cota@braap.org>2015-04-02 20:07:53 -0400
committerRichard Henderson <rth@twiddle.net>2015-05-05 08:44:46 -0700
commit00c8fa9ffeee7458e5ed62c962faf638156c18da (patch)
tree060a79c226263cd5ade656d266f139917d4f2842 /tcg
parent874e9aeeeb74c5459639a93439a502d262847e68 (diff)
downloadhqemu-00c8fa9ffeee7458e5ed62c962faf638156c18da.zip
hqemu-00c8fa9ffeee7458e5ed62c962faf638156c18da.tar.gz
tcg: optimise memory layout of TCGTemp
This brings down the size of the struct from 56 to 32 bytes on 64-bit, and to 20 bytes on 32-bit. This leads to memory savings: Before: $ find . -name 'tcg.o' | xargs size text data bss dec hex filename 41131 29800 88 71019 1156b ./aarch64-softmmu/tcg/tcg.o 37969 29416 96 67481 10799 ./x86_64-linux-user/tcg/tcg.o 39354 28816 96 68266 10aaa ./arm-linux-user/tcg/tcg.o 40802 29096 88 69986 11162 ./arm-softmmu/tcg/tcg.o 39417 29672 88 69177 10e39 ./x86_64-softmmu/tcg/tcg.o After: $ find . -name 'tcg.o' | xargs size text data bss dec hex filename 40883 29800 88 70771 11473 ./aarch64-softmmu/tcg/tcg.o 37473 29416 96 66985 105a9 ./x86_64-linux-user/tcg/tcg.o 38858 28816 96 67770 108ba ./arm-linux-user/tcg/tcg.o 40554 29096 88 69738 1106a ./arm-softmmu/tcg/tcg.o 39169 29672 88 68929 10d41 ./x86_64-softmmu/tcg/tcg.o Note that using an entire byte for some enums that need less than that wastes a few bits (noticeable in 32 bits, where we use 20 bytes instead of 16) but avoids extraction code, which overall is a win--I've tested several variations of the patch, and the appended is the best performer for OpenSSL's bntest by a very small margin: Before: $ taskset -c 0 perf stat -r 15 -- x86_64-linux-user/qemu-x86_64 img/bntest-x86_64 >/dev/null [...] Performance counter stats for 'x86_64-linux-user/qemu-x86_64 img/bntest-x86_64' (15 runs): 10538.479833 task-clock (msec) # 0.999 CPUs utilized ( +- 0.38% ) 772 context-switches # 0.073 K/sec ( +- 2.03% ) 0 cpu-migrations # 0.000 K/sec ( +-100.00% ) 2,207 page-faults # 0.209 K/sec ( +- 0.08% ) 10.552871687 seconds time elapsed ( +- 0.39% ) After: $ taskset -c 0 perf stat -r 15 -- x86_64-linux-user/qemu-x86_64 img/bntest-x86_64 >/dev/null Performance counter stats for 'x86_64-linux-user/qemu-x86_64 img/bntest-x86_64' (15 runs): 10459.968847 task-clock (msec) # 0.999 CPUs utilized ( +- 0.30% ) 739 context-switches # 0.071 K/sec ( +- 1.71% ) 0 cpu-migrations # 0.000 K/sec ( +- 68.14% ) 2,204 page-faults # 0.211 K/sec ( +- 0.10% ) 10.473900411 seconds time elapsed ( +- 0.30% ) Suggested-by: Stefan Weil <sw@weilnetz.de> Suggested-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'tcg')
-rw-r--r--tcg/tcg.h26
1 files changed, 14 insertions, 12 deletions
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 3d004ba..fbb3daf 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -417,20 +417,19 @@ static inline TCGCond tcg_high_cond(TCGCond c)
}
}
-#define TEMP_VAL_DEAD 0
-#define TEMP_VAL_REG 1
-#define TEMP_VAL_MEM 2
-#define TEMP_VAL_CONST 3
+typedef enum TCGTempVal {
+ TEMP_VAL_DEAD,
+ TEMP_VAL_REG,
+ TEMP_VAL_MEM,
+ TEMP_VAL_CONST,
+} TCGTempVal;
-/* XXX: optimize memory layout */
typedef struct TCGTemp {
- TCGType base_type;
- TCGType type;
- int val_type;
- int reg;
- tcg_target_long val;
- int mem_reg;
- intptr_t mem_offset;
+ unsigned int reg:8;
+ unsigned int mem_reg:8;
+ TCGTempVal val_type:8;
+ TCGType base_type:8;
+ TCGType type:8;
unsigned int fixed_reg:1;
unsigned int mem_coherent:1;
unsigned int mem_allocated:1;
@@ -438,6 +437,9 @@ typedef struct TCGTemp {
basic blocks. Otherwise, it is not
preserved across basic blocks. */
unsigned int temp_allocated:1; /* never used for code gen */
+
+ tcg_target_long val;
+ intptr_t mem_offset;
const char *name;
} TCGTemp;
OpenPOWER on IntegriCloud