diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-17 12:15:41 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-17 12:15:41 +0200 |
commit | d77be00aaadd7772b364c10bff41a38e0112f59d (patch) | |
tree | a0d80bcb62d2e51a41742543512d809d3e4dc416 /src/codegen.c | |
parent | 3a44c5ebd57ea10d69b883b212ebd7a773fe7a43 (diff) | |
download | ffts-d77be00aaadd7772b364c10bff41a38e0112f59d.zip ffts-d77be00aaadd7772b364c10bff41a38e0112f59d.tar.gz |
Don't use long NOPs, instead add extra prefix to extend op codes to align branch targets
Diffstat (limited to 'src/codegen.c')
-rw-r--r-- | src/codegen.c | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/src/codegen.c b/src/codegen.c index e1ed11f..9f2921a 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -194,23 +194,26 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N loop_count = 4 * p->i0; generate_leaf_init(&fp, loop_count); - generate_leaf_ee(&fp, offsets); if (ffts_ctzl(N) & 1) { + generate_leaf_ee(&fp, offsets, p->i1 ? 6 : 0); + if (p->i1) { loop_count += 4 * p->i1; - generate_leaf_oo(&fp, loop_count, offsets_o); + generate_leaf_oo(&fp, loop_count, offsets_o, 7); } loop_count += 4; generate_leaf_oe(&fp, offsets_o); } else { + generate_leaf_ee(&fp, offsets, N >= 256 ? 2 : 8); + loop_count += 4; generate_leaf_eo(&fp, offsets); if (p->i1) { loop_count += 4 * p->i1; - generate_leaf_oo(&fp, loop_count, offsets_o); + generate_leaf_oo(&fp, loop_count, offsets_o, N >= 256 ? 4 : 7); } } @@ -222,13 +225,12 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 x86_mov_reg_imm(fp, X86_EBX, loop_count); - ffts_align_mem16(&fp, 8); #else x86_mov_reg_imm(fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 9); #endif - generate_leaf_ee(&fp, offsets_oe); + generate_leaf_ee(&fp, offsets_oe, 0); } generate_transform_init(&fp); |