summaryrefslogtreecommitdiffstats
path: root/src/codegen.c
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-17 12:15:41 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-17 12:15:41 +0200
commitd77be00aaadd7772b364c10bff41a38e0112f59d (patch)
treea0d80bcb62d2e51a41742543512d809d3e4dc416 /src/codegen.c
parent3a44c5ebd57ea10d69b883b212ebd7a773fe7a43 (diff)
downloadffts-d77be00aaadd7772b364c10bff41a38e0112f59d.zip
ffts-d77be00aaadd7772b364c10bff41a38e0112f59d.tar.gz
Don't use long NOPs, instead add extra prefix to extend op codes to align branch targets
Diffstat (limited to 'src/codegen.c')
-rw-r--r--src/codegen.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/codegen.c b/src/codegen.c
index e1ed11f..9f2921a 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -194,23 +194,26 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
loop_count = 4 * p->i0;
generate_leaf_init(&fp, loop_count);
- generate_leaf_ee(&fp, offsets);
if (ffts_ctzl(N) & 1) {
+ generate_leaf_ee(&fp, offsets, p->i1 ? 6 : 0);
+
if (p->i1) {
loop_count += 4 * p->i1;
- generate_leaf_oo(&fp, loop_count, offsets_o);
+ generate_leaf_oo(&fp, loop_count, offsets_o, 7);
}
loop_count += 4;
generate_leaf_oe(&fp, offsets_o);
} else {
+ generate_leaf_ee(&fp, offsets, N >= 256 ? 2 : 8);
+
loop_count += 4;
generate_leaf_eo(&fp, offsets);
if (p->i1) {
loop_count += 4 * p->i1;
- generate_leaf_oo(&fp, loop_count, offsets_o);
+ generate_leaf_oo(&fp, loop_count, offsets_o, N >= 256 ? 4 : 7);
}
}
@@ -222,13 +225,12 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
x86_mov_reg_imm(fp, X86_EBX, loop_count);
- ffts_align_mem16(&fp, 8);
#else
x86_mov_reg_imm(fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 9);
#endif
- generate_leaf_ee(&fp, offsets_oe);
+ generate_leaf_ee(&fp, offsets_oe, 0);
}
generate_transform_init(&fp);
OpenPOWER on IntegriCloud