diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-06 11:38:55 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-06 11:38:55 +0200 |
commit | 18d19a9f8b4e409b4db46338c9040a61555f9c58 (patch) | |
tree | d086566753530cd86bafc68f03887a1eceb844fd | |
parent | a0db4af6fe8f68a62cbf993871137d4cd341dfc5 (diff) | |
download | ffts-18d19a9f8b4e409b4db46338c9040a61555f9c58.zip ffts-18d19a9f8b4e409b4db46338c9040a61555f9c58.tar.gz |
Win64 actually "generate_size8_base_case" instead of copying
-rw-r--r-- | src/codegen.c | 74 | ||||
-rw-r--r-- | src/codegen_sse.h | 1059 | ||||
-rw-r--r-- | src/sequitur.h | 630 |
3 files changed, 1090 insertions, 673 deletions
diff --git a/src/codegen.c b/src/codegen.c index 9d95519..4e70cb1 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -144,21 +144,21 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N fp = (insns_t*) p->transform_base; - /* generate base cases */ - x_4_addr = generate_size4_base_case(&fp, sign); - x_8_addr = generate_size8_base_case(&fp, sign); + /* generate base cases */ + x_4_addr = generate_size4_base_case(&fp, sign); + x_8_addr = generate_size8_base_case(&fp, sign); #ifdef __arm__ - start = generate_prologue(&fp, p); + start = generate_prologue(&fp, p); #else - start = generate_prologue(&fp, p); + start = generate_prologue(&fp, p); /* assign loop counter register */ loop_count = 4 * p->i0; #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); #endif #endif @@ -204,18 +204,18 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N //fprintf(stderr, "Body start address = %016p\n", start); #ifdef _M_X64 - /* generate function */ + /* generate function */ - /* clear */ - XOR2(&fp, EAX, EAX); - - /* set "pointer" to offsets */ - MOV(&fp, RDI, RCX, 0, 0); + /* clear */ + XOR2(&fp, EAX, EAX); - /* set "pointer" to constants */ - MOV(&fp, RSI, RCX, 0xE0, 0); + /* set "pointer" to offsets */ + MOV_D(&fp, RDI, RCX, 0, 0); - /* align loop/jump destination */ + /* set "pointer" to constants */ + MOV_D(&fp, RSI, RCX, 0xE0, 0); + + /* align loop/jump destination */ ffts_align_mem16(&fp, 8); #else /* copy function */ @@ -245,10 +245,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -298,10 +298,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -325,10 +325,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 8); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 9); #endif @@ -343,38 +343,26 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N fp += len; } -#ifdef _M_X64 - /* generate function */ - MOVAPS2(&fp, XMM3, RSI); - - /* set "pointer" to twiddle factors */ - MOV(&fp, RDI, RCX, 0x20, 0); -#else - /* copy function */ - assert((char*) x4 > (char*) x_init); - len = (char*) x4 - (char*) x_init; - memcpy(fp, x_init, len); - fp += len; -#endif + generate_transform_init(&fp); - /* generate subtransform calls */ + /* generate subtransform calls */ count = 2; while (pps[0]) { size_t ws_is; if (!pN) { #ifdef _M_X64 - MOVI(&fp, EBX, pps[0]); + MOV_I(&fp, EBX, pps[0]); #else - MOVI(&fp, ECX, pps[0] / 4); + MOV_I(&fp, ECX, pps[0] / 4); #endif } else { int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 - ADDI(&fp, R8, offset); + ADD_I(&fp, R8, offset); #else - ADDI(&fp, RDX, offset); + ADD_I(&fp, RDX, offset); #endif } @@ -394,9 +382,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int offset = (int) (ws_is - pLUT); #ifdef _M_X64 - ADDI(&fp, RDI, offset); + ADD_I(&fp, RDI, offset); #else - ADDI(&fp, R8, offset); + ADD_I(&fp, R8, offset); #endif } @@ -701,7 +689,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N *fp++ = POP_LR(); count++; #else - generate_epilogue(&fp); + generate_epilogue(&fp); #endif // *fp++ = B(14); count++; diff --git a/src/codegen_sse.h b/src/codegen_sse.h index abb5008..fa67a32 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -35,6 +35,7 @@ #define FFTS_CODEGEN_SSE_H #include <assert.h> +#include <string.h> void neon_x4(float *, size_t, float *); void neon_x8(float *, size_t, float *); @@ -90,8 +91,8 @@ extern const uint32_t sse_leaf_oe_offsets[8]; #define XMM0 (XMM_REG | 0x0) #define XMM1 (XMM_REG | 0x1) -#define XMM2 (XMM_REG | 0x2) -#define XMM3 (XMM_REG | 0x3) +#define XMM2 (XMM_REG | 0x2) +#define XMM3 (XMM_REG | 0x3) #define XMM4 (XMM_REG | 0x4) #define XMM5 (XMM_REG | 0x5) #define XMM6 (XMM_REG | 0x6) @@ -111,7 +112,26 @@ extern const uint32_t sse_leaf_oe_offsets[8]; static void IMM8(uint8_t **p, int32_t imm); static void IMM32(uint8_t **p, int32_t imm); -static void ADDI(uint8_t **p, uint8_t dst, int32_t imm) +static FFTS_INLINE void ADDPS(uint8_t **p, uint8_t reg2, uint8_t reg1) +{ + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + /* esacape opcode */ + *(*p)++ = 0x0F; + + /* opcode */ + *(*p)++ = 0x58; + *(*p)++ = 0xC0 | r1 | (r2 << 3); +} + +/* Immediate */ +static void ADD_I(uint8_t **p, uint8_t dst, int32_t imm) { if (dst >= 8) { *(*p)++ = 0x49; @@ -201,297 +221,268 @@ static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) ADDRMODE(p, dst, base, disp); } -static FFTS_INLINE void MOV(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) +static FFTS_INLINE void MOVAPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x49; - } else { - *(*p)++ = 0x48; - } - - if (is_store) { - *(*p)++ = 0x89; - } else { - *(*p)++ = 0x8B; - } - - if (disp == 0) { - *(*p)++ = r2 | (r1 << 3); + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + uint8_t r; - if (r2 == 4) { - *(*p)++ = 0x24; - } - } else if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r2 | (r1 << 3); + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } - if (r2 == 4) { - *(*p)++ = 0x24; - } + /* esacape opcode */ + *(*p)++ = 0x0F; - IMM8(p, disp); + /* opcode */ + if (is_store) { + *(*p)++ = 0x29; } else { - *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11); - - if (r2 == 4) { - *(*p)++ = 0x24; - } - - IMM32(p, disp); + *(*p)++ = 0x28; } -} - -static FFTS_INLINE void MOVAPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) -{ - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - uint8_t r; - - /* REX prefix */ - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); - } - - /* esacape opcode */ - *(*p)++ = 0x0F; - /* opcode */ - if (is_store) { - *(*p)++ = 0x29; - } else { - *(*p)++ = 0x28; - } + r = r1 | (r2 << 3); - r = r1 | (r2 << 3); - - if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { - assert(disp == 0); - *(*p)++ = 0xC0 | r; - } else { - assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); + if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { + assert(disp == 0); + *(*p)++ = 0xC0 | r; + } else { + assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); - if (disp == 0 && r1 != 5) { - *(*p)++ = r; + if (disp == 0 && r1 != 5) { + *(*p)++ = r; - if (r1 == 4) { - *(*p)++ = 0x24; - } - } else { - if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r; + if (r1 == 4) { + *(*p)++ = 0x24; + } + } else { + if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r; - if (r1 == 4) { - *(*p)++ = 0x24; - } + if (r1 == 4) { + *(*p)++ = 0x24; + } - IMM8(p, disp); - } else { - *(*p)++ = 0x80 | r; + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | r; - if (r1 == 4) { - *(*p)++ = 0x24; - } + if (r1 == 4) { + *(*p)++ = 0x24; + } - IMM32(p, disp); - } - } - } + IMM32(p, disp); + } + } + } } static FFTS_INLINE void MOVAPS2(uint8_t **p, uint8_t reg1, uint8_t reg2) { - if (reg1 & XMM_REG) { - MOVAPS(p, reg2, reg1, 0, 0); - } else { - MOVAPS(p, reg1, reg2, 0, 1); - } + if (reg1 & XMM_REG) { + MOVAPS(p, reg2, reg1, 0, 0); + } else { + MOVAPS(p, reg1, reg2, 0, 1); + } } static FFTS_INLINE void MOVAPS3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3) { - if (reg1 & XMM_REG) { - MOVAPS(p, (uint8_t) op2, reg1, op3, 0); - } else { - MOVAPS(p, reg1, (uint8_t) op3, op2, 1); - } + if (reg1 & XMM_REG) { + MOVAPS(p, (uint8_t) op2, reg1, op3, 0); + } else { + MOVAPS(p, reg1, (uint8_t) op3, op2, 1); + } } static FFTS_INLINE void MOVDQA(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - uint8_t r; + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + uint8_t r; - /* mandatory prefix */ - *(*p)++ = 0x66; + /* mandatory prefix */ + *(*p)++ = 0x66; - /* REX prefix */ - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); - } + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } - /* esacape opcode */ - *(*p)++ = 0x0F; + /* esacape opcode */ + *(*p)++ = 0x0F; - /* opcode */ - if (is_store) { - *(*p)++ = 0x7F; - } else { - *(*p)++ = 0x6F; - } + /* opcode */ + if (is_store) { + *(*p)++ = 0x7F; + } else { + *(*p)++ = 0x6F; + } - r = r1 | (r2 << 3); + r = r1 | (r2 << 3); - if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { - assert(disp == 0); - *(*p)++ = 0xC0 | r; - } else { - assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); + if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { + assert(disp == 0); + *(*p)++ = 0xC0 | r; + } else { + assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); - if (disp == 0 && r1 != 5) { - *(*p)++ = r; + if (disp == 0 && r1 != 5) { + *(*p)++ = r; - if (r1 == 4) { - *(*p)++ = 0x24; - } - } else { - if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r; + if (r1 == 4) { + *(*p)++ = 0x24; + } + } else { + if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r; - if (r1 == 4) { - *(*p)++ = 0x24; - } + if (r1 == 4) { + *(*p)++ = 0x24; + } - IMM8(p, disp); - } else { - *(*p)++ = 0x80 | r; + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | r; - if (r1 == 4) { - *(*p)++ = 0x24; - } + if (r1 == 4) { + *(*p)++ = 0x24; + } - IMM32(p, disp); - } - } - } + IMM32(p, disp); + } + } + } } static FFTS_INLINE void MOVDQA2(uint8_t **p, uint8_t reg1, uint8_t reg2) { - if (reg1 & XMM_REG) { - MOVDQA(p, reg2, reg1, 0, 0); - } else { - MOVDQA(p, reg1, reg2, 0, 1); - } + if (reg1 & XMM_REG) { + MOVDQA(p, reg2, reg1, 0, 0); + } else { + MOVDQA(p, reg1, reg2, 0, 1); + } } static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3) { - if (reg1 & XMM_REG) { - MOVDQA(p, (uint8_t) op2, reg1, op3, 0); - } else { - MOVDQA(p, reg1, (uint8_t) op3, op2, 1); - } + if (reg1 & XMM_REG) { + MOVDQA(p, (uint8_t) op2, reg1, op3, 0); + } else { + MOVDQA(p, reg1, (uint8_t) op3, op2, 1); + } } -static void MOVI(uint8_t **p, uint8_t dst, uint64_t imm) +static FFTS_INLINE void MOV_D(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { - /* REX prefix */ - if (dst >= 8 || imm > UINT32_MAX) { - uint8_t val = 0x40; - - if (dst >= 8) { - val |= 1; - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); - if (imm > UINT32_MAX) { - val |= 8; - } + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x49; + } else { + *(*p)++ = 0x48; + } - *(*p)++ = val; + if (is_store) { + *(*p)++ = 0x89; + } else { + *(*p)++ = 0x8B; } - /* opcode */ - *(*p)++ = 0xb8 | (dst & 0x7); - - if (imm > UINT32_MAX) { - IMM64(p, imm); - } else { - IMM32(p, imm); - } -} + if (disp == 0) { + *(*p)++ = r2 | (r1 << 3); -static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) -{ - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - uint8_t r; + if (r2 == 4) { + *(*p)++ = 0x24; + } + } else if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r2 | (r1 << 3); - /* REX prefix */ - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); - } + if (r2 == 4) { + *(*p)++ = 0x24; + } - /* esacape opcode */ - *(*p)++ = 0x0F; - - /* opcode */ - *(*p)++ = 0x59; - - r = r1 | (r2 << 3); + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11); - if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { - assert(disp == 0); - *(*p)++ = 0xC0 | r; - } else { - assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); + if (r2 == 4) { + *(*p)++ = 0x24; + } - if (disp == 0 && r1 != 5) { - *(*p)++ = r; + IMM32(p, disp); + } +} - if (r1 == 4) { - *(*p)++ = 0x24; - } - } else { - if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r; +static void MOV_I(uint8_t **p, uint8_t dst, uint64_t imm) +{ + /* REX prefix */ + if (dst >= 8 || imm > UINT32_MAX) { + uint8_t val = 0x40; - if (r1 == 4) { - *(*p)++ = 0x24; - } + if (dst >= 8) { + val |= 1; + } - IMM8(p, disp); - } else { - *(*p)++ = 0x80 | r; + if (imm > UINT32_MAX) { + val |= 8; + } - if (r1 == 4) { - *(*p)++ = 0x24; - } + *(*p)++ = val; + } - IMM32(p, disp); - } - } - } + /* opcode */ + *(*p)++ = 0xb8 | (dst & 0x7); + + if (imm > UINT32_MAX) { + IMM64(p, imm); + } else { + IMM32(p, imm); + } } -static FFTS_INLINE void MULPS2(uint8_t **p, uint8_t reg1, uint8_t reg2) +static FFTS_INLINE void MOV_R(uint8_t **p, uint8_t reg1, uint8_t reg2, int is_store) { - if (reg1 & XMM_REG) { - MULPS(p, reg2, reg1, 0, 0); - } else { - MULPS(p, reg1, reg2, 0, 1); - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x48 | ((reg2 & 8) >> 3) | ((reg1 & 8) >> 1); + } else { + *(*p)++ = 0x48; + } + + if (is_store) { + *(*p)++ = 0x89; + } else { + *(*p)++ = 0x8B; + } + + *(*p)++ = 0xC0 | r2 | (r1 << 3); + + if (r2 == 4) { + *(*p)++ = 0x24; + } } -static FFTS_INLINE void MULPS3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3) +static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg2, uint8_t reg1) { - if (reg1 & XMM_REG) { - MULPS(p, (uint8_t) op2, reg1, op3, 0); - } else { - MULPS(p, reg1, (uint8_t) op3, op2, 1); - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + /* esacape opcode */ + *(*p)++ = 0x0F; + + /* opcode */ + *(*p)++ = 0x59; + *(*p)++ = 0xC0 | r1 | (r2 << 3); } static void POP(uint8_t **p, uint8_t reg) @@ -546,7 +537,48 @@ static void SHIFT(uint8_t **p, uint8_t reg, int shift) } } -static void SUBI(uint8_t **p, uint8_t dst, int32_t imm) +static FFTS_INLINE void SHUFPS(uint8_t **p, uint8_t reg2, uint8_t reg1, const int select) +{ + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + uint8_t r; + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + /* esacape opcode */ + *(*p)++ = 0x0F; + + /* opcode */ + *(*p)++ = 0xC6; + + r = r1 | (r2 << 3); + + *(*p)++ = 0xC0 | r; + *(*p)++ = (select & 0xFF); +} + +static FFTS_INLINE void SUBPS(uint8_t **p, uint8_t reg2, uint8_t reg1) +{ + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + /* esacape opcode */ + *(*p)++ = 0x0F; + + /* opcode */ + *(*p)++ = 0x5C; + *(*p)++ = 0xC0 | r1 | (r2 << 3); +} + +static void SUB_I(uint8_t **p, uint8_t dst, int32_t imm) { if (dst >= 8) { *(*p)++ = 0x49; @@ -571,12 +603,34 @@ static void SUBI(uint8_t **p, uint8_t dst, int32_t imm) static FFTS_INLINE void XOR2(uint8_t **p, uint8_t reg1, uint8_t reg2) { - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + *(*p)++ = 0x31; + *(*p)++ = 0xC0 | r2 | (r1 << 3); +} + +static FFTS_INLINE void XORPS(uint8_t **p, uint8_t reg2, uint8_t reg1) +{ + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + /* esacape opcode */ + *(*p)++ = 0x0F; - *(*p)++ = 0x31; - *(*p)++ = 0xC0 | (reg2 & 7) | ((reg1 & 7) << 3); + /* opcode */ + *(*p)++ = 0x57; + *(*p)++ = 0xC0 | r1 | (r2 << 3); } static FFTS_INLINE void ffts_insert_nops(uint8_t **p, uint32_t count) @@ -662,78 +716,71 @@ static FFTS_INLINE void ffts_align_mem16(uint8_t **p, uint32_t offset) ffts_insert_nops(p, r); } -static FFTS_INLINE insns_t* generate_size4_base_case(insns_t **fp, int sign) -{ - insns_t *x_4_addr; - size_t len; - - /* align call destination */ - ffts_align_mem16(fp, 0); - x_4_addr = *fp; - - /* copy function */ - assert((char*) x8_soft > (char*) x4); - len = (char*) x8_soft - (char*) x4; - memcpy(*fp, x4, len); - *fp += len; - - return x_4_addr; -} - -static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) +static FFTS_INLINE void generate_epilogue(insns_t **fp) { - insns_t *x_8_addr; - size_t len; - - /* align call destination */ - ffts_align_mem16(fp, 0); - x_8_addr = *fp; - - /* align loop/jump destination */ #ifdef _M_X64 - ffts_align_mem16(fp, 6); + /* restore nonvolatile registers */ + MOVDQA3(fp, XMM6, RSP, 0); + MOVDQA3(fp, XMM7, RSP, 16); + MOVDQA3(fp, XMM8, RSP, 32); + MOVDQA3(fp, XMM9, RSP, 48); + MOVDQA3(fp, XMM10, RSP, 64); + MOVDQA3(fp, XMM11, RSP, 80); + MOVDQA3(fp, XMM12, RSP, 96); + MOVDQA3(fp, XMM13, RSP, 112); + MOVDQA3(fp, XMM14, RSP, 128); + MOVDQA3(fp, XMM15, RSP, 144); + + /* restore stack */ + ADD_I(fp, RSP, 168); + + /* restore the last 3 registers from the shadow space */ + MOV_D(fp, RBX, RSP, 8, 0); + MOV_D(fp, RSI, RSP, 16, 0); + MOV_D(fp, RDI, RSP, 24, 0); #else - ffts_align_mem16(fp, 5); + POP(fp, R15); + POP(fp, R14); + POP(fp, R13); + POP(fp, R12); + POP(fp, R11); + POP(fp, R10); + POP(fp, RBX); + POP(fp, RBP); #endif - /* copy function */ - assert((char*) x8_soft_end > (char*) x8_soft); - len = (char*) x8_soft_end - (char*) x8_soft; - memcpy(*fp, x8_soft, len); - *fp += len; - - return x_8_addr; + RET(fp); } static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) { - insns_t *start; + insns_t *start; - /* align call destination */ - ffts_align_mem16(fp, 0); - start = *fp; + /* align call destination */ + ffts_align_mem16(fp, 0); + start = *fp; /* save nonvolatile registers */ #ifdef _M_X64 /* use the shadow space to save first 3 registers */ - MOV(fp, RBX, RSP, 8, 1); - MOV(fp, RSI, RSP, 16, 1); - MOV(fp, RDI, RSP, 24, 1); + MOV_D(fp, RBX, RSP, 8, 1); + MOV_D(fp, RSI, RSP, 16, 1); + MOV_D(fp, RDI, RSP, 24, 1); /* reserve space.. */ - SUBI(fp, RSP, 168); - - /* to save XMM6-XMM15 registers */ - MOVDQA3(fp, RSP, 0, XMM6); - MOVDQA3(fp, RSP, 16, XMM7); - MOVDQA3(fp, RSP, 32, XMM8); - MOVDQA3(fp, RSP, 48, XMM9); - MOVDQA3(fp, RSP, 64, XMM10); - MOVDQA3(fp, RSP, 80, XMM11); - MOVDQA3(fp, RSP, 96, XMM12); - MOVDQA3(fp, RSP, 112, XMM13); - MOVDQA3(fp, RSP, 128, XMM14); - MOVDQA3(fp, RSP, 144, XMM15); + SUB_I(fp, RSP, 168); + + /* to save XMM6-XMM15 registers */ + MOVDQA3(fp, RSP, 0, XMM6); + MOVDQA3(fp, RSP, 16, XMM7); + MOVDQA3(fp, RSP, 32, XMM8); + MOVDQA3(fp, RSP, 48, XMM9); + MOVDQA3(fp, RSP, 64, XMM10); + MOVDQA3(fp, RSP, 80, XMM11); + MOVDQA3(fp, RSP, 96, XMM12); + MOVDQA3(fp, RSP, 112, XMM13); + MOVDQA3(fp, RSP, 128, XMM14); + MOVDQA3(fp, RSP, 144, XMM15); #else PUSH(fp, RBP); PUSH(fp, RBX); @@ -745,43 +792,425 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) PUSH(fp, R15); #endif - return start; + return start; } -static FFTS_INLINE void generate_epilogue(insns_t **fp) +static FFTS_INLINE void generate_transform_init(insns_t **fp) { #ifdef _M_X64 - /* restore nonvolatile registers */ - MOVDQA3(fp, XMM6, RSP, 0); - MOVDQA3(fp, XMM7, RSP, 16); - MOVDQA3(fp, XMM8, RSP, 32); - MOVDQA3(fp, XMM9, RSP, 48); - MOVDQA3(fp, XMM10, RSP, 64); - MOVDQA3(fp, XMM11, RSP, 80); - MOVDQA3(fp, XMM12, RSP, 96); - MOVDQA3(fp, XMM13, RSP, 112); - MOVDQA3(fp, XMM14, RSP, 128); - MOVDQA3(fp, XMM15, RSP, 144); - - /* restore stack */ - ADDI(fp, RSP, 168); + /* generate function */ + MOVAPS2(fp, XMM3, RSI); - /* restore the last 3 registers from the shadow space */ - MOV(fp, RBX, RSP, 8, 0); - MOV(fp, RSI, RSP, 16, 0); - MOV(fp, RDI, RSP, 24, 0); + /* set "pointer" to twiddle factors */ + MOV_D(fp, RDI, RCX, 0x20, 0); #else - POP(fp, R15); - POP(fp, R14); - POP(fp, R13); - POP(fp, R12); - POP(fp, R11); - POP(fp, R10); - POP(fp, RBX); - POP(fp, RBP); + size_t len; + + /* copy function */ + assert((char*) x4 > (char*) x_init); + len = (char*) x4 - (char*) x_init; + memcpy(*fp, x_init, len); + *fp += len; #endif +} + +static FFTS_INLINE insns_t* generate_size4_base_case(insns_t **fp, int sign) +{ + insns_t *x_4_addr; + size_t len; + + /* align call destination */ + ffts_align_mem16(fp, 0); + x_4_addr = *fp; + /* copy function */ + assert((char*) x8_soft > (char*) x4); + len = (char*) x8_soft - (char*) x4; + memcpy(*fp, x4, len); + *fp += len; + + return x_4_addr; +} + +static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) +{ + insns_t *x_8_addr; +#ifdef _M_X64 + insns_t *x8_soft_loop; +#else + size_t len; +#endif + + /* align call destination */ + ffts_align_mem16(fp, 0); + x_8_addr = *fp; + + /* align loop/jump destination */ +#ifdef _M_X64 + ffts_align_mem16(fp, 6); +#else + ffts_align_mem16(fp, 5); +#endif + +#ifdef _M_X64 + /* input */ + MOV_R(fp, RDI, RAX, 1); + + /* output */ + MOV_R(fp, R8, RCX, 1); + + /* lea rdx, [r8 + rbx] */ + /* loop stop (output + output_stride) */ + *(*fp)++ = 0x49; + *(*fp)++ = 0x8D; + *(*fp)++ = 0x14; + *(*fp)++ = 0x18; + + /* lea rsi, [rbx + rbx*2] */ + /* 3 * output_stride */ + *(*fp)++ = 0x48; + *(*fp)++ = 0x8D; + *(*fp)++ = 0x34; + *(*fp)++ = 0x5B; + + /* lea r10, [rbx + rbx*4] */ + /* 5 * output_stride */ + *(*fp)++ = 0x4C; + *(*fp)++ = 0x8D; + *(*fp)++ = 0x14; + *(*fp)++ = 0x9B; + + /* lea r11, [rsi + rbx*4] */ + /* 7 * output_stride */ + *(*fp)++ = 0x4C; + *(*fp)++ = 0x8D; + *(*fp)++ = 0x1C; + *(*fp)++ = 0x9E; + + x8_soft_loop = *fp; + assert(!(((uintptr_t) x8_soft_loop) & 0xF)); + + /* movaps xmm9, [rax] */ + /* input + 0 * input_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x08; + + /* movaps xmm6, [rcx + rbx*2] */ + /* output + 2 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x34; + *(*fp)++ = 0x59; + + /* movaps xmm11, xmm9 */ + *(*fp)++ = 0x45; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xD9; + + /* movaps xmm7, [rcx + rsi] */ + /* output + 3 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x3C; + *(*fp)++ = 0x31; + + /* movaps xmm8, [rax + 0x10] */ + /* input + 1 * input_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x40; + *(*fp)++ = 0x10; + + MULPS(fp, XMM11, XMM6); + MULPS(fp, XMM9, XMM7); + SHUFPS(fp, XMM6, XMM6, 0xB1); + MULPS(fp, XMM6, XMM8); + SHUFPS(fp, XMM7, XMM7, 0xB1); + SUBPS(fp, XMM11, XMM6); + MULPS(fp, XMM8, XMM7); + + /* movaps xmm10, xmm11 */ + *(*fp)++ = 0x45; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xD3; + + ADDPS(fp, XMM9, XMM8); + + /* movaps xmm15, [rax + 0x20] */ + /* input + 2 * input_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x78; + *(*fp)++ = 0x20; + + ADDPS(fp, XMM10, XMM9); + SUBPS(fp, XMM11, XMM9); + + /* movaps xmm5, [rcx] */ + /* output + 0 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x29; + + /* movaps xmm6,xmm15 */ + *(*fp)++ = 0x41; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xF7; + + /* movaps xmm12, [rcx + rbx*4] */ + /* output + 4 * output_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x24; + *(*fp)++ = 0x99; + + /* movaps xmm2, xmm5 */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xD5; + + /* movaps xmm13, [rcx + rsi*2] */ + /* output + 6 * output_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x2C; + *(*fp)++ = 0x71; + + XORPS(fp, XMM11, XMM3); + + /* movaps xmm14, [rax + 0x30] */ + /* input + 3 * input_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x70; + *(*fp)++ = 0x30; + + SUBPS(fp, XMM2, XMM10); + MULPS(fp, XMM6, XMM12); + ADDPS(fp, XMM5, XMM10); + MULPS(fp, XMM15, XMM13); + + /* movaps xmm10, [rax + 0x40] */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x50; + *(*fp)++ = 0x40; + + /* movaps xmm0, xmm5 */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xC5; + + SHUFPS(fp, XMM12, XMM12, 0xB1); + SHUFPS(fp, XMM13, XMM13, 0xB1); + MULPS(fp, XMM12, XMM14); + MULPS(fp, XMM14, XMM13); + SUBPS(fp, XMM6, XMM12); + ADDPS(fp, XMM15, XMM14); + + /* movaps xmm7, [rcx + r10] */ + *(*fp)++ = 0x42; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x3C; + *(*fp)++ = 0x11; + + /* movaps xmm13, xmm10 */ + *(*fp)++ = 0x45; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xEA; + + /* movaps xmm8, [rcx + r11] */ + *(*fp)++ = 0x46; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x04; + *(*fp)++ = 0x19; + + /* movaps xmm12, xmm6 */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xE6; + + /* movaps xmm9, [rax + 0x50] */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x48; + *(*fp)++ = 0x50; + + /* input + 6 * input_stride */ + ADD_I(fp, RAX, 0x60); + + MULPS(fp, XMM13, XMM7); + SUBPS(fp, XMM6, XMM15); + ADDPS(fp, XMM12, XMM15); + MULPS(fp, XMM10, XMM8); + SUBPS(fp, XMM0, XMM12); + ADDPS(fp, XMM5, XMM12); + SHUFPS(fp, XMM7, XMM7, 0xB1); + XORPS(fp, XMM6, XMM3); + SHUFPS(fp, XMM8, XMM8, 0xB1); + + /* movaps xmm12, xmm2 */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xE2; + + MULPS(fp, XMM7, XMM9); + MULPS(fp, XMM9, XMM8); + SUBPS(fp, XMM13, XMM7); + ADDPS(fp, XMM10, XMM9); + + /* movaps xmm4, [rcx + rbx] */ + /* output + 1 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0x24; + *(*fp)++ = 0x19; + + SHUFPS(fp, XMM11, XMM11, 0xB1); + + /* movaps xmm1, xmm4 */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xCC; + + SHUFPS(fp, XMM6, XMM6, 0xB1); + ADDPS(fp, XMM1, XMM11); + SUBPS(fp, XMM4, XMM11); + ADDPS(fp, XMM12, XMM6); + SUBPS(fp, XMM2, XMM6); + + /* movaps xmm11, xmm13 */ + *(*fp)++ = 0x45; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xDD; + + /* movaps xmm14, xmm4 */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xF4; + + /* movaps xmm6, xmm1 */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x28; + *(*fp)++ = 0xF1; + + SUBPS(fp, XMM13, XMM10); + ADDPS(fp, XMM11, XMM10); + XORPS(fp, XMM13, XMM3); + ADDPS(fp, XMM4, XMM11); + SUBPS(fp, XMM14, XMM11); + SHUFPS(fp, XMM13, XMM13, 0xB1); + + /* movaps [rcx], xmm5 */ + /* output + 0 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x29; + + /* movaps [rcx + rbx], xmm4 */ + /* output + 1 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x24; + *(*fp)++ = 0x19; + + /* movaps [rcx + rbx*2], xmm2 */ + /* output + 2 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x14; + *(*fp)++ = 0x59; + + SUBPS(fp, XMM1, XMM13); + ADDPS(fp, XMM6, XMM13); + + /* movaps [rcx + rsi], xmm1 */ + /* output + 3 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x0C; + *(*fp)++ = 0x31; + + /* movaps [rcx + rbx*4], xmm0 */ + /* output + 4 * output_stride */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x04; + *(*fp)++ = 0x99; + + /* movaps [rcx + r10], xmm14 */ + /* output + 5 * output_stride */ + *(*fp)++ = 0x46; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x34; + *(*fp)++ = 0x11; + + /* movaps [rcx + rsi*2], xmm12 */ + /* output + 6 * output_stride */ + *(*fp)++ = 0x44; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x24; + *(*fp)++ = 0x71; + + /* movaps [rcx + r11], xmm6 */ + /* output + 7 * output_stride */ + *(*fp)++ = 0x42; + *(*fp)++ = 0x0F; + *(*fp)++ = 0x29; + *(*fp)++ = 0x34; + *(*fp)++ = 0x19; + + /* add rcx, 0x10 */ + *(*fp)++ = 0x48; + *(*fp)++ = 0x83; + *(*fp)++ = 0xC1; + *(*fp)++ = 0x10; + + /* cmp rcx, rdx */ + *(*fp)++ = 0x48; + *(*fp)++ = 0x39; + *(*fp)++ = 0xD1; + + /* jne [x8_soft_loop] */ + *(*fp)++ = 0x0F; + *(*fp)++ = 0x85; + *(*fp)++ = 0x9E; + *(*fp)++ = 0xFE; + *(*fp)++ = 0xFF; + *(*fp)++ = 0xFF; + + /* ret */ RET(fp); +#else + /* copy function */ + assert((char*) x8_soft_end >= (char*) x8_soft); + len = (char*) x8_soft_end - (char*) x8_soft; + memcpy(*fp, x8_soft, len); + *fp += len; +#endif + + return x_8_addr; } #endif /* FFTS_CODEGEN_SSE_H */
\ No newline at end of file diff --git a/src/sequitur.h b/src/sequitur.h index 7429115..d459c56 100644 --- a/src/sequitur.h +++ b/src/sequitur.h @@ -32,417 +32,417 @@ */ typedef struct _sym_t { - int c; - struct _sym_t *pPrev, *pNext; - struct _seq_rule_t *r; - int offset; + int c; + struct _sym_t *pPrev, *pNext; + struct _seq_rule_t *r; + int offset; } sym_t; typedef struct _seq_rule_t { - int c; - sym_t *ss; - struct _seq_rule_t *pPrev, *pNext; - int count; - int length; + int c; + sym_t *ss; + struct _seq_rule_t *pPrev, *pNext; + int count; + int length; } seq_rule_t; void sym_tail_insert(sym_t **ss, sym_t *s) { - if (!*ss) { - *ss = s; - s->pPrev = s->pNext = NULL; - } else { - while (*ss) { - s->pPrev = *ss; - ss = &(*ss)->pNext; - } - - *ss = s; - } + if (!*ss) { + *ss = s; + s->pPrev = s->pNext = NULL; + } else { + while (*ss) { + s->pPrev = *ss; + ss = &(*ss)->pNext; + } + + *ss = s; + } } sym_t* sym_init(int c) { - sym_t *s; - - s = (sym_t*) malloc(sizeof(*s)); - if (!s) { - return NULL; - } - - s->c = c; - s->pPrev = s->pNext = NULL; - s->r = NULL; - - return s; + sym_t *s; + + s = (sym_t*) malloc(sizeof(*s)); + if (!s) { + return NULL; + } + + s->c = c; + s->pPrev = s->pNext = NULL; + s->r = NULL; + + return s; } sym_t* sym_init_from_sym(sym_t *s2) { - sym_t *s; + sym_t *s; - s = (sym_t*) malloc(sizeof(*s)); - if (!s) { - return NULL; - } + s = (sym_t*) malloc(sizeof(*s)); + if (!s) { + return NULL; + } - s->c = s2->c; - s->pPrev = s->pNext = NULL; - s->r = s2->r; - s->offset = s2->offset; + s->c = s2->c; + s->pPrev = s->pNext = NULL; + s->r = s2->r; + s->offset = s2->offset; - return s; + return s; } seq_rule_t* seq_init_rule(int c) { - seq_rule_t *G; - - G = (seq_rule_t *)malloc(sizeof(*G)); - if (!G) { - return NULL; - } - - G->c = c; - G->count = 2; - G->ss = NULL; - G->pPrev = NULL; - G->pNext = NULL; - - return G; + seq_rule_t *G; + + G = (seq_rule_t *)malloc(sizeof(*G)); + if (!G) { + return NULL; + } + + G->c = c; + G->count = 2; + G->ss = NULL; + G->pPrev = NULL; + G->pNext = NULL; + + return G; } seq_rule_t* seq_grammer_insert_new_rule(seq_rule_t *G, char r, sym_t *a, sym_t *b) { - sym_t *sa, *sb; + sym_t *sa, *sb; - while (G->pNext) { - G = G->pNext; - } + while (G->pNext) { + G = G->pNext; + } - G->pNext = seq_init_rule(r); - if (!G->pNext) { - return NULL; - } + G->pNext = seq_init_rule(r); + if (!G->pNext) { + return NULL; + } - sa = sym_init_from_sym(a); - if (!sa) { - goto cleanup_pnext; - } + sa = sym_init_from_sym(a); + if (!sa) { + goto cleanup_pnext; + } - sb = sym_init_from_sym(b); - if (!sb) { - goto cleanup_sa; - } + sb = sym_init_from_sym(b); + if (!sb) { + goto cleanup_sa; + } - sb->offset = sb->offset - sa->offset; - sa->offset = 0; - sym_tail_insert(&G->pNext->ss, sa); - sym_tail_insert(&G->pNext->ss, sb); - return G->pNext; + sb->offset = sb->offset - sa->offset; + sa->offset = 0; + sym_tail_insert(&G->pNext->ss, sa); + sym_tail_insert(&G->pNext->ss, sb); + return G->pNext; cleanup_sa: - free(sa); + free(sa); cleanup_pnext: - free(G->pNext); - G->pNext = NULL; + free(G->pNext); + G->pNext = NULL; - return NULL; + return NULL; } sym_t* sym_match_digram(sym_t *s, sym_t *term, sym_t *a, sym_t *b) { - while (s != term) { - if (s->c == a->c && s->pNext->c == b->c && - s->pNext->offset - s->offset == b->offset-a->offset) { - return s; - } + while (s != term) { + if (s->c == a->c && s->pNext->c == b->c && + s->pNext->offset - s->offset == b->offset-a->offset) { + return s; + } - s = s->pNext; - } + s = s->pNext; + } - return NULL; + return NULL; } seq_rule_t* seq_match_digram(seq_rule_t *R, sym_t *a, sym_t *b) { - while (R) { - if (R->ss->c == a->c && R->ss->pNext->c == b->c && - R->ss->pNext->offset - R->ss->offset == b->offset - a->offset) { - return R; - } + while (R) { + if (R->ss->c == a->c && R->ss->pNext->c == b->c && + R->ss->pNext->offset - R->ss->offset == b->offset - a->offset) { + return R; + } - R = R->pNext; - } + R = R->pNext; + } - return NULL; + return NULL; } sym_t* sym_tail(sym_t *s) { - while (s->pNext) { - s = s->pNext; - } + while (s->pNext) { + s = s->pNext; + } - return s; + return s; } int sym_count(sym_t *s) { - int count = 0; + int count = 0; - while (s) { - count++; - s = s->pNext; - } + while (s) { + count++; + s = s->pNext; + } - return count; + return count; } sym_t* sym_copylist(sym_t *s) { - sym_t *head = NULL; - sym_t *prev = head; - - while (s) { - sym_t *copy = sym_init_from_sym(s); - if (!copy) { - return NULL; - } - - copy->pPrev = prev; - - if (prev) { - prev->pNext = copy; - } - - if (!head) { - head = copy; - } - - prev = copy; - s = s->pNext; - } - - return head; + sym_t *head = NULL; + sym_t *prev = head; + + while (s) { + sym_t *copy = sym_init_from_sym(s); + if (!copy) { + return NULL; + } + + copy->pPrev = prev; + + if (prev) { + prev->pNext = copy; + } + + if (!head) { + head = copy; + } + + prev = copy; + s = s->pNext; + } + + return head; } void seq_enforce_uniqueness(seq_rule_t *G) { - seq_rule_t *R = G;//->pNext; - seq_rule_t **ppr = &G->pNext; + seq_rule_t *R = G;//->pNext; + seq_rule_t **ppr = &G->pNext; - while (R) { - if (R == G || R->count > 1) { - sym_t *s = R->ss; - sym_t **pp = &R->ss; + while (R) { + if (R == G || R->count > 1) { + sym_t *s = R->ss; + sym_t **pp = &R->ss; - while (s) { - if (s->r && s->r->count == 1) { - sym_t *temp_itr; + while (s) { + if (s->r && s->r->count == 1) { + sym_t *temp_itr; + + *pp = s->r->ss; - *pp = s->r->ss; - temp_itr = s->r->ss; while (temp_itr) { temp_itr->offset += s->offset; temp_itr = temp_itr->pNext; } - - s->r->ss->pPrev = s->pPrev; - if (s->pNext) { - s->pNext->pPrev = sym_tail(s->r->ss); - } - - sym_tail(s->r->ss)->pNext = s->pNext; - s = s->r->ss; - continue; - } - - pp = &s->pNext; - s = s->pNext; - } - - ppr = &R->pNext; - } else { - *ppr = R->pNext; - } - - R = R->pNext; - } + + s->r->ss->pPrev = s->pPrev; + if (s->pNext) { + s->pNext->pPrev = sym_tail(s->r->ss); + } + + sym_tail(s->r->ss)->pNext = s->pNext; + s = s->r->ss; + continue; + } + + pp = &s->pNext; + s = s->pNext; + } + + ppr = &R->pNext; + } else { + *ppr = R->pNext; + } + + R = R->pNext; + } } void seq_merge_small_rules(seq_rule_t *G, int thresh) { - seq_rule_t *R = G; + seq_rule_t *R = G; - while (R) { - if (sym_count(R->ss) <= thresh) { + while (R) { + if (sym_count(R->ss) <= thresh) { //printf("count %d > %d for %d\n", sym_count(R->ss), thresh, R->c); - sym_t *s = R->ss; - sym_t **pp = &R->ss; - - while (s) { - if (s->r) { - sym_t *copylist; - sym_t *copylist_itr; - - s->r->count--; - - copylist = sym_copylist(s->r->ss); - if (!copylist) { - return; - } + sym_t *s = R->ss; + sym_t **pp = &R->ss; + + while (s) { + if (s->r) { + sym_t *copylist; + sym_t *copylist_itr; + + s->r->count--; + + copylist = sym_copylist(s->r->ss); + if (!copylist) { + return; + } copylist_itr = copylist; while (copylist_itr) { copylist_itr->offset += s->offset; copylist_itr = copylist_itr->pNext; } - - *pp = copylist; - copylist->pPrev = s->pPrev; - if (s->pNext) { - s->pNext->pPrev = sym_tail(copylist); - } - - sym_tail(copylist)->pNext = s->pNext; - pp = &(sym_tail(copylist)->pNext); - s = sym_tail(copylist)->pNext; - continue; - } - - pp = &s->pNext; - s = s->pNext; - } - } - - R = R->pNext; - } + + *pp = copylist; + copylist->pPrev = s->pPrev; + if (s->pNext) { + s->pNext->pPrev = sym_tail(copylist); + } + + sym_tail(copylist)->pNext = s->pNext; + pp = &(sym_tail(copylist)->pNext); + s = sym_tail(copylist)->pNext; + continue; + } + + pp = &s->pNext; + s = s->pNext; + } + } + + R = R->pNext; + } seq_enforce_uniqueness(G); } void seq_extract_hierarchy(seq_rule_t *G) { - int next_rule = -2; - sym_t *cursym = G->ss; - - while (cursym) { - sym_t *m = NULL; - seq_rule_t *mr = NULL; - - if (cursym->pPrev && cursym->pPrev->pPrev) { - mr = seq_match_digram(G->pNext, cursym->pPrev, cursym); - if (mr) { - if (cursym->pPrev->r) { - cursym->pPrev->r->count--; - } - - if(cursym->r) { - cursym->r->count--; - } - - mr->count++; - - cursym->pPrev->r = mr; - cursym->pPrev->c = mr->c; - cursym->pPrev->pNext = cursym->pNext; - cursym->pNext->pPrev = cursym->pPrev; - cursym = cursym->pPrev; - } - - m = sym_match_digram(G->ss, cursym->pPrev->pPrev, cursym->pPrev, cursym); - if (m) { - seq_rule_t *newr; - - if (cursym->pPrev->r) { - cursym->pPrev->r->count--; - } - - if (cursym->r) { - cursym->r->count--; - } - - newr = seq_grammer_insert_new_rule(G, next_rule, m, m->pNext); - if (!newr) { - return; - } - - m->r = newr; - m->c = next_rule; - m->pNext = m->pNext->pNext; - m->pNext->pPrev = m; - - cursym->pPrev->r = newr; - cursym->pPrev->c = next_rule; - cursym->pPrev->pNext = cursym->pNext; - cursym->pNext->pPrev = cursym->pPrev; - cursym = cursym->pPrev; - - next_rule--; - } - } - - if (!m && !mr) { - cursym = cursym->pNext; - } - } - - seq_enforce_uniqueness(G); - seq_merge_small_rules(G, 2); + int next_rule = -2; + sym_t *cursym = G->ss; + + while (cursym) { + sym_t *m = NULL; + seq_rule_t *mr = NULL; + + if (cursym->pPrev && cursym->pPrev->pPrev) { + mr = seq_match_digram(G->pNext, cursym->pPrev, cursym); + if (mr) { + if (cursym->pPrev->r) { + cursym->pPrev->r->count--; + } + + if(cursym->r) { + cursym->r->count--; + } + + mr->count++; + + cursym->pPrev->r = mr; + cursym->pPrev->c = mr->c; + cursym->pPrev->pNext = cursym->pNext; + cursym->pNext->pPrev = cursym->pPrev; + cursym = cursym->pPrev; + } + + m = sym_match_digram(G->ss, cursym->pPrev->pPrev, cursym->pPrev, cursym); + if (m) { + seq_rule_t *newr; + + if (cursym->pPrev->r) { + cursym->pPrev->r->count--; + } + + if (cursym->r) { + cursym->r->count--; + } + + newr = seq_grammer_insert_new_rule(G, next_rule, m, m->pNext); + if (!newr) { + return; + } + + m->r = newr; + m->c = next_rule; + m->pNext = m->pNext->pNext; + m->pNext->pPrev = m; + + cursym->pPrev->r = newr; + cursym->pPrev->c = next_rule; + cursym->pPrev->pNext = cursym->pNext; + cursym->pNext->pPrev = cursym->pPrev; + cursym = cursym->pPrev; + + next_rule--; + } + } + + if (!m && !mr) { + cursym = cursym->pNext; + } + } + + seq_enforce_uniqueness(G); + seq_merge_small_rules(G, 2); // seq_enforce_uniqueness(G); } void seq_compute_lengths(seq_rule_t *G) { - seq_rule_t *R = G->pNext; - sym_t *s; - int sum; - - while (R) { - sum = 0; - s = R->ss; - - while (s) { - if (s->c >= 0) { - if (s->offset + s->c > sum) { - sum = s->offset + s->c; - } - } - - if (s->c < 0) { - if (s->offset + s->r->length > sum) { - sum = s->offset + s->r->length; - } - } - - s = s->pNext; - } - - R->length = sum; - R = R->pNext; - } - - sum = 0; - s = G->ss; - - while (s) { - if (s->c >= 0) { - if (s->offset + s->c > sum) { - sum = s->offset + s->c; - } - } - - if (s->c < 0) { - if (s->offset + s->r->length > sum) { - sum = s->offset + s->r->length; - } - } - - s = s->pNext; - } - - G->length = sum; + seq_rule_t *R = G->pNext; + sym_t *s; + int sum; + + while (R) { + sum = 0; + s = R->ss; + + while (s) { + if (s->c >= 0) { + if (s->offset + s->c > sum) { + sum = s->offset + s->c; + } + } + + if (s->c < 0) { + if (s->offset + s->r->length > sum) { + sum = s->offset + s->r->length; + } + } + + s = s->pNext; + } + + R->length = sum; + R = R->pNext; + } + + sum = 0; + s = G->ss; + + while (s) { + if (s->c >= 0) { + if (s->offset + s->c > sum) { + sum = s->offset + s->c; + } + } + + if (s->c < 0) { + if (s->offset + s->r->length > sum) { + sum = s->offset + s->r->length; + } + } + + s = s->pNext; + } + + G->length = sum; }
\ No newline at end of file |