#!/bin/awk -f # gen-insn-attr-x86.awk: Instruction attribute table generator # Written by Masami Hiramatsu # # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c # Awk implementation sanity check function check_awk_implement() { if (sprintf("%x", 0) != "0") return "Your awk has a printf-format problem." return "" } # Clear working vars function clear_vars() { delete table delete lptable2 delete lptable1 delete lptable3 eid = -1 # escape id gid = -1 # group id aid = -1 # AVX id tname = "" } BEGIN { # Implementation error checking awkchecked = check_awk_implement() if (awkchecked != "") { print "Error: " awkchecked > "/dev/stderr" print "Please try to use gawk." > "/dev/stderr" exit 1 } # Setup generating tables print "/* x86 opcode map generated from x86-opcode-map.txt */" print "/* Do not change this code. */\n" ggid = 1 geid = 1 gaid = 0 delete etable delete gtable delete atable opnd_expr = "^[A-Za-z/]" ext_expr = "^\\(" sep_expr = "^\\|$" group_expr = "^Grp[0-9A-Za-z]+" imm_expr = "^[IJAOL][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 # All opcodes starting with lower-case 'v' or with (v1) superscript # accepts VEX prefix vexok_opcode_expr = "^v.*" vexok_expr = "\\(v1\\)" # All opcodes with (v) superscript supports *only* VEX prefix vexonly_expr = "\\(v\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" prefix_num["REPNE"] = "INAT_PFX_REPNE" prefix_num["REP/REPE"] = "INAT_PFX_REPE" prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" prefix_num["XRELEASE"] = "INAT_PFX_REPE" prefix_num["LOCK"] = "INAT_PFX_LOCK" prefix_num["SEG=CS"] = "INAT_PFX_CS" prefix_num["SEG=DS"] = "INAT_PFX_DS" prefix_num["SEG=ES"] = "INAT_PFX_ES" prefix_num["SEG=FS"] = "INAT_PFX_FS" prefix_num["SEG=GS"] = "INAT_PFX_GS" prefix_num["SEG=SS"] = "INAT_PFX_SS" prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" clear_vars() } function semantic_error(msg) { print "Semantic error at " NR ": " msg > "/dev/stderr" exit 1 } function debug(msg) { print "DEBUG: " msg } function array_size(arr, i,c) { c = 0 for (i in arr) c++ return c } /^Table:/ { print "/* " $0 " */" if (tname != "") semantic_error("Hit Table: before EndTable:."); } /^Referrer:/ { if (NF != 1) { # escape opcode table ref = "" for (i = 2; i <= NF; i++) ref = ref $i eid = escape[ref] tname = sprintf("inat_escape_table_%d", eid) } } /^AVXcode:/ { if (NF != 1) { # AVX/escape opcode table aid = $2 if (gaid <= aid) gaid = aid + 1 if (tname == "") # AVX only opcode table tname = sprintf("inat_avx_table_%d", $2) } if (aid == -1 && eid == -1) # primary opcode table tname = "inat_primary_table" } /^GrpTable:/ { print "/* " $0 " */" if (!($2 in group)) semantic_error("No group: " $2 ) gid = group[$2] tname = "inat_group_table_" gid } function print_table(tbl,name,fmt,n) { print "const insn_attr_t " name " = {" for (i = 0; i < n; i++) { id = sprintf(fmt, i) if (tbl[id]) print " [" id "] = " tbl[id] "," } print "};" } /^EndTable/ { if (gid != -1) { # print group tables if (array_size(table) != 0) { print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", "0x%x", 8) gtable[gid,0] = tname } if (array_size(lptable1) != 0) { print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", "0x%x", 8) gtable[gid,1] = tname "_1" } if (array_size(lptable2) != 0) { print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", "0x%x", 8) gtable[gid,2] = tname "_2" } if (array_size(lptable3) != 0) { print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", "0x%x", 8) gtable[gid,3] = tname "_3" } } else { # print primary/escaped tables if (array_size(table) != 0) { print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,0] = tname if (aid >= 0) atable[aid,0] = tname } if (array_size(lptable1) != 0) { print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,1] = tname "_1" if (aid >= 0) atable[aid,1] = tname "_1" } if (array_size(lptable2) != 0) { print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,2] = tname "_2" if (aid >= 0) atable[aid,2] = tname "_2" } if (array_size(lptable3) != 0) { print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,3] = tname "_3" if (aid >= 0) atable[aid,3] = tname "_3" } } print "" clear_vars() } function add_flags(old,new) { if (old && new) return old " | " new else if (old) return old else return new } # convert operands to flags. function convert_operands(count,opnd, i,j,imm,mod) { imm = null mod = null for (j = 1; j <= count; j++) { i = opnd[j] if (match(i, imm_expr) == 1) { if (!imm_flag[i]) semantic_error("Unknown imm opnd: " i) if (imm) { if (i != "Ib") semantic_error("Second IMM error") imm = add_flags(imm, "INAT_SCNDIMM") } else imm = imm_flag[i] } else if (match(i, modrm_expr)) mod = "INAT_MODRM" } return add_flags(imm, mod) } /^[0-9a-f]+\:/ { if (NR == 1) next # get index idx = "0x" substr($1, 1, index($1,":") - 1) if (idx in table) semantic_error("Redefine " idx " in " tname) # check if escaped opcode if ("escape" == $2) { if ($3 != "#") semantic_error("No escaped name") ref = "" for (i = 4; i <= NF; i++) ref = ref $i if (ref in escape) semantic_error("Redefine escape (" ref ")") escape[ref] = geid geid++ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" next } variant = null # converts i = 2 while (i <= NF) { opcode = $(i++) delete opnds ext = null flags = null opnd = null # parse one opcode if (match($i, opnd_expr)) { opnd = $i count = split($(i++), opnds, ",") flags = convert_operands(count, opnds) } if (match($i, ext_expr)) ext = $(i++) if (match($i, sep_expr)) i++ else if (i < NF) semantic_error($i " is not a separator") # check if group opcode if (match(opcode, group_expr)) { if (!(opcode in group)) { group[opcode] = ggid ggid++ } flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") } # check force(or default) 64bit if (match(ext, force64_expr)) flags = add_flags(flags, "INAT_FORCE64") # check REX prefix if (match(opcode, rex_expr)) flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") # check coprocessor escape : TODO if (match(opcode, fpu_expr)) flags = add_flags(flags, "INAT_MODRM") # check VEX codes if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") # check prefixes if (match(ext, prefix_expr)) { if (!prefix_num[opcode]) semantic_error("Unknown prefix: " opcode) flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") } if (length(flags) == 0) continue # check if last prefix if (match(ext, lprefix1_expr)) { lptable1[idx] = add_flags(lptable1[idx],flags) variant = "INAT_VARIANT" } if (match(ext, lprefix2_expr)) { lptable2[idx] = add_flags(lptable2[idx],flags) variant = "INAT_VARIANT" } if (match(ext, lprefix3_expr)) { lptable3[idx] = add_flags(lptable3[idx],flags) variant = "INAT_VARIANT" } if (!match(ext, lprefix_expr)){ table[idx] = add_flags(table[idx],flags) } } if (variant) table[idx] = add_flags(table[idx],variant) } END { if (awkchecked != "") exit 1 # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < geid; i++) for (j = 0; j < max_lprefix; j++) if (etable[i,j]) print " ["i"]["j"] = "etable[i,j]"," print "};\n" # print group opcode map's array print "/* Group opcode map array */" print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < ggid; i++) for (j = 0; j < max_lprefix; j++) if (gtable[i,j]) print " ["i"]["j"] = "gtable[i,j]"," print "};\n" # print AVX opcode map's array print "/* AVX opcode map array */" print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < gaid; i++) for (j = 0; j < max_lprefix; j++) if (atable[i,j]) print " ["i"]["j"] = "atable[i,j]"," print "};" }