From 055dc8e12114d91b9d472f6f1f094db4fc8470dc Mon Sep 17 00:00:00 2001
From: jkim <jkim@FreeBSD.org>
Date: Tue, 6 Dec 2005 02:58:12 +0000
Subject: Add experimental BPF Just-In-Time compiler for amd64 and i386.

Use the following kernel configuration option to enable:

	options BPF_JITTER

If you want to use bpf_filter() instead (e. g., debugging), do:

	sysctl net.bpf.jitter.enable=0

to turn it off.

Currently BIOCSETWF and bpf_mtap2() are unsupported, and bpf_mtap() is
partially supported because 1) no need, 2) avoid expensive m_copydata(9).

Obtained from:	WinPcap 3.1 (for i386)
---
 sys/amd64/amd64/bpf_jit_machdep.c | 490 ++++++++++++++++++++++++++++++++++++++
 sys/amd64/amd64/bpf_jit_machdep.h | 431 +++++++++++++++++++++++++++++++++
 2 files changed, 921 insertions(+)
 create mode 100644 sys/amd64/amd64/bpf_jit_machdep.c
 create mode 100644 sys/amd64/amd64/bpf_jit_machdep.h

(limited to 'sys/amd64')

diff --git a/sys/amd64/amd64/bpf_jit_machdep.c b/sys/amd64/amd64/bpf_jit_machdep.c
new file mode 100644
index 0000000..2f9dd21
--- /dev/null
+++ b/sys/amd64/amd64/bpf_jit_machdep.c
@@ -0,0 +1,490 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_bpf.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/malloc.h>
+
+#include <net/if.h>
+#include <net/bpf.h>
+#include <net/bpf_jitter.h>
+
+#include <amd64/amd64/bpf_jit_machdep.h>
+
+bpf_filter_func	bpf_jit_compile(struct bpf_insn *, u_int, int *);
+
+/*
+ * emit routine to update the jump table
+ */
+static void
+emit_length(bpf_bin_stream *stream, u_int value, u_int len)
+{
+
+	(stream->refs)[stream->bpf_pc] += len;
+	stream->cur_ip += len;
+}
+
+/*
+ * emit routine to output the actual binary code
+ */
+static void
+emit_code(bpf_bin_stream *stream, u_int value, u_int len)
+{
+
+	switch (len) {
+	case 1:
+		stream->ibuf[stream->cur_ip] = (u_char)value;
+		stream->cur_ip++;
+		break;
+
+	case 2:
+		*((u_short *)(stream->ibuf + stream->cur_ip)) = (u_short)value;
+		stream->cur_ip += 2;
+		break;
+
+	case 4:
+		*((u_int *)(stream->ibuf + stream->cur_ip)) = value;
+		stream->cur_ip += 4;
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Function that does the real stuff
+ */
+bpf_filter_func
+bpf_jit_compile(struct bpf_insn *prog, u_int nins, int *mem)
+{
+	struct bpf_insn *ins;
+	u_int i, pass;
+	bpf_bin_stream stream;
+
+	/*
+	 * NOTE: do not modify the name of this variable, as it's used by
+	 * the macros to emit code.
+	 */
+	emit_func emitm;
+
+	/* Allocate the reference table for the jumps */
+	stream.refs = (u_int *)malloc((nins + 1) * sizeof(u_int),
+	    M_BPFJIT, M_WAITOK);
+	if (stream.refs == NULL)
+		return NULL;
+
+	/* Reset the reference table */
+	for (i = 0; i < nins + 1; i++)
+		stream.refs[i] = 0;
+
+	stream.cur_ip = 0;
+	stream.bpf_pc = 0;
+
+	/*
+	 * the first pass will emit the lengths of the instructions
+	 * to create the reference table
+	 */
+	emitm = emit_length;
+
+	pass = 0;
+	for (;;) {
+		ins = prog;
+
+		/* create the procedure header */
+		PUSH(RBP);
+		MOVrq(RBP, RSP);
+		MOVoqd(RBP, -8, ESI);
+		MOVoqd(RBP, -12, EDX);
+		PUSH(RBX);
+		MOVrq(RBX, RDI);
+
+		for (i = 0; i < nins; i++) {
+			stream.bpf_pc++;
+
+			switch (ins->code) {
+			default:
+				return NULL;
+
+			case BPF_RET|BPF_K:
+				MOVid(EAX, ins->k);
+				POP(RBX);
+				LEAVE_RET();
+				break;
+
+			case BPF_RET|BPF_A:
+				POP(RBX);
+				LEAVE_RET();
+				break;
+
+			case BPF_LD|BPF_W|BPF_ABS:
+				MOVid(ECX, ins->k);
+				MOVrd(ESI, ECX);
+				ADDib(ECX, sizeof(int));
+				CMPodd(ECX, RBP, -12);
+				JLEb(5);
+				ZERO_EAX();
+				POP(RBX);
+				LEAVE_RET();
+				MOVobd(EAX, RBX, RSI);
+				BSWAP(EAX);
+				break;
+
+			case BPF_LD|BPF_H|BPF_ABS:
+				ZERO_EAX();
+				MOVid(ECX, ins->k);
+				MOVrd(ESI, ECX);
+				ADDib(ECX, sizeof(short));
+				CMPodd(ECX, RBP, -12);
+				JLEb(3);
+				POP(RBX);
+				LEAVE_RET();
+				MOVobw(AX, RBX, RSI);
+				SWAP_AX();
+				break;
+
+			case BPF_LD|BPF_B|BPF_ABS:
+				ZERO_EAX();
+				MOVid(ECX, ins->k);
+				CMPodd(ECX, RBP, -12);
+				JLEb(3);
+				POP(RBX);
+				LEAVE_RET();
+				MOVobb(AL, RBX, RCX);
+				break;
+
+			case BPF_LD|BPF_W|BPF_LEN:
+				MOVodd(EAX, RBP, -8);
+				break;
+
+			case BPF_LDX|BPF_W|BPF_LEN:
+				MOVodd(EDX, RBP, -8);
+				break;
+
+			case BPF_LD|BPF_W|BPF_IND:
+				MOVid(ECX, ins->k);
+				ADDrd(ECX, EDX);
+				MOVrd(ESI, ECX);
+				ADDib(ECX, sizeof(int));
+				CMPodd(ECX, RBP, -12);
+				JLEb(5);
+				ZERO_EAX();
+				POP(RBX);
+				LEAVE_RET();
+				MOVobd(EAX, RBX, RSI);
+				BSWAP(EAX);
+				break;
+
+			case BPF_LD|BPF_H|BPF_IND:
+				ZERO_EAX();
+				MOVid(ECX, ins->k);
+				ADDrd(ECX, EDX);
+				MOVrd(ESI, ECX);
+				ADDib(ECX, sizeof(short));
+				CMPodd(ECX, RBP, -12);
+				JLEb(3);
+				POP(RBX);
+				LEAVE_RET();
+				MOVobw(AX, RBX, RSI);
+				SWAP_AX();
+				break;
+
+			case BPF_LD|BPF_B|BPF_IND:
+				ZERO_EAX();
+				MOVid(ECX, ins->k);
+				ADDrd(ECX, EDX);
+				CMPodd(ECX, RBP, -12);
+				JLEb(3);
+				POP(RBX);
+				LEAVE_RET();
+				MOVobb(AL, RBX, RCX);
+				break;
+
+			case BPF_LDX|BPF_MSH|BPF_B:
+				MOVid(ECX, ins->k);
+				CMPodd(ECX, RBP, -12);
+				JLEb(5);
+				ZERO_EAX();
+				POP(RBX);
+				LEAVE_RET();
+				MOVid(EDX, 0);
+				MOVobb(DL, RBX, RCX);
+				ANDib(DL, 0xf);
+				SHLib(EDX, 2);
+				break;
+
+			case BPF_LD|BPF_IMM:
+				MOVid(EAX, ins->k);
+				break;
+
+			case BPF_LDX|BPF_IMM:
+				MOVid(EDX, ins->k);
+				break;
+
+			case BPF_LD|BPF_MEM:
+				MOViq(RCX, (uintptr_t)mem);
+				MOVid(ESI, ins->k * 4);
+				MOVobd(EAX, RCX, RSI);
+				break;
+
+			case BPF_LDX|BPF_MEM:
+				MOViq(RCX, (uintptr_t)mem);
+				MOVid(ESI, ins->k * 4);
+				MOVobd(EDX, RCX, RSI);
+				break;
+
+			case BPF_ST:
+				/*
+				 * XXX this command and the following could
+				 * be optimized if the previous instruction
+				 * was already of this type
+				 */
+				MOViq(RCX, (uintptr_t)mem);
+				MOVid(ESI, ins->k * 4);
+				MOVomd(RCX, RSI, EAX);
+				break;
+
+			case BPF_STX:
+				MOViq(RCX, (uintptr_t)mem);
+				MOVid(ESI, ins->k * 4);
+				MOVomd(RCX, RSI, EDX);
+				break;
+
+			case BPF_JMP|BPF_JA:
+				JMP(stream.refs[stream.bpf_pc + ins->k] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JGT|BPF_K:
+				CMPid(EAX, ins->k);
+				/* 5 is the size of the following JMP */
+				JG(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5 );
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JGE|BPF_K:
+				CMPid(EAX, ins->k);
+				JGE(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JEQ|BPF_K:
+				CMPid(EAX, ins->k);
+				JE(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JSET|BPF_K:
+				MOVrd(ECX, EAX);
+				ANDid(ECX, ins->k);
+				JE(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JGT|BPF_X:
+				CMPrd(EAX, EDX);
+				JA(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JGE|BPF_X:
+				CMPrd(EAX, EDX);
+				JAE(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JEQ|BPF_X:
+				CMPrd(EAX, EDX);
+				JE(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_JMP|BPF_JSET|BPF_X:
+				MOVrd(ECX, EAX);
+				ANDrd(ECX, EDX);
+				JE(stream.refs[stream.bpf_pc + ins->jf] -
+				    stream.refs[stream.bpf_pc] + 5);
+				JMP(stream.refs[stream.bpf_pc + ins->jt] -
+				    stream.refs[stream.bpf_pc]);
+				break;
+
+			case BPF_ALU|BPF_ADD|BPF_X:
+				ADDrd(EAX, EDX);
+				break;
+
+			case BPF_ALU|BPF_SUB|BPF_X:
+				SUBrd(EAX, EDX);
+				break;
+
+			case BPF_ALU|BPF_MUL|BPF_X:
+				MOVrd(ECX, EDX);
+				MULrd(EDX);
+				MOVrd(EDX, ECX);
+				break;
+
+			case BPF_ALU|BPF_DIV|BPF_X:
+				CMPid(EDX, 0);
+				JNEb(5);
+				ZERO_EAX();
+				POP(RBX);
+				LEAVE_RET();
+				MOVrd(ECX, EDX);
+				MOVid(EDX, 0);
+				DIVrd(ECX);
+				MOVrd(EDX, ECX);
+				break;
+
+			case BPF_ALU|BPF_AND|BPF_X:
+				ANDrd(EAX, EDX);
+				break;
+
+			case BPF_ALU|BPF_OR|BPF_X:
+				ORrd(EAX, EDX);
+				break;
+
+			case BPF_ALU|BPF_LSH|BPF_X:
+				MOVrd(ECX, EDX);
+				SHL_CLrb(EAX);
+				break;
+
+			case BPF_ALU|BPF_RSH|BPF_X:
+				MOVrd(ECX, EDX);
+				SHR_CLrb(EAX);
+				break;
+
+			case BPF_ALU|BPF_ADD|BPF_K:
+				ADD_EAXi(ins->k);
+				break;
+
+			case BPF_ALU|BPF_SUB|BPF_K:
+				SUB_EAXi(ins->k);
+				break;
+
+			case BPF_ALU|BPF_MUL|BPF_K:
+				MOVrd(ECX, EDX);
+				MOVid(EDX, ins->k);
+				MULrd(EDX);
+				MOVrd(EDX, ECX);
+				break;
+
+			case BPF_ALU|BPF_DIV|BPF_K:
+				MOVrd(ECX, EDX);
+				MOVid(EDX, 0);
+				MOVid(ESI, ins->k);
+				DIVrd(ESI);
+				MOVrd(EDX, ECX);
+				break;
+
+			case BPF_ALU|BPF_AND|BPF_K:
+				ANDid(EAX, ins->k);
+				break;
+
+			case BPF_ALU|BPF_OR|BPF_K:
+				ORid(EAX, ins->k);
+				break;
+
+			case BPF_ALU|BPF_LSH|BPF_K:
+				SHLib(EAX, (ins->k) & 255);
+				break;
+
+			case BPF_ALU|BPF_RSH|BPF_K:
+				SHRib(EAX, (ins->k) & 255);
+				break;
+
+			case BPF_ALU|BPF_NEG:
+				NEGd(EAX);
+				break;
+
+			case BPF_MISC|BPF_TAX:
+				MOVrd(EDX, EAX);
+				break;
+
+			case BPF_MISC|BPF_TXA:
+				MOVrd(EAX, EDX);
+				break;
+			}
+			ins++;
+		}
+
+		pass++;
+		if (pass == 2)
+			break;
+
+		stream.ibuf = (char *)malloc(stream.cur_ip, M_BPFJIT, M_WAITOK);
+		if (stream.ibuf == NULL) {
+			free(stream.refs, M_BPFJIT);
+			return NULL;
+		}
+
+		/*
+		 * modify the reference table to contain the offsets and
+		 * not the lengths of the instructions
+		 */
+		for (i = 1; i < nins + 1; i++)
+			stream.refs[i] += stream.refs[i - 1];
+
+		/* Reset the counters */
+		stream.cur_ip = 0;
+		stream.bpf_pc = 0;
+
+		/* the second pass creates the actual code */
+		emitm = emit_code;
+	}
+
+	/*
+	 * the reference table is needed only during compilation,
+	 * now we can free it
+	 */
+	free(stream.refs, M_BPFJIT);
+
+	return (bpf_filter_func)stream.ibuf;
+}
diff --git a/sys/amd64/amd64/bpf_jit_machdep.h b/sys/amd64/amd64/bpf_jit_machdep.h
new file mode 100644
index 0000000..028d961
--- /dev/null
+++ b/sys/amd64/amd64/bpf_jit_machdep.h
@@ -0,0 +1,431 @@
+/*-
+ * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
+ * Copyright (c) 2005 Jung-uk Kim <jkim@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Politecnico di Torino nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BPF_JIT_MACHDEP_H_
+#define _BPF_JIT_MACHDEP_H_
+
+/*
+ * Registers
+ */
+#define RAX	0
+#define RCX	1
+#define RDX	2
+#define RBX	3
+#define RSP	4
+#define RBP	5
+#define RSI	6
+#define RDI	7
+
+#define EAX	0
+#define ECX	1
+#define EDX	2
+#define EBX	3
+#define ESP	4
+#define EBP	5
+#define ESI	6
+#define EDI	7
+
+#define AX	0
+#define CX	1
+#define DX	2
+#define BX	3
+#define SP	4
+#define BP	5
+#define SI	6
+#define DI	7
+
+#define AL	0
+#define CL	1
+#define DL	2
+#define BL	3
+
+/* A stream of native binary code.*/
+typedef struct bpf_bin_stream {
+	/* Current native instruction pointer. */
+	int		cur_ip;
+
+	/*
+	 * Current BPF instruction pointer, i.e. position in
+	 * the BPF program reached by the jitter.
+	 */
+	int		bpf_pc;
+
+	/* Instruction buffer, contains the generated native code. */
+	char		*ibuf;
+
+	/* Jumps reference table. */
+	u_int		*refs;
+} bpf_bin_stream;
+
+/*
+ * Prototype of the emit functions.
+ *
+ * Different emit functions are used to create the reference table and
+ * to generate the actual filtering code. This allows to have simpler
+ * instruction macros.
+ * The first parameter is the stream that will receive the data.
+ * The second one is a variable containing the data.
+ * The third one is the length, that can be 1, 2, or 4 since it is possible
+ * to emit a byte, a short, or a word at a time.
+ */
+typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
+
+/*
+ * native Instruction Macros
+ */
+
+/* mov r32,i32 */
+#define MOVid(r32, i32) do {						\
+	emitm(&stream, (11 << 4) | (1 << 3) | (r32 & 0x7), 1);		\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* mov r64,i64 */
+#define MOViq(r64, i64) do {						\
+	emitm(&stream, 0x48, 1);					\
+	emitm(&stream, (11 << 4) | (1 << 3) | (r64 & 0x7), 1);		\
+	emitm(&stream, i64, 4);						\
+	emitm(&stream, (i64 >> 32), 4);					\
+} while (0)
+
+/* mov dr32,sr32 */
+#define MOVrd(dr32, sr32) do {						\
+	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* mov dr64,sr64 */
+#define MOVrq(dr64, sr64) do {						\
+	emitm(&stream, 0x48, 1);					\
+	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
+} while (0)
+
+/* mov dr32,sr32[off] */
+#define MOVodd(dr32, sr32, off) do {					\
+	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream,							\
+	    (1 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+	emitm(&stream, off, 1);						\
+} while (0)
+
+/* mov dr64[off],sr32 */
+#define MOVoqd(dr64, off, sr32) do {					\
+	emitm(&stream, (8 << 4) | 1 | (1 << 3), 1);			\
+	emitm(&stream,							\
+	    (1 << 6) | ((sr32 & 0x7) << 3) | (dr64 & 0x7), 1);		\
+	emitm(&stream, off, 1);						\
+} while (0)
+
+/* mov dr32,sr32[or32] */
+#define MOVobd(dr32, sr32, or32) do {					\
+	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* mov dr16,sr32[or32] */
+#define MOVobw(dr32, sr32, or32) do {					\
+	emitm(&stream, 0x66, 1);					\
+	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* mov dr8,sr32[or32] */
+#define MOVobb(dr8, sr32, or32) do {					\
+	emitm(&stream, 0x8a, 1);					\
+	emitm(&stream, ((dr8 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* mov [dr32][or32],sr32 */
+#define MOVomd(dr32, or32, sr32) do {					\
+	emitm(&stream, 0x89, 1);					\
+	emitm(&stream, ((sr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((or32 & 0x7) << 3) | (dr32 & 0x7), 1);		\
+} while (0)
+
+/* bswap dr32 */
+#define BSWAP(dr32) do {						\
+	emitm(&stream, 0xf, 1);						\
+	emitm(&stream, (0x19 << 3) | dr32, 1);				\
+} while (0)
+
+/* xchg al,ah */
+#define SWAP_AX() do {							\
+	emitm(&stream, 0x86, 1);					\
+	emitm(&stream, 0xc4, 1);					\
+} while (0)
+
+/* push r64 */
+#define PUSH(r64) do {							\
+	emitm(&stream, (5 << 4) | (0 << 3) | (r64 & 0x7), 1);		\
+} while (0)
+
+/* pop r64 */
+#define POP(r64) do {							\
+	emitm(&stream, (5 << 4) | (1 << 3) | (r64 & 0x7), 1);		\
+} while (0)
+
+/* leave/ret */
+#define LEAVE_RET() do {						\
+	emitm(&stream, 0xc9, 1);					\
+	emitm(&stream, 0xc3, 1);					\
+} while (0)
+
+/* add dr32,sr32 */
+#define ADDrd(dr32, sr32) do {						\
+	emitm(&stream, 0x03, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);	\
+} while (0)
+
+/* add eax,i32 */
+#define ADD_EAXi(i32) do {						\
+	emitm(&stream, 0x05, 1);					\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* add r32,i32 */
+#define ADDid(r32, i32) do {						\
+	emitm(&stream, 0x81, 1);					\
+	emitm(&stream, (24 << 3) | r32, 1);				\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* add r32,i8 */
+#define ADDib(r32, i8) do {						\
+	emitm(&stream, 0x83, 1);					\
+	emitm(&stream, (24 << 3) | r32, 1);				\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* sub dr32,sr32 */
+#define SUBrd(dr32, sr32) do {						\
+	emitm(&stream, 0x2b, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* sub eax,i32 */
+#define SUB_EAXi(i32) do {						\
+	emitm(&stream, 0x2d, 1);					\
+	emitm(&stream, i32, 4);						\
+} while (0)
+
+/* mul r32 */
+#define MULrd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* div r32 */
+#define DIVrd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (15 << 4) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* and r8,i8 */
+#define ANDib(r8, i8) do {						\
+	emitm(&stream, 0x80, 1);					\
+	emitm(&stream, (7 << 5) | r8, 1);				\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* and r32,i32 */
+#define ANDid(r32, i32) do {						\
+	if (r32 == EAX) {						\
+		emitm(&stream, 0x25, 1);				\
+		emitm(&stream, i32, 4);					\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (7 << 5) | r32, 1);			\
+		emitm(&stream, i32, 4);					\
+	}								\
+} while (0)
+
+/* and dr32,sr32 */
+#define ANDrd(dr32, sr32) do {						\
+	emitm(&stream, 0x23, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* or dr32,sr32 */
+#define ORrd(dr32, sr32) do {						\
+	emitm(&stream, 0x0b, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* or r32,i32 */
+#define ORid(r32, i32) do {						\
+	if (r32 == EAX) {						\
+		emitm(&stream, 0x0d, 1);				\
+		emitm(&stream, i32, 4);					\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (25 << 3) | r32, 1);			\
+		emitm(&stream, i32, 4);					\
+	}								\
+} while (0)
+
+/* shl r32,i8 */
+#define SHLib(r32, i8) do {						\
+	emitm(&stream, 0xc1, 1);					\
+	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* shl dr32,cl */
+#define SHL_CLrb(dr32) do {						\
+	emitm(&stream, 0xd3, 1);					\
+	emitm(&stream, (7 << 5) | (dr32 & 0x7), 1);			\
+} while (0)
+
+/* shr r32,i8 */
+#define SHRib(r32, i8) do {						\
+	emitm(&stream, 0xc1, 1);					\
+	emitm(&stream, (29 << 3) | (r32 & 0x7), 1);			\
+	emitm(&stream, i8, 1);						\
+} while (0)
+
+/* shr dr32,cl */
+#define SHR_CLrb(dr32) do {						\
+	emitm(&stream, 0xd3, 1);					\
+	emitm(&stream, (29 << 3) | (dr32 & 0x7), 1);			\
+} while (0)
+
+/* neg r32 */
+#define NEGd(r32) do {							\
+	emitm(&stream, 0xf7, 1);					\
+	emitm(&stream, (27 << 3) | (r32 & 0x7), 1);			\
+} while (0)
+
+/* cmp dr32,sr32[off] */
+#define CMPodd(dr32, sr32, off) do {					\
+	emitm(&stream, (3 << 4) | 3 | (1 << 3), 1);			\
+	emitm(&stream,							\
+	    (1 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+	emitm(&stream, off, 1);						\
+} while (0)
+
+/* cmp dr32,sr32 */
+#define CMPrd(dr32, sr32) do {						\
+	emitm(&stream, 0x3b, 1);					\
+	emitm(&stream,							\
+	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
+} while (0)
+
+/* cmp dr32,i32 */
+#define CMPid(dr32, i32) do {						\
+	if (dr32 == EAX){						\
+		emitm(&stream, 0x3d, 1);				\
+		emitm(&stream, i32, 4);					\
+	} else {							\
+		emitm(&stream, 0x81, 1);				\
+		emitm(&stream, (0x1f << 3) | (dr32 & 0x7), 1);		\
+		emitm(&stream, i32, 4);					\
+	}								\
+} while (0)
+
+/* jne off32 */
+#define JNEb(off8) do {							\
+	emitm(&stream, 0x75, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* je off32 */
+#define JE(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x84, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jle off32 */
+#define JLE(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x8e, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jle off8 */
+#define JLEb(off8) do {							\
+	emitm(&stream, 0x7e, 1);					\
+	emitm(&stream, off8, 1);					\
+} while (0)
+
+/* ja off32 */
+#define JA(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x87, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jae off32 */
+#define JAE(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x83, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jg off32 */
+#define JG(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x8f, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jge off32 */
+#define JGE(off32) do {							\
+	emitm(&stream, 0x0f, 1);					\
+	emitm(&stream, 0x8d, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* jmp off32 */
+#define JMP(off32) do {							\
+	emitm(&stream, 0xe9, 1);					\
+	emitm(&stream, off32, 4);					\
+} while (0)
+
+/* xor eax,eax */
+#define ZERO_EAX() do {							\
+	emitm(&stream, 0x31, 1);					\
+	emitm(&stream, 0xc0, 1);					\
+} while (0)
+
+#endif	/* _BPF_JIT_MACHDEP_H_ */
-- 
cgit v1.1