summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2010-05-23 18:32:02 +0000
committerkib <kib@FreeBSD.org>2010-05-23 18:32:02 +0000
commit4208ccbe79fd8e0ec189a25823cc3bdbe848155d (patch)
tree56fad7d2ceb7c32121f981b707c967936cda94bf /sys/amd64
parent2b2103d8ae2c5f46d069ef8b5d67cc0061230612 (diff)
downloadFreeBSD-src-4208ccbe79fd8e0ec189a25823cc3bdbe848155d.zip
FreeBSD-src-4208ccbe79fd8e0ec189a25823cc3bdbe848155d.tar.gz
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements: sv_fetch_syscall_args - the method to fetch syscall arguments from usermode into struct syscall_args. The structure is machine-depended (this might be reconsidered after all architectures are converted). sv_set_syscall_retval - the method to set a return value for usermode from the syscall. It is a generalization of cpu_set_syscall_retval(9) to allow ABIs to override the way to set a return value. sv_syscallnames - the table of syscall names. Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding the call to cpu_set_syscall_retval(). The new functions syscallenter(9) and syscallret(9) are provided that use sv_*syscall* pointers and contain the common repeated code from the syscall() implementations for the architecture-specific syscall trap handlers. Syscallenter() fetches arguments, calls syscall implementation from ABI sysent table, and set up return frame. The end of syscall bookkeeping is done by syscallret(). Take advantage of single place for MI syscall handling code and implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the thread is stopped at syscall entry or return point respectively. The EXEC flag augments SCX and notifies debugger that the process address space was changed by one of exec(2)-family syscalls. The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are changed to use syscallenter()/syscallret(). MIPS and arm are not converted and use the mostly unchanged syscall() implementation. Reviewed by: jhb, marcel, marius, nwhitehorn, stas Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc), stas (mips) MFC after: 1 month
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/elf_machdep.c6
-rw-r--r--sys/amd64/amd64/trap.c157
-rw-r--r--sys/amd64/ia32/ia32_syscall.c160
-rw-r--r--sys/amd64/include/proc.h8
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c47
5 files changed, 101 insertions, 277 deletions
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index dc7c8b9..6472d55 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/exec.h>
#include <sys/imgact.h>
#include <sys/linker.h>
+#include <sys/proc.h>
#include <sys/sysent.h>
#include <sys/imgact_elf.h>
#include <sys/syscall.h>
@@ -74,7 +75,10 @@ struct sysentvec elf64_freebsd_sysvec = {
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
- .sv_flags = SV_ABI_FREEBSD | SV_LP64
+ .sv_flags = SV_ABI_FREEBSD | SV_LP64,
+ .sv_set_syscall_retval = cpu_set_syscall_retval,
+ .sv_fetch_syscall_args = cpu_fetch_syscall_args,
+ .sv_syscallnames = syscallnames,
};
static Elf64_Brandinfo freebsd_brand_info = {
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index d5ddc59..2deb931 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -76,7 +76,6 @@ __FBSDID("$FreeBSD$");
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
-#include <security/audit/audit.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -170,8 +169,6 @@ static int prot_fault_translation = 0;
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
&prot_fault_translation, 0, "Select signal to deliver on protection fault");
-extern char *syscallnames[];
-
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
* This common code is called from assembly language IDT gate entry
@@ -805,19 +802,12 @@ dblfault_handler(struct trapframe *frame)
panic("double fault");
}
-struct syscall_args {
- u_int code;
- struct sysent *callp;
- register_t args[8];
- register_t *argp;
- int narg;
-};
-
-static int
-fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+int
+cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
{
struct proc *p;
struct trapframe *frame;
+ register_t *argp;
caddr_t params;
int reg, regcnt, error;
@@ -829,15 +819,10 @@ fetch_syscall_args(struct thread *td, struct syscall_args *sa)
params = (caddr_t)frame->tf_rsp + sizeof(register_t);
sa->code = frame->tf_rax;
- if (p->p_sysent->sv_prepsyscall) {
- (*p->p_sysent->sv_prepsyscall)(frame, (int *)sa->args,
- &sa->code, &params);
- } else {
- if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
- sa->code = frame->tf_rdi;
- reg++;
- regcnt--;
- }
+ if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
+ sa->code = frame->tf_rdi;
+ reg++;
+ regcnt--;
}
if (p->p_sysent->sv_mask)
sa->code &= p->p_sysent->sv_mask;
@@ -851,24 +836,20 @@ fetch_syscall_args(struct thread *td, struct syscall_args *sa)
KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]),
("Too many syscall arguments!"));
error = 0;
- sa->argp = &frame->tf_rdi;
- sa->argp += reg;
- bcopy(sa->argp, sa->args, sizeof(sa->args[0]) * regcnt);
+ argp = &frame->tf_rdi;
+ argp += reg;
+ bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt);
if (sa->narg > regcnt) {
KASSERT(params != NULL, ("copyin args with no params!"));
error = copyin(params, &sa->args[regcnt],
(sa->narg - regcnt) * sizeof(sa->args[0]));
}
- sa->argp = &sa->args[0];
- /*
- * This may result in two records if debugger modified
- * registers or memory during sleep at stop/ptrace point.
- */
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSCALL))
- ktrsyscall(sa->code, sa->narg, sa->argp);
-#endif
+ if (error == 0) {
+ td->td_retval[0] = 0;
+ td->td_retval[1] = frame->tf_rdx;
+ }
+
return (error);
}
@@ -881,87 +862,22 @@ void
syscall(struct trapframe *frame)
{
struct thread *td;
- struct proc *p;
struct syscall_args sa;
register_t orig_tf_rflags;
int error;
ksiginfo_t ksi;
- PCPU_INC(cnt.v_syscall);
- td = curthread;
- p = td->td_proc;
- td->td_syscalls++;
-
#ifdef DIAGNOSTIC
if (ISPL(frame->tf_cs) != SEL_UPL) {
panic("syscall");
/* NOT REACHED */
}
#endif
-
- td->td_pticks = 0;
- td->td_frame = frame;
- if (td->td_ucred != p->p_ucred)
- cred_update_thread(td);
orig_tf_rflags = frame->tf_rflags;
- if (p->p_flag & P_TRACED) {
- PROC_LOCK(p);
- td->td_dbgflags &= ~TDB_USERWR;
- PROC_UNLOCK(p);
- }
- error = fetch_syscall_args(td, &sa);
-
- CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
- td->td_proc->p_pid, td->td_name, sa.code);
-
- if (error == 0) {
- td->td_retval[0] = 0;
- td->td_retval[1] = frame->tf_rdx;
-
- STOPEVENT(p, S_SCE, sa.narg);
- PTRACESTOP_SC(p, td, S_PT_SCE);
- if (td->td_dbgflags & TDB_USERWR) {
- /*
- * Reread syscall number and arguments if
- * debugger modified registers or memory.
- */
- error = fetch_syscall_args(td, &sa);
- if (error != 0)
- goto retval;
- td->td_retval[1] = frame->tf_rdx;
- }
-
-#ifdef KDTRACE_HOOKS
- /*
- * If the systrace module has registered it's probe
- * callback and if there is a probe active for the
- * syscall 'entry', process the probe.
- */
- if (systrace_probe_func != NULL && sa.callp->sy_entry != 0)
- (*systrace_probe_func)(sa.callp->sy_entry, sa.code,
- sa.callp, sa.args);
-#endif
-
- AUDIT_SYSCALL_ENTER(sa.code, td);
- error = (*sa.callp->sy_call)(td, sa.argp);
- AUDIT_SYSCALL_EXIT(error, td);
+ td = curthread;
+ td->td_frame = frame;
- /* Save the latest error return value. */
- td->td_errno = error;
-
-#ifdef KDTRACE_HOOKS
- /*
- * If the systrace module has registered it's probe
- * callback and if there is a probe active for the
- * syscall 'return', process the probe.
- */
- if (systrace_probe_func != NULL && sa.callp->sy_return != 0)
- (*systrace_probe_func)(sa.callp->sy_return, sa.code,
- sa.callp, sa.args);
-#endif
- }
- retval:
- cpu_set_syscall_retval(td, error);
+ error = syscallenter(td, &sa);
/*
* Traced syscall.
@@ -975,40 +891,5 @@ syscall(struct trapframe *frame)
trapsignal(td, &ksi);
}
- /*
- * Check for misbehavior.
- */
- WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- syscallnames[sa.code] : "???");
- KASSERT(td->td_critnest == 0,
- ("System call %s returning in a critical section",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- syscallnames[sa.code] : "???"));
- KASSERT(td->td_locks == 0,
- ("System call %s returning with %d locks held",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- syscallnames[sa.code] : "???", td->td_locks));
-
- /*
- * Handle reschedule and other end-of-syscall issues
- */
- userret(td, frame);
-
- CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
- td->td_proc->p_pid, td->td_name, sa.code);
-
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSRET))
- ktrsysret(sa.code, error, td->td_retval[0]);
-#endif
-
- /*
- * This works because errno is findable through the
- * register set. If we ever support an emulation where this
- * is not the case, this code will need to be revisited.
- */
- STOPEVENT(p, S_SCX, sa.code);
-
- PTRACESTOP_SC(p, td, S_PT_SCX);
+ syscallret(td, error, &sa);
}
diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c
index aa1ae6c..6a649ae 100644
--- a/sys/amd64/ia32/ia32_syscall.c
+++ b/sys/amd64/ia32/ia32_syscall.c
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
@@ -81,62 +82,54 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
+#include <compat/freebsd32/freebsd32_util.h>
+
#define IDTVEC(name) __CONCAT(X,name)
extern inthand_t IDTVEC(int0x80_syscall), IDTVEC(rsvd);
-extern const char *freebsd32_syscallnames[];
void ia32_syscall(struct trapframe *frame); /* Called from asm code */
-struct ia32_syscall_args {
- u_int code;
- caddr_t params;
- struct sysent *callp;
- u_int64_t args64[8];
- int narg;
-};
+void
+ia32_set_syscall_retval(struct thread *td, int error)
+{
-static int
-fetch_ia32_syscall_args(struct thread *td, struct ia32_syscall_args *sa)
+ cpu_set_syscall_retval(td, error);
+}
+
+int
+ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
{
struct proc *p;
struct trapframe *frame;
+ caddr_t params;
u_int32_t args[8];
int error, i;
p = td->td_proc;
frame = td->td_frame;
- sa->params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
+ params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
sa->code = frame->tf_rax;
- if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * Need to check if this is a 32 bit or 64 bit syscall.
+ */
+ if (sa->code == SYS_syscall) {
/*
- * The prep code is MP aware.
+ * Code is first argument, followed by actual args.
*/
- (*p->p_sysent->sv_prepsyscall)(frame, args, &sa->code,
- &sa->params);
- } else {
+ sa->code = fuword32(params);
+ params += sizeof(int);
+ } else if (sa->code == SYS___syscall) {
/*
- * Need to check if this is a 32 bit or 64 bit syscall.
- * fuword is MP aware.
+ * Like syscall, but code is a quad, so as to maintain
+ * quad alignment for the rest of the arguments.
+ * We use a 32-bit fetch in case params is not
+ * aligned.
*/
- if (sa->code == SYS_syscall) {
- /*
- * Code is first argument, followed by actual args.
- */
- sa->code = fuword32(sa->params);
- sa->params += sizeof(int);
- } else if (sa->code == SYS___syscall) {
- /*
- * Like syscall, but code is a quad, so as to maintain
- * quad alignment for the rest of the arguments.
- * We use a 32-bit fetch in case params is not
- * aligned.
- */
- sa->code = fuword32(sa->params);
- sa->params += sizeof(quad_t);
- }
+ sa->code = fuword32(params);
+ params += sizeof(quad_t);
}
if (p->p_sysent->sv_mask)
sa->code &= p->p_sysent->sv_mask;
@@ -146,19 +139,19 @@ fetch_ia32_syscall_args(struct thread *td, struct ia32_syscall_args *sa)
sa->callp = &p->p_sysent->sv_table[sa->code];
sa->narg = sa->callp->sy_narg;
- if (sa->params != NULL && sa->narg != 0)
- error = copyin(sa->params, (caddr_t)args,
+ if (params != NULL && sa->narg != 0)
+ error = copyin(params, (caddr_t)args,
(u_int)(sa->narg * sizeof(int)));
else
error = 0;
for (i = 0; i < sa->narg; i++)
- sa->args64[i] = args[i];
+ sa->args[i] = args[i];
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSCALL))
- ktrsyscall(sa->code, sa->narg, sa->args64);
-#endif
+ if (error == 0) {
+ td->td_retval[0] = 0;
+ td->td_retval[1] = frame->tf_rdx;
+ }
return (error);
}
@@ -167,58 +160,16 @@ void
ia32_syscall(struct trapframe *frame)
{
struct thread *td;
- struct proc *p;
- struct ia32_syscall_args sa;
+ struct syscall_args sa;
register_t orig_tf_rflags;
int error;
ksiginfo_t ksi;
- PCPU_INC(cnt.v_syscall);
+ orig_tf_rflags = frame->tf_rflags;
td = curthread;
- p = td->td_proc;
- td->td_syscalls++;
-
- td->td_pticks = 0;
td->td_frame = frame;
- if (td->td_ucred != p->p_ucred)
- cred_update_thread(td);
- orig_tf_rflags = frame->tf_rflags;
- if (p->p_flag & P_TRACED) {
- PROC_LOCK(p);
- td->td_dbgflags &= ~TDB_USERWR;
- PROC_UNLOCK(p);
- }
- error = fetch_ia32_syscall_args(td, &sa);
-
- CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
- td->td_proc->p_pid, td->td_name, sa.code);
-
- if (error == 0) {
- td->td_retval[0] = 0;
- td->td_retval[1] = frame->tf_rdx;
- STOPEVENT(p, S_SCE, sa.narg);
- PTRACESTOP_SC(p, td, S_PT_SCE);
- if (td->td_dbgflags & TDB_USERWR) {
- /*
- * Reread syscall number and arguments if
- * debugger modified registers or memory.
- */
- error = fetch_ia32_syscall_args(td, &sa);
- if (error != 0)
- goto retval;
- td->td_retval[1] = frame->tf_rdx;
- }
-
- AUDIT_SYSCALL_ENTER(sa.code, td);
- error = (*sa.callp->sy_call)(td, sa.args64);
- AUDIT_SYSCALL_EXIT(error, td);
-
- /* Save the latest error return value. */
- td->td_errno = error;
- }
- retval:
- cpu_set_syscall_retval(td, error);
+ error = syscallenter(td, &sa);
/*
* Traced syscall.
@@ -232,44 +183,9 @@ ia32_syscall(struct trapframe *frame)
trapsignal(td, &ksi);
}
- /*
- * Check for misbehavior.
- */
- WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- freebsd32_syscallnames[sa.code] : "???");
- KASSERT(td->td_critnest == 0,
- ("System call %s returning in a critical section",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- freebsd32_syscallnames[sa.code] : "???"));
- KASSERT(td->td_locks == 0,
- ("System call %s returning with %d locks held",
- (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
- freebsd32_syscallnames[sa.code] : "???", td->td_locks));
-
- /*
- * Handle reschedule and other end-of-syscall issues
- */
- userret(td, frame);
-
- CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
- td->td_proc->p_pid, td->td_proc->p_comm, sa.code);
-#ifdef KTRACE
- if (KTRPOINT(td, KTR_SYSRET))
- ktrsysret(sa.code, error, td->td_retval[0]);
-#endif
-
- /*
- * This works because errno is findable through the
- * register set. If we ever support an emulation where this
- * is not the case, this code will need to be revisited.
- */
- STOPEVENT(p, S_SCX, sa.code);
-
- PTRACESTOP_SC(p, td, S_PT_SCX);
+ syscallret(td, error, &sa);
}
-
static void
ia32_syscall_enable(void *dummy)
{
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index acea4c0..2de7a9e 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -79,6 +79,14 @@ int amd64_set_ldt_data(struct thread *td, int start, int num,
extern struct mtx dt_lock;
extern int max_ldt_segment;
+struct syscall_args {
+ u_int code;
+ struct sysent *callp;
+ register_t args[8];
+ int narg;
+};
+#define HAVE_SYSCALL_ARGS_DEF 1
+
#endif /* _KERNEL */
#endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 06f1e97..010e1d6 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -121,8 +121,6 @@ SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
static int elf_linux_fixup(register_t **stack_base,
struct image_params *iparams);
static register_t *linux_copyout_strings(struct image_params *imgp);
-static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
- caddr_t *params);
static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
static void exec_linux_setregs(struct thread *td,
struct image_params *imgp, u_long stack);
@@ -764,19 +762,33 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
return (EJUSTRETURN);
}
-/*
- * MPSAFE
- */
-static void
-linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
+static int
+linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
{
- args[0] = tf->tf_rbx;
- args[1] = tf->tf_rcx;
- args[2] = tf->tf_rdx;
- args[3] = tf->tf_rsi;
- args[4] = tf->tf_rdi;
- args[5] = tf->tf_rbp; /* Unconfirmed */
- *params = NULL; /* no copyin */
+ struct proc *p;
+ struct trapframe *frame;
+
+ p = td->td_proc;
+ frame = td->td_frame;
+
+ sa->args[0] = frame->tf_rbx;
+ sa->args[1] = frame->tf_rcx;
+ sa->args[2] = frame->tf_rdx;
+ sa->args[3] = frame->tf_rsi;
+ sa->args[4] = frame->tf_rdi;
+ sa->args[5] = frame->tf_rbp; /* Unconfirmed */
+ sa->code = frame->tf_rax;
+
+ if (sa->code >= p->p_sysent->sv_size)
+ sa->callp = &p->p_sysent->sv_table[0];
+ else
+ sa->callp = &p->p_sysent->sv_table[sa->code];
+ sa->narg = sa->callp->sy_narg;
+
+ td->td_retval[0] = 0;
+ td->td_retval[1] = frame->tf_rdx;
+
+ return (0);
}
/*
@@ -1039,7 +1051,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_sendsig = linux_sendsig,
.sv_sigcode = linux_sigcode,
.sv_szsigcode = &linux_szsigcode,
- .sv_prepsyscall = linux_prepsyscall,
+ .sv_prepsyscall = NULL,
.sv_name = "Linux ELF32",
.sv_coredump = elf32_coredump,
.sv_imgact_try = exec_linux_imgact_try,
@@ -1054,7 +1066,10 @@ struct sysentvec elf_linux_sysvec = {
.sv_setregs = exec_linux_setregs,
.sv_fixlimit = linux32_fixlimit,
.sv_maxssiz = &linux32_maxssiz,
- .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32
+ .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32,
+ .sv_set_syscall_retval = cpu_set_syscall_retval,
+ .sv_fetch_syscall_args = linux32_fetch_syscall_args,
+ .sv_syscallnames = NULL,
};
static char GNU_ABI_VENDOR[] = "GNU";
OpenPOWER on IntegriCloud