From ed08737097eb396742e2825609c2449687d3a944 Mon Sep 17 00:00:00 2001 From: dchagin Date: Sat, 16 Jan 2016 07:56:49 +0000 Subject: MFC r293613: Implement vsyscall hack. Prior to 2.13 glibc uses vsyscall instead of vdso. An upcoming linux_base-c6 needs it. --- sys/kern/imgact_aout.c | 1 + sys/kern/init_main.c | 1 + 2 files changed, 2 insertions(+) (limited to 'sys/kern') diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index edd5f5f..553dc04 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -100,6 +100,7 @@ struct sysentvec aout_sysvec = { .sv_syscallnames = syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; #elif defined(__amd64__) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 6cb5017..201680a 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -415,6 +415,7 @@ struct sysentvec null_sysvec = { .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, }; /* -- cgit v1.1 From e6d9c6386f7459b113c9c9586b3364d2ffd472b3 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 18 Jan 2016 18:27:21 +0000 Subject: MFC 290728: Export various helper variables describing the layout and size of certain kernel structures for use by debuggers. This mostly aids in examining cores from a kernel without debug symbols as a debugger can infer these values if debug symbols are available. One set of variables describes the layout of 'struct linker_file' to walk the list of loaded kernel modules. A second set of variables describes the layout of 'struct proc' and 'struct thread' to walk the list of processes in the kernel and the threads in each process. The 'pcb_size' variable is used to index into the stoppcbs[] array. The 'vm_maxuser_address' is used to distinguish kernel virtual addresses from user addresses. This doesn't have to be perfect, and 'vm_maxuser_address' is a cheap and simple way to differentiate kernel pointers from simple values like TIDs and PIDs. While here, annotate the fields in struct pcb used by kgdb on amd64 and i386 to note that their ABI should be preserved. Annotations for other platforms will be added in the future. --- sys/kern/kern_linker.c | 6 ++++++ sys/kern/kern_mib.c | 5 +++++ sys/kern/kern_proc.c | 15 +++++++++++++++ 3 files changed, 26 insertions(+) (limited to 'sys/kern') diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index e379f5f..78eab87 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -71,6 +71,12 @@ SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW | CTLFLAG_TUN, TUNABLE_INT("debug.kld_debug", &kld_debug); #endif +/* These variables are used by kernel debuggers to enumerate loaded files. */ +const int kld_off_address = offsetof(struct linker_file, address); +const int kld_off_filename = offsetof(struct linker_file, filename); +const int kld_off_pathname = offsetof(struct linker_file, pathname); +const int kld_off_next = offsetof(struct linker_file, link.tqe_next); + /* * static char *linker_search_path(const char *name, struct mod_depend * *verinfo); diff --git a/sys/kern/kern_mib.c b/sys/kern/kern_mib.c index 0307791..ccecbc9 100644 --- a/sys/kern/kern_mib.c +++ b/sys/kern/kern_mib.c @@ -574,6 +574,11 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD, SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)"); +/* Used by kernel debuggers. */ +const int pcb_size = sizeof(struct pcb); +SYSCTL_INT(_debug_sizeof, OID_AUTO, pcb, CTLFLAG_RD, + SYSCTL_NULL_INT_PTR, sizeof(struct pcb), "sizeof(struct pcb)"); + /* XXX compatibility, remove for 6.0 */ #include #include diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index c9b7ca3..6b60840 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -139,6 +139,21 @@ struct sx proctree_lock; struct mtx ppeers_lock; uma_zone_t proc_zone; +/* + * The offset of various fields in struct proc and struct thread. + * These are used by kernel debuggers to enumerate kernel threads and + * processes. + */ +const int proc_off_p_pid = offsetof(struct proc, p_pid); +const int proc_off_p_comm = offsetof(struct proc, p_comm); +const int proc_off_p_list = offsetof(struct proc, p_list); +const int proc_off_p_threads = offsetof(struct proc, p_threads); +const int thread_off_td_tid = offsetof(struct thread, td_tid); +const int thread_off_td_name = offsetof(struct thread, td_name); +const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu); +const int thread_off_td_pcb = offsetof(struct thread, td_pcb); +const int thread_off_td_plist = offsetof(struct thread, td_plist); + int kstack_pages = KSTACK_PAGES; SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "Kernel stack size in pages"); -- cgit v1.1 From 77733541d454c3b987e985b85df345fc8c7f205d Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 20 Jan 2016 01:09:53 +0000 Subject: MFC 289769,289822,290143,290144: Rename remaining linux32 symbols from linux_* to linux32_*. 289769: Rename remaining linux32 symbols such as linux_sysent[] and linux_syscallnames[] from linux_* to linux32_* to avoid conflicts with linux64.ko. While here, add support for linux64 binaries to systrace. - Update NOPROTO entries in amd64/linux/syscalls.master to match the main table to fix systrace build. - Add a special case for union l_semun arguments to the systrace generation. - The systrace_linux32 module now only builds the systrace_linux32.ko. module on amd64. - Add a new systrace_linux module that builds on both i386 and amd64. For i386 it builds the existing systrace_linux.ko. For amd64 it builds a systrace_linux.ko for 64-bit binaries. 289822: Fix build for the KTR-enabled kernels. 290143: Fix build with DEBUG defined. 290144: Update for LINUX32 rename. The assembler didn't complain about undefined symbols but just used 0 after the rename. --- sys/kern/makesyscalls.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'sys/kern') diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh index 8be4896..1d3af65 100644 --- a/sys/kern/makesyscalls.sh +++ b/sys/kern/makesyscalls.sh @@ -410,6 +410,10 @@ s/\$//g printf("\t\tuarg[%d] = (intptr_t) p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace + else if (arg == "union l_semun") + printf("\t\tuarg[%d] = p->%s.buf; /* %s */\n", \ + i - 1, \ + argname[i], arg) > systrace else if (substr(arg, 1, 1) == "u" || arg == "size_t") printf("\t\tuarg[%d] = p->%s; /* %s */\n", \ i - 1, \ -- cgit v1.1 From 51a396136ecde08b5b9e4fbf449c3b167483ee0c Mon Sep 17 00:00:00 2001 From: jhb Date: Sat, 23 Jan 2016 01:21:11 +0000 Subject: MFC 292892: Call kern_thr_exit() instead of duplicating it. This code is missing the racct_subr() call from kern_thr_exit() and would require further code duplication in future changes. --- sys/kern/kern_thread.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 96f68609ca..c85813b 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -885,7 +886,6 @@ thread_suspend_check(int return_instead) */ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { PROC_UNLOCK(p); - tidhash_remove(td); /* * Allow Linux emulation layer to do some work @@ -893,13 +893,8 @@ thread_suspend_check(int return_instead) */ if (__predict_false(p->p_sysent->sv_thread_detach != NULL)) (p->p_sysent->sv_thread_detach)(td); - - PROC_LOCK(p); - tdsigcleanup(td); - umtx_thread_exit(td); - PROC_SLOCK(p); - thread_stopped(p); - thread_exit(); + kern_thr_exit(td); + panic("stopped thread did not exit"); } PROC_SLOCK(p); -- cgit v1.1 From 6e826779c72ba605eb445362d8558a6233a9a7c8 Mon Sep 17 00:00:00 2001 From: ian Date: Sun, 24 Jan 2016 19:21:53 +0000 Subject: MFC r289618, r290316: Fix printf format to allow for bus_size_t not being u_long on all platforms. Fix an alignment check that is wrong in half the busdma implementations. This will enable the elimination of a workaround in the USB driver that artifically allocates buffers twice as big as they need to be (which actually saves memory for very small buffers on the buggy platforms). When deciding how to allocate a dma buffer, armv4, armv6, mips, and x86/iommu all correctly check for the tag alignment <= maxsize as enabling simple uma/malloc based allocation. Powerpc, sparc64, x86/bounce, and arm64/bounce were all checking for alignment < maxsize; on those platforms when alignment was equal to the max size it would fall back to page-based allocators even for very small buffers. This change makes all platforms use the <= check. It should be noted that on all platforms other than arm[v6] and mips, this check is relying on undocumented behavior in malloc(9) that if you allocate a block of a given size it will be aligned to the next larger power-of-2 boundary. There is nothing in the malloc(9) man page that makes that explicit promise (but the busdma code has been relying on this behavior all along so I guess it works). Arm and mips code uses the allocator in kern/subr_busdma_buffalloc.c, which does explicitly implement this promise about size and alignment. Other platforms probably should switch to the aligned allocator. --- sys/kern/subr_busdma_bufalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c index b0b1ba8..c8980e1 100644 --- a/sys/kern/subr_busdma_bufalloc.c +++ b/sys/kern/subr_busdma_bufalloc.c @@ -94,8 +94,8 @@ busdma_bufalloc_create(const char *name, bus_size_t minimum_alignment, for (i = 0, bz = ba->buf_zones, cursize = ba->min_size; i < nitems(ba->buf_zones) && cursize <= MAX_ZONE_BUFSIZE; ++i, ++bz, cursize <<= 1) { - snprintf(bz->name, sizeof(bz->name), "dma %.10s %lu", - name, cursize); + snprintf(bz->name, sizeof(bz->name), "dma %.10s %ju", + name, (uintmax_t)cursize); bz->size = cursize; bz->umazone = uma_zcreate(bz->name, bz->size, NULL, NULL, NULL, NULL, bz->size - 1, zcreate_flags); -- cgit v1.1 From 33902405d5bbd7b2931c254ec445012ce3f55107 Mon Sep 17 00:00:00 2001 From: ian Date: Sun, 24 Jan 2016 21:04:06 +0000 Subject: MFC r293045, r293046: Make the 'env' directive described in config(5) work on all architectures, providing compiled-in static environment data that is used instead of any data passed in from a boot loader. Previously 'env' worked only on i386 and arm xscale systems, because it required the MD startup code to examine the global envmode variable and decide whether to use static_env or an environment obtained from the boot loader, and set the global kern_envp accordingly. Most startup code wasn't doing so. Making things even more complex, some mips startup code uses an alternate scheme that involves calling init_static_kenv() to pass an empty buffer and its size, then uses a series of kern_setenv() calls to populate that buffer. Now all MD startup code calls init_static_kenv(), and that routine provides a single point where envmode is checked and the decision is made whether to use the compiled-in static_kenv or the values provided by the MD code. The routine also continues to serve its original purpose for mips; if a non-zero buffer size is passed the routine installs the empty buffer ready to accept kern_setenv() values. Now if the size is zero, the provided buffer full of existing env data is installed. A NULL pointer can be passed if the boot loader provides no env data; this allows the static env to be installed if envmode is set to do so. Most of the work here is a near-mechanical change to call the init function instead of directly setting kern_envp. A notable exception is in xen/pv.c; that code was originally installing a buffer full of preformatted env data along with its non-zero size (like mips code does), which would have allowed kern_setenv() calls to wipe out the preformatted data. Now it passes a zero for the size so that the buffer of data it installs is treated as non-writeable. Also, revert accidental change that snuck into r293045. --- sys/kern/kern_environment.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c index ff453cb..05c4f62 100644 --- a/sys/kern/kern_environment.c +++ b/sys/kern/kern_environment.c @@ -210,12 +210,44 @@ done: return (error); } +/* + * Populate the initial kernel environment. + * + * This is called very early in MD startup, either to provide a copy of the + * environment obtained from a boot loader, or to provide an empty buffer into + * which MD code can store an initial environment using kern_setenv() calls. + * + * If the global envmode is 1, the environment is initialized from the global + * static_env[], regardless of the arguments passed. This implements the env + * keyword described in config(5). In this case env_pos is set to env_len, + * causing kern_setenv() to return -1 (if len > 0) or panic (if len == 0) until + * the dynamic environment is available. The envmode and static_env variables + * are defined in env.c which is generated by config(8). + * + * If len is non-zero, the caller is providing an empty buffer. The caller will + * subsequently use kern_setenv() to add up to len bytes of initial environment + * before the dynamic environment is available. + * + * If len is zero, the caller is providing a pre-loaded buffer containing + * environment strings. Additional strings cannot be added until the dynamic + * environment is available. The memory pointed to must remain stable at least + * until sysinit runs init_dynamic_kenv(). If no initial environment is + * available from the boot loader, passing a NULL pointer allows the static_env + * to be installed if it is configured. + */ void init_static_kenv(char *buf, size_t len) { - kern_envp = buf; - env_len = len; - env_pos = 0; + + if (envmode == 1) { + kern_envp = static_env; + env_len = len; + env_pos = len; + } else { + kern_envp = buf; + env_len = len; + env_pos = 0; + } } /* -- cgit v1.1