summaryrefslogtreecommitdiffstats
path: root/sys/cddl
diff options
context:
space:
mode:
authorwma <wma@FreeBSD.org>2016-04-06 05:13:36 +0000
committerwma <wma@FreeBSD.org>2016-04-06 05:13:36 +0000
commitf5a4347e1c8f2e48ca0d0185b8c27fb25c428c27 (patch)
treed41f6613424d9e8705a5da0daa7e48fd56125426 /sys/cddl
parentb4cec75b570705bdb63d9d3d22ad0ca5269dd739 (diff)
downloadFreeBSD-src-f5a4347e1c8f2e48ca0d0185b8c27fb25c428c27.zip
FreeBSD-src-f5a4347e1c8f2e48ca0d0185b8c27fb25c428c27.tar.gz
Implement dtrace_getupcstack in ARM64
Allow using DTRACE for performance analysis of userspace applications - the function call stack can be captured. This is almost an exact copy of AMD64 solution. Obtained from: Semihalf Sponsored by: Cavium Reviewed by: emaste, gnn, jhibbits Differential Revision: https://reviews.freebsd.org/D5779
Diffstat (limited to 'sys/cddl')
-rw-r--r--sys/cddl/dev/dtrace/aarch64/dtrace_isa.c119
1 files changed, 118 insertions, 1 deletions
diff --git a/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c b/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
index 3d7e044..442367f 100644
--- a/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/aarch64/dtrace_isa.c
@@ -57,6 +57,7 @@
*/
#define MAX_FUNCTION_SIZE 0x10000
#define MAX_PROLOGUE_SIZE 0x100
+#define MAX_USTACK_DEPTH 2048
uint8_t dtrace_fuword8_nocheck(void *);
uint16_t dtrace_fuword16_nocheck(void *);
@@ -111,11 +112,127 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
}
}
+static int
+dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
+ uintptr_t fp)
+{
+ volatile uint16_t *flags =
+ (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
+ int ret = 0;
+ uintptr_t oldfp;
+
+ ASSERT(pcstack == NULL || pcstack_limit > 0);
+
+ while (pc != 0) {
+ /*
+ * We limit the number of times we can go around this
+ * loop to account for a circular stack.
+ */
+ if (ret++ >= MAX_USTACK_DEPTH) {
+ *flags |= CPU_DTRACE_BADSTACK;
+ cpu_core[curcpu].cpuc_dtrace_illval = fp;
+ break;
+ }
+
+ if (pcstack != NULL) {
+ *pcstack++ = (uint64_t)pc;
+ pcstack_limit--;
+ if (pcstack_limit <= 0)
+ break;
+ }
+
+ if (fp == 0)
+ break;
+
+ pc = dtrace_fuword64((void *)(fp +
+ offsetof(struct arm64_frame, f_retaddr)));
+ fp = dtrace_fuword64((void *)fp);
+
+ if (fp == oldfp) {
+ *flags |= CPU_DTRACE_BADSTACK;
+ cpu_core[curcpu].cpuc_dtrace_illval = fp;
+ break;
+ }
+
+ /*
+ * ARM64TODO:
+ * This workaround might not be necessary. It needs to be
+ * revised and removed from all architectures if found
+ * unwanted. Leaving the original x86 comment for reference.
+ *
+ * This is totally bogus: if we faulted, we're going to clear
+ * the fault and break. This is to deal with the apparently
+ * broken Java stacks on x86.
+ */
+ if (*flags & CPU_DTRACE_FAULT) {
+ *flags &= ~CPU_DTRACE_FAULT;
+ break;
+ }
+ }
+
+ return (ret);
+}
+
void
dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
{
+ proc_t *p = curproc;
+ struct trapframe *tf;
+ uintptr_t pc, sp, fp;
+ volatile uint16_t *flags =
+ (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
+ int n;
- printf("IMPLEMENT ME: %s\n", __func__);
+ if (*flags & CPU_DTRACE_FAULT)
+ return;
+
+ if (pcstack_limit <= 0)
+ return;
+
+ /*
+ * If there's no user context we still need to zero the stack.
+ */
+ if (p == NULL || (tf = curthread->td_frame) == NULL)
+ goto zero;
+
+ *pcstack++ = (uint64_t)p->p_pid;
+ pcstack_limit--;
+
+ if (pcstack_limit <= 0)
+ return;
+
+ pc = tf->tf_elr;
+ sp = tf->tf_sp;
+ fp = tf->tf_x[29];
+
+ if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
+ /*
+ * In an entry probe. The frame pointer has not yet been
+ * pushed (that happens in the function prologue). The
+ * best approach is to add the current pc as a missing top
+ * of stack and back the pc up to the caller, which is stored
+ * at the current stack pointer address since the call
+ * instruction puts it there right before the branch.
+ */
+
+ *pcstack++ = (uint64_t)pc;
+ pcstack_limit--;
+ if (pcstack_limit <= 0)
+ return;
+
+ pc = tf->tf_lr;
+ }
+
+ n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
+ ASSERT(n >= 0);
+ ASSERT(n <= pcstack_limit);
+
+ pcstack += n;
+ pcstack_limit -= n;
+
+zero:
+ while (pcstack_limit-- > 0)
+ *pcstack++ = 0;
}
int
OpenPOWER on IntegriCloud