summaryrefslogtreecommitdiffstats
path: root/contrib/compiler-rt/lib/xray
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/compiler-rt/lib/xray')
-rw-r--r--contrib/compiler-rt/lib/xray/xray_AArch64.cc43
-rw-r--r--contrib/compiler-rt/lib/xray/xray_always_instrument.txt6
-rw-r--r--contrib/compiler-rt/lib/xray/xray_arm.cc50
-rw-r--r--contrib/compiler-rt/lib/xray/xray_buffer_queue.cc68
-rw-r--r--contrib/compiler-rt/lib/xray/xray_buffer_queue.h92
-rw-r--r--contrib/compiler-rt/lib/xray/xray_emulate_tsc.h40
-rw-r--r--contrib/compiler-rt/lib/xray/xray_fdr_log_records.h66
-rw-r--r--contrib/compiler-rt/lib/xray/xray_fdr_logging.cc300
-rw-r--r--contrib/compiler-rt/lib/xray/xray_fdr_logging.h38
-rw-r--r--contrib/compiler-rt/lib/xray/xray_fdr_logging_impl.h694
-rw-r--r--contrib/compiler-rt/lib/xray/xray_flags.cc36
-rw-r--r--contrib/compiler-rt/lib/xray/xray_flags.h4
-rw-r--r--contrib/compiler-rt/lib/xray/xray_flags.inc7
-rw-r--r--contrib/compiler-rt/lib/xray/xray_init.cc29
-rw-r--r--contrib/compiler-rt/lib/xray/xray_inmemory_log.cc112
-rw-r--r--contrib/compiler-rt/lib/xray/xray_interface.cc248
-rw-r--r--contrib/compiler-rt/lib/xray/xray_interface_internal.h14
-rw-r--r--contrib/compiler-rt/lib/xray/xray_log_interface.cc69
-rw-r--r--contrib/compiler-rt/lib/xray/xray_mips.cc165
-rw-r--r--contrib/compiler-rt/lib/xray/xray_mips64.cc173
-rw-r--r--contrib/compiler-rt/lib/xray/xray_never_instrument.txt6
-rw-r--r--contrib/compiler-rt/lib/xray/xray_powerpc64.cc106
-rw-r--r--contrib/compiler-rt/lib/xray/xray_powerpc64.inc37
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_AArch64.S55
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_arm.S37
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_mips.S110
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_mips64.S136
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc15
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S235
-rw-r--r--contrib/compiler-rt/lib/xray/xray_trampoline_x86_64.S185
-rw-r--r--contrib/compiler-rt/lib/xray/xray_tsc.h68
-rw-r--r--contrib/compiler-rt/lib/xray/xray_utils.cc125
-rw-r--r--contrib/compiler-rt/lib/xray/xray_utils.h41
-rw-r--r--contrib/compiler-rt/lib/xray/xray_x86_64.cc88
-rw-r--r--contrib/compiler-rt/lib/xray/xray_x86_64.inc (renamed from contrib/compiler-rt/lib/xray/xray_x86_64.h)11
35 files changed, 3151 insertions, 358 deletions
diff --git a/contrib/compiler-rt/lib/xray/xray_AArch64.cc b/contrib/compiler-rt/lib/xray/xray_AArch64.cc
index 0c1df22..f26e77d 100644
--- a/contrib/compiler-rt/lib/xray/xray_AArch64.cc
+++ b/contrib/compiler-rt/lib/xray/xray_AArch64.cc
@@ -14,29 +14,14 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
-#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>
-
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
namespace __xray {
-uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
- // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
- // not have a constant frequency like TSC on x86[_64]; it may go faster or
- // slower depending on CPU's turbo or power saving modes. Furthermore, to
- // read from CP15 on ARM a kernel modification or a driver is needed.
- // We can not require this from users of compiler-rt.
- // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
- // To get the measurements per second, we scale this by the number of
- // nanoseconds per second, pretending that the TSC frequency is 1GHz and
- // one TSC tick is 1 nanosecond.
- return NanosecondsPerSecond;
-}
-
// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
@@ -100,14 +85,15 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
}
- __clear_cache(reinterpret_cast<char*>(FirstAddress),
- reinterpret_cast<char*>(CurAddress));
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
return true;
}
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
}
bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
@@ -117,9 +103,20 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- // FIXME: In the future we'd need to distinguish between non-tail exits and
- // tail exits for better information preservation.
- return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64?
+ return false;
+}
+
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_always_instrument.txt b/contrib/compiler-rt/lib/xray/xray_always_instrument.txt
new file mode 100644
index 0000000..151ed70
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_always_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# always instrument. You can use this as an argument to
+# -fxray-always-instrument=<path> along with your project-specific lists.
+
+# Always instrument the main function.
+fun:main
diff --git a/contrib/compiler-rt/lib/xray/xray_arm.cc b/contrib/compiler-rt/lib/xray/xray_arm.cc
index f5e2cd2..da4efcd 100644
--- a/contrib/compiler-rt/lib/xray/xray_arm.cc
+++ b/contrib/compiler-rt/lib/xray/xray_arm.cc
@@ -14,28 +14,14 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
-#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
namespace __xray {
-uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
- // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
- // not have a constant frequency like TSC on x86[_64]; it may go faster or
- // slower depending on CPU's turbo or power saving modes. Furthermore, to
- // read from CP15 on ARM a kernel modification or a driver is needed.
- // We can not require this from users of compiler-rt.
- // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
- // To get the measurements per second, we scale this by the number of
- // nanoseconds per second, pretending that the TSC frequency is 1GHz and
- // one TSC tick is 1 nanosecond.
- return NanosecondsPerSecond;
-}
-
// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
@@ -74,7 +60,7 @@ write32bitLoadReg(uint8_t regNo, uint32_t *Address,
// MOVW r0, #<lower 16 bits of the |Value|>
// MOVT r0, #<higher 16 bits of the |Value|>
inline static uint32_t *
-Write32bitLoadR0(uint32_t *Address,
+write32bitLoadR0(uint32_t *Address,
const uint32_t Value) XRAY_NEVER_INSTRUMENT {
return write32bitLoadReg(0, Address, Value);
}
@@ -83,7 +69,7 @@ Write32bitLoadR0(uint32_t *Address,
// MOVW ip, #<lower 16 bits of the |Value|>
// MOVT ip, #<higher 16 bits of the |Value|>
inline static uint32_t *
-Write32bitLoadIP(uint32_t *Address,
+write32bitLoadIP(uint32_t *Address,
const uint32_t Value) XRAY_NEVER_INSTRUMENT {
return write32bitLoadReg(12, Address, Value);
}
@@ -121,9 +107,9 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
uint32_t *CurAddress = FirstAddress + 1;
if (Enable) {
CurAddress =
- Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
+ write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
CurAddress =
- Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
+ write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
*CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
CurAddress++;
*CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
@@ -136,14 +122,15 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
}
- __clear_cache(reinterpret_cast<char*>(FirstAddress),
- reinterpret_cast<char*>(CurAddress));
+ __clear_cache(reinterpret_cast<char *>(FirstAddress),
+ reinterpret_cast<char *>(CurAddress));
return true;
}
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
}
bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
@@ -153,9 +140,20 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
- // FIXME: In the future we'd need to distinguish between non-tail exits and
- // tail exits for better information preservation.
- return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled)
+ XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm?
+ return false;
}
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_buffer_queue.cc b/contrib/compiler-rt/lib/xray/xray_buffer_queue.cc
index 7e5462f..7ba755a 100644
--- a/contrib/compiler-rt/lib/xray/xray_buffer_queue.cc
+++ b/contrib/compiler-rt/lib/xray/xray_buffer_queue.cc
@@ -13,53 +13,69 @@
//
//===----------------------------------------------------------------------===//
#include "xray_buffer_queue.h"
-#include <cassert>
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
#include <cstdlib>
+#include <tuple>
using namespace __xray;
+using namespace __sanitizer;
-BufferQueue::BufferQueue(std::size_t B, std::size_t N)
- : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) {
- for (auto &Buf : Buffers) {
+BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
+ : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing{0} {
+ for (auto &T : Buffers) {
void *Tmp = malloc(BufferSize);
+ if (Tmp == nullptr) {
+ Success = false;
+ return;
+ }
+
+ auto &Buf = std::get<0>(T);
Buf.Buffer = Tmp;
Buf.Size = B;
- if (Tmp != 0)
- OwnedBuffers.insert(Tmp);
+ OwnedBuffers.emplace(Tmp);
}
+ Success = true;
}
-std::error_code BufferQueue::getBuffer(Buffer &Buf) {
- if (Finalizing.load(std::memory_order_acquire))
- return std::make_error_code(std::errc::state_not_recoverable);
- std::lock_guard<std::mutex> Guard(Mutex);
+BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
+ if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
+ return ErrorCode::QueueFinalizing;
+ __sanitizer::BlockingMutexLock Guard(&Mutex);
if (Buffers.empty())
- return std::make_error_code(std::errc::not_enough_memory);
- Buf = Buffers.front();
+ return ErrorCode::NotEnoughMemory;
+ auto &T = Buffers.front();
+ auto &B = std::get<0>(T);
+ Buf = B;
+ B.Buffer = nullptr;
+ B.Size = 0;
Buffers.pop_front();
- return {};
+ return ErrorCode::Ok;
}
-std::error_code BufferQueue::releaseBuffer(Buffer &Buf) {
+BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
if (OwnedBuffers.count(Buf.Buffer) == 0)
- return std::make_error_code(std::errc::argument_out_of_domain);
- std::lock_guard<std::mutex> Guard(Mutex);
- Buffers.push_back(Buf);
+ return ErrorCode::UnrecognizedBuffer;
+ __sanitizer::BlockingMutexLock Guard(&Mutex);
+
+ // Now that the buffer has been released, we mark it as "used".
+ Buffers.emplace(Buffers.end(), Buf, true /* used */);
Buf.Buffer = nullptr;
- Buf.Size = BufferSize;
- return {};
+ Buf.Size = 0;
+ return ErrorCode::Ok;
}
-std::error_code BufferQueue::finalize() {
- if (Finalizing.exchange(true, std::memory_order_acq_rel))
- return std::make_error_code(std::errc::state_not_recoverable);
- return {};
+BufferQueue::ErrorCode BufferQueue::finalize() {
+ if (__sanitizer::atomic_exchange(&Finalizing, 1,
+ __sanitizer::memory_order_acq_rel))
+ return ErrorCode::QueueFinalizing;
+ return ErrorCode::Ok;
}
BufferQueue::~BufferQueue() {
- for (auto &Buf : Buffers) {
+ for (auto &T : Buffers) {
+ auto &Buf = std::get<0>(T);
free(Buf.Buffer);
- Buf.Buffer = nullptr;
- Buf.Size = 0;
}
}
diff --git a/contrib/compiler-rt/lib/xray/xray_buffer_queue.h b/contrib/compiler-rt/lib/xray/xray_buffer_queue.h
index bf0b7af..e051695 100644
--- a/contrib/compiler-rt/lib/xray/xray_buffer_queue.h
+++ b/contrib/compiler-rt/lib/xray/xray_buffer_queue.h
@@ -15,12 +15,11 @@
#ifndef XRAY_BUFFER_QUEUE_H
#define XRAY_BUFFER_QUEUE_H
-#include <atomic>
-#include <cstdint>
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_mutex.h"
#include <deque>
-#include <mutex>
-#include <system_error>
#include <unordered_set>
+#include <utility>
namespace __xray {
@@ -33,19 +32,47 @@ class BufferQueue {
public:
struct Buffer {
void *Buffer = nullptr;
- std::size_t Size = 0;
+ size_t Size = 0;
};
private:
- std::size_t BufferSize;
- std::deque<Buffer> Buffers;
- std::mutex Mutex;
+ size_t BufferSize;
+
+ // We use a bool to indicate whether the Buffer has been used in this
+ // freelist implementation.
+ std::deque<std::tuple<Buffer, bool>> Buffers;
+ __sanitizer::BlockingMutex Mutex;
std::unordered_set<void *> OwnedBuffers;
- std::atomic<bool> Finalizing;
+ __sanitizer::atomic_uint8_t Finalizing;
public:
- /// Initialise a queue of size |N| with buffers of size |B|.
- BufferQueue(std::size_t B, std::size_t N);
+ enum class ErrorCode : unsigned {
+ Ok,
+ NotEnoughMemory,
+ QueueFinalizing,
+ UnrecognizedBuffer,
+ AlreadyFinalized,
+ };
+
+ static const char *getErrorString(ErrorCode E) {
+ switch (E) {
+ case ErrorCode::Ok:
+ return "(none)";
+ case ErrorCode::NotEnoughMemory:
+ return "no available buffers in the queue";
+ case ErrorCode::QueueFinalizing:
+ return "queue already finalizing";
+ case ErrorCode::UnrecognizedBuffer:
+ return "buffer being returned not owned by buffer queue";
+ case ErrorCode::AlreadyFinalized:
+ return "queue already finalized";
+ }
+ return "unknown error";
+ }
+
+ /// Initialise a queue of size |N| with buffers of size |B|. We report success
+ /// through |Success|.
+ BufferQueue(size_t B, size_t N, bool &Success);
/// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
/// error in case there are no available buffers to return when we will run
@@ -58,24 +85,41 @@ public:
/// - std::errc::not_enough_memory on exceeding MaxSize.
/// - no error when we find a Buffer.
/// - std::errc::state_not_recoverable on finalising BufferQueue.
- std::error_code getBuffer(Buffer &Buf);
+ ErrorCode getBuffer(Buffer &Buf);
/// Updates |Buf| to point to nullptr, with size 0.
///
/// Returns:
/// - ...
- std::error_code releaseBuffer(Buffer &Buf);
-
- bool finalizing() const { return Finalizing.load(std::memory_order_acquire); }
-
- // Sets the state of the BufferQueue to finalizing, which ensures that:
- //
- // - All subsequent attempts to retrieve a Buffer will fail.
- // - All releaseBuffer operations will not fail.
- //
- // After a call to finalize succeeds, all subsequent calls to finalize will
- // fail with std::errc::state_not_recoverable.
- std::error_code finalize();
+ ErrorCode releaseBuffer(Buffer &Buf);
+
+ bool finalizing() const {
+ return __sanitizer::atomic_load(&Finalizing,
+ __sanitizer::memory_order_acquire);
+ }
+
+ /// Returns the configured size of the buffers in the buffer queue.
+ size_t ConfiguredBufferSize() const { return BufferSize; }
+
+ /// Sets the state of the BufferQueue to finalizing, which ensures that:
+ ///
+ /// - All subsequent attempts to retrieve a Buffer will fail.
+ /// - All releaseBuffer operations will not fail.
+ ///
+ /// After a call to finalize succeeds, all subsequent calls to finalize will
+ /// fail with std::errc::state_not_recoverable.
+ ErrorCode finalize();
+
+ /// Applies the provided function F to each Buffer in the queue, only if the
+ /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
+ /// releaseBuffer(...) operation.
+ template <class F> void apply(F Fn) {
+ __sanitizer::BlockingMutexLock G(&Mutex);
+ for (const auto &T : Buffers) {
+ if (std::get<1>(T))
+ Fn(std::get<0>(T));
+ }
+ }
// Cleans up allocated buffers.
~BufferQueue();
diff --git a/contrib/compiler-rt/lib/xray/xray_emulate_tsc.h b/contrib/compiler-rt/lib/xray/xray_emulate_tsc.h
deleted file mode 100644
index a3e8b1c..0000000
--- a/contrib/compiler-rt/lib/xray/xray_emulate_tsc.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- xray_emulate_tsc.h --------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of XRay, a dynamic runtime instrumentation system.
-//
-//===----------------------------------------------------------------------===//
-#ifndef XRAY_EMULATE_TSC_H
-#define XRAY_EMULATE_TSC_H
-
-#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_internal_defs.h"
-#include "xray_defs.h"
-#include <cerrno>
-#include <cstdint>
-#include <time.h>
-
-namespace __xray {
-
-static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
-
-ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
- timespec TS;
- int result = clock_gettime(CLOCK_REALTIME, &TS);
- if (result != 0) {
- Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
- TS.tv_sec = 0;
- TS.tv_nsec = 0;
- }
- CPU = 0;
- return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
-}
-}
-
-#endif // XRAY_EMULATE_TSC_H
diff --git a/contrib/compiler-rt/lib/xray/xray_fdr_log_records.h b/contrib/compiler-rt/lib/xray/xray_fdr_log_records.h
new file mode 100644
index 0000000..3d6d388
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_fdr_log_records.h
@@ -0,0 +1,66 @@
+//===-- xray_fdr_log_records.h -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H
+#define XRAY_XRAY_FDR_LOG_RECORDS_H
+
+enum class RecordType : uint8_t { Function, Metadata };
+
+// A MetadataRecord encodes the kind of record in its first byte, and have 15
+// additional bytes in the end to hold free-form data.
+struct alignas(16) MetadataRecord {
+ // A MetadataRecord must always have a type of 1.
+ /* RecordType */ uint8_t Type : 1;
+
+ // Each kind of record is represented as a 7-bit value (even though we use an
+ // unsigned 8-bit enum class to do so).
+ enum class RecordKinds : uint8_t {
+ NewBuffer,
+ EndOfBuffer,
+ NewCPUId,
+ TSCWrap,
+ WalltimeMarker,
+ CustomEventMarker,
+ };
+ // Use 7 bits to identify this record type.
+ /* RecordKinds */ uint8_t RecordKind : 7;
+ char Data[15];
+} __attribute__((packed));
+
+static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
+
+struct alignas(8) FunctionRecord {
+ // A FunctionRecord must always have a type of 0.
+ /* RecordType */ uint8_t Type : 1;
+ enum class RecordKinds {
+ FunctionEnter = 0x00,
+ FunctionExit = 0x01,
+ FunctionTailExit = 0x02,
+ };
+ /* RecordKinds */ uint8_t RecordKind : 3;
+
+ // We only use 28 bits of the function ID, so that we can use as few bytes as
+ // possible. This means we only support 2^28 (268,435,456) unique function ids
+ // in a single binary.
+ int FuncId : 28;
+
+ // We use another 4 bytes to hold the delta between the previous entry's TSC.
+ // In case we've found that the distance is greater than the allowable 32 bits
+ // (either because we are running in a different CPU and the TSC might be
+ // different then), we should use a MetadataRecord before this FunctionRecord
+ // that will contain the full TSC for that CPU, and keep this to 0.
+ uint32_t TSCDelta;
+} __attribute__((packed));
+
+static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
+
+#endif // XRAY_XRAY_FDR_LOG_RECORDS_H
diff --git a/contrib/compiler-rt/lib/xray/xray_fdr_logging.cc b/contrib/compiler-rt/lib/xray/xray_fdr_logging.cc
new file mode 100644
index 0000000..a7e1382
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_fdr_logging.cc
@@ -0,0 +1,300 @@
+//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Here we implement the Flight Data Recorder mode for XRay, where we use
+// compact structures to store records in memory as well as when writing out the
+// data to files.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_fdr_logging.h"
+#include <algorithm>
+#include <bitset>
+#include <cerrno>
+#include <cstring>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <unordered_map>
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_interface.h"
+#include "xray/xray_records.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_fdr_logging_impl.h"
+#include "xray_flags.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
+
+namespace __xray {
+
+// Global BufferQueue.
+std::shared_ptr<BufferQueue> BQ;
+
+__sanitizer::atomic_sint32_t LogFlushStatus = {
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+FDRLoggingOptions FDROptions;
+
+__sanitizer::SpinMutex FDROptionsMutex;
+
+// Must finalize before flushing.
+XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
+ if (__sanitizer::atomic_load(&LoggingStatus,
+ __sanitizer::memory_order_acquire) !=
+ XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+
+ s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+ __sanitizer::memory_order_release))
+ return static_cast<XRayLogFlushStatus>(Result);
+
+ // Make a copy of the BufferQueue pointer to prevent other threads that may be
+ // resetting it from blowing away the queue prematurely while we're dealing
+ // with it.
+ auto LocalBQ = BQ;
+
+ // We write out the file in the following format:
+ //
+ // 1) We write down the XRay file header with version 1, type FDR_LOG.
+ // 2) Then we use the 'apply' member of the BufferQueue that's live, to
+ // ensure that at this point in time we write down the buffers that have
+ // been released (and marked "used") -- we dump the full buffer for now
+ // (fixed-sized) and let the tools reading the buffers deal with the data
+ // afterwards.
+ //
+ int Fd = -1;
+ {
+ __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+ Fd = FDROptions.Fd;
+ }
+ if (Fd == -1)
+ Fd = getLogFD();
+ if (Fd == -1) {
+ auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ __sanitizer::atomic_store(&LogFlushStatus, Result,
+ __sanitizer::memory_order_release);
+ return Result;
+ }
+
+ // Test for required CPU features and cache the cycle frequency
+ static bool TSCSupported = probeRequiredCPUFeatures();
+ static uint64_t CycleFrequency =
+ TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
+
+ XRayFileHeader Header;
+ Header.Version = 1;
+ Header.Type = FileTypes::FDR_LOG;
+ Header.CycleFrequency = CycleFrequency;
+ // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+ // before setting the values in the header.
+ Header.ConstantTSC = 1;
+ Header.NonstopTSC = 1;
+ Header.FdrData = FdrAdditionalHeaderData{LocalBQ->ConfiguredBufferSize()};
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+ reinterpret_cast<char *>(&Header) + sizeof(Header));
+
+ LocalBQ->apply([&](const BufferQueue::Buffer &B) {
+ uint64_t BufferSize = B.Size;
+ if (BufferSize > 0) {
+ retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
+ reinterpret_cast<char *>(B.Buffer) + B.Size);
+ }
+ });
+ __sanitizer::atomic_store(&LogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ __sanitizer::memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_FINALIZING,
+ __sanitizer::memory_order_release))
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ // Do special things to make the log finalize itself, and not allow any more
+ // operations to be performed until re-initialized.
+ BQ->finalize();
+
+ __sanitizer::atomic_store(&LoggingStatus,
+ XRayLogInitStatus::XRAY_LOG_FINALIZED,
+ __sanitizer::memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED;
+ if (__sanitizer::atomic_compare_exchange_strong(
+ &LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+ __sanitizer::memory_order_release))
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ // Release the in-memory buffer queue.
+ BQ.reset();
+
+ // Spin until the flushing status is flushed.
+ s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+ while (__sanitizer::atomic_compare_exchange_weak(
+ &LogFlushStatus, &CurrentFlushingStatus,
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+ __sanitizer::memory_order_release)) {
+ if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING)
+ break;
+ CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+ }
+
+ // At this point, we know that the status is flushed, and that we can assume
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+}
+
+static std::tuple<uint64_t, unsigned char>
+getTimestamp() XRAY_NEVER_INSTRUMENT {
+ // We want to get the TSC as early as possible, so that we can check whether
+ // we've seen this CPU before. We also do it before we load anything else, to
+ // allow for forward progress with the scheduling.
+ unsigned char CPU;
+ uint64_t TSC;
+
+ // Test once for required CPU features
+ static bool TSCSupported = probeRequiredCPUFeatures();
+
+ if (TSCSupported) {
+ TSC = __xray::readTSC(CPU);
+ } else {
+ // FIXME: This code needs refactoring as it appears in multiple locations
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
+ TS = {0, 0};
+ }
+ CPU = 0;
+ TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+ }
+ return std::make_tuple(TSC, CPU);
+}
+
+void fdrLoggingHandleArg0(int32_t FuncId,
+ XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+ auto TSC_CPU = getTimestamp();
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
+ std::get<1>(TSC_CPU), clock_gettime,
+ LoggingStatus, BQ);
+}
+
+void fdrLoggingHandleCustomEvent(void *Event,
+ std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
+ using namespace __xray_fdr_internal;
+ auto TSC_CPU = getTimestamp();
+ auto &TSC = std::get<0>(TSC_CPU);
+ auto &CPU = std::get<1>(TSC_CPU);
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running && "RecursionGuard is buggy!");
+ return;
+ }
+ if (EventSize > std::numeric_limits<int32_t>::max()) {
+ using Empty = struct {};
+ static Empty Once = [&] {
+ Report("Event size too large = %zu ; > max = %d\n", EventSize,
+ std::numeric_limits<int32_t>::max());
+ return Empty();
+ }();
+ (void)Once;
+ }
+ int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
+ if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime))
+ return;
+
+ // Here we need to prepare the log to handle:
+ // - The metadata record we're going to write. (16 bytes)
+ // - The additional data we're going to write. Currently, that's the size of
+ // the event we're going to dump into the log as free-form bytes.
+ if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
+ LocalBQ = nullptr;
+ return;
+ }
+
+ // Write the custom event metadata record, which consists of the following
+ // information:
+ // - 8 bytes (64-bits) for the full TSC when the event started.
+ // - 4 bytes (32-bits) for the length of the data.
+ MetadataRecord CustomEvent;
+ CustomEvent.Type = uint8_t(RecordType::Metadata);
+ CustomEvent.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
+ constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
+ std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
+ std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
+ std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent));
+ RecordPtr += sizeof(CustomEvent);
+ std::memcpy(RecordPtr, Event, ReducedEventSize);
+ endBufferIfFull();
+}
+
+XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
+ void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ if (OptionsSize != sizeof(FDRLoggingOptions))
+ return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
+ &LoggingStatus, __sanitizer::memory_order_acquire));
+ s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &LoggingStatus, &CurrentStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+ __sanitizer::memory_order_release))
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ {
+ __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+ memcpy(&FDROptions, Options, OptionsSize);
+ }
+
+ bool Success = false;
+ BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
+ if (!Success) {
+ Report("BufferQueue init failed.\n");
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ // Install the actual handleArg0 handler after initialising the buffers.
+ __xray_set_handler(fdrLoggingHandleArg0);
+ __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
+
+ __sanitizer::atomic_store(&LoggingStatus,
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+ __sanitizer::memory_order_release);
+ Report("XRay FDR init successful.\n");
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = [] {
+ using namespace __xray;
+ if (flags()->xray_fdr_log) {
+ XRayLogImpl Impl{
+ fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0,
+ fdrLoggingFlush,
+ };
+ __xray_set_log_impl(Impl);
+ }
+ return true;
+}();
diff --git a/contrib/compiler-rt/lib/xray/xray_fdr_logging.h b/contrib/compiler-rt/lib/xray/xray_fdr_logging.h
new file mode 100644
index 0000000..426b54d
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_fdr_logging.h
@@ -0,0 +1,38 @@
+//===-- xray_fdr_logging.h ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOGGING_H
+#define XRAY_XRAY_FDR_LOGGING_H
+
+#include "xray/xray_log_interface.h"
+#include "xray_fdr_log_records.h"
+
+// FDR (Flight Data Recorder) Mode
+// ===============================
+//
+// The XRay whitepaper describes a mode of operation for function call trace
+// logging that involves writing small records into an in-memory circular
+// buffer, that then gets logged to disk on demand. To do this efficiently and
+// capture as much data as we can, we use smaller records compared to the
+// default mode of always writing fixed-size records.
+
+namespace __xray {
+XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
+ void *Options, size_t OptionsSize);
+XRayLogInitStatus fdrLoggingFinalize();
+void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
+XRayLogFlushStatus fdrLoggingFlush();
+XRayLogInitStatus fdrLoggingReset();
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOGGING_H
diff --git a/contrib/compiler-rt/lib/xray/xray_fdr_logging_impl.h b/contrib/compiler-rt/lib/xray/xray_fdr_logging_impl.h
new file mode 100644
index 0000000..4a1d80f
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_fdr_logging_impl.h
@@ -0,0 +1,694 @@
+//===-- xray_fdr_logging_impl.h ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Here we implement the thread local state management and record i/o for Flight
+// Data Recorder mode for XRay, where we use compact structures to store records
+// in memory as well as when writing out the data to files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOGGING_IMPL_H
+#define XRAY_XRAY_FDR_LOGGING_IMPL_H
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <string>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_log_interface.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_fdr_log_records.h"
+#include "xray_flags.h"
+#include "xray_tsc.h"
+
+namespace __xray {
+
+__sanitizer::atomic_sint32_t LoggingStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
+/// We expose some of the state transitions when FDR logging mode is operating
+/// such that we can simulate a series of log events that may occur without
+/// and test with determinism without worrying about the real CPU time.
+///
+/// Because the code uses thread_local allocation extensively as part of its
+/// design, callers that wish to test events occuring on different threads
+/// will actually have to run them on different threads.
+///
+/// This also means that it is possible to break invariants maintained by
+/// cooperation with xray_fdr_logging class, so be careful and think twice.
+namespace __xray_fdr_internal {
+
+/// Writes the new buffer record and wallclock time that begin a buffer for a
+/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
+/// machine and writes directly to memory without checks.
+static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
+
+/// Write a metadata record to switch to a new CPU to MemPtr and increments
+/// MemPtr. Bypasses the thread local state machine and writes directly to
+/// memory without checks.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
+
+/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes directly to memory without checks.
+static void writeEOBMetadata(char *&MemPtr);
+
+/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
+/// the thread local state machine and directly writes to memory without checks.
+static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
+
+/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes the function record directly to
+/// memory.
+static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType, char *&MemPtr);
+
+/// Sets up a new buffer in thread_local storage and writes a preamble. The
+/// wall_clock_reader function is used to populate the WallTimeRecord entry.
+static void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
+ struct timespec *));
+
+/// Called to record CPU time for a new CPU within the current thread.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
+
+/// Called to close the buffer when the thread exhausts the buffer or when the
+/// thread exits (via a thread local variable destructor).
+static void writeEOBMetadata();
+
+/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
+static void writeTSCWrapMetadata(uint64_t TSC);
+
+/// Here's where the meat of the processing happens. The writer captures
+/// function entry, exit and tail exit points with a time and will create
+/// TSCWrap, NewCPUId and Function records as necessary. The writer might
+/// walk backward through its buffer and erase trivial functions to avoid
+/// polluting the log and may use the buffer queue to obtain or release a
+/// buffer.
+static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
+ uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ __sanitizer::atomic_sint32_t &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ);
+
+//-----------------------------------------------------------------------------|
+// The rest of the file is implementation. |
+//-----------------------------------------------------------------------------|
+// Functions are implemented in the header for inlining since we don't want |
+// to grow the stack when we've hijacked the binary for logging. |
+//-----------------------------------------------------------------------------|
+
+namespace {
+
+thread_local BufferQueue::Buffer Buffer;
+thread_local char *RecordPtr = nullptr;
+
+// The number of FunctionEntry records immediately preceding RecordPtr.
+thread_local uint8_t NumConsecutiveFnEnters = 0;
+
+// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
+// records preceding RecordPtr.
+thread_local uint8_t NumTailCalls = 0;
+
+constexpr auto MetadataRecSize = sizeof(MetadataRecord);
+constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+
+// We use a thread_local variable to keep track of which CPUs we've already
+// run, and the TSC times for these CPUs. This allows us to stop repeating the
+// CPU field in the function records.
+//
+// We assume that we'll support only 65536 CPUs for x86_64.
+thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+thread_local uint64_t LastTSC = 0;
+thread_local uint64_t LastFunctionEntryTSC = 0;
+
+class ThreadExitBufferCleanup {
+ std::shared_ptr<BufferQueue> &Buffers;
+ BufferQueue::Buffer &Buffer;
+
+public:
+ explicit ThreadExitBufferCleanup(std::shared_ptr<BufferQueue> &BQ,
+ BufferQueue::Buffer &Buffer)
+ XRAY_NEVER_INSTRUMENT : Buffers(BQ),
+ Buffer(Buffer) {}
+
+ ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
+ if (RecordPtr == nullptr)
+ return;
+
+ // We make sure that upon exit, a thread will write out the EOB
+ // MetadataRecord in the thread-local log, and also release the buffer to
+ // the queue.
+ assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
+ static_cast<ptrdiff_t>(MetadataRecSize));
+ if (Buffers) {
+ writeEOBMetadata();
+ auto EC = Buffers->releaseBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok)
+ Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ BufferQueue::getErrorString(EC));
+ Buffers = nullptr;
+ return;
+ }
+ }
+};
+
+// Make sure a thread that's ever called handleArg0 has a thread-local
+// live reference to the buffer queue for this particular instance of
+// FDRLogging, and that we're going to clean it up when the thread exits.
+thread_local std::shared_ptr<BufferQueue> LocalBQ = nullptr;
+thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
+
+class RecursionGuard {
+ bool &Running;
+ const bool Valid;
+
+public:
+ explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
+ if (Valid)
+ Running = true;
+ }
+
+ RecursionGuard(const RecursionGuard &) = delete;
+ RecursionGuard(RecursionGuard &&) = delete;
+ RecursionGuard &operator=(const RecursionGuard &) = delete;
+ RecursionGuard &operator=(RecursionGuard &&) = delete;
+
+ explicit operator bool() const { return Valid; }
+
+ ~RecursionGuard() noexcept {
+ if (Valid)
+ Running = false;
+ }
+};
+
+inline bool loggingInitialized(
+ const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT {
+ return __sanitizer::atomic_load(&LoggingStatus,
+ __sanitizer::memory_order_acquire) ==
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+} // namespace
+
+inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ static constexpr int InitRecordsCount = 2;
+ std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
+ {
+ // Write out a MetadataRecord to signify that this is the start of a new
+ // buffer, associated with a particular thread, with a new CPU. For the
+ // data, we have 15 bytes to squeeze as much information as we can. At this
+ // point we only write down the following bytes:
+ // - Thread ID (pid_t, 4 bytes)
+ auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
+ NewBuffer.Type = uint8_t(RecordType::Metadata);
+ NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
+ std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
+ }
+ // Also write the WalltimeMarker record.
+ {
+ static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
+ auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
+ WalltimeMarker.Type = uint8_t(RecordType::Metadata);
+ WalltimeMarker.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
+
+ // We only really need microsecond precision here, and enforce across
+ // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
+ // the Metadata record.
+ int32_t Micros = TS.tv_nsec / 1000;
+ int64_t Seconds = TS.tv_sec;
+ std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
+ std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
+ }
+ std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
+ MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+}
+
+inline void setupNewBuffer(int (*wall_clock_reader)(
+ clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
+ RecordPtr = static_cast<char *>(Buffer.Buffer);
+ pid_t Tid = syscall(SYS_gettid);
+ timespec TS{0, 0};
+ // This is typically clock_gettime, but callers have injection ability.
+ wall_clock_reader(CLOCK_MONOTONIC, &TS);
+ writeNewBufferPreamble(Tid, TS, RecordPtr);
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+}
+
+inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord NewCPUId;
+ NewCPUId.Type = uint8_t(RecordType::Metadata);
+ NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
+
+ // The data for the New CPU will contain the following bytes:
+ // - CPU ID (uint16_t, 2 bytes)
+ // - Full TSC (uint64_t, 8 bytes)
+ // Total = 10 bytes.
+ std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
+ std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
+ std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+}
+
+inline void writeNewCPUIdMetadata(uint16_t CPU,
+ uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
+}
+
+inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord EOBMeta;
+ EOBMeta.Type = uint8_t(RecordType::Metadata);
+ EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
+ // For now we don't write any bytes into the Data field.
+ std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+}
+
+inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+ writeEOBMetadata(RecordPtr);
+}
+
+inline void writeTSCWrapMetadata(uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ MetadataRecord TSCWrap;
+ TSCWrap.Type = uint8_t(RecordType::Metadata);
+ TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
+
+ // The data for the TSCWrap record contains the following bytes:
+ // - Full TSC (uint64_t, 8 bytes)
+ // Total = 8 bytes.
+ std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
+ std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
+ MemPtr += sizeof(MetadataRecord);
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+}
+
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ writeTSCWrapMetadata(TSC, RecordPtr);
+}
+
+inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+ std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
+ AlignedFuncRecordBuffer;
+ auto &FuncRecord =
+ *reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
+ FuncRecord.Type = uint8_t(RecordType::Function);
+ // Only take 28 bits of the function id.
+ FuncRecord.FuncId = FuncId & ~(0x0F << 28);
+ FuncRecord.TSCDelta = TSCDelta;
+
+ switch (EntryType) {
+ case XRayEntryType::ENTRY:
+ ++NumConsecutiveFnEnters;
+ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+ break;
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ // We should not rewind functions with logged args.
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+ break;
+ case XRayEntryType::EXIT:
+ // If we've decided to log the function exit, we will never erase the log
+ // before it.
+ NumConsecutiveFnEnters = 0;
+ NumTailCalls = 0;
+ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
+ break;
+ case XRayEntryType::TAIL:
+ // If we just entered the function we're tail exiting from or erased every
+ // invocation since then, this function entry tail pair is a candidate to
+ // be erased when the child function exits.
+ if (NumConsecutiveFnEnters > 0) {
+ ++NumTailCalls;
+ NumConsecutiveFnEnters = 0;
+ } else {
+ // We will never be able to erase this tail call since we have logged
+ // something in between the function entry and tail exit.
+ NumTailCalls = 0;
+ NumConsecutiveFnEnters = 0;
+ }
+ FuncRecord.RecordKind =
+ uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
+ break;
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d\n",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
+ }
+
+ std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
+ MemPtr += sizeof(FunctionRecord);
+}
+
+static uint64_t thresholdTicks() {
+ static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+ ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
+ static const uint64_t ThresholdTicks =
+ TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000;
+ return ThresholdTicks;
+}
+
+// Re-point the thread local pointer into this thread's Buffer before the recent
+// "Function Entry" record and any "Tail Call Exit" records after that.
+static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
+ uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
+ using AlignedFuncStorage =
+ std::aligned_storage<sizeof(FunctionRecord),
+ alignof(FunctionRecord)>::type;
+ RecordPtr -= FunctionRecSize;
+ AlignedFuncStorage AlignedFuncRecordBuffer;
+ const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
+ std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));
+ assert(FuncRecord.RecordKind ==
+ uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+ "Expected to find function entry recording when rewinding.");
+ assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
+ "Expected matching function id when rewinding Exit");
+ --NumConsecutiveFnEnters;
+ LastTSC -= FuncRecord.TSCDelta;
+
+ // We unwound one call. Update the state and return without writing a log.
+ if (NumConsecutiveFnEnters != 0) {
+ LastFunctionEntryTSC -= FuncRecord.TSCDelta;
+ return;
+ }
+
+ // Otherwise we've rewound the stack of all function entries, we might be
+ // able to rewind further by erasing tail call functions that are being
+ // exited from via this exit.
+ LastFunctionEntryTSC = 0;
+ auto RewindingTSC = LastTSC;
+ auto RewindingRecordPtr = RecordPtr - FunctionRecSize;
+ while (NumTailCalls > 0) {
+ AlignedFuncStorage TailExitRecordBuffer;
+ // Rewind the TSC back over the TAIL EXIT record.
+ const auto &ExpectedTailExit =
+ *reinterpret_cast<FunctionRecord *>(std::memcpy(
+ &TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
+
+ assert(ExpectedTailExit.RecordKind ==
+ uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
+ "Expected to find tail exit when rewinding.");
+ RewindingRecordPtr -= FunctionRecSize;
+ RewindingTSC -= ExpectedTailExit.TSCDelta;
+ AlignedFuncStorage FunctionEntryBuffer;
+ const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
+ std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
+ assert(ExpectedFunctionEntry.RecordKind ==
+ uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+ "Expected to find function entry when rewinding tail call.");
+ assert(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId &&
+ "Expected funcids to match when rewinding tail call.");
+
+ // This tail call exceeded the threshold duration. It will not be erased.
+ if ((TSC - RewindingTSC) >= thresholdTicks()) {
+ NumTailCalls = 0;
+ return;
+ }
+
+ // We can erase a tail exit pair that we're exiting through since
+ // its duration is under threshold.
+ --NumTailCalls;
+ RewindingRecordPtr -= FunctionRecSize;
+ RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
+ RecordPtr -= 2 * FunctionRecSize;
+ LastTSC = RewindingTSC;
+ }
+}
+
+inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
+ auto EC = BQ->releaseBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok) {
+ Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ BufferQueue::getErrorString(EC));
+ return false;
+ }
+ return true;
+}
+
+inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ size_t MaxSize) XRAY_NEVER_INSTRUMENT {
+ char *BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
+ auto EC = LocalBQ->getBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok) {
+ Report("Failed to acquire a buffer; error=%s\n",
+ BufferQueue::getErrorString(EC));
+ return false;
+ }
+ setupNewBuffer(wall_clock_reader);
+ }
+ return true;
+}
+
+inline bool isLogInitializedAndReady(
+ std::shared_ptr<BufferQueue> &LocalBQ, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *)) XRAY_NEVER_INSTRUMENT {
+ // Bail out right away if logging is not initialized yet.
+ // We should take the opportunity to release the buffer though.
+ auto Status = __sanitizer::atomic_load(&LoggingStatus,
+ __sanitizer::memory_order_acquire);
+ if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
+ if (RecordPtr != nullptr &&
+ (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
+ Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
+ RecordPtr = nullptr;
+ LocalBQ = nullptr;
+ return false;
+ }
+ return false;
+ }
+
+ if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
+ RecordPtr = nullptr;
+ }
+
+ if (Buffer.Buffer == nullptr) {
+ auto EC = LocalBQ->getBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok) {
+ auto LS = __sanitizer::atomic_load(&LoggingStatus,
+ __sanitizer::memory_order_acquire);
+ if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
+ LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ Report("Failed to acquire a buffer; error=%s\n",
+ BufferQueue::getErrorString(EC));
+ return false;
+ }
+
+ setupNewBuffer(wall_clock_reader);
+ }
+
+ if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
+ // This means this is the first CPU this thread has ever run on. We set
+ // the current CPU and record this as the first TSC we've seen.
+ CurrentCPU = CPU;
+ writeNewCPUIdMetadata(CPU, TSC);
+ }
+
+ return true;
+} // namespace __xray_fdr_internal
+
+inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
+ auto BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return;
+ RecordPtr = nullptr;
+ }
+}
+
+inline void processFunctionHook(
+ int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *),
+ __sanitizer::atomic_sint32_t &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+ // Prevent signal handler recursion, so in case we're already in a log writing
+ // mode and the signal handler comes in (and is also instrumented) then we
+ // don't want to be clobbering potentially partial writes already happening in
+ // the thread. We use a simple thread_local latch to only allow one on-going
+ // handleArg0 to happen at any given time.
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running == true && "RecursionGuard is buggy!");
+ return;
+ }
+
+ // In case the reference has been cleaned up before, we make sure we
+ // initialize it to the provided BufferQueue.
+ if (LocalBQ == nullptr)
+ LocalBQ = BQ;
+
+ if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader))
+ return;
+
+ // Before we go setting up writing new function entries, we need to be really
+ // careful about the pointer math we're doing. This means we need to ensure
+ // that the record we are about to write is going to fit into the buffer,
+ // without overflowing the buffer.
+ //
+ // To do this properly, we use the following assumptions:
+ //
+ // - The least number of bytes we will ever write is 8
+ // (sizeof(FunctionRecord)) only if the delta between the previous entry
+ // and this entry is within 32 bits.
+ // - The most number of bytes we will ever write is 8 + 16 = 24. This is
+ // computed by:
+ //
+ // sizeof(FunctionRecord) + sizeof(MetadataRecord)
+ //
+ // These arise in the following cases:
+ //
+ // 1. When the delta between the TSC we get and the previous TSC for the
+ // same CPU is outside of the uint32_t range, we end up having to
+ // write a MetadataRecord to indicate a "tsc wrap" before the actual
+ // FunctionRecord.
+ // 2. When we learn that we've moved CPUs, we need to write a
+ // MetadataRecord to indicate a "cpu change", and thus write out the
+ // current TSC for that CPU before writing out the actual
+ // FunctionRecord.
+ // 3. When we learn about a new CPU ID, we need to write down a "new cpu
+ // id" MetadataRecord before writing out the actual FunctionRecord.
+ //
+ // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
+ //
+ // So the math we need to do is to determine whether writing 24 bytes past the
+ // current pointer leaves us with enough bytes to write the EOB
+ // MetadataRecord. If we don't have enough space after writing as much as 24
+ // bytes in the end of the buffer, we need to write out the EOB, get a new
+ // Buffer, set it up properly before doing any further writing.
+ //
+ if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
+ LocalBQ = nullptr;
+ return;
+ }
+
+ // By this point, we are now ready to write at most 24 bytes (one metadata
+ // record and one function record).
+ assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) -
+ static_cast<char *>(Buffer.Buffer) >=
+ static_cast<ptrdiff_t>(MetadataRecSize) &&
+ "Misconfigured BufferQueue provided; Buffer size not large enough.");
+
+ // Here we compute the TSC Delta. There are a few interesting situations we
+ // need to account for:
+ //
+ // - The thread has migrated to a different CPU. If this is the case, then
+ // we write down the following records:
+ //
+ // 1. A 'NewCPUId' Metadata record.
+ // 2. A FunctionRecord with a 0 for the TSCDelta field.
+ //
+ // - The TSC delta is greater than the 32 bits we can store in a
+ // FunctionRecord. In this case we write down the following records:
+ //
+ // 1. A 'TSCWrap' Metadata record.
+ // 2. A FunctionRecord with a 0 for the TSCDelta field.
+ //
+ // - The TSC delta is representable within the 32 bits we can store in a
+ // FunctionRecord. In this case we write down just a FunctionRecord with
+ // the correct TSC delta.
+ //
+ uint32_t RecordTSCDelta = 0;
+ if (CPU != CurrentCPU) {
+ // We've moved to a new CPU.
+ writeNewCPUIdMetadata(CPU, TSC);
+ } else {
+ // If the delta is greater than the range for a uint32_t, then we write out
+ // the TSC wrap metadata entry with the full TSC, and the TSC for the
+ // function record be 0.
+ auto Delta = TSC - LastTSC;
+ if (Delta > (1ULL << 32) - 1)
+ writeTSCWrapMetadata(TSC);
+ else
+ RecordTSCDelta = Delta;
+ }
+
+ LastTSC = TSC;
+ CurrentCPU = CPU;
+ switch (Entry) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ // Update the thread local state for the next invocation.
+ LastFunctionEntryTSC = TSC;
+ break;
+ case XRayEntryType::TAIL:
+ break;
+ case XRayEntryType::EXIT:
+ // Break out and write the exit record if we can't erase any functions.
+ if (NumConsecutiveFnEnters == 0 ||
+ (TSC - LastFunctionEntryTSC) >= thresholdTicks())
+ break;
+ rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
+ return; // without writing log.
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
+ }
+
+ writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
+
+ // If we've exhausted the buffer by this time, we then release the buffer to
+ // make sure that other threads may start using this buffer.
+ endBufferIfFull();
+}
+
+} // namespace __xray_fdr_internal
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
diff --git a/contrib/compiler-rt/lib/xray/xray_flags.cc b/contrib/compiler-rt/lib/xray/xray_flags.cc
index 338c237..1ee4d10 100644
--- a/contrib/compiler-rt/lib/xray/xray_flags.cc
+++ b/contrib/compiler-rt/lib/xray/xray_flags.cc
@@ -24,31 +24,55 @@ namespace __xray {
Flags xray_flags_dont_use_directly; // use via flags().
-void Flags::SetDefaults() XRAY_NEVER_INSTRUMENT {
+void Flags::setDefaults() XRAY_NEVER_INSTRUMENT {
#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
#include "xray_flags.inc"
#undef XRAY_FLAG
}
-static void RegisterXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT {
+static void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT {
#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
RegisterFlag(P, #Name, Description, &F->Name);
#include "xray_flags.inc"
#undef XRAY_FLAG
}
-void InitializeFlags() XRAY_NEVER_INSTRUMENT {
+// This function, as defined with the help of a macro meant to be introduced at
+// build time of the XRay runtime, passes in a statically defined list of
+// options that control XRay. This means users/deployments can tweak the
+// defaults that override the hard-coded defaults in the xray_flags.inc at
+// compile-time using the XRAY_DEFAULT_OPTIONS macro.
+static const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
+#ifdef XRAY_DEFAULT_OPTIONS
+// Do the double-layered string conversion to prevent badly crafted strings
+// provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues (or
+// changing the semantics of the implementation through the macro). This ensures
+// that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a string literal.
+#define XRAY_STRINGIZE(x) #x
+#define XRAY_STRINGIZE_OPTIONS(options) XRAY_STRINGIZE(options)
+ return XRAY_STRINGIZE_OPTIONS(XRAY_DEFAULT_OPTIONS);
+#else
+ return "";
+#endif
+}
+
+void initializeFlags() XRAY_NEVER_INSTRUMENT {
SetCommonFlagsDefaults();
auto *F = flags();
- F->SetDefaults();
+ F->setDefaults();
FlagParser XRayParser;
- RegisterXRayFlags(&XRayParser, F);
+ registerXRayFlags(&XRayParser, F);
RegisterCommonFlags(&XRayParser);
- // Override from command line.
+ // Use options defaulted at compile-time for the runtime.
+ const char *XRayCompileFlags = useCompilerDefinedFlags();
+ XRayParser.ParseString(XRayCompileFlags);
+
+ // Override from environment variables.
XRayParser.ParseString(GetEnv("XRAY_OPTIONS"));
+ // Override from command line.
InitializeCommonFlags();
if (Verbosity())
diff --git a/contrib/compiler-rt/lib/xray/xray_flags.h b/contrib/compiler-rt/lib/xray/xray_flags.h
index 2ecf5fb..f4e3028 100644
--- a/contrib/compiler-rt/lib/xray/xray_flags.h
+++ b/contrib/compiler-rt/lib/xray/xray_flags.h
@@ -24,13 +24,13 @@ struct Flags {
#include "xray_flags.inc"
#undef XRAY_FLAG
- void SetDefaults();
+ void setDefaults();
};
extern Flags xray_flags_dont_use_directly;
inline Flags *flags() { return &xray_flags_dont_use_directly; }
-void InitializeFlags();
+void initializeFlags();
} // namespace __xray
diff --git a/contrib/compiler-rt/lib/xray/xray_flags.inc b/contrib/compiler-rt/lib/xray/xray_flags.inc
index 0f6ced8..7ddce78 100644
--- a/contrib/compiler-rt/lib/xray/xray_flags.inc
+++ b/contrib/compiler-rt/lib/xray/xray_flags.inc
@@ -14,9 +14,14 @@
#error "Define XRAY_FLAG prior to including this file!"
#endif
-XRAY_FLAG(bool, patch_premain, true,
+XRAY_FLAG(bool, patch_premain, false,
"Whether to patch instrumentation points before main.")
XRAY_FLAG(bool, xray_naive_log, true,
"Whether to install the naive log implementation.")
XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
"Filename base for the xray logfile.")
+XRAY_FLAG(bool, xray_fdr_log, false,
+ "Whether to install the flight data recorder logging implementation.")
+XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
+ "FDR logging will try to skip functions that execute for fewer "
+ "microseconds than this threshold.")
diff --git a/contrib/compiler-rt/lib/xray/xray_init.cc b/contrib/compiler-rt/lib/xray/xray_init.cc
index eb86182..aa660ba 100644
--- a/contrib/compiler-rt/lib/xray/xray_init.cc
+++ b/contrib/compiler-rt/lib/xray/xray_init.cc
@@ -12,7 +12,6 @@
// XRay initialisation logic.
//===----------------------------------------------------------------------===//
-#include <atomic>
#include <fcntl.h>
#include <strings.h>
#include <unistd.h>
@@ -26,9 +25,10 @@ extern "C" {
void __xray_init();
extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
}
-using namespace __sanitizer;
using namespace __xray;
// When set to 'true' this means the XRay runtime has been initialised. We use
@@ -38,29 +38,30 @@ using namespace __xray;
//
// FIXME: Support DSO instrumentation maps too. The current solution only works
// for statically linked executables.
-std::atomic<bool> XRayInitialized{false};
+__sanitizer::atomic_uint8_t XRayInitialized{0};
// This should always be updated before XRayInitialized is updated.
-std::atomic<__xray::XRaySledMap> XRayInstrMap{};
+__sanitizer::SpinMutex XRayInstrMapMutex;
+XRaySledMap XRayInstrMap;
// __xray_init() will do the actual loading of the current process' memory map
// and then proceed to look for the .xray_instr_map section/segment.
void __xray_init() XRAY_NEVER_INSTRUMENT {
- InitializeFlags();
+ initializeFlags();
if (__start_xray_instr_map == nullptr) {
Report("XRay instrumentation map missing. Not initializing XRay.\n");
return;
}
- // Now initialize the XRayInstrMap global struct with the address of the
- // entries, reinterpreted as an array of XRaySledEntry objects. We use the
- // virtual pointer we have from the section to provide us the correct
- // information.
- __xray::XRaySledMap SledMap{};
- SledMap.Sleds = __start_xray_instr_map;
- SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
- XRayInstrMap.store(SledMap, std::memory_order_release);
- XRayInitialized.store(true, std::memory_order_release);
+ {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ XRayInstrMap.Sleds = __start_xray_instr_map;
+ XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+ XRayInstrMap.SledsIndex = __start_xray_fn_idx;
+ XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
+ }
+ __sanitizer::atomic_store(&XRayInitialized, true,
+ __sanitizer::memory_order_release);
if (flags()->patch_premain)
__xray_patch();
diff --git a/contrib/compiler-rt/lib/xray/xray_inmemory_log.cc b/contrib/compiler-rt/lib/xray/xray_inmemory_log.cc
index adcb216..83aecfa 100644
--- a/contrib/compiler-rt/lib/xray/xray_inmemory_log.cc
+++ b/contrib/compiler-rt/lib/xray/xray_inmemory_log.cc
@@ -16,8 +16,6 @@
//===----------------------------------------------------------------------===//
#include <cassert>
-#include <cstdint>
-#include <cstdio>
#include <fcntl.h>
#include <mutex>
#include <sys/stat.h>
@@ -26,19 +24,13 @@
#include <thread>
#include <unistd.h>
-#if defined(__x86_64__)
-#include "xray_x86_64.h"
-#elif defined(__arm__) || defined(__aarch64__)
-#include "xray_emulate_tsc.h"
-#else
-#error "Unsupported CPU Architecture"
-#endif /* Architecture-specific inline intrinsics */
-
#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
#include "xray_defs.h"
#include "xray_flags.h"
#include "xray_interface_internal.h"
+#include "xray_tsc.h"
+#include "xray_utils.h"
// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a
// certain size (32kb by default) and use it as if it were a circular buffer for
@@ -53,25 +45,6 @@ namespace __xray {
std::mutex LogMutex;
-static void retryingWriteAll(int Fd, char *Begin,
- char *End) XRAY_NEVER_INSTRUMENT {
- if (Begin == End)
- return;
- auto TotalBytes = std::distance(Begin, End);
- while (auto Written = write(Fd, Begin, TotalBytes)) {
- if (Written < 0) {
- if (errno == EINTR)
- continue; // Try again.
- Report("Failed to write; errno = %d\n", errno);
- return;
- }
- TotalBytes -= Written;
- if (TotalBytes == 0)
- break;
- Begin += Written;
- }
-}
-
class ThreadExitFlusher {
int Fd;
XRayRecord *Start;
@@ -102,41 +75,15 @@ public:
using namespace __xray;
-void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
- fprintf(stderr, "%s", Buffer);
-}
-
static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
- // FIXME: Figure out how to make this less stderr-dependent.
- SetPrintfAndReportCallback(PrintToStdErr);
- // Open a temporary file once for the log.
- static char TmpFilename[256] = {};
- static char TmpWildcardPattern[] = "XXXXXX";
- auto Argv = GetArgv();
- const char *Progname = Argv[0] == nullptr ? "(unknown)" : Argv[0];
- const char *LastSlash = internal_strrchr(Progname, '/');
-
- if (LastSlash != nullptr)
- Progname = LastSlash + 1;
-
- const int HalfLength = sizeof(TmpFilename) / 2 - sizeof(TmpWildcardPattern);
- int NeededLength = internal_snprintf(TmpFilename, sizeof(TmpFilename),
- "%.*s%.*s.%s",
- HalfLength, flags()->xray_logfile_base,
- HalfLength, Progname,
- TmpWildcardPattern);
- if (NeededLength > int(sizeof(TmpFilename))) {
- Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename);
- return -1;
- }
- int Fd = mkstemp(TmpFilename);
- if (Fd == -1) {
- Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
- TmpFilename);
+ int F = getLogFD();
+ if (F == -1)
return -1;
- }
- if (Verbosity())
- fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+
+ // Test for required CPU features and cache the cycle frequency
+ static bool TSCSupported = probeRequiredCPUFeatures();
+ static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
@@ -144,19 +91,20 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
XRayFileHeader Header;
Header.Version = 1;
Header.Type = FileTypes::NAIVE_LOG;
- Header.CycleFrequency = __xray::cycleFrequency();
+ Header.CycleFrequency = CycleFrequency;
// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
- retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+ retryingWriteAll(F, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
- return Fd;
+ return F;
}
-void __xray_InMemoryRawLog(int32_t FuncId,
- XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+template <class RDTSC>
+void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
+ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
using Buffer =
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
static constexpr size_t BuffLen = 1024;
@@ -173,7 +121,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
// through a pointer offset.
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
R.RecordType = RecordTypes::NORMAL;
- R.TSC = __xray::readTSC(R.CPU);
+ R.TSC = ReadTSC(R.CPU);
R.TId = TId;
R.Type = Type;
R.FuncId = FuncId;
@@ -187,8 +135,32 @@ void __xray_InMemoryRawLog(int32_t FuncId,
}
}
-static auto Unused = [] {
+void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+ __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
+}
+
+void __xray_InMemoryEmulateTSC(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+ __xray_InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+ TS = {0, 0};
+ }
+ CPU = 0;
+ return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+ });
+}
+
+static auto UNUSED Unused = [] {
+ auto UseRealTSC = probeRequiredCPUFeatures();
+ if (!UseRealTSC)
+ Report("WARNING: Required CPU features missing for XRay instrumentation, "
+ "using emulation instead.\n");
if (flags()->xray_naive_log)
- __xray_set_handler(__xray_InMemoryRawLog);
+ __xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
+ : __xray_InMemoryEmulateTSC);
return true;
}();
diff --git a/contrib/compiler-rt/lib/xray/xray_interface.cc b/contrib/compiler-rt/lib/xray/xray_interface.cc
index 20a2b66..694d34c 100644
--- a/contrib/compiler-rt/lib/xray/xray_interface.cc
+++ b/contrib/compiler-rt/lib/xray/xray_interface.cc
@@ -15,7 +15,6 @@
#include "xray_interface_internal.h"
-#include <atomic>
#include <cstdint>
#include <cstdio>
#include <errno.h>
@@ -35,12 +34,24 @@ static const int16_t cSledLength = 12;
static const int16_t cSledLength = 32;
#elif defined(__arm__)
static const int16_t cSledLength = 28;
+#elif SANITIZER_MIPS32
+static const int16_t cSledLength = 48;
+#elif SANITIZER_MIPS64
+static const int16_t cSledLength = 64;
+#elif defined(__powerpc64__)
+static const int16_t cSledLength = 8;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
// This is the function to call when we encounter the entry or exit sleds.
-std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
+__sanitizer::atomic_uintptr_t XRayPatchedFunction{0};
+
+// This is the function to call from the arg1-enabled sleds/trampolines.
+__sanitizer::atomic_uintptr_t XRayArgLogger{0};
+
+// This is the function to call when we encounter a custom event log call.
+__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0};
// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
// any successful mprotect(...) changes. This is used to make a page writeable
@@ -79,23 +90,45 @@ public:
} // namespace __xray
-extern std::atomic<bool> XRayInitialized;
-extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
+extern __sanitizer::SpinMutex XRayInstrMapMutex;
+extern __sanitizer::atomic_uint8_t XRayInitialized;
+extern __xray::XRaySledMap XRayInstrMap;
int __xray_set_handler(void (*entry)(int32_t,
XRayEntryType)) XRAY_NEVER_INSTRUMENT {
- if (XRayInitialized.load(std::memory_order_acquire)) {
- __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire)) {
+
+ __sanitizer::atomic_store(&__xray::XRayPatchedFunction,
+ reinterpret_cast<uintptr_t>(entry),
+ __sanitizer::memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_set_customevent_handler(void (*entry)(void *, size_t))
+ XRAY_NEVER_INSTRUMENT {
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire)) {
+ __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
+ reinterpret_cast<uintptr_t>(entry),
+ __sanitizer::memory_order_release);
return 1;
}
return 0;
}
+
int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
return __xray_set_handler(nullptr);
}
-std::atomic<bool> XRayPatching{false};
+int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_customevent_handler(nullptr);
+}
+
+__sanitizer::atomic_uint8_t XRayPatching{0};
using namespace __xray;
@@ -115,34 +148,76 @@ public:
};
template <class Function>
-CleanupInvoker<Function> ScopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT {
+CleanupInvoker<Function> scopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT {
return CleanupInvoker<Function>{Fn};
}
-// ControlPatching implements the common internals of the patching/unpatching
+inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
+ int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ // While we're here, we should patch the nop sled. To do that we mprotect
+ // the page containing the function to be writeable.
+ const uint64_t PageSize = GetPageSizeCached();
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
+ std::size_t MProtectLen = (Sled.Address + cSledLength) -
+ reinterpret_cast<uint64_t>(PageAlignedAddr);
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+ if (Protector.MakeWriteable() == -1) {
+ printf("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ bool Success = false;
+ switch (Sled.Kind) {
+ case XRayEntryType::ENTRY:
+ Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+ break;
+ case XRayEntryType::EXIT:
+ Success = patchFunctionExit(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::TAIL:
+ Success = patchFunctionTailExit(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::LOG_ARGS_ENTRY:
+ Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+ break;
+ case XRayEntryType::CUSTOM_EVENT:
+ Success = patchCustomEvent(Enable, FuncId, Sled);
+ break;
+ default:
+ Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind));
+ return false;
+ }
+ return Success;
+}
+
+// controlPatching implements the common internals of the patching/unpatching
// implementation. |Enable| defines whether we're enabling or disabling the
// runtime XRay instrumentation.
-XRayPatchingStatus ControlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
- if (!XRayInitialized.load(std::memory_order_acquire))
+XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire))
return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
- static bool NotPatching = false;
- if (!XRayPatching.compare_exchange_strong(NotPatching, true,
- std::memory_order_acq_rel,
- std::memory_order_acquire)) {
+ uint8_t NotPatching = false;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
return XRayPatchingStatus::ONGOING; // Already patching.
- }
- bool PatchingSuccess = false;
- auto XRayPatchingStatusResetter = ScopeCleanup([&PatchingSuccess] {
- if (!PatchingSuccess) {
- XRayPatching.store(false, std::memory_order_release);
- }
+ uint8_t PatchingSuccess = false;
+ auto XRayPatchingStatusResetter = scopeCleanup([&PatchingSuccess] {
+ if (!PatchingSuccess)
+ __sanitizer::atomic_store(&XRayPatching, false,
+ __sanitizer::memory_order_release);
});
// Step 1: Compute the function id, as a unique identifier per function in the
// instrumentation map.
- XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
+ XRaySledMap InstrMap;
+ {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
if (InstrMap.Entries == 0)
return XRayPatchingStatus::NOT_INITIALIZED;
@@ -163,45 +238,110 @@ XRayPatchingStatus ControlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
++FuncId;
CurFun = F;
}
-
- // While we're here, we should patch the nop sled. To do that we mprotect
- // the page containing the function to be writeable.
- void *PageAlignedAddr =
- reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
- std::size_t MProtectLen = (Sled.Address + cSledLength) -
- reinterpret_cast<uint64_t>(PageAlignedAddr);
- MProtectHelper Protector(PageAlignedAddr, MProtectLen);
- if (Protector.MakeWriteable() == -1) {
- printf("Failed mprotect: %d\n", errno);
- return XRayPatchingStatus::FAILED;
- }
-
- bool Success = false;
- switch (Sled.Kind) {
- case XRayEntryType::ENTRY:
- Success = patchFunctionEntry(Enable, FuncId, Sled);
- break;
- case XRayEntryType::EXIT:
- Success = patchFunctionExit(Enable, FuncId, Sled);
- break;
- case XRayEntryType::TAIL:
- Success = patchFunctionTailExit(Enable, FuncId, Sled);
- break;
- default:
- Report("Unsupported sled kind: %d\n", int(Sled.Kind));
- continue;
- }
- (void)Success;
+ patchSled(Sled, Enable, FuncId);
}
- XRayPatching.store(false, std::memory_order_release);
+ __sanitizer::atomic_store(&XRayPatching, false,
+ __sanitizer::memory_order_release);
PatchingSuccess = true;
return XRayPatchingStatus::SUCCESS;
}
XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
- return ControlPatching(true);
+ return controlPatching(true);
}
XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
- return ControlPatching(false);
+ return controlPatching(false);
+}
+
+XRayPatchingStatus patchFunction(int32_t FuncId,
+ bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire))
+ return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+ uint8_t NotPatching = false;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
+ return XRayPatchingStatus::ONGOING; // Already patching.
+
+ // Next, we look for the function index.
+ XRaySledMap InstrMap;
+ {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+
+ // If we don't have an index, we can't patch individual functions.
+ if (InstrMap.Functions == 0)
+ return XRayPatchingStatus::NOT_INITIALIZED;
+
+ // FuncId must be a positive number, less than the number of functions
+ // instrumented.
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
+ Report("Invalid function id provided: %d\n", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ // Now we patch ths sleds for this specific function.
+ auto SledRange = InstrMap.SledsIndex[FuncId - 1];
+ auto *f = SledRange.Begin;
+ auto *e = SledRange.End;
+
+ bool SucceedOnce = false;
+ while (f != e)
+ SucceedOnce |= patchSled(*f++, Enable, FuncId);
+
+ __sanitizer::atomic_store(&XRayPatching, false,
+ __sanitizer::memory_order_release);
+
+ if (!SucceedOnce) {
+ Report("Failed patching any sled for function '%d'.", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ return XRayPatchingStatus::SUCCESS;
+}
+
+XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ return patchFunction(FuncId, true);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ return patchFunction(FuncId, false);
+}
+
+int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
+ if (!__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire))
+ return 0;
+
+ // A relaxed write might not be visible even if the current thread gets
+ // scheduled on a different CPU/NUMA node. We need to wait for everyone to
+ // have this handler installed for consistency of collected data across CPUs.
+ __sanitizer::atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
+ __sanitizer::memory_order_release);
+ return 1;
+}
+
+int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
+
+uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > XRayInstrMap.Functions)
+ return 0;
+ return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Address
+// On PPC, function entries are always aligned to 16 bytes. The beginning of a
+// sled might be a local entry, which is always +8 based on the global entry.
+// Always return the global entry.
+#ifdef __PPC__
+ & ~0xf
+#endif
+ ;
+}
+
+size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ return XRayInstrMap.Functions;
}
diff --git a/contrib/compiler-rt/lib/xray/xray_interface_internal.h b/contrib/compiler-rt/lib/xray/xray_interface_internal.h
index a8434a6..4a27846 100644
--- a/contrib/compiler-rt/lib/xray/xray_interface_internal.h
+++ b/contrib/compiler-rt/lib/xray/xray_interface_internal.h
@@ -39,6 +39,11 @@ struct XRaySledEntry {
#error "Unsupported word size."
#endif
};
+
+struct XRayFunctionSledIndex {
+ const XRaySledEntry* Begin;
+ const XRaySledEntry* End;
+};
}
namespace __xray {
@@ -46,15 +51,16 @@ namespace __xray {
struct XRaySledMap {
const XRaySledEntry *Sleds;
size_t Entries;
+ const XRayFunctionSledIndex *SledsIndex;
+ size_t Functions;
};
-uint64_t cycleFrequency();
-
bool patchFunctionEntry(bool Enable, uint32_t FuncId,
- const XRaySledEntry &Sled);
+ const XRaySledEntry &Sled, void (*Trampoline)());
bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled);
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
} // namespace __xray
@@ -64,6 +70,8 @@ extern "C" {
extern void __xray_FunctionEntry();
extern void __xray_FunctionExit();
extern void __xray_FunctionTailExit();
+extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
}
#endif
diff --git a/contrib/compiler-rt/lib/xray/xray_log_interface.cc b/contrib/compiler-rt/lib/xray/xray_log_interface.cc
new file mode 100644
index 0000000..ee14ae4
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_log_interface.cc
@@ -0,0 +1,69 @@
+//===-- xray_log_interface.cc ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray/xray_log_interface.h"
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "xray/xray_interface.h"
+#include "xray_defs.h"
+
+#include <memory>
+
+__sanitizer::SpinMutex XRayImplMutex;
+std::unique_ptr<XRayLogImpl> GlobalXRayImpl;
+
+void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
+ if (Impl.log_init == nullptr || Impl.log_finalize == nullptr ||
+ Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ GlobalXRayImpl.reset();
+ __xray_remove_handler();
+ __xray_remove_handler_arg1();
+ return;
+ }
+
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ GlobalXRayImpl.reset(new XRayLogImpl);
+ *GlobalXRayImpl = Impl;
+ __xray_set_handler(Impl.handle_arg0);
+}
+
+void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ GlobalXRayImpl.reset();
+ __xray_remove_handler();
+ __xray_remove_handler_arg1();
+}
+
+XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
+ void *Args,
+ size_t ArgsSize) XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize);
+}
+
+XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ return GlobalXRayImpl->log_finalize();
+}
+
+XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ if (!GlobalXRayImpl)
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ return GlobalXRayImpl->flush_log();
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_mips.cc b/contrib/compiler-rt/lib/xray/xray_mips.cc
new file mode 100644
index 0000000..cd86330
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_mips.cc
@@ -0,0 +1,165 @@
+//===-- xray_mips.cc --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of MIPS-specific routines (32-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+ PO_ADDIU = 0x24000000, // addiu rt, rs, imm
+ PO_SW = 0xAC000000, // sw rt, offset(sp)
+ PO_LUI = 0x3C000000, // lui rs, %hi(address)
+ PO_ORI = 0x34000000, // ori rt, rs, %lo(address)
+ PO_JALR = 0x0000F809, // jalr rs
+ PO_LW = 0x8C000000, // lw rt, offset(address)
+ PO_B44 = 0x1000000b, // b #44
+ PO_NOP = 0x0, // nop
+};
+
+enum RegNum : uint32_t {
+ RN_T0 = 0x8,
+ RN_T9 = 0x19,
+ RN_RA = 0x1F,
+ RN_SP = 0x1D,
+};
+
+inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs,
+ uint32_t Rt,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Opcode | Rs << 21 | Rt << 16 | Imm);
+}
+
+inline static uint32_t
+encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B .tmpN
+ // 11 NOPs (44 bytes)
+ // .tmpN
+ // ADDIU T9, T9, 44
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n (32-bit):
+ // addiu sp, sp, -8 ;create stack frame
+ // nop
+ // sw ra, 4(sp) ;save return address
+ // sw t9, 0(sp) ;save register t9
+ // lui t9, %hi(__xray_FunctionEntry/Exit)
+ // ori t9, t9, %lo(__xray_FunctionEntry/Exit)
+ // lui t0, %hi(function_id)
+ // jalr t9 ;call Tracing hook
+ // ori t0, t0, %lo(function_id) ;pass function id (delay slot)
+ // lw t9, 0(sp) ;restore register t9
+ // lw ra, 4(sp) ;restore return address
+ // addiu sp, sp, 8 ;delete stack frame
+ //
+ // We add 44 bytes to t9 because we want to adjust the function pointer to
+ // the actual start of function i.e. the address just after the noop sled.
+ // We do this because gp displacement relocation is emitted at the start of
+ // of the function i.e after the nop sled and to correctly calculate the
+ // global offset table address, t9 must hold the address of the instruction
+ // containing the gp displacement relocation.
+ // FIXME: Is this correct for the static relocation model?
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #44
+
+ if (Enable) {
+ uint32_t LoTracingHookAddr =
+ reinterpret_cast<int32_t>(TracingHook) & 0xffff;
+ uint32_t HiTracingHookAddr =
+ (reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff;
+ uint32_t LoFunctionID = FuncId & 0xffff;
+ uint32_t HiFunctionID = (FuncId >> 16) & 0xffff;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction(
+ PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_RA, 0x4);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction(
+ PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HiTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 20) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeSpecialInstruction(
+ PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction(
+ PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction(
+ PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_RA, 0x4);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeInstruction(
+ PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x8);
+ uint32_t CreateStackSpaceInstr = encodeInstruction(
+ PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(CreateStackSpaceInstr), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(PatchOpcodes::PO_B44), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips?
+ return false;
+}
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_mips64.cc b/contrib/compiler-rt/lib/xray/xray_mips64.cc
new file mode 100644
index 0000000..fa8fdd5
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_mips64.cc
@@ -0,0 +1,173 @@
+//===-- xray_mips64.cc ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of MIPS64-specific routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+ PO_DADDIU = 0x64000000, // daddiu rt, rs, imm
+ PO_SD = 0xFC000000, // sd rt, base(offset)
+ PO_LUI = 0x3C000000, // lui rt, imm
+ PO_ORI = 0x34000000, // ori rt, rs, imm
+ PO_DSLL = 0x00000038, // dsll rd, rt, sa
+ PO_JALR = 0x00000009, // jalr rs
+ PO_LD = 0xDC000000, // ld rt, base(offset)
+ PO_B60 = 0x1000000f, // b #60
+ PO_NOP = 0x0, // nop
+};
+
+enum RegNum : uint32_t {
+ RN_T0 = 0xC,
+ RN_T9 = 0x19,
+ RN_RA = 0x1F,
+ RN_SP = 0x1D,
+};
+
+inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs,
+ uint32_t Rt,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Opcode | Rs << 21 | Rt << 16 | Imm);
+}
+
+inline static uint32_t
+encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd,
+ uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+ return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B .tmpN
+ // 15 NOPs (60 bytes)
+ // .tmpN
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n (64-bit):
+ // daddiu sp, sp, -16 ;create stack frame
+ // nop
+ // sd ra, 8(sp) ;save return address
+ // sd t9, 0(sp) ;save register t9
+ // lui t9, %highest(__xray_FunctionEntry/Exit)
+ // ori t9, t9, %higher(__xray_FunctionEntry/Exit)
+ // dsll t9, t9, 16
+ // ori t9, t9, %hi(__xray_FunctionEntry/Exit)
+ // dsll t9, t9, 16
+ // ori t9, t9, %lo(__xray_FunctionEntry/Exit)
+ // lui t0, %hi(function_id)
+ // jalr t9 ;call Tracing hook
+ // ori t0, t0, %lo(function_id) ;pass function id (delay slot)
+ // ld t9, 0(sp) ;restore register t9
+ // ld ra, 8(sp) ;restore return address
+ // daddiu sp, sp, 16 ;delete stack frame
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #60
+
+ if (Enable) {
+ uint32_t LoTracingHookAddr =
+ reinterpret_cast<int64_t>(TracingHook) & 0xffff;
+ uint32_t HiTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
+ uint32_t HigherTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff;
+ uint32_t HighestTracingHookAddr =
+ (reinterpret_cast<int64_t>(TracingHook) >> 48) & 0xffff;
+ uint32_t LoFunctionID = FuncId & 0xffff;
+ uint32_t HiFunctionID = (FuncId >> 16) & 0xffff;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 8) = encodeInstruction(
+ PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_RA, 0x8);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 12) = encodeInstruction(
+ PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 16) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HighestTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 20) =
+ encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9,
+ HigherTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 24) = encodeSpecialInstruction(
+ PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 28) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, HiTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 32) = encodeSpecialInstruction(
+ PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 36) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 40) = encodeInstruction(
+ PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 44) = encodeSpecialInstruction(
+ PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 48) = encodeInstruction(
+ PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 52) = encodeInstruction(
+ PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_T9, 0x0);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 56) = encodeInstruction(
+ PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_RA, 0x8);
+ *reinterpret_cast<uint32_t *>(Sled.Address + 60) = encodeInstruction(
+ PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x10);
+ uint32_t CreateStackSpace = encodeInstruction(
+ PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ CreateStackSpace, std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(Sled.Address),
+ uint32_t(PatchOpcodes::PO_B60), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in mips64?
+ return false;
+}
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_never_instrument.txt b/contrib/compiler-rt/lib/xray/xray_never_instrument.txt
new file mode 100644
index 0000000..7fa48dd
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_never_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# never instrument. You can use this as an argument to
+# -fxray-never-instrument=<path> along with your project-specific lists.
+
+# Never instrument any function whose symbol starts with __xray.
+fun:__xray*
diff --git a/contrib/compiler-rt/lib/xray/xray_powerpc64.cc b/contrib/compiler-rt/lib/xray/xray_powerpc64.cc
new file mode 100644
index 0000000..ab03cb1
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_powerpc64.cc
@@ -0,0 +1,106 @@
+//===-- xray_powerpc64.cc ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of powerpc64 and powerpc64le routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include "xray_utils.h"
+#include <atomic>
+#include <cassert>
+#include <cstring>
+
+#ifndef __LITTLE_ENDIAN__
+#error powerpc64 big endian is not supported for now.
+#endif
+
+namespace {
+
+constexpr unsigned long long JumpOverInstNum = 7;
+
+void clearCache(void *Addr, size_t Len) {
+ const size_t LineSize = 32;
+
+ const intptr_t Mask = ~(LineSize - 1);
+ const intptr_t StartLine = ((intptr_t)Addr) & Mask;
+ const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask;
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("dcbf 0, %0" : : "r"(Line));
+ asm volatile("sync");
+
+ for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+ asm volatile("icbi 0, %0" : : "r"(Line));
+ asm volatile("isync");
+}
+
+} // namespace
+
+extern "C" void __clear_cache(void *start, void *end);
+
+namespace __xray {
+
+bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ if (Enable) {
+ // lis 0, FuncId[16..32]
+ // li 0, FuncId[0..15]
+ *reinterpret_cast<uint64_t *>(Sled.Address) =
+ (0x3c000000ull + (FuncId >> 16)) +
+ ((0x60000000ull + (FuncId & 0xffff)) << 32);
+ } else {
+ // b +JumpOverInstNum instructions.
+ *reinterpret_cast<uint32_t *>(Sled.Address) =
+ 0x48000000ull + (JumpOverInstNum << 2);
+ }
+ clearCache(reinterpret_cast<void *>(Sled.Address), 8);
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ if (Enable) {
+ // lis 0, FuncId[16..32]
+ // li 0, FuncId[0..15]
+ *reinterpret_cast<uint64_t *>(Sled.Address) =
+ (0x3c000000ull + (FuncId >> 16)) +
+ ((0x60000000ull + (FuncId & 0xffff)) << 32);
+ } else {
+ // Copy the blr/b instruction after JumpOverInstNum instructions.
+ *reinterpret_cast<uint32_t *>(Sled.Address) =
+ *(reinterpret_cast<uint32_t *>(Sled.Address) + JumpOverInstNum);
+ }
+ clearCache(reinterpret_cast<void *>(Sled.Address), 8);
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchFunctionExit(Enable, FuncId, Sled);
+}
+
+// FIXME: Maybe implement this better?
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: Implement in powerpc64?
+ return false;
+}
+
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+ // FIXME: this will have to be implemented in the trampoline assembly file
+}
diff --git a/contrib/compiler-rt/lib/xray/xray_powerpc64.inc b/contrib/compiler-rt/lib/xray/xray_powerpc64.inc
new file mode 100644
index 0000000..c1a1bac
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_powerpc64.inc
@@ -0,0 +1,37 @@
+//===-- xray_powerpc64.inc --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <mutex>
+#include <sys/platform/ppc.h>
+
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ CPU = 0;
+ return __ppc_get_timebase();
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ static std::mutex M;
+ std::lock_guard<std::mutex> Guard(M);
+ return __ppc_get_timebase_freq();
+}
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
+ return true;
+}
+
+} // namespace __xray
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/contrib/compiler-rt/lib/xray/xray_trampoline_AArch64.S
index f1a471c..4d1b04f 100644
--- a/contrib/compiler-rt/lib/xray/xray_trampoline_AArch64.S
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_AArch64.S
@@ -1,3 +1,5 @@
+#include "../builtins/assembly.h"
+
.text
/* The variable containing the handler function pointer */
.global _ZN6__xray19XRayPatchedFunctionE
@@ -87,3 +89,56 @@ FunctionExit_restore:
LDP X3, X4, [SP], #16
LDP X1, X2, [SP], #16
RET
+
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionTailExit
+ .type __xray_FunctionTailExit, %function
+ /* In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId)
+ with FuncId passed in W0 register. */
+__xray_FunctionTailExit:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ /* Push the parameters of the tail called function */
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionTailExit_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=2 means that we are tracing a tail exit event, but before the
+ logging part of XRay is ready, we pretend that here a normal function
+ exit happens, so we give the handler code 1 */
+ MOV X1, #1
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionTailExit_restore:
+ /* Pop the parameters of the tail called function */
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ /* Pop the registers which may be modified by the handler function */
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_arm.S b/contrib/compiler-rt/lib/xray/xray_trampoline_arm.S
index 5d87c97..71dbee6 100644
--- a/contrib/compiler-rt/lib/xray/xray_trampoline_arm.S
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_arm.S
@@ -1,8 +1,11 @@
+#include "../builtins/assembly.h"
+
.syntax unified
.arch armv6t2
.fpu vfpv2
.code 32
.global _ZN6__xray19XRayPatchedFunctionE
+
@ Word-aligned function entry point
.p2align 2
@ Let C/C++ see the symbol
@@ -63,3 +66,37 @@ FunctionExit_restore:
@ Restore the floating-point return value of the instrumented function
VPOP {d0}
POP {r1-r3,pc}
+
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionTailExit
+ @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
+ @ Assume that "q" part of the floating-point registers is not used
+ @ for passing parameters to C/C++ functions.
+ .type __xray_FunctionTailExit, %function
+ @ In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId)
+ @ with FuncId passed in r0 register.
+__xray_FunctionTailExit:
+ PUSH {r1-r3,lr}
+ @ Save floating-point parameters of the instrumented function
+ VPUSH {d0-d7}
+ MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
+ MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
+ LDR r2, [r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionTailExit_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ r1=2 means that we are tracing a tail exit event
+ @ But before the logging part of XRay is ready, we pretend that here a
+ @ normal function exit happens, so we give the handler code 1
+ MOV r1, #1
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionTailExit_restore:
+ @ Restore floating-point parameters of the instrumented function
+ VPOP {d0-d7}
+ POP {r1-r3,pc}
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_mips.S b/contrib/compiler-rt/lib/xray/xray_trampoline_mips.S
new file mode 100644
index 0000000..39a1a3a
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_mips.S
@@ -0,0 +1,110 @@
+//===-- xray_trampoline_mips.s ----------------------------------*- ASM -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the MIPS-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+ .file "xray_trampoline_mips.S"
+ .globl __xray_FunctionEntry
+ .p2align 2
+ .type __xray_FunctionEntry,@function
+__xray_FunctionEntry:
+ .cfi_startproc
+ .set noreorder
+ .cpload $t9
+ .set reorder
+ // Save argument registers before doing any actual work
+ .cfi_def_cfa_offset 36
+ addiu $sp, $sp, -36
+ sw $ra, 32($sp)
+ .cfi_offset 31, -4
+ sw $a3, 28($sp)
+ sw $a2, 24($sp)
+ sw $a1, 20($sp)
+ sw $a0, 16($sp)
+ sdc1 $f14, 8($sp)
+ sdc1 $f12, 0($sp)
+
+ la $t9, _ZN6__xray19XRayPatchedFunctionE
+ lw $t9, 0($t9)
+
+ beqz $t9, FunctionEntry_restore
+
+ // a1=0 means that we are tracing an entry event
+ move $a1, $zero
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionEntry_restore:
+ // Restore argument registers
+ ldc1 $f12, 0($sp)
+ ldc1 $f14, 8($sp)
+ lw $a0, 16($sp)
+ lw $a1, 20($sp)
+ lw $a2, 24($sp)
+ lw $a3, 28($sp)
+ lw $ra, 32($sp)
+ addiu $sp, $sp, 36
+ jr $ra
+FunctionEntry_end:
+ .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry
+ .cfi_endproc
+
+ .text
+ .globl __xray_FunctionExit
+ .p2align 2
+ .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+ .cfi_startproc
+ .set noreorder
+ .cpload $t9
+ .set reorder
+ // Save return registers before doing any actual work.
+ .cfi_def_cfa_offset 36
+ addiu $sp, $sp, -36
+ sw $ra, 32($sp)
+ .cfi_offset 31, -4
+ sw $a1, 28($sp)
+ sw $a0, 24($sp)
+ sw $v1, 20($sp)
+ sw $v0, 16($sp)
+ sdc1 $f2, 8($sp)
+ sdc1 $f0, 0($sp)
+
+ la $t9, _ZN6__xray19XRayPatchedFunctionE
+ lw $t9, 0($t9)
+
+ beqz $t9, FunctionExit_restore
+
+ // a1=1 means that we are tracing an exit event
+ li $a1, 1
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionExit_restore:
+ // Restore return registers
+ ldc1 $f0, 0($sp)
+ ldc1 $f2, 8($sp)
+ lw $v0, 16($sp)
+ lw $v1, 20($sp)
+ lw $a0, 24($sp)
+ lw $a1, 28($sp)
+ lw $ra, 32($sp)
+ addiu $sp, $sp, 36
+ jr $ra
+
+FunctionExit_end:
+ .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit
+ .cfi_endproc
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_mips64.S b/contrib/compiler-rt/lib/xray/xray_trampoline_mips64.S
new file mode 100644
index 0000000..9cbc7e1
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_mips64.S
@@ -0,0 +1,136 @@
+//===-- xray_trampoline_mips64.s --------------------------------*- ASM -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the MIPS64-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+ .file "xray_trampoline_mips64.S"
+ .globl __xray_FunctionEntry
+ .p2align 2
+ .type __xray_FunctionEntry,@function
+__xray_FunctionEntry:
+ .cfi_startproc
+ // Save argument registers before doing any actual work.
+ .cfi_def_cfa_offset 144
+ daddiu $sp, $sp, -144
+ sd $ra, 136($sp)
+ .cfi_offset 31, -8
+ sd $gp, 128($sp)
+ sd $a7, 120($sp)
+ sd $a6, 112($sp)
+ sd $a5, 104($sp)
+ sd $a4, 96($sp)
+ sd $a3, 88($sp)
+ sd $a2, 80($sp)
+ sd $a1, 72($sp)
+ sd $a0, 64($sp)
+ sdc1 $f19, 56($sp)
+ sdc1 $f18, 48($sp)
+ sdc1 $f17, 40($sp)
+ sdc1 $f16, 32($sp)
+ sdc1 $f15, 24($sp)
+ sdc1 $f14, 16($sp)
+ sdc1 $f13, 8($sp)
+ sdc1 $f12, 0($sp)
+
+ lui $gp, %hi(%neg(%gp_rel(__xray_FunctionEntry)))
+ daddu $gp, $gp, $t9
+ daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionEntry)))
+
+ dla $t9, _ZN6__xray19XRayPatchedFunctionE
+ ld $t9, 0($t9)
+
+ beqz $t9, FunctionEntry_restore
+
+ // a1=0 means that we are tracing an entry event
+ move $a1, $zero
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionEntry_restore:
+ // Restore argument registers
+ ldc1 $f12, 0($sp)
+ ldc1 $f13, 8($sp)
+ ldc1 $f14, 16($sp)
+ ldc1 $f15, 24($sp)
+ ldc1 $f16, 32($sp)
+ ldc1 $f17, 40($sp)
+ ldc1 $f18, 48($sp)
+ ldc1 $f19, 56($sp)
+ ld $a0, 64($sp)
+ ld $a1, 72($sp)
+ ld $a2, 80($sp)
+ ld $a3, 88($sp)
+ ld $a4, 96($sp)
+ ld $a5, 104($sp)
+ ld $a6, 112($sp)
+ ld $a7, 120($sp)
+ ld $gp, 128($sp)
+ ld $ra, 136($sp)
+ daddiu $sp, $sp, 144
+ jr $ra
+FunctionEntry_end:
+ .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry
+ .cfi_endproc
+
+ .text
+ .globl __xray_FunctionExit
+ .p2align 2
+ .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+ .cfi_startproc
+ // Save return registers before doing any actual work.
+ .cfi_def_cfa_offset 64
+ daddiu $sp, $sp, -64
+ sd $ra, 56($sp)
+ .cfi_offset 31, -8
+ sd $gp, 48($sp)
+ sd $a0, 40($sp)
+ sd $v1, 32($sp)
+ sd $v0, 24($sp)
+ sdc1 $f2, 16($sp)
+ sdc1 $f1, 8($sp)
+ sdc1 $f0, 0($sp)
+
+ lui $gp, %hi(%neg(%gp_rel(__xray_FunctionExit)))
+ daddu $gp, $gp, $t9
+ daddiu $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionExit)))
+
+ dla $t9, _ZN6__xray19XRayPatchedFunctionE
+ ld $t9, 0($t9)
+
+ beqz $t9, FunctionExit_restore
+
+ // a1=1 means that we are tracing an exit event
+ li $a1, 1
+ // Function ID is in t0 (the first parameter).
+ move $a0, $t0
+ jalr $t9
+
+FunctionExit_restore:
+ // Restore return registers
+ ldc1 $f0, 0($sp)
+ ldc1 $f1, 8($sp)
+ ldc1 $f2, 16($sp)
+ ld $v0, 24($sp)
+ ld $v1, 32($sp)
+ ld $a0, 40($sp)
+ ld $gp, 48($sp)
+ ld $ra, 56($sp)
+ daddiu $sp, $sp, 64
+ jr $ra
+
+FunctionExit_end:
+ .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit
+ .cfi_endproc
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc b/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc
new file mode 100644
index 0000000..878c469
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64.cc
@@ -0,0 +1,15 @@
+#include <atomic>
+#include <xray/xray_interface.h>
+
+namespace __xray {
+
+extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction;
+
+// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand.
+void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) {
+ auto fptr = __xray::XRayPatchedFunction.load();
+ if (fptr != nullptr)
+ (*fptr)(FuncId, Type);
+}
+
+} // namespace __xray
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S b/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S
new file mode 100644
index 0000000..250e2e5b
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S
@@ -0,0 +1,235 @@
+ .text
+ .abiversion 2
+ .globl __xray_FunctionEntry
+ .p2align 4
+__xray_FunctionEntry:
+ std 0, 16(1)
+ stdu 1, -408(1)
+# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers.
+# If this appears to be slow, the caller needs to pass in number of generic,
+# floating point, and vector parameters, so that we only spill those live ones.
+ std 3, 32(1)
+ ld 3, 400(1) # FuncId
+ std 4, 40(1)
+ std 5, 48(1)
+ std 6, 56(1)
+ std 7, 64(1)
+ std 8, 72(1)
+ std 9, 80(1)
+ std 10, 88(1)
+ addi 4, 1, 96
+ stxsdx 1, 0, 4
+ addi 4, 1, 104
+ stxsdx 2, 0, 4
+ addi 4, 1, 112
+ stxsdx 3, 0, 4
+ addi 4, 1, 120
+ stxsdx 4, 0, 4
+ addi 4, 1, 128
+ stxsdx 5, 0, 4
+ addi 4, 1, 136
+ stxsdx 6, 0, 4
+ addi 4, 1, 144
+ stxsdx 7, 0, 4
+ addi 4, 1, 152
+ stxsdx 8, 0, 4
+ addi 4, 1, 160
+ stxsdx 9, 0, 4
+ addi 4, 1, 168
+ stxsdx 10, 0, 4
+ addi 4, 1, 176
+ stxsdx 11, 0, 4
+ addi 4, 1, 184
+ stxsdx 12, 0, 4
+ addi 4, 1, 192
+ stxsdx 13, 0, 4
+ addi 4, 1, 200
+ stxvd2x 34, 0, 4
+ addi 4, 1, 216
+ stxvd2x 35, 0, 4
+ addi 4, 1, 232
+ stxvd2x 36, 0, 4
+ addi 4, 1, 248
+ stxvd2x 37, 0, 4
+ addi 4, 1, 264
+ stxvd2x 38, 0, 4
+ addi 4, 1, 280
+ stxvd2x 39, 0, 4
+ addi 4, 1, 296
+ stxvd2x 40, 0, 4
+ addi 4, 1, 312
+ stxvd2x 41, 0, 4
+ addi 4, 1, 328
+ stxvd2x 42, 0, 4
+ addi 4, 1, 344
+ stxvd2x 43, 0, 4
+ addi 4, 1, 360
+ stxvd2x 44, 0, 4
+ addi 4, 1, 376
+ stxvd2x 45, 0, 4
+ std 2, 392(1)
+ mflr 0
+ std 0, 400(1)
+
+ li 4, 0
+ bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
+ nop
+
+ addi 4, 1, 96
+ lxsdx 1, 0, 4
+ addi 4, 1, 104
+ lxsdx 2, 0, 4
+ addi 4, 1, 112
+ lxsdx 3, 0, 4
+ addi 4, 1, 120
+ lxsdx 4, 0, 4
+ addi 4, 1, 128
+ lxsdx 5, 0, 4
+ addi 4, 1, 136
+ lxsdx 6, 0, 4
+ addi 4, 1, 144
+ lxsdx 7, 0, 4
+ addi 4, 1, 152
+ lxsdx 8, 0, 4
+ addi 4, 1, 160
+ lxsdx 9, 0, 4
+ addi 4, 1, 168
+ lxsdx 10, 0, 4
+ addi 4, 1, 176
+ lxsdx 11, 0, 4
+ addi 4, 1, 184
+ lxsdx 12, 0, 4
+ addi 4, 1, 192
+ lxsdx 13, 0, 4
+ addi 4, 1, 200
+ lxvd2x 34, 0, 4
+ addi 4, 1, 216
+ lxvd2x 35, 0, 4
+ addi 4, 1, 232
+ lxvd2x 36, 0, 4
+ addi 4, 1, 248
+ lxvd2x 37, 0, 4
+ addi 4, 1, 264
+ lxvd2x 38, 0, 4
+ addi 4, 1, 280
+ lxvd2x 39, 0, 4
+ addi 4, 1, 296
+ lxvd2x 40, 0, 4
+ addi 4, 1, 312
+ lxvd2x 41, 0, 4
+ addi 4, 1, 328
+ lxvd2x 42, 0, 4
+ addi 4, 1, 344
+ lxvd2x 43, 0, 4
+ addi 4, 1, 360
+ lxvd2x 44, 0, 4
+ addi 4, 1, 376
+ lxvd2x 45, 0, 4
+ ld 0, 400(1)
+ mtlr 0
+ ld 2, 392(1)
+ ld 3, 32(1)
+ ld 4, 40(1)
+ ld 5, 48(1)
+ ld 6, 56(1)
+ ld 7, 64(1)
+ ld 8, 72(1)
+ ld 9, 80(1)
+ ld 10, 88(1)
+
+ addi 1, 1, 408
+ ld 0, 16(1)
+ blr
+
+ .globl __xray_FunctionExit
+ .p2align 4
+__xray_FunctionExit:
+ std 0, 16(1)
+ stdu 1, -256(1)
+# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers.
+# If this appears to be slow, the caller needs to pass in number of generic,
+# floating point, and vector parameters, so that we only spill those live ones.
+ std 3, 32(1)
+ ld 3, 248(1) # FuncId
+ std 4, 40(1)
+ addi 4, 1, 48
+ stxsdx 1, 0, 4
+ addi 4, 1, 56
+ stxsdx 2, 0, 4
+ addi 4, 1, 64
+ stxsdx 3, 0, 4
+ addi 4, 1, 72
+ stxsdx 4, 0, 4
+ addi 4, 1, 80
+ stxsdx 5, 0, 4
+ addi 4, 1, 88
+ stxsdx 6, 0, 4
+ addi 4, 1, 96
+ stxsdx 7, 0, 4
+ addi 4, 1, 104
+ stxsdx 8, 0, 4
+ addi 4, 1, 112
+ stxvd2x 34, 0, 4
+ addi 4, 1, 128
+ stxvd2x 35, 0, 4
+ addi 4, 1, 144
+ stxvd2x 36, 0, 4
+ addi 4, 1, 160
+ stxvd2x 37, 0, 4
+ addi 4, 1, 176
+ stxvd2x 38, 0, 4
+ addi 4, 1, 192
+ stxvd2x 39, 0, 4
+ addi 4, 1, 208
+ stxvd2x 40, 0, 4
+ addi 4, 1, 224
+ stxvd2x 41, 0, 4
+ std 2, 240(1)
+ mflr 0
+ std 0, 248(1)
+
+ li 4, 1
+ bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
+ nop
+
+ addi 4, 1, 48
+ lxsdx 1, 0, 4
+ addi 4, 1, 56
+ lxsdx 2, 0, 4
+ addi 4, 1, 64
+ lxsdx 3, 0, 4
+ addi 4, 1, 72
+ lxsdx 4, 0, 4
+ addi 4, 1, 80
+ lxsdx 5, 0, 4
+ addi 4, 1, 88
+ lxsdx 6, 0, 4
+ addi 4, 1, 96
+ lxsdx 7, 0, 4
+ addi 4, 1, 104
+ lxsdx 8, 0, 4
+ addi 4, 1, 112
+ lxvd2x 34, 0, 4
+ addi 4, 1, 128
+ lxvd2x 35, 0, 4
+ addi 4, 1, 144
+ lxvd2x 36, 0, 4
+ addi 4, 1, 160
+ lxvd2x 37, 0, 4
+ addi 4, 1, 176
+ lxvd2x 38, 0, 4
+ addi 4, 1, 192
+ lxvd2x 39, 0, 4
+ addi 4, 1, 208
+ lxvd2x 40, 0, 4
+ addi 4, 1, 224
+ lxvd2x 41, 0, 4
+ ld 0, 248(1)
+ mtlr 0
+ ld 2, 240(1)
+ ld 3, 32(1)
+ ld 4, 40(1)
+
+ addi 1, 1, 256
+ ld 0, 16(1)
+ blr
diff --git a/contrib/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/contrib/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index d90c30c..b59eedc 100644
--- a/contrib/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/contrib/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -13,54 +13,64 @@
//
//===----------------------------------------------------------------------===//
+#include "../builtins/assembly.h"
+
.macro SAVE_REGISTERS
- subq $200, %rsp
- movupd %xmm0, 184(%rsp)
- movupd %xmm1, 168(%rsp)
- movupd %xmm2, 152(%rsp)
- movupd %xmm3, 136(%rsp)
- movupd %xmm4, 120(%rsp)
- movupd %xmm5, 104(%rsp)
- movupd %xmm6, 88(%rsp)
- movupd %xmm7, 72(%rsp)
- movq %rdi, 64(%rsp)
- movq %rax, 56(%rsp)
- movq %rdx, 48(%rsp)
- movq %rsi, 40(%rsp)
- movq %rcx, 32(%rsp)
- movq %r8, 24(%rsp)
- movq %r9, 16(%rsp)
+ subq $192, %rsp
+ .cfi_def_cfa_offset 200
+ // At this point, the stack pointer should be aligned to an 8-byte boundary,
+ // because any call instructions that come after this will add another 8
+ // bytes and therefore align it to 16-bytes.
+ movq %rbp, 184(%rsp)
+ movupd %xmm0, 168(%rsp)
+ movupd %xmm1, 152(%rsp)
+ movupd %xmm2, 136(%rsp)
+ movupd %xmm3, 120(%rsp)
+ movupd %xmm4, 104(%rsp)
+ movupd %xmm5, 88(%rsp)
+ movupd %xmm6, 72(%rsp)
+ movupd %xmm7, 56(%rsp)
+ movq %rdi, 48(%rsp)
+ movq %rax, 40(%rsp)
+ movq %rdx, 32(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 8(%rsp)
+ movq %r9, 0(%rsp)
.endm
.macro RESTORE_REGISTERS
- movupd 184(%rsp), %xmm0
- movupd 168(%rsp), %xmm1
- movupd 152(%rsp), %xmm2
- movupd 136(%rsp), %xmm3
- movupd 120(%rsp), %xmm4
- movupd 104(%rsp), %xmm5
- movupd 88(%rsp) , %xmm6
- movupd 72(%rsp) , %xmm7
- movq 64(%rsp), %rdi
- movq 56(%rsp), %rax
- movq 48(%rsp), %rdx
- movq 40(%rsp), %rsi
- movq 32(%rsp), %rcx
- movq 24(%rsp), %r8
- movq 16(%rsp), %r9
- addq $200, %rsp
+ movq 184(%rsp), %rbp
+ movupd 168(%rsp), %xmm0
+ movupd 152(%rsp), %xmm1
+ movupd 136(%rsp), %xmm2
+ movupd 120(%rsp), %xmm3
+ movupd 104(%rsp), %xmm4
+ movupd 88(%rsp), %xmm5
+ movupd 72(%rsp) , %xmm6
+ movupd 56(%rsp) , %xmm7
+ movq 48(%rsp), %rdi
+ movq 40(%rsp), %rax
+ movq 32(%rsp), %rdx
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rcx
+ movq 8(%rsp), %r8
+ movq 0(%rsp), %r9
+ addq $192, %rsp
+ .cfi_def_cfa_offset 8
.endm
.text
.file "xray_trampoline_x86.S"
+
+//===----------------------------------------------------------------------===//
+
.globl __xray_FunctionEntry
.align 16, 0x90
.type __xray_FunctionEntry,@function
__xray_FunctionEntry:
.cfi_startproc
- pushq %rbp
- .cfi_def_cfa_offset 16
SAVE_REGISTERS
// This load has to be atomic, it's concurrent with __xray_patch().
@@ -75,12 +85,13 @@ __xray_FunctionEntry:
callq *%rax
.Ltmp0:
RESTORE_REGISTERS
- popq %rbp
retq
.Ltmp1:
.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
.cfi_endproc
+//===----------------------------------------------------------------------===//
+
.globl __xray_FunctionExit
.align 16, 0x90
.type __xray_FunctionExit,@function
@@ -89,14 +100,13 @@ __xray_FunctionExit:
// Save the important registers first. Since we're assuming that this
// function is only jumped into, we only preserve the registers for
// returning.
- pushq %rbp
- .cfi_def_cfa_offset 16
subq $56, %rsp
- .cfi_def_cfa_offset 32
- movupd %xmm0, 40(%rsp)
- movupd %xmm1, 24(%rsp)
- movq %rax, 16(%rsp)
- movq %rdx, 8(%rsp)
+ .cfi_def_cfa_offset 64
+ movq %rbp, 48(%rsp)
+ movupd %xmm0, 32(%rsp)
+ movupd %xmm1, 16(%rsp)
+ movq %rax, 8(%rsp)
+ movq %rdx, 0(%rsp)
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
testq %rax,%rax
je .Ltmp2
@@ -106,17 +116,20 @@ __xray_FunctionExit:
callq *%rax
.Ltmp2:
// Restore the important registers.
- movupd 40(%rsp), %xmm0
- movupd 24(%rsp), %xmm1
- movq 16(%rsp), %rax
- movq 8(%rsp), %rdx
+ movq 48(%rsp), %rbp
+ movupd 32(%rsp), %xmm0
+ movupd 16(%rsp), %xmm1
+ movq 8(%rsp), %rax
+ movq 0(%rsp), %rdx
addq $56, %rsp
- popq %rbp
+ .cfi_def_cfa_offset 8
retq
.Ltmp3:
.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
.cfi_endproc
+//===----------------------------------------------------------------------===//
+
.global __xray_FunctionTailExit
.align 16, 0x90
.type __xray_FunctionTailExit,@function
@@ -126,8 +139,6 @@ __xray_FunctionTailExit:
// this is an exit. In the future, we will introduce a new entry type that
// differentiates between a normal exit and a tail exit, but we'd have to do
// this and increment the version number for the header.
- pushq %rbp
- .cfi_def_cfa_offset 16
SAVE_REGISTERS
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
@@ -140,8 +151,82 @@ __xray_FunctionTailExit:
.Ltmp4:
RESTORE_REGISTERS
- popq %rbp
retq
.Ltmp5:
.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
.cfi_endproc
+
+//===----------------------------------------------------------------------===//
+
+ .globl __xray_ArgLoggerEntry
+ .align 16, 0x90
+ .type __xray_ArgLoggerEntry,@function
+__xray_ArgLoggerEntry:
+ .cfi_startproc
+ SAVE_REGISTERS
+
+ // Again, these function pointer loads must be atomic; MOV is fine.
+ movq _ZN6__xray13XRayArgLoggerE(%rip), %rax
+ testq %rax, %rax
+ jne .Larg1entryLog
+
+ // If [arg1 logging handler] not set, defer to no-arg logging.
+ movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ testq %rax, %rax
+ je .Larg1entryFail
+
+.Larg1entryLog:
+
+ // First argument will become the third
+ movq %rdi, %rdx
+
+ // XRayEntryType::ENTRY into the second
+ xorq %rsi, %rsi
+
+ // 32-bit function ID becomes the first
+ movl %r10d, %edi
+ callq *%rax
+
+.Larg1entryFail:
+ RESTORE_REGISTERS
+ retq
+
+.Larg1entryEnd:
+ .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
+ .cfi_endproc
+
+//===----------------------------------------------------------------------===//
+
+ .global __xray_CustomEvent
+ .align 16, 0x90
+ .type __xray_CustomEvent,@function
+__xray_CustomEvent:
+ .cfi_startproc
+ subq $16, %rsp
+ .cfi_def_cfa_offset 24
+ movq %rbp, 8(%rsp)
+ movq %rax, 0(%rsp)
+
+ // We take two arguments to this trampoline, which should be in rdi and rsi
+ // already. We also make sure that we stash %rax because we use that register
+ // to call the logging handler.
+ movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
+ testq %rax,%rax
+ je .LcustomEventCleanup
+
+ // At this point we know that rcx and rdx already has the data, so we just
+ // call the logging handler.
+ callq *%rax
+
+.LcustomEventCleanup:
+ movq 0(%rsp), %rax
+ movq 8(%rsp), %rbp
+ addq $16, %rsp
+ .cfi_def_cfa_offset 8
+ retq
+
+.Ltmp8:
+ .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
+ .cfi_endproc
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/compiler-rt/lib/xray/xray_tsc.h b/contrib/compiler-rt/lib/xray/xray_tsc.h
new file mode 100644
index 0000000..4507564
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_tsc.h
@@ -0,0 +1,68 @@
+//===-- xray_tsc.h ----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_EMULATE_TSC_H
+#define XRAY_EMULATE_TSC_H
+
+namespace __xray {
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+}
+
+#if defined(__x86_64__)
+#include "xray_x86_64.inc"
+#elif defined(__powerpc64__)
+#include "xray_powerpc64.inc"
+#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__)
+// Emulated TSC.
+// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+// not have a constant frequency like TSC on x86(_64), it may go faster
+// or slower depending on CPU turbo or power saving mode. Furthermore,
+// to read from CP15 on ARM a kernel modification or a driver is needed.
+// We can not require this from users of compiler-rt.
+// So on ARM we use clock_gettime() which gives the result in nanoseconds.
+// To get the measurements per second, we scale this by the number of
+// nanoseconds per second, pretending that the TSC frequency is 1GHz and
+// one TSC tick is 1 nanosecond.
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
+ TS.tv_sec = 0;
+ TS.tv_nsec = 0;
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+}
+
+inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ return NanosecondsPerSecond;
+}
+
+} // namespace __xray
+
+#else
+#error Target architecture is not supported.
+#endif // CPU architecture
+
+#endif // XRAY_EMULATE_TSC_H
diff --git a/contrib/compiler-rt/lib/xray/xray_utils.cc b/contrib/compiler-rt/lib/xray/xray_utils.cc
new file mode 100644
index 0000000..b9a38d1
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_utils.cc
@@ -0,0 +1,125 @@
+//===-- xray_utils.cc -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_utils.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include <stdlib.h>
+#include <cstdio>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <sys/types.h>
+#include <tuple>
+#include <unistd.h>
+#include <utility>
+
+namespace __xray {
+
+void printToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
+ fprintf(stderr, "%s", Buffer);
+}
+
+void retryingWriteAll(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+ if (Begin == End)
+ return;
+ auto TotalBytes = std::distance(Begin, End);
+ while (auto Written = write(Fd, Begin, TotalBytes)) {
+ if (Written < 0) {
+ if (errno == EINTR)
+ continue; // Try again.
+ Report("Failed to write; errno = %d\n", errno);
+ return;
+ }
+ TotalBytes -= Written;
+ if (TotalBytes == 0)
+ break;
+ Begin += Written;
+ }
+}
+
+std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin,
+ char *End) XRAY_NEVER_INSTRUMENT {
+ auto BytesToRead = std::distance(Begin, End);
+ ssize_t BytesRead;
+ ssize_t TotalBytesRead = 0;
+ while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+ if (BytesRead == -1) {
+ if (errno == EINTR)
+ continue;
+ Report("Read error; errno = %d\n", errno);
+ return std::make_pair(TotalBytesRead, false);
+ }
+
+ TotalBytesRead += BytesRead;
+ BytesToRead -= BytesRead;
+ Begin += BytesRead;
+ }
+ return std::make_pair(TotalBytesRead, true);
+}
+
+bool readValueFromFile(const char *Filename,
+ long long *Value) XRAY_NEVER_INSTRUMENT {
+ int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+ if (Fd == -1)
+ return false;
+ static constexpr size_t BufSize = 256;
+ char Line[BufSize] = {};
+ ssize_t BytesRead;
+ bool Success;
+ std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ if (!Success)
+ return false;
+ close(Fd);
+ char *End = nullptr;
+ long long Tmp = internal_simple_strtoll(Line, &End, 10);
+ bool Result = false;
+ if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+ *Value = Tmp;
+ Result = true;
+ }
+ return Result;
+}
+
+int getLogFD() XRAY_NEVER_INSTRUMENT {
+ // Open a temporary file once for the log.
+ static char TmpFilename[256] = {};
+ static char TmpWildcardPattern[] = "XXXXXX";
+ auto Argv = GetArgv();
+ const char *Progname = Argv[0] == nullptr ? "(unknown)" : Argv[0];
+ const char *LastSlash = internal_strrchr(Progname, '/');
+
+ if (LastSlash != nullptr)
+ Progname = LastSlash + 1;
+
+ const int HalfLength = sizeof(TmpFilename) / 2 - sizeof(TmpWildcardPattern);
+ int NeededLength = internal_snprintf(
+ TmpFilename, sizeof(TmpFilename), "%.*s%.*s.%s", HalfLength,
+ flags()->xray_logfile_base, HalfLength, Progname, TmpWildcardPattern);
+ if (NeededLength > int(sizeof(TmpFilename))) {
+ Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename);
+ return -1;
+ }
+ int Fd = mkstemp(TmpFilename);
+ if (Fd == -1) {
+ Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
+ TmpFilename);
+ return -1;
+ }
+ Report("XRay: Log file in '%s'\n", TmpFilename);
+
+ return Fd;
+}
+
+} // namespace __xray
diff --git a/contrib/compiler-rt/lib/xray/xray_utils.h b/contrib/compiler-rt/lib/xray/xray_utils.h
new file mode 100644
index 0000000..1ecc74a
--- /dev/null
+++ b/contrib/compiler-rt/lib/xray/xray_utils.h
@@ -0,0 +1,41 @@
+//===-- xray_utils.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Some shared utilities for the XRay runtime implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_UTILS_H
+#define XRAY_UTILS_H
+
+#include <sys/types.h>
+#include <utility>
+
+namespace __xray {
+
+// Default implementation of the reporting interface for sanitizer errors.
+void printToStdErr(const char *Buffer);
+
+// EINTR-safe write routine, provided a file descriptor and a character range.
+void retryingWriteAll(int Fd, char *Begin, char *End);
+
+// Reads a long long value from a provided file.
+bool readValueFromFile(const char *Filename, long long *Value);
+
+// EINTR-safe read routine, providing a file descriptor and a character range.
+std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin, char *End);
+
+// EINTR-safe open routine, uses flag-provided values for initialising a log
+// file.
+int getLogFD();
+
+} // namespace __xray
+
+#endif // XRAY_UTILS_H
diff --git a/contrib/compiler-rt/lib/xray/xray_x86_64.cc b/contrib/compiler-rt/lib/xray/xray_x86_64.cc
index 3ee9189..e34806f 100644
--- a/contrib/compiler-rt/lib/xray/xray_x86_64.cc
+++ b/contrib/compiler-rt/lib/xray/xray_x86_64.cc
@@ -1,6 +1,8 @@
+#include "cpuid.h"
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_interface_internal.h"
+
#include <atomic>
#include <cstdint>
#include <errno.h>
@@ -42,9 +44,9 @@ static bool readValueFromFile(const char *Filename,
ssize_t BytesRead;
bool Success;
std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ close(Fd);
if (!Success)
return false;
- close(Fd);
char *End = nullptr;
long long Tmp = internal_simple_strtoll(Line, &End, 10);
bool Result = false;
@@ -55,32 +57,35 @@ static bool readValueFromFile(const char *Filename,
return Result;
}
-uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
- long long CPUFrequency = -1;
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ long long TSCFrequency = -1;
if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
- &CPUFrequency)) {
- CPUFrequency *= 1000;
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
} else if (readValueFromFile(
- "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
- &CPUFrequency)) {
- CPUFrequency *= 1000;
+ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
} else {
Report("Unable to determine CPU frequency for TSC accounting.\n");
}
- return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
+ return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
}
static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint16_t Jmp20Seq = 0x14eb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
+static constexpr uint16_t NopwSeq = 0x9066;
static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
- const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
// Here we do the dance of replacing the following sled:
//
// xray_sled_n:
@@ -101,13 +106,12 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
// 4. Do an atomic write over the jmp instruction for the "mov r10d"
// opcode and first operand.
//
- // Prerequisite is to compute the relative offset to the
- // __xray_FunctionEntry function's address.
- int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+ // Prerequisite is to compute the relative offset to the trampoline's address.
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
(static_cast<int64_t>(Sled.Address) + 11);
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
- __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address));
+ Trampoline, reinterpret_cast<void *>(Sled.Address));
return false;
}
if (Enable) {
@@ -199,4 +203,60 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
return true;
}
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +19 // 2 bytes
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes*
+ // ...
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
+ // the relative offset to the __xray_CustomEvent trampoline.
+ // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
+ // This allows us to "enable" this code once the changes have committed.
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
+ //
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ }
+ return false;
+}
+
+// We determine whether the CPU we're running on has the correct features we
+// need. In x86_64 this will be rdtscp support.
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
+ unsigned int EAX, EBX, ECX, EDX;
+
+ // We check whether rdtscp support is enabled. According to the x86_64 manual,
+ // level should be set at 0x80000001, and we should have a look at bit 27 in
+ // EDX. That's 0x8000000 (or 1u << 26).
+ __get_cpuid(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if (!(EDX & (1u << 26))) {
+ Report("Missing rdtscp support.\n");
+ return false;
+ }
+ // Also check whether we can determine the CPU frequency, since if we cannot,
+ // we should use the emulated TSC instead.
+ if (!getTSCFrequency()) {
+ Report("Unable to determine CPU frequency.\n");
+ return false;
+ }
+ return true;
+}
+
} // namespace __xray
diff --git a/contrib/compiler-rt/lib/xray/xray_x86_64.h b/contrib/compiler-rt/lib/xray/xray_x86_64.inc
index 52d2dea..4ad3f98 100644
--- a/contrib/compiler-rt/lib/xray/xray_x86_64.h
+++ b/contrib/compiler-rt/lib/xray/xray_x86_64.inc
@@ -1,4 +1,4 @@
-//===-- xray_x86_64.h -------------------------------------------*- C++ -*-===//
+//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,8 +10,6 @@
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
//===----------------------------------------------------------------------===//
-#ifndef XRAY_X86_64_H
-#define XRAY_X86_64_H
#include <cstdint>
#include <x86intrin.h>
@@ -27,6 +25,9 @@ ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
CPU = LongCPU;
return TSC;
}
-}
-#endif // XRAY_X86_64_H
+uint64_t getTSCFrequency();
+
+bool probeRequiredCPUFeatures();
+
+} // namespace __xray
OpenPOWER on IntegriCloud