summaryrefslogtreecommitdiffstats
path: root/lib/Fuzzer/FuzzerTraceState.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Fuzzer/FuzzerTraceState.cpp')
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp244
1 files changed, 160 insertions, 84 deletions
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index 241c2f0..b2006fa 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -46,8 +46,6 @@
// * The __dfsw_* functions (implemented in this file) record the
// parameters (i.e. the application data and the corresponding taint labels)
// in a global state.
-// * Fuzzer::ApplyTraceBasedMutation() tries to use the data recorded
-// by __dfsw_* hooks to guide the fuzzing towards new application states.
//
// Parts of this code will not function when DFSan is not linked in.
// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
@@ -78,7 +76,7 @@
#include <algorithm>
#include <cstring>
#include <thread>
-#include <unordered_map>
+#include <map>
#if !LLVM_FUZZER_SUPPORTS_DFSAN
// Stubs for dfsan for platforms where dfsan does not exist and weak
@@ -166,15 +164,19 @@ struct LabelRange {
// For now, very simple: put Size bytes of Data at position Pos.
struct TraceBasedMutation {
- size_t Pos;
- size_t Size;
- uint64_t Data;
+ static const size_t kMaxSize = 28;
+ uint32_t Pos : 24;
+ uint32_t Size : 8;
+ uint8_t Data[kMaxSize];
};
+const size_t TraceBasedMutation::kMaxSize;
+
class TraceState {
public:
- TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
- : Options(Options), CurrentUnit(CurrentUnit) {
+ TraceState(UserSuppliedFuzzer &USF,
+ const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
+ : USF(USF), Options(Options), CurrentUnit(CurrentUnit) {
// Current trace collection is not thread-friendly and it probably
// does not have to be such, but at least we should not crash in presence
// of threads. So, just ignore all traces coming from all threads but one.
@@ -185,28 +187,71 @@ class TraceState {
void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
dfsan_label L2);
+ void DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2);
void DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits, uint64_t Val,
size_t NumCases, uint64_t *Cases, dfsan_label L);
void TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2);
+ void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2);
void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
size_t NumCases, uint64_t *Cases);
int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
size_t DataSize);
+ int TryToAddDesiredData(const uint8_t *PresentData,
+ const uint8_t *DesiredData, size_t DataSize);
void StartTraceRecording() {
if (!Options.UseTraces) return;
RecordingTraces = true;
- Mutations.clear();
+ NumMutations = 0;
+ USF.GetMD().ClearAutoDictionary();
}
- size_t StopTraceRecording(FuzzerRandomBase &Rand) {
+ void StopTraceRecording() {
+ if (!RecordingTraces) return;
RecordingTraces = false;
- return Mutations.size();
+ for (size_t i = 0; i < NumMutations; i++) {
+ auto &M = Mutations[i];
+ Unit U(M.Data, M.Data + M.Size);
+ if (Options.Verbosity >= 2) {
+ AutoDictUnitCounts[U]++;
+ AutoDictAdds++;
+ if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
+ typedef std::pair<size_t, Unit> CU;
+ std::vector<CU> CountedUnits;
+ for (auto &I : AutoDictUnitCounts)
+ CountedUnits.push_back(std::make_pair(I.second, I.first));
+ std::sort(CountedUnits.begin(), CountedUnits.end(),
+ [](const CU &a, const CU &b) { return a.first > b.first; });
+ Printf("AutoDict:\n");
+ for (auto &I : CountedUnits) {
+ Printf(" %zd ", I.first);
+ PrintASCII(I.second);
+ Printf("\n");
+ }
+ }
+ }
+ USF.GetMD().AddWordToAutoDictionary(U, M.Pos);
+ }
}
- void ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U);
+ void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
+ if (NumMutations >= kMaxMutations) return;
+ assert(Size <= TraceBasedMutation::kMaxSize);
+ auto &M = Mutations[NumMutations++];
+ M.Pos = Pos;
+ M.Size = Size;
+ memcpy(M.Data, Data, Size);
+ }
+
+ void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
+ assert(Size <= sizeof(Data));
+ AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
+ }
private:
bool IsTwoByteData(uint64_t Data) {
@@ -215,10 +260,15 @@ class TraceState {
return Signed == 0 || Signed == -1L;
}
bool RecordingTraces = false;
- std::vector<TraceBasedMutation> Mutations;
+ static const size_t kMaxMutations = 1 << 16;
+ size_t NumMutations;
+ TraceBasedMutation Mutations[kMaxMutations];
LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
+ UserSuppliedFuzzer &USF;
const Fuzzer::FuzzingOptions &Options;
const Unit &CurrentUnit;
+ std::map<Unit, size_t> AutoDictUnitCounts;
+ size_t AutoDictAdds = 0;
static thread_local bool IsMyThread;
};
@@ -234,15 +284,6 @@ LabelRange TraceState::GetLabelRange(dfsan_label L) {
return LR = LabelRange::Singleton(LI);
}
-void TraceState::ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U) {
- assert(Idx < Mutations.size());
- auto &M = Mutations[Idx];
- if (Options.Verbosity >= 3)
- Printf("TBM %zd %zd %zd\n", M.Pos, M.Size, M.Data);
- if (M.Pos + M.Size > U->size()) return;
- memcpy(U->data() + M.Pos, &M.Data, M.Size);
-}
-
void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
dfsan_label L2) {
@@ -257,19 +298,39 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
- Mutations.push_back({Pos, CmpSize, Data});
- Mutations.push_back({Pos, CmpSize, Data + 1});
- Mutations.push_back({Pos, CmpSize, Data - 1});
+ AddMutation(Pos, CmpSize, Data);
+ AddMutation(Pos, CmpSize, Data + 1);
+ AddMutation(Pos, CmpSize, Data - 1);
}
if (CmpSize > LR.End - LR.Beg)
- Mutations.push_back({LR.Beg, (unsigned)(LR.End - LR.Beg), Data});
+ AddMutation(LR.Beg, (unsigned)(LR.End - LR.Beg), Data);
if (Options.Verbosity >= 3)
Printf("DFSanCmpCallback: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 "
"%d MU %zd\n",
- PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, Mutations.size());
+ PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, NumMutations);
+}
+
+void TraceState::DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2) {
+
+ assert(ReallyHaveDFSan());
+ if (!RecordingTraces || !IsMyThread) return;
+ if (L1 == 0 && L2 == 0)
+ return; // Not actionable.
+ if (L1 != 0 && L2 != 0)
+ return; // Probably still actionable.
+
+ const uint8_t *Data = L1 ? Data2 : Data1;
+ LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
+ for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
+ AddMutation(Pos, CmpSize, Data);
+ if (Options.Verbosity >= 3)
+ Printf("DFSanMemcmpCallback: Pos %d Size %d\n", Pos, CmpSize);
+ }
}
void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
@@ -286,12 +347,12 @@ void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
for (size_t Pos = LR.Beg; Pos + ValSize <= LR.End; Pos++)
for (size_t i = 0; i < NumCases; i++)
- Mutations.push_back({Pos, ValSize, Cases[i]});
+ AddMutation(Pos, ValSize, Cases[i]);
if (TryShort)
for (size_t Pos = LR.Beg; Pos + 2 <= LR.End; Pos++)
for (size_t i = 0; i < NumCases; i++)
- Mutations.push_back({Pos, 2, Cases[i]});
+ AddMutation(Pos, 2, Cases[i]);
if (Options.Verbosity >= 3)
Printf("DFSanSwitchCallback: PC %lx Val %zd SZ %zd # %zd L %d: {%d, %d} "
@@ -310,10 +371,27 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
break;
size_t Pos = Cur - Beg;
assert(Pos < CurrentUnit.size());
- if (Mutations.size() > 100000U) return Res; // Just in case.
- Mutations.push_back({Pos, DataSize, DesiredData});
- Mutations.push_back({Pos, DataSize, DesiredData + 1});
- Mutations.push_back({Pos, DataSize, DesiredData - 1});
+ AddMutation(Pos, DataSize, DesiredData);
+ AddMutation(Pos, DataSize, DesiredData + 1);
+ AddMutation(Pos, DataSize, DesiredData - 1);
+ Res++;
+ }
+ return Res;
+}
+
+int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
+ const uint8_t *DesiredData,
+ size_t DataSize) {
+ int Res = 0;
+ const uint8_t *Beg = CurrentUnit.data();
+ const uint8_t *End = Beg + CurrentUnit.size();
+ for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
+ Cur = (uint8_t *)memmem(Cur, End - Cur, PresentData, DataSize);
+ if (!Cur)
+ break;
+ size_t Pos = Cur - Beg;
+ assert(Pos < CurrentUnit.size());
+ AddMutation(Pos, DataSize, DesiredData);
Res++;
}
return Res;
@@ -322,15 +400,31 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2) {
if (!RecordingTraces || !IsMyThread) return;
+ if ((CmpType == ICMP_EQ || CmpType == ICMP_NE) && Arg1 == Arg2)
+ return; // No reason to mutate.
int Added = 0;
- if (Options.Verbosity >= 3)
- Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
Added += TryToAddDesiredData(Arg1, Arg2, CmpSize);
Added += TryToAddDesiredData(Arg2, Arg1, CmpSize);
if (!Added && CmpSize == 4 && IsTwoByteData(Arg1) && IsTwoByteData(Arg2)) {
Added += TryToAddDesiredData(Arg1, Arg2, 2);
Added += TryToAddDesiredData(Arg2, Arg1, 2);
}
+ if (Options.Verbosity >= 3 && Added)
+ Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
+}
+
+void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2) {
+ if (!RecordingTraces || !IsMyThread) return;
+ CmpSize = std::min(CmpSize, TraceBasedMutation::kMaxSize);
+ int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
+ int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
+ if ((Added1 || Added2) && Options.Verbosity >= 3) {
+ Printf("MemCmp Added %d%d: ", Added1, Added2);
+ if (Added1) PrintASCII(Data1, CmpSize);
+ if (Added2) PrintASCII(Data2, CmpSize);
+ Printf("\n");
+ }
}
void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
@@ -351,7 +445,6 @@ void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
if (TryShort)
TryToAddDesiredData(Val, Cases[i], 2);
}
-
}
static TraceState *TS;
@@ -364,19 +457,14 @@ void Fuzzer::StartTraceRecording() {
TS->StartTraceRecording();
}
-size_t Fuzzer::StopTraceRecording() {
- if (!TS) return 0;
- return TS->StopTraceRecording(USF.GetRand());
-}
-
-void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) {
- assert(TS);
- TS->ApplyTraceBasedMutation(Idx, U);
+void Fuzzer::StopTraceRecording() {
+ if (!TS) return;
+ TS->StopTraceRecording();
}
void Fuzzer::InitializeTraceState() {
if (!Options.UseTraces) return;
- TS = new TraceState(Options, CurrentUnit);
+ TS = new TraceState(USF, Options, CurrentUnit);
CurrentUnit.resize(Options.MaxLen);
// The rest really requires DFSan.
if (!ReallyHaveDFSan()) return;
@@ -423,91 +511,79 @@ void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, n);
dfsan_label L2 = dfsan_read_label(s2, n);
- TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
n = std::min(n, fuzzer::InternalStrnlen(s1, n));
n = std::min(n, fuzzer::InternalStrnlen(s2, n));
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, n);
dfsan_label L2 = dfsan_read_label(s2, n);
- TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
dfsan_label s1_label, dfsan_label s2_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
size_t N = std::min(Len1, Len2);
if (N <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(N, sizeof(S1)));
- memcpy(&S2, s2, std::min(N, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, Len1);
dfsan_label L2 = dfsan_read_label(s2, Len2);
- TS->DFSanCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
+// We may need to avoid defining weak hooks to stay compatible with older clang.
+#ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
+# define LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS 1
+#endif
+
+#if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
- const void *s2, size_t n) {
+ const void *s2, size_t n, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
- TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+ if (result == 0) return; // No reason to mutate.
+ if (n <= 1) return; // Not interesting.
+ TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
- const char *s2, size_t n) {
+ const char *s2, size_t n, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
+ if (result == 0) return; // No reason to mutate.
size_t Len1 = fuzzer::InternalStrnlen(s1, n);
size_t Len2 = fuzzer::InternalStrnlen(s2, n);
n = std::min(n, Len1);
n = std::min(n, Len2);
if (n <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
- TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+ TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
- const char *s2) {
+ const char *s2, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
+ if (result == 0) return; // No reason to mutate.
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
size_t N = std::min(Len1, Len2);
if (N <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(N, sizeof(S1)));
- memcpy(&S2, s2, std::min(N, sizeof(S2)));
- TS->TraceCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2);
+ TS->TraceMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
+#endif // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
+
__attribute__((visibility("default")))
void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
uint64_t Arg2) {
OpenPOWER on IntegriCloud