diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp | 584 |
1 files changed, 584 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp new file mode 100644 index 0000000..c38aae3 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp @@ -0,0 +1,584 @@ +//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Handling of format string in printf and friends. The structure of format +// strings for fprintf() are described in C99 7.19.6.1. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/PrintfFormatString.h" +#include "clang/AST/ASTContext.h" + +using clang::analyze_printf::ArgTypeResult; +using clang::analyze_printf::FormatSpecifier; +using clang::analyze_printf::FormatStringHandler; +using clang::analyze_printf::OptionalAmount; +using clang::analyze_printf::PositionContext; + +using namespace clang; + +namespace { +class FormatSpecifierResult { + FormatSpecifier FS; + const char *Start; + bool Stop; +public: + FormatSpecifierResult(bool stop = false) + : Start(0), Stop(stop) {} + FormatSpecifierResult(const char *start, + const FormatSpecifier &fs) + : FS(fs), Start(start), Stop(false) {} + + + const char *getStart() const { return Start; } + bool shouldStop() const { return Stop; } + bool hasValue() const { return Start != 0; } + const FormatSpecifier &getValue() const { + assert(hasValue()); + return FS; + } + const FormatSpecifier &getValue() { return FS; } +}; +} // end anonymous namespace + +template <typename T> +class UpdateOnReturn { + T &ValueToUpdate; + const T &ValueToCopy; +public: + UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) + : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} + + ~UpdateOnReturn() { + ValueToUpdate = ValueToCopy; + } +}; + +//===----------------------------------------------------------------------===// +// Methods for parsing format strings. +//===----------------------------------------------------------------------===// + +static OptionalAmount ParseAmount(const char *&Beg, const char *E) { + const char *I = Beg; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + unsigned accumulator = 0; + bool hasDigits = false; + + for ( ; I != E; ++I) { + char c = *I; + if (c >= '0' && c <= '9') { + hasDigits = true; + accumulator = (accumulator * 10) + (c - '0'); + continue; + } + + if (hasDigits) + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); + + break; + } + + return OptionalAmount(); +} + +static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, + unsigned &argIndex) { + if (*Beg == '*') { + ++Beg; + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); + } + + return ParseAmount(Beg, E); +} + +static OptionalAmount ParsePositionAmount(FormatStringHandler &H, + const char *Start, + const char *&Beg, const char *E, + PositionContext p) { + if (*Beg == '*') { + const char *I = Beg + 1; + const OptionalAmount &Amt = ParseAmount(I, E); + + if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + if (I== E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return OptionalAmount(false); + } + + assert(Amt.getHowSpecified() == OptionalAmount::Constant); + + if (*I == '$') { + // Special case: '*0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Beg, I - Beg + 1); + return OptionalAmount(false); + } + + const char *Tmp = Beg; + Beg = ++I; + + return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, + Tmp); + } + + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + return ParseAmount(Beg, E); +} + +static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, + const char *Start, const char *&Beg, const char *E, + unsigned *argIndex) { + if (argIndex) { + FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); + } + else { + const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, + analyze_printf::PrecisionPos); + if (Amt.isInvalid()) + return true; + FS.setPrecision(Amt); + } + return false; +} + +static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, + const char *Start, const char *&Beg, const char *E, + unsigned *argIndex) { + // FIXME: Support negative field widths. + if (argIndex) { + FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + } + else { + const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, + analyze_printf::FieldWidthPos); + if (Amt.isInvalid()) + return true; + FS.setFieldWidth(Amt); + } + return false; +} + + +static bool ParseArgPosition(FormatStringHandler &H, + FormatSpecifier &FS, const char *Start, + const char *&Beg, const char *E) { + + using namespace clang::analyze_printf; + const char *I = Beg; + + const OptionalAmount &Amt = ParseAmount(I, E); + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + // Special case: '%0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Start, I - Start); + return true; + } + + FS.setArgIndex(Amt.getConstantAmount() - 1); + FS.setUsesPositionalArg(); + // Update the caller's pointer if we decided to consume + // these characters. + Beg = I; + return false; + } + + return false; +} + +static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, + const char *&Beg, + const char *E, + unsigned &argIndex) { + + using namespace clang::analyze_printf; + + const char *I = Beg; + const char *Start = 0; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + // Look for a '%' character that indicates the start of a format specifier. + for ( ; I != E ; ++I) { + char c = *I; + if (c == '\0') { + // Detect spurious null characters, which are likely errors. + H.HandleNullChar(I); + return true; + } + if (c == '%') { + Start = I++; // Record the start of the format specifier. + break; + } + } + + // No format specifier found? + if (!Start) + return false; + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + FormatSpecifier FS; + if (ParseArgPosition(H, FS, Start, I, E)) + return true; + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + // Look for flags (if any). + bool hasMore = true; + for ( ; I != E; ++I) { + switch (*I) { + default: hasMore = false; break; + case '-': FS.setIsLeftJustified(); break; + case '+': FS.setHasPlusPrefix(); break; + case ' ': FS.setHasSpacePrefix(); break; + case '#': FS.setHasAlternativeForm(); break; + case '0': FS.setHasLeadingZeros(); break; + } + if (!hasMore) + break; + } + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + // Look for the field width (if any). + if (ParseFieldWidth(H, FS, Start, I, E, + FS.usesPositionalArg() ? 0 : &argIndex)) + return true; + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + // Look for the precision (if any). + if (*I == '.') { + ++I; + if (I == E) { + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + if (ParsePrecision(H, FS, Start, I, E, + FS.usesPositionalArg() ? 0 : &argIndex)) + return true; + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + } + + // Look for the length modifier. + LengthModifier lm = None; + switch (*I) { + default: + break; + case 'h': + ++I; + lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; + break; + case 'l': + ++I; + lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; + break; + case 'j': lm = AsIntMax; ++I; break; + case 'z': lm = AsSizeT; ++I; break; + case 't': lm = AsPtrDiff; ++I; break; + case 'L': lm = AsLongDouble; ++I; break; + case 'q': lm = AsLongLong; ++I; break; + } + FS.setLengthModifier(lm); + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E - Start); + return true; + } + + if (*I == '\0') { + // Detect spurious null characters, which are likely errors. + H.HandleNullChar(I); + return true; + } + + // Finally, look for the conversion specifier. + const char *conversionPosition = I++; + ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; + switch (*conversionPosition) { + default: + break; + // C99: 7.19.6.1 (section 8). + case '%': k = ConversionSpecifier::PercentArg; break; + case 'A': k = ConversionSpecifier::AArg; break; + case 'E': k = ConversionSpecifier::EArg; break; + case 'F': k = ConversionSpecifier::FArg; break; + case 'G': k = ConversionSpecifier::GArg; break; + case 'X': k = ConversionSpecifier::XArg; break; + case 'a': k = ConversionSpecifier::aArg; break; + case 'c': k = ConversionSpecifier::IntAsCharArg; break; + case 'd': k = ConversionSpecifier::dArg; break; + case 'e': k = ConversionSpecifier::eArg; break; + case 'f': k = ConversionSpecifier::fArg; break; + case 'g': k = ConversionSpecifier::gArg; break; + case 'i': k = ConversionSpecifier::iArg; break; + case 'n': k = ConversionSpecifier::OutIntPtrArg; break; + case 'o': k = ConversionSpecifier::oArg; break; + case 'p': k = ConversionSpecifier::VoidPtrArg; break; + case 's': k = ConversionSpecifier::CStrArg; break; + case 'u': k = ConversionSpecifier::uArg; break; + case 'x': k = ConversionSpecifier::xArg; break; + // Mac OS X (unicode) specific + case 'C': k = ConversionSpecifier::CArg; break; + case 'S': k = ConversionSpecifier::UnicodeStrArg; break; + // Objective-C. + case '@': k = ConversionSpecifier::ObjCObjArg; break; + // Glibc specific. + case 'm': k = ConversionSpecifier::PrintErrno; break; + } + ConversionSpecifier CS(conversionPosition, k); + FS.setConversionSpecifier(CS); + if (CS.consumesDataArgument() && !FS.usesPositionalArg()) + FS.setArgIndex(argIndex++); + + if (k == ConversionSpecifier::InvalidSpecifier) { + // Assume the conversion takes one argument. + return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); + } + return FormatSpecifierResult(Start, FS); +} + +bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, + const char *I, const char *E) { + + unsigned argIndex = 0; + + // Keep looking for a format specifier until we have exhausted the string. + while (I != E) { + const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) + return true;; + // Did we exhaust the string or encounter an error that + // we can recover from? + if (!FSR.hasValue()) + continue; + // We have a format specifier. Pass it to the callback. + if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), + I - FSR.getStart())) + return true; + } + assert(I == E && "Format string not exhausted"); + return false; +} + +FormatStringHandler::~FormatStringHandler() {} + +//===----------------------------------------------------------------------===// +// Methods on ArgTypeResult. +//===----------------------------------------------------------------------===// + +bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { + assert(isValid()); + + if (K == UnknownTy) + return true; + + if (K == SpecificTy) { + argTy = C.getCanonicalType(argTy).getUnqualifiedType(); + + if (T == argTy) + return true; + + if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + return T == C.UnsignedCharTy; + case BuiltinType::Char_U: + case BuiltinType::UChar: + return T == C.SignedCharTy; + case BuiltinType::Short: + return T == C.UnsignedShortTy; + case BuiltinType::UShort: + return T == C.ShortTy; + case BuiltinType::Int: + return T == C.UnsignedIntTy; + case BuiltinType::UInt: + return T == C.IntTy; + case BuiltinType::Long: + return T == C.UnsignedLongTy; + case BuiltinType::ULong: + return T == C.LongTy; + case BuiltinType::LongLong: + return T == C.UnsignedLongLongTy; + case BuiltinType::ULongLong: + return T == C.LongLongTy; + } + + return false; + } + + if (K == CStrTy) { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + + QualType pointeeTy = PT->getPointeeType(); + + if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Void: + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::Char_S: + case BuiltinType::SChar: + return true; + default: + break; + } + + return false; + } + + if (K == WCStrTy) { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + + return pointeeTy == C.getWCharType(); + } + + return false; +} + +QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { + assert(isValid()); + if (K == SpecificTy) + return T; + if (K == CStrTy) + return C.getPointerType(C.CharTy); + if (K == WCStrTy) + return C.getPointerType(C.getWCharType()); + if (K == ObjCPointerTy) + return C.ObjCBuiltinIdTy; + + return QualType(); +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { + return Ctx.IntTy; +} + +//===----------------------------------------------------------------------===// +// Methods on FormatSpecifier. +//===----------------------------------------------------------------------===// + +ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { + if (!CS.consumesDataArgument()) + return ArgTypeResult::Invalid(); + + if (CS.isIntArg()) + switch (LM) { + case AsLongDouble: + return ArgTypeResult::Invalid(); + case None: return Ctx.IntTy; + case AsChar: return Ctx.SignedCharTy; + case AsShort: return Ctx.ShortTy; + case AsLong: return Ctx.LongTy; + case AsLongLong: return Ctx.LongLongTy; + case AsIntMax: + // FIXME: Return unknown for now. + return ArgTypeResult(); + case AsSizeT: return Ctx.getSizeType(); + case AsPtrDiff: return Ctx.getPointerDiffType(); + } + + if (CS.isUIntArg()) + switch (LM) { + case AsLongDouble: + return ArgTypeResult::Invalid(); + case None: return Ctx.UnsignedIntTy; + case AsChar: return Ctx.UnsignedCharTy; + case AsShort: return Ctx.UnsignedShortTy; + case AsLong: return Ctx.UnsignedLongTy; + case AsLongLong: return Ctx.UnsignedLongLongTy; + case AsIntMax: + // FIXME: Return unknown for now. + return ArgTypeResult(); + case AsSizeT: + // FIXME: How to get the corresponding unsigned + // version of size_t? + return ArgTypeResult(); + case AsPtrDiff: + // FIXME: How to get the corresponding unsigned + // version of ptrdiff_t? + return ArgTypeResult(); + } + + if (CS.isDoubleArg()) { + if (LM == AsLongDouble) + return Ctx.LongDoubleTy; + return Ctx.DoubleTy; + } + + switch (CS.getKind()) { + case ConversionSpecifier::CStrArg: + return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); + case ConversionSpecifier::UnicodeStrArg: + // FIXME: This appears to be Mac OS X specific. + return ArgTypeResult::WCStrTy; + case ConversionSpecifier::CArg: + return Ctx.WCharTy; + default: + break; + } + + // FIXME: Handle other cases. + return ArgTypeResult(); +} + |