//==- PrintfFormatStrings.h - Analysis of printf format strings --*- C++ -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Handling of format string in printf and friends. The structure of format // strings for fprintf() are described in C99 7.19.6.1. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_FPRINTF_FORMAT_H #define LLVM_CLANG_FPRINTF_FORMAT_H #include "clang/AST/CanonicalType.h" namespace clang { class ASTContext; namespace analyze_printf { class ArgTypeResult { public: enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, CStrTy, WCStrTy }; private: const Kind K; QualType T; ArgTypeResult(bool) : K(InvalidTy) {} public: ArgTypeResult(Kind k = UnknownTy) : K(k) {} ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} static ArgTypeResult Invalid() { return ArgTypeResult(true); } bool isValid() const { return K != InvalidTy; } const QualType *getSpecificType() const { return K == SpecificTy ? &T : 0; } bool matchesType(ASTContext &C, QualType argTy) const; bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } QualType getRepresentativeType(ASTContext &C) const; }; class ConversionSpecifier { public: enum Kind { InvalidSpecifier = 0, // C99 conversion specifiers. dArg, // 'd' IntAsCharArg, // 'c' iArg, // 'i', oArg, // 'o', uArg, // 'u', xArg, // 'x', XArg, // 'X', fArg, // 'f', FArg, // 'F', eArg, // 'e', EArg, // 'E', gArg, // 'g', GArg, // 'G', aArg, // 'a', AArg, // 'A', CStrArg, // 's' VoidPtrArg, // 'p' OutIntPtrArg, // 'n' PercentArg, // '%' // MacOS X unicode extensions. CArg, // 'C' UnicodeStrArg, // 'S' // Objective-C specific specifiers. ObjCObjArg, // '@' // GlibC specific specifiers. PrintErrno, // 'm' // Specifier ranges. IntArgBeg = dArg, IntArgEnd = iArg, UIntArgBeg = oArg, UIntArgEnd = XArg, DoubleArgBeg = fArg, DoubleArgEnd = AArg, C99Beg = IntArgBeg, C99End = DoubleArgEnd, ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg }; ConversionSpecifier() : Position(0), kind(InvalidSpecifier) {} ConversionSpecifier(const char *pos, Kind k) : Position(pos), kind(k) {} const char *getStart() const { return Position; } llvm::StringRef getCharacters() const { return llvm::StringRef(getStart(), getLength()); } bool consumesDataArgument() const { switch (kind) { case PercentArg: case PrintErrno: return false; default: return true; } } bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } bool isIntArg() const { return kind >= dArg && kind <= iArg; } bool isUIntArg() const { return kind >= oArg && kind <= XArg; } bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } Kind getKind() const { return kind; } void setKind(Kind k) { kind = k; } unsigned getLength() const { // Conversion specifiers currently only are represented by // single characters, but we be flexible. return 1; } const char *toString() const; private: const char *Position; Kind kind; }; class LengthModifier { public: enum Kind { None, AsChar, // 'hh' AsShort, // 'h' AsLong, // 'l' AsLongLong, // 'll', 'q' (BSD, deprecated) AsIntMax, // 'j' AsSizeT, // 'z' AsPtrDiff, // 't' AsLongDouble, // 'L' AsWideChar = AsLong // for '%ls' }; LengthModifier() : Position(0), kind(None) {} LengthModifier(const char *pos, Kind k) : Position(pos), kind(k) {} const char *getStart() const { return Position; } unsigned getLength() const { switch (kind) { default: return 1; case AsLongLong: case AsChar: return 2; case None: return 0; } } Kind getKind() const { return kind; } void setKind(Kind k) { kind = k; } const char *toString() const; private: const char *Position; Kind kind; }; class OptionalAmount { public: enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; OptionalAmount(HowSpecified howSpecified, unsigned amount, const char *amountStart, unsigned amountLength, bool usesPositionalArg) : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} OptionalAmount(bool valid = true) : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), UsesPositionalArg(0), UsesDotPrefix(0) {} bool isInvalid() const { return hs == Invalid; } HowSpecified getHowSpecified() const { return hs; } void setHowSpecified(HowSpecified h) { hs = h; } bool hasDataArgument() const { return hs == Arg; } unsigned getArgIndex() const { assert(hasDataArgument()); return amt; } unsigned getConstantAmount() const { assert(hs == Constant); return amt; } const char *getStart() const { // We include the . character if it is given. return start - UsesDotPrefix; } unsigned getConstantLength() const { assert(hs == Constant); return length + UsesDotPrefix; } ArgTypeResult getArgType(ASTContext &Ctx) const; void toString(llvm::raw_ostream &os) const; bool usesPositionalArg() const { return (bool) UsesPositionalArg; } unsigned getPositionalArgIndex() const { assert(hasDataArgument()); return amt + 1; } bool usesDotPrefix() const { return UsesDotPrefix; } void setUsesDotPrefix() { UsesDotPrefix = true; } private: const char *start; unsigned length; HowSpecified hs; unsigned amt; bool UsesPositionalArg : 1; bool UsesDotPrefix; }; // Class representing optional flags with location and representation // information. class OptionalFlag { public: OptionalFlag(const char *Representation) : representation(Representation), flag(false) {} bool isSet() { return flag; } void set() { flag = true; } void clear() { flag = false; } void setPosition(const char *position) { assert(position); this->position = position; } const char *getPosition() const { assert(position); return position; } const char *toString() const { return representation; } // Overloaded operators for bool like qualities operator bool() const { return flag; } OptionalFlag& operator=(const bool &rhs) { flag = rhs; return *this; // Return a reference to myself. } private: const char *representation; const char *position; bool flag; }; class FormatSpecifier { LengthModifier LM; OptionalFlag IsLeftJustified; // '-' OptionalFlag HasPlusPrefix; // '+' OptionalFlag HasSpacePrefix; // ' ' OptionalFlag HasAlternativeForm; // '#' OptionalFlag HasLeadingZeroes; // '0' /// Positional arguments, an IEEE extension: /// IEEE Std 1003.1, 2004 Edition /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html bool UsesPositionalArg; unsigned argIndex; ConversionSpecifier CS; OptionalAmount FieldWidth; OptionalAmount Precision; public: FormatSpecifier() : IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0"), UsesPositionalArg(false), argIndex(0) {} static FormatSpecifier Parse(const char *beg, const char *end); // Methods for incrementally constructing the FormatSpecifier. void setConversionSpecifier(const ConversionSpecifier &cs) { CS = cs; } void setLengthModifier(LengthModifier lm) { LM = lm; } void setIsLeftJustified(const char *position) { IsLeftJustified = true; IsLeftJustified.setPosition(position); } void setHasPlusPrefix(const char *position) { HasPlusPrefix = true; HasPlusPrefix.setPosition(position); } void setHasSpacePrefix(const char *position) { HasSpacePrefix = true; HasSpacePrefix.setPosition(position); } void setHasAlternativeForm(const char *position) { HasAlternativeForm = true; HasAlternativeForm.setPosition(position); } void setHasLeadingZeros(const char *position) { HasLeadingZeroes = true; HasLeadingZeroes.setPosition(position); } void setUsesPositionalArg() { UsesPositionalArg = true; } void setArgIndex(unsigned i) { assert(CS.consumesDataArgument()); argIndex = i; } unsigned getArgIndex() const { assert(CS.consumesDataArgument()); return argIndex; } unsigned getPositionalArgIndex() const { assert(CS.consumesDataArgument()); return argIndex + 1; } // Methods for querying the format specifier. const ConversionSpecifier &getConversionSpecifier() const { return CS; } const LengthModifier &getLengthModifier() const { return LM; } const OptionalAmount &getFieldWidth() const { return FieldWidth; } void setFieldWidth(const OptionalAmount &Amt) { FieldWidth = Amt; } void setPrecision(const OptionalAmount &Amt) { Precision = Amt; Precision.setUsesDotPrefix(); } const OptionalAmount &getPrecision() const { return Precision; } /// \brief Returns the builtin type that a data argument /// paired with this format specifier should have. This method /// will return null if the format specifier does not have /// a matching data argument or the matching argument matches /// more than one type. ArgTypeResult getArgType(ASTContext &Ctx) const; const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } bool usesPositionalArg() const { return UsesPositionalArg; } /// Changes the specifier and length according to a QualType, retaining any /// flags or options. Returns true on success, or false when a conversion /// was not successful. bool fixType(QualType QT); void toString(llvm::raw_ostream &os) const; // Validation methods - to check if any element results in undefined behavior bool hasValidPlusPrefix() const; bool hasValidAlternativeForm() const; bool hasValidLeadingZeros() const; bool hasValidSpacePrefix() const; bool hasValidLeftJustified() const; bool hasValidLengthModifier() const; bool hasValidPrecision() const; bool hasValidFieldWidth() const; }; enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; class FormatStringHandler { public: FormatStringHandler() {} virtual ~FormatStringHandler(); virtual void HandleIncompleteFormatSpecifier(const char *startSpecifier, unsigned specifierLen) {} virtual void HandleNullChar(const char *nullCharacter) {} virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, PositionContext p) {} virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} virtual bool HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen) { return true; } virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen) { return true; } }; bool ParseFormatString(FormatStringHandler &H, const char *beg, const char *end); } // end printf namespace } // end clang namespace #endif