diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AnalysisContext.cpp | 45 | ||||
-rw-r--r-- | lib/Analysis/CFG.cpp | 164 | ||||
-rw-r--r-- | lib/Analysis/CFGStmtMap.cpp | 88 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 4 | ||||
-rw-r--r-- | lib/Analysis/FormatString.cpp | 474 | ||||
-rw-r--r-- | lib/Analysis/FormatStringParsing.h | 72 | ||||
-rw-r--r-- | lib/Analysis/LiveVariables.cpp | 24 | ||||
-rw-r--r-- | lib/Analysis/Makefile | 1 | ||||
-rw-r--r-- | lib/Analysis/PrintfFormatString.cpp | 572 | ||||
-rw-r--r-- | lib/Analysis/PseudoConstantAnalysis.cpp | 238 | ||||
-rw-r--r-- | lib/Analysis/ReachableCode.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/ScanfFormatString.cpp | 221 | ||||
-rw-r--r-- | lib/Analysis/UninitializedValues.cpp | 4 |
13 files changed, 1346 insertions, 563 deletions
diff --git a/lib/Analysis/AnalysisContext.cpp b/lib/Analysis/AnalysisContext.cpp index 06d8aec..bf9f967 100644 --- a/lib/Analysis/AnalysisContext.cpp +++ b/lib/Analysis/AnalysisContext.cpp @@ -18,6 +18,7 @@ #include "clang/AST/ParentMap.h" #include "clang/AST/StmtVisitor.h" #include "clang/Analysis/Analyses/LiveVariables.h" +#include "clang/Analysis/Analyses/PseudoConstantAnalysis.h" #include "clang/Analysis/AnalysisContext.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/Support/BumpVector.h" @@ -54,8 +55,11 @@ const ImplicitParamDecl *AnalysisContext::getSelfDecl() const { } CFG *AnalysisContext::getCFG() { + if (UseUnoptimizedCFG) + return getUnoptimizedCFG(); + if (!builtCFG) { - cfg = CFG::buildCFG(D, getBody(), &D->getASTContext(), AddEHEdges); + cfg = CFG::buildCFG(D, getBody(), &D->getASTContext(), true, AddEHEdges); // Even when the cfg is not successfully built, we don't // want to try building it again. builtCFG = true; @@ -63,12 +67,29 @@ CFG *AnalysisContext::getCFG() { return cfg; } +CFG *AnalysisContext::getUnoptimizedCFG() { + if (!builtCompleteCFG) { + completeCFG = CFG::buildCFG(D, getBody(), &D->getASTContext(), + false, AddEHEdges); + // Even when the cfg is not successfully built, we don't + // want to try building it again. + builtCompleteCFG = true; + } + return completeCFG; +} + ParentMap &AnalysisContext::getParentMap() { if (!PM) PM = new ParentMap(getBody()); return *PM; } +PseudoConstantAnalysis *AnalysisContext::getPseudoConstantAnalysis() { + if (!PCA) + PCA = new PseudoConstantAnalysis(getBody()); + return PCA; +} + LiveVariables *AnalysisContext::getLiveVariables() { if (!liveness) { CFG *c = getCFG(); @@ -83,10 +104,25 @@ LiveVariables *AnalysisContext::getLiveVariables() { return liveness; } -AnalysisContext *AnalysisContextManager::getContext(const Decl *D) { +LiveVariables *AnalysisContext::getRelaxedLiveVariables() { + if (!relaxedLiveness) { + CFG *c = getCFG(); + if (!c) + return 0; + + relaxedLiveness = new LiveVariables(*this, false); + relaxedLiveness->runOnCFG(*c); + relaxedLiveness->runOnAllBlocks(*c, 0, true); + } + + return relaxedLiveness; +} + +AnalysisContext *AnalysisContextManager::getContext(const Decl *D, + idx::TranslationUnit *TU) { AnalysisContext *&AC = Contexts[D]; if (!AC) - AC = new AnalysisContext(D); + AC = new AnalysisContext(D, TU, UseUnoptimizedCFG); return AC; } @@ -296,8 +332,11 @@ AnalysisContext::getReferencedBlockVars(const BlockDecl *BD) { AnalysisContext::~AnalysisContext() { delete cfg; + delete completeCFG; delete liveness; + delete relaxedLiveness; delete PM; + delete PCA; delete ReferencedBlockVars; } diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 08543aa..c97b916 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -35,7 +35,7 @@ static SourceLocation GetEndLoc(Decl* D) { return D->getLocation(); } - + class AddStmtChoice { public: enum Kind { NotAlwaysAdd = 0, @@ -99,7 +99,8 @@ public: TryTerminatedBlock(NULL) {} // buildCFG - Used by external clients to construct the CFG. - CFG* buildCFG(const Decl *D, Stmt *Statement, ASTContext *C, bool AddEHEdges, + CFG* buildCFG(const Decl *D, Stmt *Statement, ASTContext *C, + bool pruneTriviallyFalseEdges, bool AddEHEdges, bool AddScopes); private: @@ -174,12 +175,12 @@ private: CFGBlock *addStmt(Stmt *S) { return Visit(S, AddStmtChoice::AlwaysAdd); } - + void AppendStmt(CFGBlock *B, Stmt *S, AddStmtChoice asc = AddStmtChoice::AlwaysAdd) { B->appendStmt(S, cfg->getBumpVectorContext(), asc.asLValue()); } - + void AddSuccessor(CFGBlock *B, CFGBlock *S) { B->addSuccessor(S, cfg->getBumpVectorContext()); } @@ -206,6 +207,9 @@ private: /// TryEvaluateBool - Try and evaluate the Stmt and return 0 or 1 /// if we can evaluate to a known value, otherwise return -1. TryResult TryEvaluateBool(Expr *S) { + if (!PruneTriviallyFalseEdges) + return TryResult(); + Expr::EvalResult Result; if (!S->isTypeDependent() && !S->isValueDependent() && S->Evaluate(Result, *Context) && Result.Val.isInt()) @@ -216,6 +220,9 @@ private: bool badCFG; + // True iff trivially false edges should be pruned from the CFG. + bool PruneTriviallyFalseEdges; + // True iff EH edges on CallExprs should be added to the CFG. bool AddEHEdges; @@ -243,8 +250,12 @@ static VariableArrayType* FindVA(Type* t) { /// transferred to the caller. If CFG construction fails, this method returns /// NULL. CFG* CFGBuilder::buildCFG(const Decl *D, Stmt* Statement, ASTContext* C, + bool pruneTriviallyFalseEdges, bool addehedges, bool AddScopes) { + AddEHEdges = addehedges; + PruneTriviallyFalseEdges = pruneTriviallyFalseEdges; + Context = C; assert(cfg.get()); if (!Statement) @@ -359,6 +370,7 @@ tryAgain: return VisitBreakStmt(cast<BreakStmt>(S)); case Stmt::CallExprClass: + case Stmt::CXXOperatorCallExprClass: return VisitCallExpr(cast<CallExpr>(S), asc); case Stmt::CaseStmtClass: @@ -379,15 +391,21 @@ tryAgain: case Stmt::CXXCatchStmtClass: return VisitCXXCatchStmt(cast<CXXCatchStmt>(S)); + case Stmt::CXXExprWithTemporariesClass: { + // FIXME: Handle temporaries. For now, just visit the subexpression + // so we don't artificially create extra blocks. + return Visit(cast<CXXExprWithTemporaries>(S)->getSubExpr(), asc); + } + case Stmt::CXXMemberCallExprClass: return VisitCXXMemberCallExpr(cast<CXXMemberCallExpr>(S), asc); case Stmt::CXXThrowExprClass: return VisitCXXThrowExpr(cast<CXXThrowExpr>(S)); - + case Stmt::CXXTryStmtClass: return VisitCXXTryStmt(cast<CXXTryStmt>(S)); - + case Stmt::DeclStmtClass: return VisitDeclStmt(cast<DeclStmt>(S)); @@ -515,15 +533,15 @@ CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, // See if this is a known constant. TryResult KnownVal = TryEvaluateBool(B->getLHS()); - if (KnownVal.isKnown() && (B->getOpcode() == BinaryOperator::LOr)) + if (KnownVal.isKnown() && (B->getOpcode() == BO_LOr)) KnownVal.negate(); // Now link the LHSBlock with RHSBlock. - if (B->getOpcode() == BinaryOperator::LOr) { + if (B->getOpcode() == BO_LOr) { AddSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : ConfluenceBlock); AddSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : RHSBlock); } else { - assert(B->getOpcode() == BinaryOperator::LAnd); + assert(B->getOpcode() == BO_LAnd); AddSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : RHSBlock); AddSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : ConfluenceBlock); } @@ -532,7 +550,7 @@ CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, Block = LHSBlock; return addStmt(B->getLHS()); } - else if (B->getOpcode() == BinaryOperator::Comma) { // , + else if (B->getOpcode() == BO_Comma) { // , autoCreateBlock(); AppendStmt(Block, B, asc); addStmt(B->getRHS()); @@ -543,7 +561,7 @@ CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, autoCreateBlock(); AppendStmt(Block, B, asc); } - + Visit(B->getRHS()); return Visit(B->getLHS(), AddStmtChoice::AsLValueNotAlwaysAdd); } @@ -586,7 +604,7 @@ static bool CanThrow(Expr *E) { Ty = Ty->getAs<PointerType>()->getPointeeType(); else if (Ty->isBlockPointerType()) Ty = Ty->getAs<BlockPointerType>()->getPointeeType(); - + const FunctionType *FT = Ty->getAs<FunctionType>(); if (FT) { if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT)) @@ -691,7 +709,10 @@ CFGBlock* CFGBuilder::VisitCompoundStmt(CompoundStmt* C) { for (CompoundStmt::reverse_body_iterator I=C->body_rbegin(), E=C->body_rend(); I != E; ++I ) { - LastBlock = addStmt(*I); + // If we hit a segment of code just containing ';' (NullStmts), we can + // get a null block back. In such cases, just use the LastBlock + if (CFGBlock *newBlock = addStmt(*I)) + LastBlock = newBlock; if (badCFG) return NULL; @@ -901,7 +922,7 @@ CFGBlock* CFGBuilder::VisitIfStmt(IfStmt* I) { // new blocks as the condition may contain control-flow. Any newly created // blocks will be pointed to be "Block". Block = addStmt(I->getCond()); - + // Finally, if the IfStmt contains a condition variable, add both the IfStmt // and the condition variable initialization to the CFG. if (VarDecl *VD = I->getConditionVariable()) { @@ -911,7 +932,7 @@ CFGBlock* CFGBuilder::VisitIfStmt(IfStmt* I) { addStmt(Init); } } - + return Block; } @@ -1029,7 +1050,7 @@ CFGBlock* CFGBuilder::VisitForStmt(ForStmt* F) { assert(Block == EntryConditionBlock); } } - + if (Block) { if (!FinishBlock(EntryConditionBlock)) return 0; @@ -1277,7 +1298,7 @@ CFGBlock* CFGBuilder::VisitWhileStmt(WhileStmt* W) { Block = ExitConditionBlock; EntryConditionBlock = addStmt(C); assert(Block == EntryConditionBlock); - + // If this block contains a condition variable, add both the condition // variable and initializer to the CFG. if (VarDecl *VD = W->getConditionVariable()) { @@ -1389,7 +1410,7 @@ CFGBlock* CFGBuilder::VisitCXXThrowExpr(CXXThrowExpr* T) { if (TryTerminatedBlock) // The current try statement is the only successor. AddSuccessor(Block, TryTerminatedBlock); - else + else // otherwise the Exit block is the only successor. AddSuccessor(Block, &cfg->getExit()); @@ -1465,18 +1486,22 @@ CFGBlock *CFGBuilder::VisitDoStmt(DoStmt* D) { return 0; } - // Add an intermediate block between the BodyBlock and the - // ExitConditionBlock to represent the "loop back" transition. Create an - // empty block to represent the transition block for looping back to the - // head of the loop. - // FIXME: Can we do this more efficiently without adding another block? - Block = NULL; - Succ = BodyBlock; - CFGBlock *LoopBackBlock = createBlock(); - LoopBackBlock->setLoopTarget(D); - - // Add the loop body entry as a successor to the condition. - AddSuccessor(ExitConditionBlock, KnownVal.isFalse() ? NULL : LoopBackBlock); + if (!KnownVal.isFalse()) { + // Add an intermediate block between the BodyBlock and the + // ExitConditionBlock to represent the "loop back" transition. Create an + // empty block to represent the transition block for looping back to the + // head of the loop. + // FIXME: Can we do this more efficiently without adding another block? + Block = NULL; + Succ = BodyBlock; + CFGBlock *LoopBackBlock = createBlock(); + LoopBackBlock->setLoopTarget(D); + + // Add the loop body entry as a successor to the condition. + AddSuccessor(ExitConditionBlock, LoopBackBlock); + } + else + AddSuccessor(ExitConditionBlock, NULL); } // Link up the condition block with the code that follows the loop. @@ -1589,7 +1614,7 @@ CFGBlock* CFGBuilder::VisitSwitchStmt(SwitchStmt* Terminator) { assert(Terminator->getCond() && "switch condition must be non-NULL"); Block = SwitchTerminatedBlock; Block = addStmt(Terminator->getCond()); - + // Finally, if the SwitchStmt contains a condition variable, add both the // SwitchStmt and the condition variable initialization to the CFG. if (VarDecl *VD = Terminator->getConditionVariable()) { @@ -1599,16 +1624,37 @@ CFGBlock* CFGBuilder::VisitSwitchStmt(SwitchStmt* Terminator) { addStmt(Init); } } - + return Block; } CFGBlock* CFGBuilder::VisitCaseStmt(CaseStmt* CS) { // CaseStmts are essentially labels, so they are the first statement in a // block. + CFGBlock *TopBlock = 0, *LastBlock = 0; + + if (Stmt *Sub = CS->getSubStmt()) { + // For deeply nested chains of CaseStmts, instead of doing a recursion + // (which can blow out the stack), manually unroll and create blocks + // along the way. + while (isa<CaseStmt>(Sub)) { + CFGBlock *CurrentBlock = createBlock(false); + CurrentBlock->setLabel(CS); + + if (TopBlock) + AddSuccessor(LastBlock, CurrentBlock); + else + TopBlock = CurrentBlock; + + AddSuccessor(SwitchTerminatedBlock, CurrentBlock); + LastBlock = CurrentBlock; + + CS = cast<CaseStmt>(Sub); + Sub = CS->getSubStmt(); + } - if (CS->getSubStmt()) - addStmt(CS->getSubStmt()); + addStmt(Sub); + } CFGBlock* CaseBlock = Block; if (!CaseBlock) @@ -1629,10 +1675,16 @@ CFGBlock* CFGBuilder::VisitCaseStmt(CaseStmt* CS) { // We set Block to NULL to allow lazy creation of a new block (if necessary) Block = NULL; - // This block is now the implicit successor of other blocks. - Succ = CaseBlock; + if (TopBlock) { + AddSuccessor(LastBlock, CaseBlock); + Succ = TopBlock; + } + else { + // This block is now the implicit successor of other blocks. + Succ = CaseBlock; + } - return CaseBlock; + return Succ; } CFGBlock* CFGBuilder::VisitDefaultStmt(DefaultStmt* Terminator) { @@ -1742,9 +1794,9 @@ CFGBlock* CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt* CS) { return CatchBlock; } -CFGBlock *CFGBuilder::VisitCXXMemberCallExpr(CXXMemberCallExpr *C, +CFGBlock *CFGBuilder::VisitCXXMemberCallExpr(CXXMemberCallExpr *C, AddStmtChoice asc) { - AddStmtChoice::Kind K = asc.asLValue() ? AddStmtChoice::AlwaysAddAsLValue + AddStmtChoice::Kind K = asc.asLValue() ? AddStmtChoice::AlwaysAddAsLValue : AddStmtChoice::AlwaysAdd; autoCreateBlock(); AppendStmt(Block, C, AddStmtChoice(K)); @@ -1795,9 +1847,11 @@ CFGBlock* CFG::createBlock() { /// buildCFG - Constructs a CFG from an AST. Ownership of the returned /// CFG is returned to the caller. CFG* CFG::buildCFG(const Decl *D, Stmt* Statement, ASTContext *C, + bool PruneTriviallyFalse, bool AddEHEdges, bool AddScopes) { CFGBuilder Builder; - return Builder.buildCFG(D, Statement, C, AddEHEdges, AddScopes); + return Builder.buildCFG(D, Statement, C, PruneTriviallyFalse, + AddEHEdges, AddScopes); } //===----------------------------------------------------------------------===// @@ -1814,10 +1868,10 @@ static void FindSubExprAssignments(Stmt *S, return; for (Stmt::child_iterator I=S->child_begin(), E=S->child_end(); I!=E; ++I) { - Stmt *child = *I; + Stmt *child = *I; if (!child) continue; - + if (BinaryOperator* B = dyn_cast<BinaryOperator>(child)) if (B->isAssignmentOp()) Set.insert(B); @@ -2037,10 +2091,10 @@ public: B->getLHS()->printPretty(OS, Helper, Policy); switch (B->getOpcode()) { - case BinaryOperator::LOr: + case BO_LOr: OS << " || ..."; return; - case BinaryOperator::LAnd: + case BO_LAnd: OS << " && ..."; return; default: @@ -2057,7 +2111,6 @@ public: static void print_stmt(llvm::raw_ostream &OS, StmtPrinterHelper* Helper, const CFGElement &E) { - Stmt *Terminator = E; if (E.asStartScope()) { OS << "start scope\n"; @@ -2068,9 +2121,11 @@ static void print_stmt(llvm::raw_ostream &OS, StmtPrinterHelper* Helper, return; } + Stmt *S = E; + if (Helper) { // special printing for statement-expressions. - if (StmtExpr* SE = dyn_cast<StmtExpr>(Terminator)) { + if (StmtExpr* SE = dyn_cast<StmtExpr>(S)) { CompoundStmt* Sub = SE->getSubStmt(); if (Sub->child_begin() != Sub->child_end()) { @@ -2082,8 +2137,8 @@ static void print_stmt(llvm::raw_ostream &OS, StmtPrinterHelper* Helper, } // special printing for comma expressions. - if (BinaryOperator* B = dyn_cast<BinaryOperator>(Terminator)) { - if (B->getOpcode() == BinaryOperator::Comma) { + if (BinaryOperator* B = dyn_cast<BinaryOperator>(S)) { + if (B->getOpcode() == BO_Comma) { OS << "... , "; Helper->handledStmt(B->getRHS(),OS); OS << '\n'; @@ -2092,10 +2147,19 @@ static void print_stmt(llvm::raw_ostream &OS, StmtPrinterHelper* Helper, } } - Terminator->printPretty(OS, Helper, PrintingPolicy(Helper->getLangOpts())); + S->printPretty(OS, Helper, PrintingPolicy(Helper->getLangOpts())); + + if (isa<CXXOperatorCallExpr>(S)) { + OS << " (OperatorCall)"; + } + else if (isa<CXXBindTemporaryExpr>(S)) { + OS << " (BindTemporary)"; + } + // Expressions need a newline. - if (isa<Expr>(Terminator)) OS << '\n'; + if (isa<Expr>(S)) + OS << '\n'; } static void print_block(llvm::raw_ostream& OS, const CFG* cfg, diff --git a/lib/Analysis/CFGStmtMap.cpp b/lib/Analysis/CFGStmtMap.cpp new file mode 100644 index 0000000..965eca1 --- /dev/null +++ b/lib/Analysis/CFGStmtMap.cpp @@ -0,0 +1,88 @@ +//===--- CFGStmtMap.h - Map from Stmt* to CFGBlock* -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CFGStmtMap class, which defines a mapping from +// Stmt* to CFGBlock* +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "clang/AST/ParentMap.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/CFGStmtMap.h" + +using namespace clang; + +typedef llvm::DenseMap<Stmt*,CFGBlock*> SMap; +static SMap *AsMap(void *m) { return (SMap*) m; } + +CFGStmtMap::~CFGStmtMap() { delete AsMap(M); } + +CFGBlock *CFGStmtMap::getBlock(Stmt *S) { + SMap *SM = AsMap(M); + Stmt *X = S; + + // If 'S' isn't in the map, walk the ParentMap to see if one of its ancestors + // is in the map. + while (X) { + SMap::iterator I = SM->find(X); + if (I != SM->end()) { + CFGBlock *B = I->second; + // Memoize this lookup. + if (X != S) + (*SM)[X] = B; + return B; + } + + X = PM->getParentIgnoreParens(X); + } + + return 0; +} + +static void Accumulate(SMap &SM, CFGBlock *B) { + // First walk the block-level expressions. + for (CFGBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { + const CFGElement &CE = *I; + if (Stmt *S = CE.getStmt()) { + CFGBlock *&Entry = SM[S]; + // If 'Entry' is already initialized (e.g., a terminator was already), + // skip. + if (Entry) + continue; + + Entry = B; + } + } + + // Look at the label of the block. + if (Stmt *Label = B->getLabel()) + SM[Label] = B; + + // Finally, look at the terminator. If the terminator was already added + // because it is a block-level expression in another block, overwrite + // that mapping. + if (Stmt *Term = B->getTerminator()) + SM[Term] = B; +} + +CFGStmtMap *CFGStmtMap::Build(CFG *C, ParentMap *PM) { + if (!C || !PM) + return 0; + + SMap *SM = new SMap(); + + // Walk all blocks, accumulating the block-level expressions, labels, + // and terminators. + for (CFG::iterator I = C->begin(), E = C->end(); I != E; ++I) + Accumulate(*SM, *I); + + return new CFGStmtMap(PM, SM); +} + diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index f2916c2..850e9b4 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -3,9 +3,13 @@ set(LLVM_NO_RTTI 1) add_clang_library(clangAnalysis AnalysisContext.cpp CFG.cpp + CFGStmtMap.cpp + FormatString.cpp LiveVariables.cpp PrintfFormatString.cpp + PseudoConstantAnalysis.cpp ReachableCode.cpp + ScanfFormatString.cpp UninitializedValues.cpp ) diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp new file mode 100644 index 0000000..388b9d3 --- /dev/null +++ b/lib/Analysis/FormatString.cpp @@ -0,0 +1,474 @@ +// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared details for processing format strings of printf and scanf +// (and friends). +// +//===----------------------------------------------------------------------===// + +#include "FormatStringParsing.h" + +using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::FormatSpecifier; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::PositionContext; +using clang::analyze_format_string::ConversionSpecifier; +using namespace clang; + +// Key function to FormatStringHandler. +FormatStringHandler::~FormatStringHandler() {} + +//===----------------------------------------------------------------------===// +// Functions for parsing format strings components in both printf and +// scanf format strings. +//===----------------------------------------------------------------------===// + +OptionalAmount +clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { + const char *I = Beg; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + unsigned accumulator = 0; + bool hasDigits = false; + + for ( ; I != E; ++I) { + char c = *I; + if (c >= '0' && c <= '9') { + hasDigits = true; + accumulator = (accumulator * 10) + (c - '0'); + continue; + } + + if (hasDigits) + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, + false); + + break; + } + + return OptionalAmount(); +} + +OptionalAmount +clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, + const char *E, + unsigned &argIndex) { + if (*Beg == '*') { + ++Beg; + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); + } + + return ParseAmount(Beg, E); +} + +OptionalAmount +clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, + const char *Start, + const char *&Beg, + const char *E, + PositionContext p) { + if (*Beg == '*') { + const char *I = Beg + 1; + const OptionalAmount &Amt = ParseAmount(I, E); + + if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return OptionalAmount(false); + } + + assert(Amt.getHowSpecified() == OptionalAmount::Constant); + + if (*I == '$') { + // Handle positional arguments + + // Special case: '*0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Beg, I - Beg + 1); + return OptionalAmount(false); + } + + const char *Tmp = Beg; + Beg = ++I; + + return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, + Tmp, 0, true); + } + + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + return ParseAmount(Beg, E); +} + + +bool +clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, + FormatSpecifier &CS, + const char *Start, + const char *&Beg, const char *E, + unsigned *argIndex) { + // FIXME: Support negative field widths. + if (argIndex) { + CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + } + else { + const OptionalAmount Amt = + ParsePositionAmount(H, Start, Beg, E, + analyze_format_string::FieldWidthPos); + + if (Amt.isInvalid()) + return true; + CS.setFieldWidth(Amt); + } + return false; +} + +bool +clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, + FormatSpecifier &FS, + const char *Start, + const char *&Beg, + const char *E) { + const char *I = Beg; + + const OptionalAmount &Amt = ParseAmount(I, E); + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + // Special case: '%0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Start, I - Start); + return true; + } + + FS.setArgIndex(Amt.getConstantAmount() - 1); + FS.setUsesPositionalArg(); + // Update the caller's pointer if we decided to consume + // these characters. + Beg = I; + return false; + } + + return false; +} + +bool +clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, + const char *&I, + const char *E) { + LengthModifier::Kind lmKind = LengthModifier::None; + const char *lmPosition = I; + switch (*I) { + default: + return false; + case 'h': + ++I; + lmKind = (I != E && *I == 'h') ? + ++I, LengthModifier::AsChar : LengthModifier::AsShort; + break; + case 'l': + ++I; + lmKind = (I != E && *I == 'l') ? + ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; + break; + case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; + case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; + case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; + case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; + case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; + } + LengthModifier lm(lmPosition, lmKind); + FS.setLengthModifier(lm); + return true; +} + +//===----------------------------------------------------------------------===// +// Methods on ArgTypeResult. +//===----------------------------------------------------------------------===// + +bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { + switch (K) { + case InvalidTy: + assert(false && "ArgTypeResult must be valid"); + return true; + + case UnknownTy: + return true; + + case SpecificTy: { + argTy = C.getCanonicalType(argTy).getUnqualifiedType(); + if (T == argTy) + return true; + if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + return T == C.UnsignedCharTy; + case BuiltinType::Char_U: + case BuiltinType::UChar: + return T == C.SignedCharTy; + case BuiltinType::Short: + return T == C.UnsignedShortTy; + case BuiltinType::UShort: + return T == C.ShortTy; + case BuiltinType::Int: + return T == C.UnsignedIntTy; + case BuiltinType::UInt: + return T == C.IntTy; + case BuiltinType::Long: + return T == C.UnsignedLongTy; + case BuiltinType::ULong: + return T == C.LongTy; + case BuiltinType::LongLong: + return T == C.UnsignedLongLongTy; + case BuiltinType::ULongLong: + return T == C.LongLongTy; + } + return false; + } + + case CStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = PT->getPointeeType(); + if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Void: + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::Char_S: + case BuiltinType::SChar: + return true; + default: + break; + } + + return false; + } + + case WCStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + return pointeeTy == C.getWCharType(); + } + + case WIntTy: { + // Instead of doing a lookup for the definition of 'wint_t' (which + // is defined by the system headers) instead see if wchar_t and + // the argument type promote to the same type. + QualType PromoWChar = + C.getWCharType()->isPromotableIntegerType() + ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); + QualType PromoArg = + argTy->isPromotableIntegerType() + ? C.getPromotedIntegerType(argTy) : argTy; + + PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); + PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); + + return PromoWChar == PromoArg; + } + + case CPointerTy: + return argTy->getAs<PointerType>() != NULL || + argTy->getAs<ObjCObjectPointerType>() != NULL; + + case ObjCPointerTy: + return argTy->getAs<ObjCObjectPointerType>() != NULL; + } + + // FIXME: Should be unreachable, but Clang is currently emitting + // a warning. + return false; +} + +QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { + switch (K) { + case InvalidTy: + assert(false && "No representative type for Invalid ArgTypeResult"); + // Fall-through. + case UnknownTy: + return QualType(); + case SpecificTy: + return T; + case CStrTy: + return C.getPointerType(C.CharTy); + case WCStrTy: + return C.getPointerType(C.getWCharType()); + case ObjCPointerTy: + return C.ObjCBuiltinIdTy; + case CPointerTy: + return C.VoidPtrTy; + case WIntTy: { + QualType WC = C.getWCharType(); + return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; + } + } + + // FIXME: Should be unreachable, but Clang is currently emitting + // a warning. + return QualType(); +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +ArgTypeResult +analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { + return Ctx.IntTy; +} + +//===----------------------------------------------------------------------===// +// Methods on LengthModifier. +//===----------------------------------------------------------------------===// + +const char * +analyze_format_string::LengthModifier::toString() const { + switch (kind) { + case AsChar: + return "hh"; + case AsShort: + return "h"; + case AsLong: // or AsWideChar + return "l"; + case AsLongLong: + return "ll"; + case AsIntMax: + return "j"; + case AsSizeT: + return "z"; + case AsPtrDiff: + return "t"; + case AsLongDouble: + return "L"; + case None: + return ""; + } + return NULL; +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +void OptionalAmount::toString(llvm::raw_ostream &os) const { + switch (hs) { + case Invalid: + case NotSpecified: + return; + case Arg: + if (UsesDotPrefix) + os << "."; + if (usesPositionalArg()) + os << "*" << getPositionalArgIndex() << "$"; + else + os << "*"; + break; + case Constant: + if (UsesDotPrefix) + os << "."; + os << amt; + break; + } +} + +//===----------------------------------------------------------------------===// +// Methods on ConversionSpecifier. +//===----------------------------------------------------------------------===// + +bool FormatSpecifier::hasValidLengthModifier() const { + switch (LM.getKind()) { + case LengthModifier::None: + return true; + + // Handle most integer flags + case LengthModifier::AsChar: + case LengthModifier::AsShort: + case LengthModifier::AsLongLong: + case LengthModifier::AsIntMax: + case LengthModifier::AsSizeT: + case LengthModifier::AsPtrDiff: + switch (CS.getKind()) { + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::nArg: + return true; + default: + return false; + } + + // Handle 'l' flag + case LengthModifier::AsLong: + switch (CS.getKind()) { + case ConversionSpecifier::dArg: + case ConversionSpecifier::iArg: + case ConversionSpecifier::oArg: + case ConversionSpecifier::uArg: + case ConversionSpecifier::xArg: + case ConversionSpecifier::XArg: + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + case ConversionSpecifier::nArg: + case ConversionSpecifier::cArg: + case ConversionSpecifier::sArg: + return true; + default: + return false; + } + + case LengthModifier::AsLongDouble: + switch (CS.getKind()) { + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: + case ConversionSpecifier::fArg: + case ConversionSpecifier::FArg: + case ConversionSpecifier::eArg: + case ConversionSpecifier::EArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + return true; + default: + return false; + } + } + return false; +} + + diff --git a/lib/Analysis/FormatStringParsing.h b/lib/Analysis/FormatStringParsing.h new file mode 100644 index 0000000..607e99c --- /dev/null +++ b/lib/Analysis/FormatStringParsing.h @@ -0,0 +1,72 @@ +#ifndef LLVM_CLANG_FORMAT_PARSING_H +#define LLVM_CLANG_FORMAT_PARSING_H + +#include "clang/Analysis/Analyses/FormatString.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Type.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { + +template <typename T> +class UpdateOnReturn { + T &ValueToUpdate; + const T &ValueToCopy; +public: + UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) + : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} + + ~UpdateOnReturn() { + ValueToUpdate = ValueToCopy; + } +}; + +namespace analyze_format_string { + +OptionalAmount ParseAmount(const char *&Beg, const char *E); +OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, + unsigned &argIndex); + +OptionalAmount ParsePositionAmount(FormatStringHandler &H, + const char *Start, const char *&Beg, + const char *E, PositionContext p); + +bool ParseFieldWidth(FormatStringHandler &H, + FormatSpecifier &CS, + const char *Start, const char *&Beg, const char *E, + unsigned *argIndex); + +bool ParseArgPosition(FormatStringHandler &H, + FormatSpecifier &CS, const char *Start, + const char *&Beg, const char *E); + +/// Returns true if a LengthModifier was parsed and installed in the +/// FormatSpecifier& argument, and false otherwise. +bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E); + +template <typename T> class SpecifierResult { + T FS; + const char *Start; + bool Stop; +public: + SpecifierResult(bool stop = false) + : Start(0), Stop(stop) {} + SpecifierResult(const char *start, + const T &fs) + : FS(fs), Start(start), Stop(false) {} + + const char *getStart() const { return Start; } + bool shouldStop() const { return Stop; } + bool hasValue() const { return Start != 0; } + const T &getValue() const { + assert(hasValue()); + return FS; + } + const T &getValue() { return FS; } +}; + +} // end analyze_format_string namespace +} // end clang namespace + +#endif + diff --git a/lib/Analysis/LiveVariables.cpp b/lib/Analysis/LiveVariables.cpp index 4efe25e..47b2e3d 100644 --- a/lib/Analysis/LiveVariables.cpp +++ b/lib/Analysis/LiveVariables.cpp @@ -77,12 +77,13 @@ public: }; } // end anonymous namespace -LiveVariables::LiveVariables(AnalysisContext &AC) { +LiveVariables::LiveVariables(AnalysisContext &AC, bool killAtAssign) { // Register all referenced VarDecls. CFG &cfg = *AC.getCFG(); getAnalysisData().setCFG(cfg); getAnalysisData().setContext(AC.getASTContext()); getAnalysisData().AC = &AC; + getAnalysisData().killAtAssign = killAtAssign; RegisterDecls R(getAnalysisData()); cfg.VisitBlockStmts(R); @@ -229,10 +230,10 @@ void TransferFuncs::VisitUnaryOperator(UnaryOperator* U) { Expr *E = U->getSubExpr(); switch (U->getOpcode()) { - case UnaryOperator::PostInc: - case UnaryOperator::PostDec: - case UnaryOperator::PreInc: - case UnaryOperator::PreDec: + case UO_PostInc: + case UO_PostDec: + case UO_PreInc: + case UO_PreDec: // Walk through the subexpressions, blasting through ParenExprs // until we either find a DeclRefExpr or some non-DeclRefExpr // expression. @@ -260,15 +261,16 @@ void TransferFuncs::VisitAssign(BinaryOperator* B) { if (DR->getDecl()->getType()->isReferenceType()) { VisitDeclRefExpr(DR); } else { - // Update liveness inforamtion. - unsigned bit = AD.getIdx(DR->getDecl()); - LiveState.getDeclBit(bit) = Dead | AD.AlwaysLive.getDeclBit(bit); - - if (AD.Observer) { AD.Observer->ObserverKill(DR); } + if (AD.killAtAssign) { + // Update liveness inforamtion. + unsigned bit = AD.getIdx(DR->getDecl()); + LiveState.getDeclBit(bit) = Dead | AD.AlwaysLive.getDeclBit(bit); + if (AD.Observer) { AD.Observer->ObserverKill(DR); } + } // Handle things like +=, etc., which also generate "uses" // of a variable. Do this just by visiting the subexpression. - if (B->getOpcode() != BinaryOperator::Assign) + if (B->getOpcode() != BO_Assign) VisitDeclRefExpr(DR); } } diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile index 03bf7a6..fbbb83d 100644 --- a/lib/Analysis/Makefile +++ b/lib/Analysis/Makefile @@ -13,7 +13,6 @@ CLANG_LEVEL := ../.. LIBRARYNAME := clangAnalysis -BUILD_ARCHIVE = 1 include $(CLANG_LEVEL)/Makefile diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp index 558d38a..b8c327c 100644 --- a/lib/Analysis/PrintfFormatString.cpp +++ b/lib/Analysis/PrintfFormatString.cpp @@ -1,4 +1,4 @@ -//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// +//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -12,141 +12,28 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/PrintfFormatString.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/Type.h" -#include "llvm/Support/raw_ostream.h" +#include "clang/Analysis/Analyses/FormatString.h" +#include "FormatStringParsing.h" -using clang::analyze_printf::ArgTypeResult; -using clang::analyze_printf::FormatSpecifier; -using clang::analyze_printf::FormatStringHandler; -using clang::analyze_printf::OptionalAmount; -using clang::analyze_printf::PositionContext; -using clang::analyze_printf::ConversionSpecifier; -using clang::analyze_printf::LengthModifier; +using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::ConversionSpecifier; +using clang::analyze_printf::PrintfSpecifier; using namespace clang; -namespace { -class FormatSpecifierResult { - FormatSpecifier FS; - const char *Start; - bool Stop; -public: - FormatSpecifierResult(bool stop = false) - : Start(0), Stop(stop) {} - FormatSpecifierResult(const char *start, - const FormatSpecifier &fs) - : FS(fs), Start(start), Stop(false) {} - - const char *getStart() const { return Start; } - bool shouldStop() const { return Stop; } - bool hasValue() const { return Start != 0; } - const FormatSpecifier &getValue() const { - assert(hasValue()); - return FS; - } - const FormatSpecifier &getValue() { return FS; } -}; -} // end anonymous namespace - -template <typename T> -class UpdateOnReturn { - T &ValueToUpdate; - const T &ValueToCopy; -public: - UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) - : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} - - ~UpdateOnReturn() { - ValueToUpdate = ValueToCopy; - } -}; +typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> + PrintfSpecifierResult; //===----------------------------------------------------------------------===// // Methods for parsing format strings. //===----------------------------------------------------------------------===// -static OptionalAmount ParseAmount(const char *&Beg, const char *E) { - const char *I = Beg; - UpdateOnReturn <const char*> UpdateBeg(Beg, I); - - unsigned accumulator = 0; - bool hasDigits = false; - - for ( ; I != E; ++I) { - char c = *I; - if (c >= '0' && c <= '9') { - hasDigits = true; - accumulator = (accumulator * 10) + (c - '0'); - continue; - } - - if (hasDigits) - return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, - false); - - break; - } - - return OptionalAmount(); -} - -static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, - unsigned &argIndex) { - if (*Beg == '*') { - ++Beg; - return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); - } - - return ParseAmount(Beg, E); -} - -static OptionalAmount ParsePositionAmount(FormatStringHandler &H, - const char *Start, - const char *&Beg, const char *E, - PositionContext p) { - if (*Beg == '*') { - const char *I = Beg + 1; - const OptionalAmount &Amt = ParseAmount(I, E); - - if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { - H.HandleInvalidPosition(Beg, I - Beg, p); - return OptionalAmount(false); - } - - if (I== E) { - // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); - return OptionalAmount(false); - } - - assert(Amt.getHowSpecified() == OptionalAmount::Constant); - - if (*I == '$') { - // Handle positional arguments - - // Special case: '*0$', since this is an easy mistake. - if (Amt.getConstantAmount() == 0) { - H.HandleZeroPosition(Beg, I - Beg + 1); - return OptionalAmount(false); - } - - const char *Tmp = Beg; - Beg = ++I; - - return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, - Tmp, 0, true); - } - - H.HandleInvalidPosition(Beg, I - Beg, p); - return OptionalAmount(false); - } - - return ParseAmount(Beg, E); -} +using analyze_format_string::ParseNonPositionAmount; -static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, +static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, const char *Start, const char *&Beg, const char *E, unsigned *argIndex) { if (argIndex) { @@ -154,7 +41,7 @@ static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, } else { const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, - analyze_printf::PrecisionPos); + analyze_format_string::PrecisionPos); if (Amt.isInvalid()) return true; FS.setPrecision(Amt); @@ -162,61 +49,12 @@ static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, return false; } -static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, - const char *Start, const char *&Beg, const char *E, - unsigned *argIndex) { - // FIXME: Support negative field widths. - if (argIndex) { - FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); - } - else { - const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, - analyze_printf::FieldWidthPos); - if (Amt.isInvalid()) - return true; - FS.setFieldWidth(Amt); - } - return false; -} - -static bool ParseArgPosition(FormatStringHandler &H, - FormatSpecifier &FS, const char *Start, - const char *&Beg, const char *E) { - - using namespace clang::analyze_printf; - const char *I = Beg; - - const OptionalAmount &Amt = ParseAmount(I, E); - - if (I == E) { - // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); - return true; - } - - if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { - // Special case: '%0$', since this is an easy mistake. - if (Amt.getConstantAmount() == 0) { - H.HandleZeroPosition(Start, I - Start); - return true; - } - - FS.setArgIndex(Amt.getConstantAmount() - 1); - FS.setUsesPositionalArg(); - // Update the caller's pointer if we decided to consume - // these characters. - Beg = I; - return false; - } - - return false; -} - -static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, +static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex) { + using namespace clang::analyze_format_string; using namespace clang::analyze_printf; const char *I = Beg; @@ -243,17 +81,17 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, if (I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } - FormatSpecifier FS; + PrintfSpecifier FS; if (ParseArgPosition(H, FS, Start, I, E)) return true; if (I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } @@ -274,7 +112,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, if (I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } @@ -285,7 +123,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, if (I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } @@ -293,7 +131,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, if (*I == '.') { ++I; if (I == E) { - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } @@ -303,39 +141,15 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, if (I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } } // Look for the length modifier. - LengthModifier::Kind lmKind = LengthModifier::None; - const char *lmPosition = I; - switch (*I) { - default: - break; - case 'h': - ++I; - lmKind = (I != E && *I == 'h') ? - ++I, LengthModifier::AsChar : LengthModifier::AsShort; - break; - case 'l': - ++I; - lmKind = (I != E && *I == 'l') ? - ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; - break; - case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; - case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; - case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; - case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; - case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; - } - LengthModifier lm(lmPosition, lmKind); - FS.setLengthModifier(lm); - - if (I == E) { + if (ParseLengthModifier(FS, I, E) && I == E) { // No more characters left? - H.HandleIncompleteFormatSpecifier(Start, E - Start); + H.HandleIncompleteSpecifier(Start, E - Start); return true; } @@ -359,46 +173,47 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, case 'G': k = ConversionSpecifier::GArg; break; case 'X': k = ConversionSpecifier::XArg; break; case 'a': k = ConversionSpecifier::aArg; break; - case 'c': k = ConversionSpecifier::IntAsCharArg; break; + case 'c': k = ConversionSpecifier::cArg; break; case 'd': k = ConversionSpecifier::dArg; break; case 'e': k = ConversionSpecifier::eArg; break; case 'f': k = ConversionSpecifier::fArg; break; case 'g': k = ConversionSpecifier::gArg; break; case 'i': k = ConversionSpecifier::iArg; break; - case 'n': k = ConversionSpecifier::OutIntPtrArg; break; + case 'n': k = ConversionSpecifier::nArg; break; case 'o': k = ConversionSpecifier::oArg; break; - case 'p': k = ConversionSpecifier::VoidPtrArg; break; - case 's': k = ConversionSpecifier::CStrArg; break; + case 'p': k = ConversionSpecifier::pArg; break; + case 's': k = ConversionSpecifier::sArg; break; case 'u': k = ConversionSpecifier::uArg; break; case 'x': k = ConversionSpecifier::xArg; break; // Mac OS X (unicode) specific case 'C': k = ConversionSpecifier::CArg; break; - case 'S': k = ConversionSpecifier::UnicodeStrArg; break; + case 'S': k = ConversionSpecifier::SArg; break; // Objective-C. case '@': k = ConversionSpecifier::ObjCObjArg; break; // Glibc specific. case 'm': k = ConversionSpecifier::PrintErrno; break; } - ConversionSpecifier CS(conversionPosition, k); + PrintfConversionSpecifier CS(conversionPosition, k); FS.setConversionSpecifier(CS); if (CS.consumesDataArgument() && !FS.usesPositionalArg()) FS.setArgIndex(argIndex++); if (k == ConversionSpecifier::InvalidSpecifier) { // Assume the conversion takes one argument. - return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); + return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg); } - return FormatSpecifierResult(Start, FS); + return PrintfSpecifierResult(Start, FS); } -bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, - const char *I, const char *E) { +bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, + const char *I, + const char *E) { unsigned argIndex = 0; // Keep looking for a format specifier until we have exhausted the string. while (I != E) { - const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); + const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); // Did a fail-stop error of any kind occur when parsing the specifier? // If so, don't do any more processing. if (FSR.shouldStop()) @@ -408,7 +223,7 @@ bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, if (!FSR.hasValue()) continue; // We have a format specifier. Pass it to the callback. - if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), + if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), I - FSR.getStart())) return true; } @@ -416,129 +231,6 @@ bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, return false; } -FormatStringHandler::~FormatStringHandler() {} - -//===----------------------------------------------------------------------===// -// Methods on ArgTypeResult. -//===----------------------------------------------------------------------===// - -bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { - switch (K) { - case InvalidTy: - assert(false && "ArgTypeResult must be valid"); - return true; - - case UnknownTy: - return true; - - case SpecificTy: { - argTy = C.getCanonicalType(argTy).getUnqualifiedType(); - if (T == argTy) - return true; - if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) - switch (BT->getKind()) { - default: - break; - case BuiltinType::Char_S: - case BuiltinType::SChar: - return T == C.UnsignedCharTy; - case BuiltinType::Char_U: - case BuiltinType::UChar: - return T == C.SignedCharTy; - case BuiltinType::Short: - return T == C.UnsignedShortTy; - case BuiltinType::UShort: - return T == C.ShortTy; - case BuiltinType::Int: - return T == C.UnsignedIntTy; - case BuiltinType::UInt: - return T == C.IntTy; - case BuiltinType::Long: - return T == C.UnsignedLongTy; - case BuiltinType::ULong: - return T == C.LongTy; - case BuiltinType::LongLong: - return T == C.UnsignedLongLongTy; - case BuiltinType::ULongLong: - return T == C.LongLongTy; - } - return false; - } - - case CStrTy: { - const PointerType *PT = argTy->getAs<PointerType>(); - if (!PT) - return false; - QualType pointeeTy = PT->getPointeeType(); - if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) - switch (BT->getKind()) { - case BuiltinType::Void: - case BuiltinType::Char_U: - case BuiltinType::UChar: - case BuiltinType::Char_S: - case BuiltinType::SChar: - return true; - default: - break; - } - - return false; - } - - case WCStrTy: { - const PointerType *PT = argTy->getAs<PointerType>(); - if (!PT) - return false; - QualType pointeeTy = - C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); - return pointeeTy == C.getWCharType(); - } - - case CPointerTy: - return argTy->getAs<PointerType>() != NULL || - argTy->getAs<ObjCObjectPointerType>() != NULL; - - case ObjCPointerTy: - return argTy->getAs<ObjCObjectPointerType>() != NULL; - } - - // FIXME: Should be unreachable, but Clang is currently emitting - // a warning. - return false; -} - -QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { - switch (K) { - case InvalidTy: - assert(false && "No representative type for Invalid ArgTypeResult"); - // Fall-through. - case UnknownTy: - return QualType(); - case SpecificTy: - return T; - case CStrTy: - return C.getPointerType(C.CharTy); - case WCStrTy: - return C.getPointerType(C.getWCharType()); - case ObjCPointerTy: - return C.ObjCBuiltinIdTy; - case CPointerTy: - return C.VoidPtrTy; - } - - // FIXME: Should be unreachable, but Clang is currently emitting - // a warning. - return QualType(); -} - -//===----------------------------------------------------------------------===// -// Methods on OptionalAmount. -//===----------------------------------------------------------------------===// - -ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { - return Ctx.IntTy; -} - //===----------------------------------------------------------------------===// // Methods on ConversionSpecifier. //===----------------------------------------------------------------------===// @@ -558,16 +250,17 @@ const char *ConversionSpecifier::toString() const { case GArg: return "G"; case aArg: return "a"; case AArg: return "A"; - case IntAsCharArg: return "c"; - case CStrArg: return "s"; - case VoidPtrArg: return "p"; - case OutIntPtrArg: return "n"; - case PercentArg: return "%"; + case cArg: return "c"; + case sArg: return "s"; + case pArg: return "p"; + case nArg: return "n"; + case PercentArg: return "%"; + case ScanListArg: return "["; case InvalidSpecifier: return NULL; // MacOS X unicode extensions. - case CArg: return "C"; - case UnicodeStrArg: return "S"; + case CArg: return "C"; + case SArg: return "S"; // Objective-C specific specifiers. case ObjCObjArg: return "@"; @@ -579,66 +272,23 @@ const char *ConversionSpecifier::toString() const { } //===----------------------------------------------------------------------===// -// Methods on LengthModifier. -//===----------------------------------------------------------------------===// - -const char *LengthModifier::toString() const { - switch (kind) { - case AsChar: - return "hh"; - case AsShort: - return "h"; - case AsLong: // or AsWideChar - return "l"; - case AsLongLong: - return "ll"; - case AsIntMax: - return "j"; - case AsSizeT: - return "z"; - case AsPtrDiff: - return "t"; - case AsLongDouble: - return "L"; - case None: - return ""; - } - return NULL; -} - -//===----------------------------------------------------------------------===// -// Methods on OptionalAmount. +// Methods on PrintfSpecifier. //===----------------------------------------------------------------------===// -void OptionalAmount::toString(llvm::raw_ostream &os) const { - switch (hs) { - case Invalid: - case NotSpecified: - return; - case Arg: - if (UsesDotPrefix) - os << "."; - if (usesPositionalArg()) - os << "*" << getPositionalArgIndex() << "$"; - else - os << "*"; - break; - case Constant: - if (UsesDotPrefix) - os << "."; - os << amt; - break; - } -} - -//===----------------------------------------------------------------------===// -// Methods on FormatSpecifier. -//===----------------------------------------------------------------------===// - -ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { +ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { + const PrintfConversionSpecifier &CS = getConversionSpecifier(); + if (!CS.consumesDataArgument()) return ArgTypeResult::Invalid(); + if (CS.getKind() == ConversionSpecifier::cArg) + switch (LM.getKind()) { + case LengthModifier::None: return Ctx.IntTy; + case LengthModifier::AsLong: return ArgTypeResult::WIntTy; + default: + return ArgTypeResult::Invalid(); + } + if (CS.isIntArg()) switch (LM.getKind()) { case LengthModifier::AsLongDouble: @@ -684,15 +334,15 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { } switch (CS.getKind()) { - case ConversionSpecifier::CStrArg: + case ConversionSpecifier::sArg: return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); - case ConversionSpecifier::UnicodeStrArg: + case ConversionSpecifier::SArg: // FIXME: This appears to be Mac OS X specific. return ArgTypeResult::WCStrTy; case ConversionSpecifier::CArg: return Ctx.WCharTy; - case ConversionSpecifier::VoidPtrArg: + case ConversionSpecifier::pArg: return ArgTypeResult::CPointerTy; default: break; @@ -702,10 +352,10 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { return ArgTypeResult(); } -bool FormatSpecifier::fixType(QualType QT) { +bool PrintfSpecifier::fixType(QualType QT) { // Handle strings first (char *, wchar_t *) if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { - CS.setKind(ConversionSpecifier::CStrArg); + CS.setKind(ConversionSpecifier::sArg); // Disable irrelevant flags HasAlternativeForm = 0; @@ -750,7 +400,7 @@ bool FormatSpecifier::fixType(QualType QT) { // Set conversion specifier and disable any flags which do not apply to it. if (QT->isAnyCharacterType()) { - CS.setKind(ConversionSpecifier::IntAsCharArg); + CS.setKind(ConversionSpecifier::cArg); Precision.setHowSpecified(OptionalAmount::NotSpecified); HasAlternativeForm = 0; HasLeadingZeroes = 0; @@ -761,7 +411,7 @@ bool FormatSpecifier::fixType(QualType QT) { CS.setKind(ConversionSpecifier::fArg); } else if (QT->isPointerType()) { - CS.setKind(ConversionSpecifier::VoidPtrArg); + CS.setKind(ConversionSpecifier::pArg); Precision.setHowSpecified(OptionalAmount::NotSpecified); HasAlternativeForm = 0; HasLeadingZeroes = 0; @@ -783,9 +433,9 @@ bool FormatSpecifier::fixType(QualType QT) { return true; } -void FormatSpecifier::toString(llvm::raw_ostream &os) const { +void PrintfSpecifier::toString(llvm::raw_ostream &os) const { // Whilst some features have no defined order, we are using the order - // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1) + // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1) os << "%"; // Positional args @@ -810,7 +460,7 @@ void FormatSpecifier::toString(llvm::raw_ostream &os) const { os << CS.toString(); } -bool FormatSpecifier::hasValidPlusPrefix() const { +bool PrintfSpecifier::hasValidPlusPrefix() const { if (!HasPlusPrefix) return true; @@ -833,7 +483,7 @@ bool FormatSpecifier::hasValidPlusPrefix() const { } } -bool FormatSpecifier::hasValidAlternativeForm() const { +bool PrintfSpecifier::hasValidAlternativeForm() const { if (!HasAlternativeForm) return true; @@ -856,7 +506,7 @@ bool FormatSpecifier::hasValidAlternativeForm() const { } } -bool FormatSpecifier::hasValidLeadingZeros() const { +bool PrintfSpecifier::hasValidLeadingZeros() const { if (!HasLeadingZeroes) return true; @@ -883,7 +533,7 @@ bool FormatSpecifier::hasValidLeadingZeros() const { } } -bool FormatSpecifier::hasValidSpacePrefix() const { +bool PrintfSpecifier::hasValidSpacePrefix() const { if (!HasSpacePrefix) return true; @@ -906,13 +556,13 @@ bool FormatSpecifier::hasValidSpacePrefix() const { } } -bool FormatSpecifier::hasValidLeftJustified() const { +bool PrintfSpecifier::hasValidLeftJustified() const { if (!IsLeftJustified) return true; // The left justified flag is valid for all conversions except n switch (CS.getKind()) { - case ConversionSpecifier::OutIntPtrArg: + case ConversionSpecifier::nArg: return false; default: @@ -920,75 +570,7 @@ bool FormatSpecifier::hasValidLeftJustified() const { } } -bool FormatSpecifier::hasValidLengthModifier() const { - switch (LM.getKind()) { - case LengthModifier::None: - return true; - - // Handle most integer flags - case LengthModifier::AsChar: - case LengthModifier::AsShort: - case LengthModifier::AsLongLong: - case LengthModifier::AsIntMax: - case LengthModifier::AsSizeT: - case LengthModifier::AsPtrDiff: - switch (CS.getKind()) { - case ConversionSpecifier::dArg: - case ConversionSpecifier::iArg: - case ConversionSpecifier::oArg: - case ConversionSpecifier::uArg: - case ConversionSpecifier::xArg: - case ConversionSpecifier::XArg: - case ConversionSpecifier::OutIntPtrArg: - return true; - default: - return false; - } - - // Handle 'l' flag - case LengthModifier::AsLong: - switch (CS.getKind()) { - case ConversionSpecifier::dArg: - case ConversionSpecifier::iArg: - case ConversionSpecifier::oArg: - case ConversionSpecifier::uArg: - case ConversionSpecifier::xArg: - case ConversionSpecifier::XArg: - case ConversionSpecifier::aArg: - case ConversionSpecifier::AArg: - case ConversionSpecifier::fArg: - case ConversionSpecifier::FArg: - case ConversionSpecifier::eArg: - case ConversionSpecifier::EArg: - case ConversionSpecifier::gArg: - case ConversionSpecifier::GArg: - case ConversionSpecifier::OutIntPtrArg: - case ConversionSpecifier::IntAsCharArg: - case ConversionSpecifier::CStrArg: - return true; - default: - return false; - } - - case LengthModifier::AsLongDouble: - switch (CS.getKind()) { - case ConversionSpecifier::aArg: - case ConversionSpecifier::AArg: - case ConversionSpecifier::fArg: - case ConversionSpecifier::FArg: - case ConversionSpecifier::eArg: - case ConversionSpecifier::EArg: - case ConversionSpecifier::gArg: - case ConversionSpecifier::GArg: - return true; - default: - return false; - } - } - return false; -} - -bool FormatSpecifier::hasValidPrecision() const { +bool PrintfSpecifier::hasValidPrecision() const { if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) return true; @@ -1008,20 +590,20 @@ bool FormatSpecifier::hasValidPrecision() const { case ConversionSpecifier::FArg: case ConversionSpecifier::gArg: case ConversionSpecifier::GArg: - case ConversionSpecifier::CStrArg: + case ConversionSpecifier::sArg: return true; default: return false; } } -bool FormatSpecifier::hasValidFieldWidth() const { +bool PrintfSpecifier::hasValidFieldWidth() const { if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) return true; // The field width is valid for all conversions except n switch (CS.getKind()) { - case ConversionSpecifier::OutIntPtrArg: + case ConversionSpecifier::nArg: return false; default: diff --git a/lib/Analysis/PseudoConstantAnalysis.cpp b/lib/Analysis/PseudoConstantAnalysis.cpp new file mode 100644 index 0000000..ff43fc2 --- /dev/null +++ b/lib/Analysis/PseudoConstantAnalysis.cpp @@ -0,0 +1,238 @@ +//== PseudoConstantAnalysis.cpp - Find Pseudoconstants in the AST-*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file tracks the usage of variables in a Decl body to see if they are +// never written to, implying that they constant. This is useful in static +// analysis to see if a developer might have intended a variable to be const. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/PseudoConstantAnalysis.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include <deque> + +using namespace clang; + +// The number of ValueDecls we want to keep track of by default (per-function) +#define VARDECL_SET_SIZE 256 +typedef llvm::SmallPtrSet<const VarDecl*, VARDECL_SET_SIZE> VarDeclSet; + +PseudoConstantAnalysis::PseudoConstantAnalysis(const Stmt *DeclBody) : + DeclBody(DeclBody), Analyzed(false) { + NonConstantsImpl = new VarDeclSet; + UsedVarsImpl = new VarDeclSet; +} + +PseudoConstantAnalysis::~PseudoConstantAnalysis() { + delete (VarDeclSet*)NonConstantsImpl; + delete (VarDeclSet*)UsedVarsImpl; +} + +// Returns true if the given ValueDecl is never written to in the given DeclBody +bool PseudoConstantAnalysis::isPseudoConstant(const VarDecl *VD) { + // Only local and static variables can be pseudoconstants + if (!VD->hasLocalStorage() && !VD->isStaticLocal()) + return false; + + if (!Analyzed) { + RunAnalysis(); + Analyzed = true; + } + + VarDeclSet *NonConstants = (VarDeclSet*)NonConstantsImpl; + + return !NonConstants->count(VD); +} + +// Returns true if the variable was used (self assignments don't count) +bool PseudoConstantAnalysis::wasReferenced(const VarDecl *VD) { + if (!Analyzed) { + RunAnalysis(); + Analyzed = true; + } + + VarDeclSet *UsedVars = (VarDeclSet*)UsedVarsImpl; + + return UsedVars->count(VD); +} + +// Returns a Decl from a (Block)DeclRefExpr (if any) +const Decl *PseudoConstantAnalysis::getDecl(const Expr *E) { + if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) + return DR->getDecl(); + else if (const BlockDeclRefExpr *BDR = dyn_cast<BlockDeclRefExpr>(E)) + return BDR->getDecl(); + else + return 0; +} + +void PseudoConstantAnalysis::RunAnalysis() { + std::deque<const Stmt *> WorkList; + VarDeclSet *NonConstants = (VarDeclSet*)NonConstantsImpl; + VarDeclSet *UsedVars = (VarDeclSet*)UsedVarsImpl; + + // Start with the top level statement of the function + WorkList.push_back(DeclBody); + + while (!WorkList.empty()) { + const Stmt* Head = WorkList.front(); + WorkList.pop_front(); + + switch (Head->getStmtClass()) { + // Case 1: Assignment operators modifying VarDecls + case Stmt::BinaryOperatorClass: { + const BinaryOperator *BO = cast<BinaryOperator>(Head); + // Look for a Decl on the LHS + const Decl *LHSDecl = getDecl(BO->getLHS()->IgnoreParenCasts()); + if (!LHSDecl) + break; + + // We found a binary operator with a DeclRefExpr on the LHS. We now check + // for any of the assignment operators, implying that this Decl is being + // written to. + switch (BO->getOpcode()) { + // Self-assignments don't count as use of a variable + case BO_Assign: { + // Look for a DeclRef on the RHS + const Decl *RHSDecl = getDecl(BO->getRHS()->IgnoreParenCasts()); + + // If the Decls match, we have self-assignment + if (LHSDecl == RHSDecl) + // Do not visit the children + continue; + + } + case BO_AddAssign: + case BO_SubAssign: + case BO_MulAssign: + case BO_DivAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_ShlAssign: + case BO_ShrAssign: { + const VarDecl *VD = dyn_cast<VarDecl>(LHSDecl); + // The DeclRefExpr is being assigned to - mark it as non-constant + if (VD) + NonConstants->insert(VD); + break; + } + + default: + break; + } + break; + } + + // Case 2: Pre/post increment/decrement and address of + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(Head); + + // Look for a DeclRef in the subexpression + const Decl *D = getDecl(UO->getSubExpr()->IgnoreParenCasts()); + if (!D) + break; + + // We found a unary operator with a DeclRef as a subexpression. We now + // check for any of the increment/decrement operators, as well as + // addressOf. + switch (UO->getOpcode()) { + case UO_PostDec: + case UO_PostInc: + case UO_PreDec: + case UO_PreInc: + // The DeclRef is being changed - mark it as non-constant + case UO_AddrOf: { + // If we are taking the address of the DeclRefExpr, assume it is + // non-constant. + const VarDecl *VD = dyn_cast<VarDecl>(D); + if (VD) + NonConstants->insert(VD); + break; + } + + default: + break; + } + break; + } + + // Case 3: Reference Declarations + case Stmt::DeclStmtClass: { + const DeclStmt *DS = cast<DeclStmt>(Head); + // Iterate over each decl and see if any of them contain reference decls + for (DeclStmt::const_decl_iterator I = DS->decl_begin(), + E = DS->decl_end(); I != E; ++I) { + // We only care about VarDecls + const VarDecl *VD = dyn_cast<VarDecl>(*I); + if (!VD) + continue; + + // We found a VarDecl; make sure it is a reference type + if (!VD->getType().getTypePtr()->isReferenceType()) + continue; + + // Try to find a Decl in the initializer + const Decl *D = getDecl(VD->getInit()->IgnoreParenCasts()); + if (!D) + break; + + // If the reference is to another var, add the var to the non-constant + // list + if (const VarDecl *RefVD = dyn_cast<VarDecl>(D)) { + NonConstants->insert(RefVD); + continue; + } + } + break; + } + + // Case 4: Block variable references + case Stmt::BlockDeclRefExprClass: { + const BlockDeclRefExpr *BDR = cast<BlockDeclRefExpr>(Head); + if (const VarDecl *VD = dyn_cast<VarDecl>(BDR->getDecl())) { + // Add the Decl to the used list + UsedVars->insert(VD); + continue; + } + break; + } + + // Case 5: Variable references + case Stmt::DeclRefExprClass: { + const DeclRefExpr *DR = cast<DeclRefExpr>(Head); + if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { + // Add the Decl to the used list + UsedVars->insert(VD); + continue; + } + break; + } + + // Case 6: Block expressions + case Stmt::BlockExprClass: { + const BlockExpr *B = cast<BlockExpr>(Head); + // Add the body of the block to the list + WorkList.push_back(B->getBody()); + continue; + } + + default: + break; + } // switch (head->getStmtClass()) + + // Add all substatements to the worklist + for (Stmt::const_child_iterator I = Head->child_begin(), + E = Head->child_end(); I != E; ++I) + if (*I) + WorkList.push_back(*I); + } // while (!WorkList.empty()) +} diff --git a/lib/Analysis/ReachableCode.cpp b/lib/Analysis/ReachableCode.cpp index f959e5c..0543939 100644 --- a/lib/Analysis/ReachableCode.cpp +++ b/lib/Analysis/ReachableCode.cpp @@ -41,7 +41,7 @@ top: switch (S->getStmtClass()) { case Expr::BinaryOperatorClass: { const BinaryOperator *BO = cast<BinaryOperator>(S); - if (BO->getOpcode() == BinaryOperator::Comma) { + if (BO->getOpcode() == BO_Comma) { if (sn+1 < b.size()) return b[sn+1].getStmt()->getLocStart(); const CFGBlock *n = &b; diff --git a/lib/Analysis/ScanfFormatString.cpp b/lib/Analysis/ScanfFormatString.cpp new file mode 100644 index 0000000..6a8673a --- /dev/null +++ b/lib/Analysis/ScanfFormatString.cpp @@ -0,0 +1,221 @@ +//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Handling of format string in scanf and friends. The structure of format +// strings for fscanf() are described in C99 7.19.6.2. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/FormatString.h" +#include "FormatStringParsing.h" + +using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::ConversionSpecifier; +using clang::analyze_scanf::ScanfConversionSpecifier; +using clang::analyze_scanf::ScanfSpecifier; +using clang::UpdateOnReturn; + +typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> + ScanfSpecifierResult; + +static bool ParseScanList(FormatStringHandler &H, + ScanfConversionSpecifier &CS, + const char *&Beg, const char *E) { + const char *I = Beg; + const char *start = I - 1; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + // No more characters? + if (I == E) { + H.HandleIncompleteScanList(start, I); + return true; + } + + // Special case: ']' is the first character. + if (*I == ']') { + if (++I == E) { + H.HandleIncompleteScanList(start, I - 1); + return true; + } + } + + // Look for a ']' character which denotes the end of the scan list. + while (*I != ']') { + if (++I == E) { + H.HandleIncompleteScanList(start, I - 1); + return true; + } + } + + CS.setEndScanList(I); + return false; +} + +// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. +// We can possibly refactor. +static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, + const char *&Beg, + const char *E, + unsigned &argIndex) { + + using namespace clang::analyze_scanf; + const char *I = Beg; + const char *Start = 0; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + // Look for a '%' character that indicates the start of a format specifier. + for ( ; I != E ; ++I) { + char c = *I; + if (c == '\0') { + // Detect spurious null characters, which are likely errors. + H.HandleNullChar(I); + return true; + } + if (c == '%') { + Start = I++; // Record the start of the format specifier. + break; + } + } + + // No format specifier found? + if (!Start) + return false; + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + ScanfSpecifier FS; + if (ParseArgPosition(H, FS, Start, I, E)) + return true; + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + // Look for '*' flag if it is present. + if (*I == '*') { + FS.setSuppressAssignment(I); + if (++I == E) { + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + } + + // Look for the field width (if any). Unlike printf, this is either + // a fixed integer or isn't present. + const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); + if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { + assert(Amt.getHowSpecified() == OptionalAmount::Constant); + FS.setFieldWidth(Amt); + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + } + + // Look for the length modifier. + if (ParseLengthModifier(FS, I, E) && I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + // Detect spurious null characters, which are likely errors. + if (*I == '\0') { + H.HandleNullChar(I); + return true; + } + + // Finally, look for the conversion specifier. + const char *conversionPosition = I++; + ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; + switch (*conversionPosition) { + default: + break; + case '%': k = ConversionSpecifier::PercentArg; break; + case 'A': k = ConversionSpecifier::AArg; break; + case 'E': k = ConversionSpecifier::EArg; break; + case 'F': k = ConversionSpecifier::FArg; break; + case 'G': k = ConversionSpecifier::GArg; break; + case 'X': k = ConversionSpecifier::XArg; break; + case 'a': k = ConversionSpecifier::aArg; break; + case 'd': k = ConversionSpecifier::dArg; break; + case 'e': k = ConversionSpecifier::eArg; break; + case 'f': k = ConversionSpecifier::fArg; break; + case 'g': k = ConversionSpecifier::gArg; break; + case 'i': k = ConversionSpecifier::iArg; break; + case 'n': k = ConversionSpecifier::nArg; break; + case 'c': k = ConversionSpecifier::cArg; break; + case 'C': k = ConversionSpecifier::CArg; break; + case 'S': k = ConversionSpecifier::SArg; break; + case '[': k = ConversionSpecifier::ScanListArg; break; + case 'u': k = ConversionSpecifier::uArg; break; + case 'x': k = ConversionSpecifier::xArg; break; + case 'o': k = ConversionSpecifier::oArg; break; + case 's': k = ConversionSpecifier::sArg; break; + case 'p': k = ConversionSpecifier::pArg; break; + } + ScanfConversionSpecifier CS(conversionPosition, k); + if (k == ScanfConversionSpecifier::ScanListArg) { + if (!ParseScanList(H, CS, I, E)) + return true; + } + FS.setConversionSpecifier(CS); + if (CS.consumesDataArgument() && !FS.getSuppressAssignment() + && !FS.usesPositionalArg()) + FS.setArgIndex(argIndex++); + + // FIXME: '%' and '*' doesn't make sense. Issue a warning. + // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. + + if (k == ScanfConversionSpecifier::InvalidSpecifier) { + // Assume the conversion takes one argument. + return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); + } + return ScanfSpecifierResult(Start, FS); +} + +bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, + const char *I, + const char *E) { + + unsigned argIndex = 0; + + // Keep looking for a format specifier until we have exhausted the string. + while (I != E) { + const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) + return true;; + // Did we exhaust the string or encounter an error that + // we can recover from? + if (!FSR.hasValue()) + continue; + // We have a format specifier. Pass it to the callback. + if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), + I - FSR.getStart())) { + return true; + } + } + assert(I == E && "Format string not exhausted"); + return false; +} + + diff --git a/lib/Analysis/UninitializedValues.cpp b/lib/Analysis/UninitializedValues.cpp index 7a62864..0f43efa 100644 --- a/lib/Analysis/UninitializedValues.cpp +++ b/lib/Analysis/UninitializedValues.cpp @@ -121,7 +121,7 @@ bool TransferFuncs::VisitBinaryOperator(BinaryOperator* B) { if (VarDecl* VD = FindBlockVarDecl(B->getLHS())) if (B->isAssignmentOp()) { - if (B->getOpcode() == BinaryOperator::Assign) + if (B->getOpcode() == BO_Assign) return V(VD,AD) = Visit(B->getRHS()); else // Handle +=, -=, *=, etc. We do want '&', not '&&'. return V(VD,AD) = Visit(B->getLHS()) & Visit(B->getRHS()); @@ -168,7 +168,7 @@ bool TransferFuncs::VisitCallExpr(CallExpr* C) { bool TransferFuncs::VisitUnaryOperator(UnaryOperator* U) { switch (U->getOpcode()) { - case UnaryOperator::AddrOf: { + case UO_AddrOf: { VarDecl* VD = FindBlockVarDecl(U->getSubExpr()); if (VD && VD->isBlockVarDecl()) return V(VD,AD) = Initialized; |