diff options
author | dim <dim@FreeBSD.org> | 2011-07-17 15:40:56 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-07-17 15:40:56 +0000 |
commit | 611ba3ea3300b71eb95dc4e45f20eee5dddd32e1 (patch) | |
tree | 2097d084eb235c0b12c0bff3445f4ec7bbaa8a12 /lib/StaticAnalyzer/Checkers/CStringChecker.cpp | |
parent | c49018d9cce52d8c9f34b44865ec3ba8e89a1488 (diff) | |
download | FreeBSD-src-611ba3ea3300b71eb95dc4e45f20eee5dddd32e1.zip FreeBSD-src-611ba3ea3300b71eb95dc4e45f20eee5dddd32e1.tar.gz |
Vendor import of clang trunk r135360:
http://llvm.org/svn/llvm-project/cfe/trunk@135360
Diffstat (limited to 'lib/StaticAnalyzer/Checkers/CStringChecker.cpp')
-rw-r--r-- | lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 756 |
1 files changed, 586 insertions, 170 deletions
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index f2f5c1e..c5dac5d 100644 --- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -30,8 +30,11 @@ class CStringChecker : public Checker< eval::Call, check::DeadSymbols, check::RegionChanges > { - mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite, - BT_Overlap, BT_NotCString; + mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, + BT_Overlap, BT_NotCString, + BT_AdditionOverflow; + mutable const char *CurrentFunctionDescription; + public: static void *getTag() { static int tag; return &tag; } @@ -91,9 +94,11 @@ public: const MemRegion *MR, SVal strLength); static SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state, - const Expr *Ex, const MemRegion *MR); + const Expr *Ex, const MemRegion *MR, + bool hypothetical); SVal getCStringLength(CheckerContext &C, const GRState *&state, - const Expr *Ex, SVal Buf) const; + const Expr *Ex, SVal Buf, + bool hypothetical = false) const; const StringLiteral *getCStringLiteral(CheckerContext &C, const GRState *&state, @@ -112,17 +117,29 @@ public: const Expr *S, SVal l) const; const GRState *CheckLocation(CheckerContext &C, const GRState *state, const Expr *S, SVal l, - bool IsDestination = false) const; + const char *message = NULL) const; const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, const Expr *Size, const Expr *FirstBuf, - const Expr *SecondBuf = NULL, - bool FirstIsDestination = false) const; + const Expr *SecondBuf, + const char *firstMessage = NULL, + const char *secondMessage = NULL, + bool WarnAboutSize = false) const; + const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, + const Expr *Size, const Expr *Buf, + const char *message = NULL, + bool WarnAboutSize = false) const { + // This is a convenience override. + return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL, + WarnAboutSize); + } const GRState *CheckOverlap(CheckerContext &C, const GRState *state, const Expr *Size, const Expr *First, const Expr *Second) const; void emitOverlapBug(CheckerContext &C, const GRState *state, const Stmt *First, const Stmt *Second) const; + const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state, + NonLoc left, NonLoc right) const; }; class CStringLength { @@ -176,10 +193,14 @@ const GRState *CStringChecker::checkNonNull(CheckerContext &C, BT_Null.reset(new BuiltinBug("API", "Null pointer argument in call to byte string function")); + llvm::SmallString<80> buf; + llvm::raw_svector_ostream os(buf); + assert(CurrentFunctionDescription); + os << "Null pointer argument in call to " << CurrentFunctionDescription; + // Generate a report for this bug. BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); - EnhancedBugReport *report = new EnhancedBugReport(*BT, - BT->getDescription(), N); + EnhancedBugReport *report = new EnhancedBugReport(*BT, os.str(), N); report->addRange(S->getSourceRange()); report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); @@ -196,7 +217,7 @@ const GRState *CStringChecker::checkNonNull(CheckerContext &C, const GRState *CStringChecker::CheckLocation(CheckerContext &C, const GRState *state, const Expr *S, SVal l, - bool IsDestination) const { + const char *warningMsg) const { // If a previous check has failed, propagate the failure. if (!state) return NULL; @@ -216,7 +237,8 @@ const GRState *CStringChecker::CheckLocation(CheckerContext &C, // Get the size of the array. const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); SValBuilder &svalBuilder = C.getSValBuilder(); - SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); + SVal Extent = + svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); // Get the index of the accessed element. @@ -229,28 +251,32 @@ const GRState *CStringChecker::CheckLocation(CheckerContext &C, if (!N) return NULL; - BuiltinBug *BT; - if (IsDestination) { - if (!BT_BoundsWrite) { - BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access", - "Byte string function overflows destination buffer")); - } - BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get()); + if (!BT_Bounds) { + BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", + "Byte string function accesses out-of-bound array element")); + } + BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); + + // Generate a report for this bug. + RangedBugReport *report; + if (warningMsg) { + report = new RangedBugReport(*BT, warningMsg, N); } else { - if (!BT_Bounds) { - BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", - "Byte string function accesses out-of-bound array element")); - } - BT = static_cast<BuiltinBug*>(BT_Bounds.get()); + assert(CurrentFunctionDescription); + assert(CurrentFunctionDescription[0] != '\0'); + + llvm::SmallString<80> buf; + llvm::raw_svector_ostream os(buf); + os << (char)toupper(CurrentFunctionDescription[0]) + << &CurrentFunctionDescription[1] + << " accesses out-of-bound array element"; + report = new RangedBugReport(*BT, os.str(), N); } // FIXME: It would be nice to eventually make this diagnostic more clear, // e.g., by referencing the original declaration or by saying *why* this // reference is outside the range. - // Generate a report for this bug. - RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); - report->addRange(S->getSourceRange()); C.EmitReport(report); return NULL; @@ -266,13 +292,15 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, const Expr *Size, const Expr *FirstBuf, const Expr *SecondBuf, - bool FirstIsDestination) const { + const char *firstMessage, + const char *secondMessage, + bool WarnAboutSize) const { // If a previous check has failed, propagate the failure. if (!state) return NULL; SValBuilder &svalBuilder = C.getSValBuilder(); - ASTContext &Ctx = C.getASTContext(); + ASTContext &Ctx = svalBuilder.getContext(); QualType sizeTy = Size->getType(); QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); @@ -284,6 +312,8 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, return NULL; // Get the access length and make sure it is known. + // FIXME: This assumes the caller has already checked that the access length + // is positive. And that it's unsigned. SVal LengthVal = state->getSVal(Size); NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); if (!Length) @@ -297,9 +327,11 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, // Check that the first buffer is sufficiently long. SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { + const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); - state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); + state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); // If the buffer isn't large enough, abort. if (!state) @@ -315,9 +347,11 @@ const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { + const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); + SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); - state = CheckLocation(C, state, SecondBuf, BufEnd); + state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); } } @@ -368,8 +402,7 @@ const GRState *CStringChecker::CheckOverlap(CheckerContext &C, state = stateFalse; // Which value comes first? - ASTContext &Ctx = svalBuilder.getContext(); - QualType cmpTy = Ctx.IntTy; + QualType cmpTy = svalBuilder.getConditionType(); SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy); DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); @@ -402,8 +435,10 @@ const GRState *CStringChecker::CheckOverlap(CheckerContext &C, // Convert the first buffer's start address to char*. // Bail out if the cast fails. + ASTContext &Ctx = svalBuilder.getContext(); QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); - SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); + SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, + First->getType()); Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); if (!FirstStartLoc) return state; @@ -454,12 +489,78 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, C.EmitReport(report); } +const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C, + const GRState *state, + NonLoc left, + NonLoc right) const { + // If a previous check has failed, propagate the failure. + if (!state) + return NULL; + + SValBuilder &svalBuilder = C.getSValBuilder(); + BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); + + QualType sizeTy = svalBuilder.getContext().getSizeType(); + const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); + NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); + + SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, + sizeTy); + + if (maxMinusRight.isUnknownOrUndef()) { + // Try switching the operands. (The order of these two assignments is + // important!) + maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, + sizeTy); + left = right; + } + + if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { + QualType cmpTy = svalBuilder.getConditionType(); + // If left > max - right, we have an overflow. + SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, + *maxMinusRightNL, cmpTy); + + const GRState *stateOverflow, *stateOkay; + llvm::tie(stateOverflow, stateOkay) = + state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); + + if (stateOverflow && !stateOkay) { + // We have an overflow. Emit a bug report. + ExplodedNode *N = C.generateSink(stateOverflow); + if (!N) + return NULL; + + if (!BT_AdditionOverflow) + BT_AdditionOverflow.reset(new BuiltinBug("API", + "Sum of expressions causes overflow")); + + // This isn't a great error message, but this should never occur in real + // code anyway -- you'd have to create a buffer longer than a size_t can + // represent, which is sort of a contradiction. + const char *warning = + "This expression will create a string whose length is too big to " + "be represented as a size_t"; + + // Generate a report for this bug. + BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); + C.EmitReport(report); + + return NULL; + } + + // From now on, assume an overflow didn't occur. + assert(stateOkay); + state = stateOkay; + } + + return state; +} + const GRState *CStringChecker::setCStringLength(const GRState *state, const MemRegion *MR, SVal strLength) { assert(!strLength.isUndef() && "Attempt to set an undefined string length"); - if (strLength.isUnknown()) - return state; MR = MR->StripCasts(); @@ -474,7 +575,8 @@ const GRState *CStringChecker::setCStringLength(const GRState *state, case MemRegion::VarRegionKind: case MemRegion::FieldRegionKind: case MemRegion::ObjCIvarRegionKind: - return state->set<CStringLength>(MR, strLength); + // These are the types we can currently track string lengths for. + break; case MemRegion::ElementRegionKind: // FIXME: Handle element regions by upper-bounding the parent region's @@ -488,16 +590,24 @@ const GRState *CStringChecker::setCStringLength(const GRState *state, // warning for things like strcpy((char[]){'a', 0}, "b"); return state; } + + if (strLength.isUnknown()) + return state->remove<CStringLength>(MR); + + return state->set<CStringLength>(MR, strLength); } SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, const GRState *&state, const Expr *Ex, - const MemRegion *MR) { - // If there's a recorded length, go ahead and return it. - const SVal *Recorded = state->get<CStringLength>(MR); - if (Recorded) - return *Recorded; + const MemRegion *MR, + bool hypothetical) { + if (!hypothetical) { + // If there's a recorded length, go ahead and return it. + const SVal *Recorded = state->get<CStringLength>(MR); + if (Recorded) + return *Recorded; + } // Otherwise, get a new symbol and update the state. unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); @@ -505,12 +615,16 @@ SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, QualType sizeTy = svalBuilder.getContext().getSizeType(); SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), MR, Ex, sizeTy, Count); - state = state->set<CStringLength>(MR, strLength); + + if (!hypothetical) + state = state->set<CStringLength>(MR, strLength); + return strLength; } SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, - const Expr *Ex, SVal Buf) const { + const Expr *Ex, SVal Buf, + bool hypothetical) const { const MemRegion *MR = Buf.getAsRegion(); if (!MR) { // If we can't get a region, see if it's something we /know/ isn't a @@ -524,8 +638,9 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, llvm::SmallString<120> buf; llvm::raw_svector_ostream os(buf); - os << "Argument to byte string function is the address of the label '" - << Label->getLabel()->getName() + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription + << " is the address of the label '" << Label->getLabel()->getName() << "', which is not a null-terminated string"; // Generate a report for this bug. @@ -561,7 +676,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, case MemRegion::VarRegionKind: case MemRegion::FieldRegionKind: case MemRegion::ObjCIvarRegionKind: - return getCStringLengthForRegion(C, state, Ex, MR); + return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); case MemRegion::CompoundLiteralRegionKind: // FIXME: Can we track this? Is it necessary? return UnknownVal(); @@ -581,7 +696,8 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, llvm::SmallString<120> buf; llvm::raw_svector_ostream os(buf); - os << "Argument to byte string function is "; + assert(CurrentFunctionDescription); + os << "Argument to " << CurrentFunctionDescription << " is "; if (SummarizeRegion(os, C.getASTContext(), MR)) os << ", which is not a null-terminated string"; @@ -700,12 +816,15 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const Expr *Size, const Expr *Dest, const Expr *Source, bool Restricted, bool IsMempcpy) const { + CurrentFunctionDescription = "memory copy function"; + // See if the size argument is zero. SVal sizeVal = state->getSVal(Size); QualType sizeTy = Size->getType(); const GRState *stateZeroSize, *stateNonZeroSize; - llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); + llvm::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, sizeVal, sizeTy); // Get the value of the Dest. SVal destVal = state->getSVal(Dest); @@ -737,8 +856,10 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, return; // Ensure the accesses are valid and that the buffers do not overlap. + const char * const writeWarning = + "Memory copy function overflows destination buffer"; state = CheckBufferAccess(C, state, Size, Dest, Source, - /* FirstIsDst = */ true); + writeWarning, /* sourceWarning = */ NULL); if (Restricted) state = CheckOverlap(C, state, Size, Dest, Source); @@ -824,6 +945,8 @@ void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { // int memcmp(const void *s1, const void *s2, size_t n); + CurrentFunctionDescription = "memory comparison function"; + const Expr *Left = CE->getArg(0); const Expr *Right = CE->getArg(1); const Expr *Size = CE->getArg(2); @@ -861,7 +984,7 @@ void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { const GRState *StSameBuf, *StNotSameBuf; llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); - // If the two arguments might be the same buffer, we know the result is zero, + // If the two arguments might be the same buffer, we know the result is 0, // and we only need to check one size. if (StSameBuf) { state = StSameBuf; @@ -902,58 +1025,126 @@ void CStringChecker::evalstrnLength(CheckerContext &C, void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, bool IsStrnlen) const { + CurrentFunctionDescription = "string length function"; const GRState *state = C.getState(); + + if (IsStrnlen) { + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr); + + const GRState *stateZeroSize, *stateNonZeroSize; + llvm::tie(stateZeroSize, stateNonZeroSize) = + assumeZero(C, state, maxlenVal, maxlenExpr->getType()); + + // If the size can be zero, the result will be 0 in that case, and we don't + // have to check the string itself. + if (stateZeroSize) { + SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); + stateZeroSize = stateZeroSize->BindExpr(CE, zero); + C.addTransition(stateZeroSize); + } + + // If the size is GUARANTEED to be zero, we're done! + if (!stateNonZeroSize) + return; + + // Otherwise, record the assumption that the size is nonzero. + state = stateNonZeroSize; + } + + // Check that the string argument is non-null. const Expr *Arg = CE->getArg(0); SVal ArgVal = state->getSVal(Arg); - // Check that the argument is non-null. state = checkNonNull(C, state, Arg, ArgVal); - if (state) { - SVal strLength = getCStringLength(C, state, Arg, ArgVal); + if (!state) + return; - // If the argument isn't a valid C string, there's no valid state to - // transition to. - if (strLength.isUndef()) - return; + SVal strLength = getCStringLength(C, state, Arg, ArgVal); - // If the check is for strnlen() then bind the return value to no more than - // the maxlen value. - if (IsStrnlen) { - const Expr *maxlenExpr = CE->getArg(1); - SVal maxlenVal = state->getSVal(maxlenExpr); - - NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); - NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); + // If the argument isn't a valid C string, there's no valid state to + // transition to. + if (strLength.isUndef()) + return; - QualType cmpTy = C.getSValBuilder().getContext().IntTy; - const GRState *stateTrue, *stateFalse; - - // Check if the strLength is greater than or equal to the maxlen - llvm::tie(stateTrue, stateFalse) = + DefinedOrUnknownSVal result = UnknownVal(); + + // If the check is for strnlen() then bind the return value to no more than + // the maxlen value. + if (IsStrnlen) { + QualType cmpTy = C.getSValBuilder().getConditionType(); + + // It's a little unfortunate to be getting this again, + // but it's not that expensive... + const Expr *maxlenExpr = CE->getArg(1); + SVal maxlenVal = state->getSVal(maxlenExpr); + + NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); + NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); + + if (strLengthNL && maxlenValNL) { + const GRState *stateStringTooLong, *stateStringNotTooLong; + + // Check if the strLength is greater than the maxlen. + llvm::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(cast<DefinedOrUnknownSVal> - (C.getSValBuilder().evalBinOpNN(state, BO_GE, - *strLengthNL, *maxlenValNL, + (C.getSValBuilder().evalBinOpNN(state, BO_GT, + *strLengthNL, + *maxlenValNL, cmpTy))); - // If the strLength is greater than or equal to the maxlen, set strLength - // to maxlen - if (stateTrue && !stateFalse) { - strLength = maxlenVal; + if (stateStringTooLong && !stateStringNotTooLong) { + // If the string is longer than maxlen, return maxlen. + result = *maxlenValNL; + } else if (stateStringNotTooLong && !stateStringTooLong) { + // If the string is shorter than maxlen, return its length. + result = *strLengthNL; } } - // If getCStringLength couldn't figure out the length, conjure a return - // value, so it can be used in constraints, at least. - if (strLength.isUnknown()) { + if (result.isUnknown()) { + // If we don't have enough information for a comparison, there's + // no guarantee the full string length will actually be returned. + // All we know is the return value is the min of the string length + // and the limit. This is better than nothing. unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); - strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); + result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); + NonLoc *resultNL = cast<NonLoc>(&result); + + if (strLengthNL) { + state = state->assume(cast<DefinedOrUnknownSVal> + (C.getSValBuilder().evalBinOpNN(state, BO_LE, + *resultNL, + *strLengthNL, + cmpTy)), true); + } + + if (maxlenValNL) { + state = state->assume(cast<DefinedOrUnknownSVal> + (C.getSValBuilder().evalBinOpNN(state, BO_LE, + *resultNL, + *maxlenValNL, + cmpTy)), true); + } } - // Bind the return value. - state = state->BindExpr(CE, strLength); - C.addTransition(state); + } else { + // This is a plain strlen(), not strnlen(). + result = cast<DefinedOrUnknownSVal>(strLength); + + // If we don't know the length of the string, conjure a return + // value, so it can be used in constraints, at least. + if (result.isUnknown()) { + unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); + result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); + } } + + // Bind the return value. + assert(!result.isUnknown() && "Should have conjured a value by now"); + state = state->BindExpr(CE, result); + C.addTransition(state); } void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { @@ -999,6 +1190,7 @@ void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, bool isBounded, bool isAppending) const { + CurrentFunctionDescription = "string copy function"; const GRState *state = C.getState(); // Check that the destination is non-null. @@ -1023,76 +1215,240 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, if (strLength.isUndef()) return; + SValBuilder &svalBuilder = C.getSValBuilder(); + QualType cmpTy = svalBuilder.getConditionType(); + QualType sizeTy = svalBuilder.getContext().getSizeType(); + + // These two values allow checking two kinds of errors: + // - actual overflows caused by a source that doesn't fit in the destination + // - potential overflows caused by a bound that could exceed the destination + SVal amountCopied = UnknownVal(); + SVal maxLastElementIndex = UnknownVal(); + const char *boundWarning = NULL; + // If the function is strncpy, strncat, etc... it is bounded. if (isBounded) { // Get the max number of characters to copy. const Expr *lenExpr = CE->getArg(2); SVal lenVal = state->getSVal(lenExpr); - // Cast the length to a NonLoc SVal. If it is not a NonLoc then give up. - NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); - if (!strLengthNL) - return; + // Protect against misdeclared strncpy(). + lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); - // Cast the max length to a NonLoc SVal. If it is not a NonLoc then give up. + NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); - if (!lenValNL) - return; - QualType cmpTy = C.getSValBuilder().getContext().IntTy; - const GRState *stateTrue, *stateFalse; - - // Check if the max number to copy is less than the length of the src. - llvm::tie(stateTrue, stateFalse) = - state->assume(cast<DefinedOrUnknownSVal> - (C.getSValBuilder().evalBinOpNN(state, BO_GT, - *strLengthNL, *lenValNL, - cmpTy))); - - if (stateTrue) { - // Max number to copy is less than the length of the src, so the actual - // strLength copied is the max number arg. - strLength = lenVal; - } + // If we know both values, we might be able to figure out how much + // we're copying. + if (strLengthNL && lenValNL) { + const GRState *stateSourceTooLong, *stateSourceNotTooLong; + + // Check if the max number to copy is less than the length of the src. + // If the bound is equal to the source length, strncpy won't null- + // terminate the result! + llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = + state->assume(cast<DefinedOrUnknownSVal> + (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, + *lenValNL, cmpTy))); + + if (stateSourceTooLong && !stateSourceNotTooLong) { + // Max number to copy is less than the length of the src, so the actual + // strLength copied is the max number arg. + state = stateSourceTooLong; + amountCopied = lenVal; + + } else if (!stateSourceTooLong && stateSourceNotTooLong) { + // The source buffer entirely fits in the bound. + state = stateSourceNotTooLong; + amountCopied = strLength; + } + } + + // We still want to know if the bound is known to be too large. + if (lenValNL) { + if (isAppending) { + // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) + + // Get the string length of the destination. If the destination is + // memory that can't have a string length, we shouldn't be copying + // into it anyway. + SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); + if (dstStrLength.isUndef()) + return; + + if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) { + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, + *lenValNL, + *dstStrLengthNL, + sizeTy); + boundWarning = "Size argument is greater than the free space in the " + "destination buffer"; + } + + } else { + // For strncpy, this is just checking that lenVal <= sizeof(dst) + // (Yes, strncpy and strncat differ in how they treat termination. + // strncat ALWAYS terminates, but strncpy doesn't.) + NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); + maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + one, sizeTy); + boundWarning = "Size argument is greater than the length of the " + "destination buffer"; + } + } + + // If we couldn't pin down the copy length, at least bound it. + // FIXME: We should actually run this code path for append as well, but + // right now it creates problems with constraints (since we can end up + // trying to pass constraints from symbol to symbol). + if (amountCopied.isUnknown() && !isAppending) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); + assert(!amountCopied.isUndef()); + + if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { + if (lenValNL) { + // amountCopied <= lenVal + SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *lenValNL, + cmpTy); + state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), + true); + if (!state) + return; + } + + if (strLengthNL) { + // amountCopied <= strlen(source) + SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, + *amountCopiedNL, + *strLengthNL, + cmpTy); + state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), + true); + if (!state) + return; + } + } + } + + } else { + // The function isn't bounded. The amount copied should match the length + // of the source buffer. + amountCopied = strLength; } + assert(state); + + // This represents the number of characters copied into the destination + // buffer. (It may not actually be the strlen if the destination buffer + // is not terminated.) + SVal finalStrLength = UnknownVal(); + // If this is an appending function (strcat, strncat...) then set the // string length to strlen(src) + strlen(dst) since the buffer will // ultimately contain both. if (isAppending) { - // Get the string length of the destination, or give up. + // Get the string length of the destination. If the destination is memory + // that can't have a string length, we shouldn't be copying into it anyway. SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); if (dstStrLength.isUndef()) return; - NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&strLength); + NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); - // If src or dst cast to NonLoc is NULL, give up. - if ((!srcStrLengthNL) || (!dstStrLengthNL)) - return; + // If we know both string lengths, we might know the final string length. + if (srcStrLengthNL && dstStrLengthNL) { + // Make sure the two lengths together don't overflow a size_t. + state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); + if (!state) + return; + + finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, + *dstStrLengthNL, sizeTy); + } - QualType addTy = C.getSValBuilder().getContext().getSizeType(); + // If we couldn't get a single value for the final string length, + // we can at least bound it by the individual lengths. + if (finalStrLength.isUnknown()) { + // Try to get a "hypothetical" string length symbol, which we can later + // set as a real value if that turns out to be the case. + finalStrLength = getCStringLength(C, state, CE, DstVal, true); + assert(!finalStrLength.isUndef()); + + if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { + if (srcStrLengthNL) { + // finalStrLength >= srcStrLength + SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *srcStrLengthNL, + cmpTy); + state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), + true); + if (!state) + return; + } + + if (dstStrLengthNL) { + // finalStrLength >= dstStrLength + SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, + *finalStrLengthNL, + *dstStrLengthNL, + cmpTy); + state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), + true); + if (!state) + return; + } + } + } - strLength = C.getSValBuilder().evalBinOpNN(state, BO_Add, - *srcStrLengthNL, *dstStrLengthNL, - addTy); + } else { + // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and + // the final string length will match the input string length. + finalStrLength = amountCopied; } + // The final result of the function will either be a pointer past the last + // copied element, or a pointer to the start of the destination buffer. SVal Result = (returnEnd ? UnknownVal() : DstVal); + assert(state); + // If the destination is a MemRegion, try to check for a buffer overflow and // record the new string length. if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { - // If the length is known, we can check for an overflow. - if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { - SVal lastElement = - C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, - *knownStrLength, Dst->getType()); + QualType ptrTy = Dst->getType(); + + // If we have an exact value on a bounded copy, use that to check for + // overflows, rather than our estimate about how much is actually copied. + if (boundWarning) { + if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) { + SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *maxLastNL, ptrTy); + state = CheckLocation(C, state, CE->getArg(2), maxLastElement, + boundWarning); + if (!state) + return; + } + } - state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); - if (!state) - return; + // Then, if the final length is known... + if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { + SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, + *knownStrLength, ptrTy); + + // ...and we haven't checked the bound, we'll check the actual copy. + if (!boundWarning) { + const char * const warningMsg = + "String copy function overflows destination buffer"; + state = CheckLocation(C, state, Dst, lastElement, warningMsg); + if (!state) + return; + } // If this is a stpcpy-style copy, the last element is the return value. if (returnEnd) @@ -1107,16 +1463,25 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, // string, but that's still an improvement over blank invalidation. state = InvalidateBuffer(C, state, Dst, *dstRegVal); - // Set the C string length of the destination. - state = setCStringLength(state, dstRegVal->getRegion(), strLength); + // Set the C string length of the destination, if we know it. + if (isBounded && !isAppending) { + // strncpy is annoying in that it doesn't guarantee to null-terminate + // the result string. If the original string didn't fit entirely inside + // the bound (including the null-terminator), we don't know how long the + // result is. + if (amountCopied != strLength) + finalStrLength = UnknownVal(); + } + state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); } + assert(state); + // If this is a stpcpy-style copy, but we were unable to check for a buffer // overflow, we still need a result. Conjure a return value. if (returnEnd && Result.isUnknown()) { - SValBuilder &svalBuilder = C.getSValBuilder(); unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); - strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); + Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); } // Set the return value. @@ -1125,29 +1490,30 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, } void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { - //int strcmp(const char *restrict s1, const char *restrict s2); + //int strcmp(const char *s1, const char *s2); evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); } void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { - //int strncmp(const char *restrict s1, const char *restrict s2, size_t n); + //int strncmp(const char *s1, const char *s2, size_t n); evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); } void CStringChecker::evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const { - //int strcasecmp(const char *restrict s1, const char *restrict s2); + //int strcasecmp(const char *s1, const char *s2); evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); } void CStringChecker::evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const { - //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n); + //int strncasecmp(const char *s1, const char *s2, size_t n); evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); } void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, bool isBounded, bool ignoreCase) const { + CurrentFunctionDescription = "string comparison function"; const GRState *state = C.getState(); // Check that the first string is non-null @@ -1174,52 +1540,96 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, if (s2Length.isUndef()) return; - // Get the string literal of the first string. - const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); - if (!s1StrLiteral) - return; - llvm::StringRef s1StrRef = s1StrLiteral->getString(); + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val); + DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val); + + // See if they are the same. + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + const GRState *StSameBuf, *StNotSameBuf; + llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(StSameBuf); + + // If the two arguments are GUARANTEED to be the same, we're done! + if (!StNotSameBuf) + return; + } + + assert(StNotSameBuf); + state = StNotSameBuf; + + // At this point we can go about comparing the two buffers. + // For now, we only do this if they're both known string literals. - // Get the string literal of the second string. + // Attempt to extract string literals from both expressions. + const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); - if (!s2StrLiteral) - return; - llvm::StringRef s2StrRef = s2StrLiteral->getString(); + bool canComputeResult = false; + + if (s1StrLiteral && s2StrLiteral) { + llvm::StringRef s1StrRef = s1StrLiteral->getString(); + llvm::StringRef s2StrRef = s2StrLiteral->getString(); + + if (isBounded) { + // Get the max number of characters to compare. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr); + + // If the length is known, we can get the right substrings. + if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { + // Create substrings of each to compare the prefix. + s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); + s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); + canComputeResult = true; + } + } else { + // This is a normal, unbounded strcmp. + canComputeResult = true; + } - int result; - if (isBounded) { - // Get the max number of characters to compare. - const Expr *lenExpr = CE->getArg(2); - SVal lenVal = state->getSVal(lenExpr); + if (canComputeResult) { + // Real strcmp stops at null characters. + size_t s1Term = s1StrRef.find('\0'); + if (s1Term != llvm::StringRef::npos) + s1StrRef = s1StrRef.substr(0, s1Term); - // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt - // then give up, otherwise get the value and use it as the bounds. - nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal); - if (!CI) - return; - llvm::APSInt lenInt(CI->getValue()); + size_t s2Term = s2StrRef.find('\0'); + if (s2Term != llvm::StringRef::npos) + s2StrRef = s2StrRef.substr(0, s2Term); + + // Use StringRef's comparison methods to compute the actual result. + int result; - // Create substrings of each to compare the prefix. - s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue()); - s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue()); + if (ignoreCase) { + // Compare string 1 to string 2 the same way strcasecmp() does. + result = s1StrRef.compare_lower(s2StrRef); + } else { + // Compare string 1 to string 2 the same way strcmp() does. + result = s1StrRef.compare(s2StrRef); + } + + // Build the SVal of the comparison and bind the return value. + SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); + state = state->BindExpr(CE, resultVal); + } } - if (ignoreCase) { - // Compare string 1 to string 2 the same way strcasecmp() does. - result = s1StrRef.compare_lower(s2StrRef); - } else { - // Compare string 1 to string 2 the same way strcmp() does. - result = s1StrRef.compare(s2StrRef); + if (!canComputeResult) { + // Conjure a symbolic value. It's the best we can do. + unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); + SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); + state = state->BindExpr(CE, resultVal); } - - // Build the SVal of the comparison to bind the return value. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType intTy = svalBuilder.getContext().IntTy; - SVal resultVal = svalBuilder.makeIntVal(result, intTy); - // Bind the return value of the expression. - // Set the return value. - state = state->BindExpr(CE, resultVal); + // Record this as a possible path. C.addTransition(state); } @@ -1251,7 +1661,7 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) - //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) + .Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat) .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat) @@ -1268,6 +1678,10 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { if (!evalFunction) return false; + // Make sure each function sets its own description. + // (But don't bother in a release build.) + assert(!(CurrentFunctionDescription = NULL)); + // Check and evaluate the call. (this->*evalFunction)(C, CE); return true; @@ -1373,8 +1787,10 @@ void CStringChecker::checkLiveSymbols(const GRState *state, for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); I != E; ++I) { SVal Len = I.getData(); - if (SymbolRef Sym = Len.getAsSymbol()) - SR.markInUse(Sym); + + for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end(); + si != se; ++si) + SR.markInUse(*si); } } |