diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | cd749a9c07f1de2fb8affde90537efa4bc3e7c54 (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Support | |
parent | 72621d11de5b873f1695f391eb95f0b336c3d2d4 (diff) | |
download | FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.zip FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.tar.gz |
Update llvm to r84119.
Diffstat (limited to 'lib/Support')
41 files changed, 6325 insertions, 1211 deletions
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 3b03c54..e431d27 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cstring> @@ -122,27 +124,30 @@ assertArithmeticOK(const llvm::fltSemantics &semantics) { If the exponent overflows, returns a large exponent with the appropriate sign. */ static int -readExponent(const char *p) +readExponent(StringRef::iterator begin, StringRef::iterator end) { bool isNegative; unsigned int absExponent; const unsigned int overlargeExponent = 24000; /* FIXME. */ + StringRef::iterator p = begin; + + assert(p != end && "Exponent has no digits"); isNegative = (*p == '-'); - if (*p == '-' || *p == '+') + if (*p == '-' || *p == '+') { p++; + assert(p != end && "Exponent has no digits"); + } absExponent = decDigitValue(*p++); - assert (absExponent < 10U); + assert(absExponent < 10U && "Invalid character in exponent"); - for (;;) { + for (; p != end; ++p) { unsigned int value; value = decDigitValue(*p); - if (value >= 10U) - break; + assert(value < 10U && "Invalid character in exponent"); - p++; value += absExponent * 10; if (absExponent >= overlargeExponent) { absExponent = overlargeExponent; @@ -151,6 +156,8 @@ readExponent(const char *p) absExponent = value; } + assert(p == end && "Invalid exponent in exponent"); + if (isNegative) return -(int) absExponent; else @@ -160,28 +167,29 @@ readExponent(const char *p) /* This is ugly and needs cleaning up, but I don't immediately see how whilst remaining safe. */ static int -totalExponent(const char *p, int exponentAdjustment) +totalExponent(StringRef::iterator p, StringRef::iterator end, + int exponentAdjustment) { int unsignedExponent; bool negative, overflow; int exponent; - /* Move past the exponent letter and sign to the digits. */ - p++; + assert(p != end && "Exponent has no digits"); + negative = *p == '-'; - if(*p == '-' || *p == '+') + if(*p == '-' || *p == '+') { p++; + assert(p != end && "Exponent has no digits"); + } unsignedExponent = 0; overflow = false; - for(;;) { + for(; p != end; ++p) { unsigned int value; value = decDigitValue(*p); - if(value >= 10U) - break; + assert(value < 10U && "Invalid character in exponent"); - p++; unsignedExponent = unsignedExponent * 10 + value; if(unsignedExponent > 65535) overflow = true; @@ -205,16 +213,21 @@ totalExponent(const char *p, int exponentAdjustment) return exponent; } -static const char * -skipLeadingZeroesAndAnyDot(const char *p, const char **dot) +static StringRef::iterator +skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, + StringRef::iterator *dot) { - *dot = 0; - while(*p == '0') + StringRef::iterator p = begin; + *dot = end; + while(*p == '0' && p != end) p++; if(*p == '.') { *dot = p++; - while(*p == '0') + + assert(end - begin != 1 && "Significand has no digits"); + + while(*p == '0' && p != end) p++; } @@ -242,41 +255,50 @@ struct decimalInfo { }; static void -interpretDecimal(const char *p, decimalInfo *D) +interpretDecimal(StringRef::iterator begin, StringRef::iterator end, + decimalInfo *D) { - const char *dot; - - p = skipLeadingZeroesAndAnyDot (p, &dot); + StringRef::iterator dot = end; + StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); D->firstSigDigit = p; D->exponent = 0; D->normalizedExponent = 0; - for (;;) { + for (; p != end; ++p) { if (*p == '.') { - assert(dot == 0); + assert(dot == end && "String contains multiple dots"); dot = p++; + if (p == end) + break; } if (decDigitValue(*p) >= 10U) break; - p++; } - /* If number is all zerooes accept any exponent. */ - if (p != D->firstSigDigit) { - if (*p == 'e' || *p == 'E') - D->exponent = readExponent(p + 1); + if (p != end) { + assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); + + /* p points to the first non-digit in the string */ + D->exponent = readExponent(p + 1, end); /* Implied decimal point? */ - if (!dot) + if (dot == end) dot = p; + } + /* If number is all zeroes accept any exponent. */ + if (p != D->firstSigDigit) { /* Drop insignificant trailing zeroes. */ - do + if (p != begin) { do - p--; - while (*p == '0'); - while (*p == '.'); + do + p--; + while (p != begin && *p == '0'); + while (p != begin && *p == '.'); + } /* Adjust the exponents for any decimal point. */ D->exponent += static_cast<exponent_t>((dot - p) - (dot > p)); @@ -292,7 +314,8 @@ interpretDecimal(const char *p, decimalInfo *D) DIGITVALUE is the first hex digit of the fraction, P points to the next digit. */ static lostFraction -trailingHexadecimalFraction(const char *p, unsigned int digitValue) +trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, + unsigned int digitValue) { unsigned int hexDigit; @@ -307,6 +330,8 @@ trailingHexadecimalFraction(const char *p, unsigned int digitValue) while(*p == '0') p++; + assert(p != end && "Invalid trailing hexadecimal fraction!"); + hexDigit = hexDigitValue(*p); /* If we ran off the end it is exactly zero or one-half, otherwise @@ -667,6 +692,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } +APFloat::APFloat(const fltSemantics &ourSemantics) { + assertArithmeticOK(ourSemantics); + initialize(&ourSemantics); + category = fcZero; + sign = false; +} + + APFloat::APFloat(const fltSemantics &ourSemantics, fltCategory ourCategory, bool negative, unsigned type) { @@ -680,7 +713,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, makeNaN(type); } -APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) +APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); @@ -1068,7 +1101,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, switch (rounding_mode) { default: - assert(0); + llvm_unreachable(0); case rmNearestTiesToAway: return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; @@ -1207,7 +1240,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1331,7 +1364,7 @@ APFloat::multiplySpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1373,7 +1406,7 @@ APFloat::divideSpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1415,7 +1448,7 @@ APFloat::modSpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1692,7 +1725,7 @@ APFloat::compare(const APFloat &rhs) const switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -2106,13 +2139,13 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, } APFloat::opStatus -APFloat::convertFromHexadecimalString(const char *p, +APFloat::convertFromHexadecimalString(const StringRef &s, roundingMode rounding_mode) { - lostFraction lost_fraction; + lostFraction lost_fraction = lfExactlyZero; integerPart *significand; unsigned int bitPos, partsCount; - const char *dot, *firstSignificantDigit; + StringRef::iterator dot, firstSignificantDigit; zeroSignificand(); exponent = 0; @@ -2123,47 +2156,58 @@ APFloat::convertFromHexadecimalString(const char *p, bitPos = partsCount * integerPartWidth; /* Skip leading zeroes and any (hexa)decimal point. */ - p = skipLeadingZeroesAndAnyDot(p, &dot); + StringRef::iterator begin = s.begin(); + StringRef::iterator end = s.end(); + StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); firstSignificantDigit = p; - for(;;) { + for(; p != end;) { integerPart hex_value; if(*p == '.') { - assert(dot == 0); + assert(dot == end && "String contains multiple dots"); dot = p++; + if (p == end) { + break; + } } hex_value = hexDigitValue(*p); if(hex_value == -1U) { - lost_fraction = lfExactlyZero; break; } p++; - /* Store the number whilst 4-bit nibbles remain. */ - if(bitPos) { - bitPos -= 4; - hex_value <<= bitPos % integerPartWidth; - significand[bitPos / integerPartWidth] |= hex_value; - } else { - lost_fraction = trailingHexadecimalFraction(p, hex_value); - while(hexDigitValue(*p) != -1U) - p++; + if (p == end) { break; + } else { + /* Store the number whilst 4-bit nibbles remain. */ + if(bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else { + lost_fraction = trailingHexadecimalFraction(p, end, hex_value); + while(p != end && hexDigitValue(*p) != -1U) + p++; + break; + } } } /* Hex floats require an exponent but not a hexadecimal point. */ - assert(*p == 'p' || *p == 'P'); + assert(p != end && "Hex strings require an exponent"); + assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); /* Ignore the exponent if we are zero. */ if(p != firstSignificantDigit) { int expAdjustment; /* Implicit hexadecimal point? */ - if(!dot) + if (dot == end) dot = p; /* Calculate the exponent adjustment implicit in the number of @@ -2179,7 +2223,7 @@ APFloat::convertFromHexadecimalString(const char *p, expAdjustment -= partsCount * integerPartWidth; /* Adjust for the given exponent. */ - exponent = totalExponent(p, expAdjustment); + exponent = totalExponent(p + 1, end, expAdjustment); } return normalize(rounding_mode, lost_fraction); @@ -2271,13 +2315,14 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, } APFloat::opStatus -APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) +APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode) { decimalInfo D; opStatus fs; /* Scan the text. */ - interpretDecimal(p, &D); + StringRef::iterator p = str.begin(); + interpretDecimal(p, str.end(), &D); /* Handle the quick cases. First the case of no significant digits, i.e. zero, and then exponents that are obviously too large or too @@ -2332,10 +2377,14 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) multiplier = 1; do { - if (*p == '.') + if (*p == '.') { p++; - + if (p == str.end()) { + break; + } + } decValue = decDigitValue(*p++); + assert(decValue < 10U && "Invalid character in significand"); multiplier *= 10; val = val * 10 + decValue; /* The maximum number that can be multiplied by ten with any @@ -2363,20 +2412,28 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) } APFloat::opStatus -APFloat::convertFromString(const char *p, roundingMode rounding_mode) +APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode) { assertArithmeticOK(*semantics); + assert(!str.empty() && "Invalid string length"); /* Handle a leading minus sign. */ - if(*p == '-') - sign = 1, p++; - else - sign = 0; + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + sign = *p == '-' ? 1 : 0; + if(*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String has no digits"); + } - if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) - return convertFromHexadecimalString(p + 2, rounding_mode); + if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + assert(slen - 2 && "Invalid string"); + return convertFromHexadecimalString(StringRef(p + 2, slen - 2), + rounding_mode); + } - return convertFromDecimalString(p, rounding_mode); + return convertFromDecimalString(StringRef(p, slen), rounding_mode); } /* Write out a hexadecimal representation of the floating point value @@ -2661,6 +2718,42 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const } APInt +APFloat::convertQuadrupleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEquad); + assert (partCount()==2); + + uint64_t myexponent, mysignificand, mysignificand2; + + if (category==fcNormal) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = mysignificand2 = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = mysignificand2 = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + } + + uint64_t words[2]; + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7fff) << 48) | + (mysignificand2 & 0xffffffffffffLL); + + return APInt(128, 2, words); +} + +APInt APFloat::convertDoubleAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); @@ -2728,10 +2821,13 @@ APFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics*)&IEEEsingle) return convertFloatAPFloatToAPInt(); - + if (semantics == (const llvm::fltSemantics*)&IEEEdouble) return convertDoubleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&IEEEquad) + return convertQuadrupleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) return convertPPCDoubleDoubleAPFloatToAPInt(); @@ -2743,7 +2839,8 @@ APFloat::bitcastToAPInt() const float APFloat::convertToFloat() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && + "Float semantics are not IEEEsingle"); APInt api = bitcastToAPInt(); return api.bitsToFloat(); } @@ -2751,7 +2848,8 @@ APFloat::convertToFloat() const double APFloat::convertToDouble() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && + "Float semantics are not IEEEdouble"); APInt api = bitcastToAPInt(); return api.bitsToDouble(); } @@ -2848,6 +2946,46 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) } void +APFloat::initFromQuadrupleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 >> 48) & 0x7fff; + uint64_t mysignificand = i1; + uint64_t mysignificand2 = i2 & 0xffffffffffffLL; + + initialize(&APFloat::IEEEquad); + assert(partCount()==2); + + sign = static_cast<unsigned int>(i2>>63); + if (myexponent==0 && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7fff && + (mysignificand!=0 || mysignificand2 !=0)) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + if (myexponent==0) // denormal + exponent = -16382; + else + significandParts()[1] |= 0x1000000000000LL; // integer bit + } +} + +void APFloat::initFromDoubleAPInt(const APInt &api) { assert(api.getBitWidth()==64); @@ -2926,10 +3064,11 @@ APFloat::initFromAPInt(const APInt& api, bool isIEEE) return initFromDoubleAPInt(api); else if (api.getBitWidth()==80) return initFromF80LongDoubleAPInt(api); - else if (api.getBitWidth()==128 && !isIEEE) - return initFromPPCDoubleDoubleAPInt(api); + else if (api.getBitWidth()==128) + return (isIEEE ? + initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api)); else - assert(0); + llvm_unreachable(0); } APFloat::APFloat(const APInt& api, bool isIEEE) diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 30dc352..56d4773 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -14,9 +14,11 @@ #define DEBUG_TYPE "apint" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <cmath> @@ -34,7 +36,7 @@ inline static uint64_t* getClearedMemory(unsigned numWords) { return result; } -/// A utility function for allocating memory and checking for allocation +/// A utility function for allocating memory and checking for allocation /// failure. The content is not zeroed. inline static uint64_t* getMemory(unsigned numWords) { uint64_t * result = new uint64_t[numWords]; @@ -42,10 +44,36 @@ inline static uint64_t* getMemory(unsigned numWords) { return result; } +/// A utility function that converts a character to a digit. +inline static unsigned getDigit(char cdigit, uint8_t radix) { + unsigned r; + + if (radix == 16) { + r = cdigit - '0'; + if (r <= 9) + return r; + + r = cdigit - 'A'; + if (r <= 5) + return r + 10; + + r = cdigit - 'a'; + if (r <= 5) + return r + 10; + } + + r = cdigit - '0'; + if (r < radix) + return r; + + return -1U; +} + + void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { pVal = getClearedMemory(getNumWords()); pVal[0] = val; - if (isSigned && int64_t(val) < 0) + if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) pVal[i] = -1ULL; } @@ -58,7 +86,7 @@ void APInt::initSlowCase(const APInt& that) { APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) : BitWidth(numBits), VAL(0) { - assert(BitWidth && "bitwidth too small"); + assert(BitWidth && "Bitwidth too small"); assert(bigVal && "Null pointer detected!"); if (isSingleWord()) VAL = bigVal[0]; @@ -74,11 +102,10 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) clearUnusedBits(); } -APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen, - uint8_t radix) +APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix) : BitWidth(numbits), VAL(0) { - assert(BitWidth && "bitwidth too small"); - fromString(numbits, StrStart, slen, radix); + assert(BitWidth && "Bitwidth too small"); + fromString(numbits, Str, radix); } APInt& APInt::AssignSlowCase(const APInt& RHS) { @@ -99,7 +126,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) { VAL = 0; pVal = getMemory(RHS.getNumWords()); memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); - } else if (getNumWords() == RHS.getNumWords()) + } else if (getNumWords() == RHS.getNumWords()) memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); else if (RHS.isSingleWord()) { delete [] pVal; @@ -114,7 +141,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) { } APInt& APInt::operator=(uint64_t RHS) { - if (isSingleWord()) + if (isSingleWord()) VAL = RHS; else { pVal[0] = RHS; @@ -126,7 +153,7 @@ APInt& APInt::operator=(uint64_t RHS) { /// Profile - This method 'profiles' an APInt for use with FoldingSet. void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); - + if (isSingleWord()) { ID.AddInteger(VAL); return; @@ -137,7 +164,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(pVal[i]); } -/// add_1 - This function adds a single "digit" integer, y, to the multiple +/// add_1 - This function adds a single "digit" integer, y, to the multiple /// "digit" integer array, x[]. x[] is modified to reflect the addition and /// 1 is returned if there is a carry out, otherwise 0 is returned. /// @returns the carry of the addition. @@ -156,15 +183,15 @@ static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { /// @brief Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { - if (isSingleWord()) + if (isSingleWord()) ++VAL; else add_1(pVal, pVal, getNumWords(), 1); return clearUnusedBits(); } -/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from -/// the multi-digit integer array, x[], propagating the borrowed 1 value until +/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from +/// the multi-digit integer array, x[], propagating the borrowed 1 value until /// no further borrowing is neeeded or it runs out of "digits" in x. The result /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. /// In other words, if y > x then this function returns 1, otherwise 0. @@ -173,7 +200,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { for (unsigned i = 0; i < len; ++i) { uint64_t X = x[i]; x[i] -= y; - if (y > X) + if (y > X) y = 1; // We have to "borrow 1" from next "digit" else { y = 0; // No need to borrow @@ -185,7 +212,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { /// @brief Prefix decrement operator. Decrements the APInt by one. APInt& APInt::operator--() { - if (isSingleWord()) + if (isSingleWord()) --VAL; else sub_1(pVal, getNumWords(), 1); @@ -193,10 +220,10 @@ APInt& APInt::operator--() { } /// add - This function adds the integer array x to the integer array Y and -/// places the result in dest. +/// places the result in dest. /// @returns the carry out from the addition /// @brief General addition of 64-bit integer arrays -static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, +static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len) { bool carry = false; for (unsigned i = 0; i< len; ++i) { @@ -209,10 +236,10 @@ static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, /// Adds the RHS APint to this APInt. /// @returns this, after addition of RHS. -/// @brief Addition assignment operator. +/// @brief Addition assignment operator. APInt& APInt::operator+=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) + if (isSingleWord()) VAL += RHS.VAL; else { add(pVal, pVal, RHS.pVal, getNumWords()); @@ -220,10 +247,10 @@ APInt& APInt::operator+=(const APInt& RHS) { return clearUnusedBits(); } -/// Subtracts the integer array y from the integer array x +/// Subtracts the integer array y from the integer array x /// @returns returns the borrow out. /// @brief Generalized subtraction of 64-bit integer arrays. -static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, +static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len) { bool borrow = false; for (unsigned i = 0; i < len; ++i) { @@ -236,10 +263,10 @@ static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, /// Subtracts the RHS APInt from this APInt /// @returns this, after subtraction -/// @brief Subtraction assignment operator. +/// @brief Subtraction assignment operator. APInt& APInt::operator-=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) + if (isSingleWord()) VAL -= RHS.VAL; else sub(pVal, pVal, RHS.pVal, getNumWords()); @@ -247,7 +274,7 @@ APInt& APInt::operator-=(const APInt& RHS) { } /// Multiplies an integer array, x by a a uint64_t integer and places the result -/// into dest. +/// into dest. /// @returns the carry out of the multiplication. /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { @@ -269,19 +296,19 @@ static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { // Determine if the add above introduces carry. hasCarry = (dest[i] < carry) ? 1 : 0; carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); - // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + + // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + // (2^32 - 1) + 2^32 = 2^64. hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); carry += (lx * hy) & 0xffffffffULL; dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); - carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + + carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + (carry >> 32) + ((lx * hy) >> 32) + hx * hy; } return carry; } -/// Multiplies integer array x by integer array y and stores the result into +/// Multiplies integer array x by integer array y and stores the result into /// the integer array dest. Note that dest's size must be >= xlen + ylen. /// @brief Generalized multiplicate of integer arrays. static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], @@ -307,7 +334,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], resul = (carry << 32) | (resul & 0xffffffffULL); dest[i+j] += resul; carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ - (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + + (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + ((lx * hy) >> 32) + hx * hy; } dest[i+xlen] = carry; @@ -325,7 +352,7 @@ APInt& APInt::operator*=(const APInt& RHS) { // Get some bit facts about LHS and check for zero unsigned lhsBits = getActiveBits(); unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; - if (!lhsWords) + if (!lhsWords) // 0 * X ===> 0 return *this; @@ -385,7 +412,7 @@ APInt& APInt::operator^=(const APInt& RHS) { VAL ^= RHS.VAL; this->clearUnusedBits(); return *this; - } + } unsigned numWords = getNumWords(); for (unsigned i = 0; i < numWords; ++i) pVal[i] ^= RHS.pVal[i]; @@ -423,7 +450,7 @@ bool APInt::operator !() const { return !VAL; for (unsigned i = 0; i < getNumWords(); ++i) - if (pVal[i]) + if (pVal[i]) return false; return true; } @@ -456,7 +483,7 @@ APInt APInt::operator-(const APInt& RHS) const { } bool APInt::operator[](unsigned bitPosition) const { - return (maskBit(bitPosition) & + return (maskBit(bitPosition) & (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0; } @@ -466,7 +493,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { unsigned n2 = RHS.getActiveBits(); // If the number of bits isn't the same, they aren't equal - if (n1 != n2) + if (n1 != n2) return false; // If the number of bits fits in a word, we only need to compare the low word. @@ -475,7 +502,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { // Otherwise, compare everything for (int i = whichWord(n1 - 1); i >= 0; --i) - if (pVal[i] != RHS.pVal[i]) + if (pVal[i] != RHS.pVal[i]) return false; return true; } @@ -512,9 +539,9 @@ bool APInt::ult(const APInt& RHS) const { // Otherwise, compare all words unsigned topWord = whichWord(std::max(n1,n2)-1); for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) + if (pVal[i] > RHS.pVal[i]) return false; - if (pVal[i] < RHS.pVal[i]) + if (pVal[i] < RHS.pVal[i]) return true; } return false; @@ -552,14 +579,14 @@ bool APInt::slt(const APInt& RHS) const { return true; else if (rhsNeg) return false; - else + else return lhs.ult(rhs); } APInt& APInt::set(unsigned bitPosition) { - if (isSingleWord()) + if (isSingleWord()) VAL |= maskBit(bitPosition); - else + else pVal[whichWord(bitPosition)] |= maskBit(bitPosition); return *this; } @@ -567,16 +594,16 @@ APInt& APInt::set(unsigned bitPosition) { /// Set the given bit to 0 whose position is given as "bitPosition". /// @brief Set a given bit to 0. APInt& APInt::clear(unsigned bitPosition) { - if (isSingleWord()) + if (isSingleWord()) VAL &= ~maskBit(bitPosition); - else + else pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); return *this; } /// @brief Toggle every bit to its opposite value. -/// Toggle a given bit to its opposite value whose position is given +/// Toggle a given bit to its opposite value whose position is given /// as "bitPosition". /// @brief Toggles a given bit to its opposite value. APInt& APInt::flip(unsigned bitPosition) { @@ -586,16 +613,22 @@ APInt& APInt::flip(unsigned bitPosition) { return *this; } -unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) { - assert(str != 0 && "Invalid value string"); - assert(slen > 0 && "Invalid string length"); +unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) { + assert(!str.empty() && "Invalid string length"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && + "Radix should be 2, 8, 10, or 16!"); + + size_t slen = str.size(); - // Each computation below needs to know if its negative - unsigned isNegative = str[0] == '-'; - if (isNegative) { + // Each computation below needs to know if it's negative. + StringRef::iterator p = str.begin(); + unsigned isNegative = *p == '-'; + if (*p == '-' || *p == '+') { + p++; slen--; - str++; + assert(slen && "String is only a sign, needs a value."); } + // For radixes of power-of-two values, the bits required is accurately and // easily computed if (radix == 2) @@ -605,22 +638,27 @@ unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) { if (radix == 16) return slen * 4 + isNegative; - // Otherwise it must be radix == 10, the hard case - assert(radix == 10 && "Invalid radix"); - // This is grossly inefficient but accurate. We could probably do something // with a computation of roughly slen*64/20 and then adjust by the value of // the first few digits. But, I'm not sure how accurate that could be. // Compute a sufficient number of bits that is always large enough but might - // be too large. This avoids the assertion in the constructor. - unsigned sufficient = slen*64/18; + // be too large. This avoids the assertion in the constructor. This + // calculation doesn't work appropriately for the numbers 0-9, so just use 4 + // bits in that case. + unsigned sufficient = slen == 1 ? 4 : slen * 64/18; // Convert to the actual binary value. - APInt tmp(sufficient, str, slen, radix); + APInt tmp(sufficient, StringRef(p, slen), radix); - // Compute how many bits are required. - return isNegative + tmp.logBase2() + 1; + // Compute how many bits are required. If the log is infinite, assume we need + // just bit. + unsigned log = tmp.logBase2(); + if (log == (unsigned)-1) { + return isNegative + 1; + } else { + return isNegative + log + 1; + } } // From http://www.burtleburtle.net, byBob Jenkins. @@ -720,7 +758,7 @@ APInt APInt::getHiBits(unsigned numBits) const { /// LoBits - This function returns the low "numBits" bits of this APInt. APInt APInt::getLoBits(unsigned numBits) const { - return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), + return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), BitWidth - numBits); } @@ -837,7 +875,7 @@ APInt APInt::byteSwap() const { } } -APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, +APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, const APInt& API2) { APInt A = API1, B = API2; while (!!B) { @@ -870,7 +908,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { // If the exponent doesn't shift all bits out of the mantissa if (exp < 52) - return isNeg ? -APInt(width, mantissa >> (52 - exp)) : + return isNeg ? -APInt(width, mantissa >> (52 - exp)) : APInt(width, mantissa >> (52 - exp)); // If the client didn't provide enough bits for us to shift the mantissa into @@ -884,22 +922,23 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { return isNeg ? -Tmp : Tmp; } -/// RoundToDouble - This function convert this APInt to a double. +/// RoundToDouble - This function converts this APInt to a double. /// The layout for double is as following (IEEE Standard 754): /// -------------------------------------- /// | Sign Exponent Fraction Bias | /// |-------------------------------------- | /// | 1[63] 11[62-52] 52[51-00] 1023 | -/// -------------------------------------- +/// -------------------------------------- double APInt::roundToDouble(bool isSigned) const { // Handle the simple case where the value is contained in one uint64_t. + // It is wrong to optimize getWord(0) to VAL; there might be more than one word. if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { if (isSigned) { - int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth); + int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth); return double(sext); } else - return double(VAL); + return double(getWord(0)); } // Determine if the value is negative. @@ -920,7 +959,7 @@ double APInt::roundToDouble(bool isSigned) const { if (exp > 1023) { if (!isSigned || !isNeg) return std::numeric_limits<double>::infinity(); - else + else return -std::numeric_limits<double>::infinity(); } exp += 1023; // Increment for 1023 bias @@ -1030,7 +1069,7 @@ APInt &APInt::zext(unsigned width) { uint64_t *newVal = getClearedMemory(wordsAfter); if (wordsBefore == 1) newVal[0] = VAL; - else + else for (unsigned i = 0; i < wordsBefore; ++i) newVal[i] = pVal[i]; if (wordsBefore != 1) @@ -1076,7 +1115,7 @@ APInt APInt::ashr(unsigned shiftAmt) const { return APInt(BitWidth, 0); // undefined else { unsigned SignBit = APINT_BITS_PER_WORD - BitWidth; - return APInt(BitWidth, + return APInt(BitWidth, (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt)); } } @@ -1113,11 +1152,11 @@ APInt APInt::ashr(unsigned shiftAmt) const { if (bitsInWord < APINT_BITS_PER_WORD) val[breakWord] |= ~0ULL << bitsInWord; // set high bits } else { - // Shift the low order words + // Shift the low order words for (unsigned i = 0; i < breakWord; ++i) { // This combines the shifted corresponding word with the low bits from // the next word (shifted into this word's high bits). - val[i] = (pVal[i+offset] >> wordShift) | + val[i] = (pVal[i+offset] >> wordShift) | (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); } @@ -1130,10 +1169,10 @@ APInt APInt::ashr(unsigned shiftAmt) const { if (isNegative()) { if (wordShift > bitsInWord) { if (breakWord > 0) - val[breakWord-1] |= + val[breakWord-1] |= ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); val[breakWord] |= ~0ULL; - } else + } else val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); } } @@ -1157,7 +1196,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { if (isSingleWord()) { if (shiftAmt == BitWidth) return APInt(BitWidth, 0); - else + else return APInt(BitWidth, this->VAL >> shiftAmt); } @@ -1168,7 +1207,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { return APInt(BitWidth, 0); // If none of the bits are shifted out, the result is *this. This avoids - // issues with shifting by the size of the integer type, which produces + // issues with shifting by the size of the integer type, which produces // undefined results in the code below. This is also an optimization. if (shiftAmt == 0) return *this; @@ -1199,7 +1238,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { return APInt(val,BitWidth).clearUnusedBits(); } - // Shift the low order words + // Shift the low order words unsigned breakWord = getNumWords() - offset -1; for (unsigned i = 0; i < breakWord; ++i) val[i] = (pVal[i+offset] >> wordShift) | @@ -1306,7 +1345,7 @@ APInt APInt::rotr(unsigned rotateAmt) const { // values using less than 52 bits, the value is converted to double and then // the libc sqrt function is called. The result is rounded and then converted // back to a uint64_t which is then used to construct the result. Finally, -// the Babylonian method for computing square roots is used. +// the Babylonian method for computing square roots is used. APInt APInt::sqrt() const { // Determine the magnitude of the value. @@ -1318,7 +1357,7 @@ APInt APInt::sqrt() const { static const uint8_t results[32] = { /* 0 */ 0, /* 1- 2 */ 1, 1, - /* 3- 6 */ 2, 2, 2, 2, + /* 3- 6 */ 2, 2, 2, 2, /* 7-12 */ 3, 3, 3, 3, 3, 3, /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -1334,10 +1373,10 @@ APInt APInt::sqrt() const { if (magnitude < 52) { #ifdef _MSC_VER // Amazingly, VC++ doesn't have round(). - return APInt(BitWidth, + return APInt(BitWidth, uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); #else - return APInt(BitWidth, + return APInt(BitWidth, uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); #endif } @@ -1346,7 +1385,7 @@ APInt APInt::sqrt() const { // is a classical Babylonian method for computing the square root. This code // was adapted to APINt from a wikipedia article on such computations. // See http://www.wikipedia.org/ and go to the page named - // Calculate_an_integer_square_root. + // Calculate_an_integer_square_root. unsigned nbits = BitWidth, i = 4; APInt testy(BitWidth, 16); APInt x_old(BitWidth, 1); @@ -1354,13 +1393,13 @@ APInt APInt::sqrt() const { APInt two(BitWidth, 2); // Select a good starting value using binary logarithms. - for (;; i += 2, testy = testy.shl(2)) + for (;; i += 2, testy = testy.shl(2)) if (i >= nbits || this->ule(testy)) { x_old = x_old.shl(i / 2); break; } - // Use the Babylonian method to arrive at the integer square root: + // Use the Babylonian method to arrive at the integer square root: for (;;) { x_new = (this->udiv(x_old) + x_old).udiv(two); if (x_old.ule(x_new)) @@ -1369,9 +1408,9 @@ APInt APInt::sqrt() const { } // Make sure we return the closest approximation - // NOTE: The rounding calculation below is correct. It will produce an + // NOTE: The rounding calculation below is correct. It will produce an // off-by-one discrepancy with results from pari/gp. That discrepancy has been - // determined to be a rounding issue with pari/gp as it begins to use a + // determined to be a rounding issue with pari/gp as it begins to use a // floating point representation after 192 bits. There are no discrepancies // between this algorithm and pari/gp for bit widths < 192 bits. APInt square(x_old * x_old); @@ -1386,7 +1425,7 @@ APInt APInt::sqrt() const { else return x_old + 1; } else - assert(0 && "Error in APInt::sqrt computation"); + llvm_unreachable("Error in APInt::sqrt computation"); return x_old + 1; } @@ -1409,7 +1448,7 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const { APInt r[2] = { modulo, *this }; APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; APInt q(BitWidth, 0); - + unsigned i; for (i = 0; r[i^1] != 0; i ^= 1) { // An overview of the math without the confusing bit-flipping: @@ -1442,11 +1481,9 @@ APInt::ms APInt::magic() const { const APInt& d = *this; unsigned p; APInt ad, anc, delta, q1, r1, q2, r2, t; - APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); - APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); struct ms mag; - + ad = d.abs(); t = signedMin + (d.lshr(d.getBitWidth() - 1)); anc = t - 1 - t.urem(ad); // absolute value of nc @@ -1471,7 +1508,7 @@ APInt::ms APInt::magic() const { } delta = ad - r2; } while (q1.ule(delta) || (q1 == delta && r1 == 0)); - + mag.m = q2 + 1; if (d.isNegative()) mag.m = -mag.m; // resulting magic number mag.s = p - d.getBitWidth(); // resulting shift @@ -1543,17 +1580,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t b = uint64_t(1) << 32; #if 0 - DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n'); - DEBUG(cerr << "KnuthDiv: original:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); - DEBUG(cerr << " by"); - DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG(errs() << "KnuthDiv: original:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << " by"); + DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); + DEBUG(errs() << '\n'); #endif - // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of - // u and v by d. Note that we have taken Knuth's advice here to use a power - // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of - // 2 allows us to shift instead of multiply and it is easy to determine the + // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of + // u and v by d. Note that we have taken Knuth's advice here to use a power + // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of + // 2 allows us to shift instead of multiply and it is easy to determine the // shift amount from the leading zeros. We are basically normalizing the u // and v so that its high bits are shifted to the top of v's range without // overflow. Note that this can require an extra word in u so that u must @@ -1575,27 +1612,27 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[m+n] = u_carry; #if 0 - DEBUG(cerr << "KnuthDiv: normal:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); - DEBUG(cerr << " by"); - DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: normal:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << " by"); + DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); + DEBUG(errs() << '\n'); #endif // D2. [Initialize j.] Set j to m. This is the loop counter over the places. int j = m; do { - DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n'); - // D3. [Calculate q'.]. + DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n'); + // D3. [Calculate q'.]. // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test // on v[n-2] determines at high speed most of the cases in which the trial - // value qp is one too large, and it eliminates all cases where qp is two - // too large. + // value qp is one too large, and it eliminates all cases where qp is two + // too large. uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); - DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n'); + DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n'); uint64_t qp = dividend / v[n-1]; uint64_t rp = dividend % v[n-1]; if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { @@ -1604,20 +1641,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) qp--; } - DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation // consists of a simple multiplication by a one-place number, combined with - // a subtraction. + // a subtraction. bool isNeg = false; for (unsigned i = 0; i < n; ++i) { uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); bool borrow = subtrahend > u_tmp; - DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp - << ", subtrahend == " << subtrahend - << ", borrow = " << borrow << '\n'); + DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp + << ", subtrahend == " << subtrahend + << ", borrow = " << borrow << '\n'); uint64_t result = u_tmp - subtrahend; unsigned k = j + i; @@ -1629,14 +1666,14 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, k++; } isNeg |= borrow; - DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << - u[j+i+1] << '\n'); + DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << + u[j+i+1] << '\n'); } - DEBUG(cerr << "KnuthDiv: after subtraction:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); - DEBUG(cerr << '\n'); - // The digits (u[j+n]...u[j]) should be kept positive; if the result of - // this step is actually negative, (u[j+n]...u[j]) should be left as the + DEBUG(errs() << "KnuthDiv: after subtraction:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << '\n'); + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the // true value plus b**(n+1), namely as the b's complement of // the true value, and a "borrow" to the left should be remembered. // @@ -1647,20 +1684,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, carry = carry && u[i] == 0; } } - DEBUG(cerr << "KnuthDiv: after complement:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: after complement:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << '\n'); - // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was + // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. q[j] = (unsigned)qp; if (isNeg) { - // D6. [Add back]. The probability that this step is necessary is very + // D6. [Add back]. The probability that this step is necessary is very // small, on the order of only 2/b. Make sure that test data accounts for - // this possibility. Decrease q[j] by 1 + // this possibility. Decrease q[j] by 1 q[j]--; - // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). - // A carry will occur to the left of u[j+n], and it should be ignored + // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). + // A carry will occur to the left of u[j+n], and it should be ignored // since it cancels with the borrow that occurred in D4. bool carry = false; for (unsigned i = 0; i < n; i++) { @@ -1670,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[j+n] += carry; } - DEBUG(cerr << "KnuthDiv: after correction:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]); - DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n'); + DEBUG(errs() << "KnuthDiv: after correction:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]); + DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. } while (--j >= 0); - DEBUG(cerr << "KnuthDiv: quotient:"); - DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: quotient:"); + DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]); + DEBUG(errs() << '\n'); // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired // remainder may be obtained by dividing u[...] by d. If r is non-null we @@ -1690,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // shift right here. In order to mak if (shift) { unsigned carry = 0; - DEBUG(cerr << "KnuthDiv: remainder:"); + DEBUG(errs() << "KnuthDiv: remainder:"); for (int i = n-1; i >= 0; i--) { r[i] = (u[i] >> shift) | carry; carry = u[i] << (32 - shift); - DEBUG(cerr << " " << r[i]); + DEBUG(errs() << " " << r[i]); } } else { for (int i = n-1; i >= 0; i--) { r[i] = u[i]; - DEBUG(cerr << " " << r[i]); + DEBUG(errs() << " " << r[i]); } } - DEBUG(cerr << '\n'); + DEBUG(errs() << '\n'); } #if 0 - DEBUG(cerr << std::setbase(10) << '\n'); + DEBUG(errs() << '\n'); #endif } @@ -1715,12 +1752,12 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, { assert(lhsWords >= rhsWords && "Fractional result"); - // First, compose the values into an array of 32-bit words instead of + // First, compose the values into an array of 32-bit words instead of // 64-bit words. This is a necessity of both the "short division" algorithm - // and the the Knuth "classical algorithm" which requires there to be native - // operations for +, -, and * on an m bit value with an m*2 bit result. We - // can't use 64-bit operands here because we don't have native results of - // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't + // and the the Knuth "classical algorithm" which requires there to be native + // operations for +, -, and * on an m bit value with an m*2 bit result. We + // can't use 64-bit operands here because we don't have native results of + // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't // work on large-endian machines. uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); unsigned n = rhsWords * 2; @@ -1769,9 +1806,9 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, if (Remainder) memset(R, 0, n * sizeof(unsigned)); - // Now, adjust m and n for the Knuth division. n is the number of words in + // Now, adjust m and n for the Knuth division. n is the number of words in // the divisor. m is the number of words by which the dividend exceeds the - // divisor (i.e. m+n is the length of the dividend). These sizes must not + // divisor (i.e. m+n is the length of the dividend). These sizes must not // contain any zero words or the Knuth algorithm fails. for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { n--; @@ -1828,10 +1865,10 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else Quotient->clear(); - // The quotient is in Q. Reconstitute the quotient into Quotient's low + // The quotient is in Q. Reconstitute the quotient into Quotient's low // order words. if (lhsWords == 1) { - uint64_t tmp = + uint64_t tmp = uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); if (Quotient->isSingleWord()) Quotient->VAL = tmp; @@ -1840,7 +1877,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else { assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); for (unsigned i = 0; i < lhsWords; ++i) - Quotient->pVal[i] = + Quotient->pVal[i] = uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1862,7 +1899,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, // The remainder is in R. Reconstitute the remainder into Remainder's low // order words. if (rhsWords == 1) { - uint64_t tmp = + uint64_t tmp = uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); if (Remainder->isSingleWord()) Remainder->VAL = tmp; @@ -1871,7 +1908,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else { assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); for (unsigned i = 0; i < rhsWords; ++i) - Remainder->pVal[i] = + Remainder->pVal[i] = uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1902,9 +1939,9 @@ APInt APInt::udiv(const APInt& RHS) const { unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); // Deal with some degenerate cases - if (!lhsWords) + if (!lhsWords) // 0 / X ===> 0 - return APInt(BitWidth, 0); + return APInt(BitWidth, 0); else if (lhsWords < rhsWords || this->ult(RHS)) { // X / Y ===> 0, iff X < Y return APInt(BitWidth, 0); @@ -1959,7 +1996,7 @@ APInt APInt::urem(const APInt& RHS) const { return Remainder; } -void APInt::udivrem(const APInt &LHS, const APInt &RHS, +void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { // Get some size facts about the dividend and divisor unsigned lhsBits = LHS.getActiveBits(); @@ -1968,24 +2005,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); // Check the degenerate cases - if (lhsWords == 0) { + if (lhsWords == 0) { Quotient = 0; // 0 / Y ===> 0 Remainder = 0; // 0 % Y ===> 0 return; - } - - if (lhsWords < rhsWords || LHS.ult(RHS)) { + } + + if (lhsWords < rhsWords || LHS.ult(RHS)) { Quotient = 0; // X / Y ===> 0, iff X < Y Remainder = LHS; // X % Y ===> X, iff X < Y return; - } - + } + if (LHS == RHS) { Quotient = 1; // X / X ===> 1 Remainder = 0; // X % X ===> 0; return; - } - + } + if (lhsWords == 1 && rhsWords == 1) { // There is only one word to consider so use the native versions. uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; @@ -1999,19 +2036,25 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); } -void APInt::fromString(unsigned numbits, const char *str, unsigned slen, - uint8_t radix) { +void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) { // Check our assumptions here + assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && "Radix should be 2, 8, 10, or 16!"); - assert(str && "String is null?"); - bool isNeg = str[0] == '-'; - if (isNeg) - str++, slen--; + + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + bool isNeg = *p == '-'; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String is only a sign, needs a value."); + } assert((slen <= numbits || radix != 2) && "Insufficient bit width"); assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); - assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); + assert((((slen-1)*64)/22 <= numbits || radix != 10) + && "Insufficient bit width"); // Allocate memory if (!isSingleWord()) @@ -2026,30 +2069,9 @@ void APInt::fromString(unsigned numbits, const char *str, unsigned slen, APInt apradix(getBitWidth(), radix); // Enter digit traversal loop - for (unsigned i = 0; i < slen; i++) { - // Get a digit - unsigned digit = 0; - char cdigit = str[i]; - if (radix == 16) { - if (!isxdigit(cdigit)) - assert(0 && "Invalid hex digit in string"); - if (isdigit(cdigit)) - digit = cdigit - '0'; - else if (cdigit >= 'a') - digit = cdigit - 'a' + 10; - else if (cdigit >= 'A') - digit = cdigit - 'A' + 10; - else - assert(0 && "huh? we shouldn't get here"); - } else if (isdigit(cdigit)) { - digit = cdigit - '0'; - assert((radix == 10 || - (radix == 8 && digit != 8 && digit != 9) || - (radix == 2 && (digit == 0 || digit == 1))) && - "Invalid digit in string for given radix"); - } else { - assert(0 && "Invalid character in digit string"); - } + for (StringRef::iterator e = str.end(); p != e; ++p) { + unsigned digit = getDigit(*p, radix); + assert(digit < radix && "Invalid character in digit string"); // Shift or multiply the value by the radix if (slen > 1) { @@ -2077,19 +2099,19 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed) const { assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) && "Radix should be 2, 8, 10, or 16!"); - + // First, check for a zero value and just short circuit the logic below. if (*this == 0) { Str.push_back('0'); return; } - + static const char Digits[] = "0123456789ABCDEF"; - + if (isSingleWord()) { char Buffer[65]; char *BufPtr = Buffer+65; - + uint64_t N; if (Signed) { int64_t I = getSExtValue(); @@ -2101,7 +2123,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, } else { N = getZExtValue(); } - + while (N) { *--BufPtr = Digits[N % Radix]; N /= Radix; @@ -2111,7 +2133,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, } APInt Tmp(*this); - + if (Signed && isNegative()) { // They want to print the signed version and it is a negative value // Flip the bits and add one to turn it into the equivalent positive @@ -2120,18 +2142,18 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, Tmp++; Str.push_back('-'); } - + // We insert the digits backward, then reverse them to get the right order. unsigned StartDig = Str.size(); - - // For the 2, 8 and 16 bit cases, we can just shift instead of divide - // because the number of bits per digit (1, 3 and 4 respectively) divides + + // For the 2, 8 and 16 bit cases, we can just shift instead of divide + // because the number of bits per digit (1, 3 and 4 respectively) divides // equaly. We just shift until the value is zero. if (Radix != 10) { // Just shift tmp right for each digit width until it becomes zero unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); unsigned MaskAmt = Radix - 1; - + while (Tmp != 0) { unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; Str.push_back(Digits[Digit]); @@ -2142,7 +2164,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, while (Tmp != 0) { APInt APdigit(1, 0); APInt tmp2(Tmp.getBitWidth(), 0); - divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, + divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, &APdigit); unsigned Digit = (unsigned)APdigit.getZExtValue(); assert(Digit < Radix && "divide failed"); @@ -2150,7 +2172,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, Tmp = tmp2; } } - + // Reverse the digits before returning. std::reverse(Str.begin()+StartDig, Str.end()); } @@ -2161,7 +2183,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { SmallString<40> S; toString(S, Radix, Signed); - return S.c_str(); + return S.str(); } @@ -2169,26 +2191,21 @@ void APInt::dump() const { SmallString<40> S, U; this->toStringUnsigned(U); this->toStringSigned(S); - fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str()); + errs() << "APInt(" << BitWidth << "b, " + << U.str() << "u " << S.str() << "s)"; } void APInt::print(raw_ostream &OS, bool isSigned) const { SmallString<40> S; this->toString(S, 10, isSigned); - OS << S.c_str(); -} - -std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) { - raw_os_ostream OS(o); - OS << I; - return o; + OS << S.str(); } // This implements a variety of operations on a representation of // arbitrary precision, two's-complement, bignum integer values. -/* Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe - and unrestricting assumption. */ +// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe +// and unrestricting assumption. #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0); diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index db0d8f3..7a3fd87 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -12,130 +12,160 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" -#include "llvm/Support/Recycler.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Streams.h" -#include <ostream> -using namespace llvm; +#include "llvm/Support/Recycler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Memory.h" +#include <cstring> -//===----------------------------------------------------------------------===// -// MemRegion class implementation -//===----------------------------------------------------------------------===// +namespace llvm { -namespace { -/// MemRegion - This is one chunk of the BumpPtrAllocator. -class MemRegion { - unsigned RegionSize; - MemRegion *Next; - char *NextPtr; -public: - void Init(unsigned size, unsigned Alignment, MemRegion *next) { - RegionSize = size; - Next = next; - NextPtr = (char*)(this+1); - - // Align NextPtr. - NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) & - ~(intptr_t)(Alignment-1)); - } - - const MemRegion *getNext() const { return Next; } - unsigned getNumBytesAllocated() const { - return NextPtr-(const char*)this; - } - - /// Allocate - Allocate and return at least the specified number of bytes. - /// - void *Allocate(size_t AllocSize, size_t Alignment, MemRegion **RegPtr) { - - char* Result = (char*) (((uintptr_t) (NextPtr+Alignment-1)) - & ~((uintptr_t) Alignment-1)); - - // Speculate the new value of NextPtr. - char* NextPtrTmp = Result + AllocSize; - - // If we are still within the current region, return Result. - if (unsigned (NextPtrTmp - (char*) this) <= RegionSize) { - NextPtr = NextPtrTmp; - return Result; - } - - // Otherwise, we have to allocate a new chunk. Create one twice as big as - // this one. - MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2); - NewRegion->Init(RegionSize*2, Alignment, this); - - // Update the current "first region" pointer to point to the new region. - *RegPtr = NewRegion; - - // Try allocating from it now. - return NewRegion->Allocate(AllocSize, Alignment, RegPtr); - } - - /// Deallocate - Recursively release all memory for this and its next regions - /// to the system. - void Deallocate() { - MemRegion *next = Next; - free(this); - if (next) - next->Deallocate(); - } +BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold, + SlabAllocator &allocator) + : SlabSize(size), SizeThreshold(threshold), Allocator(allocator), + CurSlab(0), BytesAllocated(0) { + StartNewSlab(); +} - /// DeallocateAllButLast - Recursively release all memory for this and its - /// next regions to the system stopping at the last region in the list. - /// Returns the pointer to the last region. - MemRegion *DeallocateAllButLast() { - MemRegion *next = Next; - if (!next) - return this; - free(this); - return next->DeallocateAllButLast(); - } -}; +BumpPtrAllocator::~BumpPtrAllocator() { + DeallocateSlabs(CurSlab); } -//===----------------------------------------------------------------------===// -// BumpPtrAllocator class implementation -//===----------------------------------------------------------------------===// +/// AlignPtr - Align Ptr to Alignment bytes, rounding up. Alignment should +/// be a power of two. This method rounds up, so AlignPtr(7, 4) == 8 and +/// AlignPtr(8, 4) == 8. +char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) { + assert(Alignment && (Alignment & (Alignment - 1)) == 0 && + "Alignment is not a power of two!"); -BumpPtrAllocator::BumpPtrAllocator() { - TheMemory = malloc(4096); - ((MemRegion*)TheMemory)->Init(4096, 1, 0); + // Do the alignment. + return (char*)(((uintptr_t)Ptr + Alignment - 1) & + ~(uintptr_t)(Alignment - 1)); } -BumpPtrAllocator::~BumpPtrAllocator() { - ((MemRegion*)TheMemory)->Deallocate(); +/// StartNewSlab - Allocate a new slab and move the bump pointers over into +/// the new slab. Modifies CurPtr and End. +void BumpPtrAllocator::StartNewSlab() { + MemSlab *NewSlab = Allocator.Allocate(SlabSize); + NewSlab->NextPtr = CurSlab; + CurSlab = NewSlab; + CurPtr = (char*)(CurSlab + 1); + End = ((char*)CurSlab) + CurSlab->Size; +} + +/// DeallocateSlabs - Deallocate all memory slabs after and including this +/// one. +void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) { + while (Slab) { + MemSlab *NextSlab = Slab->NextPtr; +#ifndef NDEBUG + // Poison the memory so stale pointers crash sooner. Note we must + // preserve the Size and NextPtr fields at the beginning. + sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab)); + memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab)); +#endif + Allocator.Deallocate(Slab); + Slab = NextSlab; + } } +/// Reset - Deallocate all but the current slab and reset the current pointer +/// to the beginning of it, freeing all memory allocated so far. void BumpPtrAllocator::Reset() { - MemRegion *MRP = (MemRegion*)TheMemory; - MRP = MRP->DeallocateAllButLast(); - MRP->Init(4096, 1, 0); - TheMemory = MRP; + DeallocateSlabs(CurSlab->NextPtr); + CurSlab->NextPtr = 0; + CurPtr = (char*)(CurSlab + 1); + End = ((char*)CurSlab) + CurSlab->Size; } -void *BumpPtrAllocator::Allocate(size_t Size, size_t Align) { - MemRegion *MRP = (MemRegion*)TheMemory; - void *Ptr = MRP->Allocate(Size, Align, &MRP); - TheMemory = MRP; +/// Allocate - Allocate space at the specified alignment. +/// +void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { + // Keep track of how many bytes we've allocated. + BytesAllocated += Size; + + // 0-byte alignment means 1-byte alignment. + if (Alignment == 0) Alignment = 1; + + // Allocate the aligned space, going forwards from CurPtr. + char *Ptr = AlignPtr(CurPtr, Alignment); + + // Check if we can hold it. + if (Ptr + Size <= End) { + CurPtr = Ptr + Size; + return Ptr; + } + + // If Size is really big, allocate a separate slab for it. + size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1; + if (PaddedSize > SizeThreshold) { + MemSlab *NewSlab = Allocator.Allocate(PaddedSize); + + // Put the new slab after the current slab, since we are not allocating + // into it. + NewSlab->NextPtr = CurSlab->NextPtr; + CurSlab->NextPtr = NewSlab; + + Ptr = AlignPtr((char*)(NewSlab + 1), Alignment); + assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size); + return Ptr; + } + + // Otherwise, start a new slab and try again. + StartNewSlab(); + Ptr = AlignPtr(CurPtr, Alignment); + CurPtr = Ptr + Size; + assert(CurPtr <= End && "Unable to allocate memory!"); return Ptr; } +unsigned BumpPtrAllocator::GetNumSlabs() const { + unsigned NumSlabs = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + ++NumSlabs; + } + return NumSlabs; +} + void BumpPtrAllocator::PrintStats() const { - unsigned BytesUsed = 0; - unsigned NumRegions = 0; - const MemRegion *R = (MemRegion*)TheMemory; - for (; R; R = R->getNext(), ++NumRegions) - BytesUsed += R->getNumBytesAllocated(); - - cerr << "\nNumber of memory regions: " << NumRegions << "\n"; - cerr << "Bytes allocated: " << BytesUsed << "\n"; + unsigned NumSlabs = 0; + size_t TotalMemory = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + TotalMemory += Slab->Size; + ++NumSlabs; + } + + errs() << "\nNumber of memory regions: " << NumSlabs << '\n' + << "Bytes used: " << BytesAllocated << '\n' + << "Bytes allocated: " << TotalMemory << '\n' + << "Bytes wasted: " << (TotalMemory - BytesAllocated) + << " (includes alignment, etc)\n"; +} + +MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator = + MallocSlabAllocator(); + +SlabAllocator::~SlabAllocator() { } + +MallocSlabAllocator::~MallocSlabAllocator() { } + +MemSlab *MallocSlabAllocator::Allocate(size_t Size) { + MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0); + Slab->Size = Size; + Slab->NextPtr = 0; + return Slab; +} + +void MallocSlabAllocator::Deallocate(MemSlab *Slab) { + Allocator.Deallocate(Slab); +} + +void PrintRecyclerStats(size_t Size, + size_t Align, + size_t FreeListSize) { + errs() << "Recycler element size: " << Size << '\n' + << "Recycler element alignment: " << Align << '\n' + << "Number of elements free for recycling: " << FreeListSize << '\n'; } -void llvm::PrintRecyclerStats(size_t Size, - size_t Align, - size_t FreeListSize) { - cerr << "Recycler element size: " << Size << '\n'; - cerr << "Recycler element alignment: " << Align << '\n'; - cerr << "Number of elements free for recycling: " << FreeListSize << '\n'; } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f26c2c0..cd355ff 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -3,32 +3,43 @@ add_llvm_library(LLVMSupport APInt.cpp APSInt.cpp Allocator.cpp - Annotation.cpp CommandLine.cpp ConstantRange.cpp Debug.cpp Dwarf.cpp + ErrorHandling.cpp FileUtilities.cpp FoldingSet.cpp + FormattedStream.cpp GraphWriter.cpp IsInf.cpp IsNAN.cpp ManagedStatic.cpp MemoryBuffer.cpp + MemoryObject.cpp PluginLoader.cpp PrettyStackTrace.cpp + Regex.cpp SlowOperationInformer.cpp SmallPtrSet.cpp SourceMgr.cpp Statistic.cpp - Streams.cpp StringExtras.cpp StringMap.cpp StringPool.cpp + StringRef.cpp SystemUtils.cpp + TargetRegistry.cpp Timer.cpp Triple.cpp + Twine.cpp + raw_os_ostream.cpp raw_ostream.cpp + regcomp.c + regerror.c + regexec.c + regfree.c + regstrlcpy.c ) target_link_libraries (LLVMSupport LLVMSystem) diff --git a/lib/Support/COPYRIGHT.regex b/lib/Support/COPYRIGHT.regex new file mode 100644 index 0000000..a6392fd --- /dev/null +++ b/lib/Support/COPYRIGHT.regex @@ -0,0 +1,54 @@ +$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ + +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 + */ diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 4922560..626daa2 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -16,22 +16,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/System/Host.h" #include "llvm/System/Path.h" -#include <algorithm> -#include <functional> -#include <map> -#include <ostream> -#include <set> -#include <cstdlib> +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" #include <cerrno> -#include <cstring> -#include <climits> +#include <cstdlib> using namespace llvm; using namespace cl; @@ -105,10 +105,10 @@ void Option::addArgument() { /// GetOptionInfo - Scan the list of registered options, turning them into data /// structures that are easier to handle. -static void GetOptionInfo(std::vector<Option*> &PositionalOpts, - std::vector<Option*> &SinkOpts, - std::map<std::string, Option*> &OptionsMap) { - std::vector<const char*> OptionNames; +static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts, + SmallVectorImpl<Option*> &SinkOpts, + StringMap<Option*> &OptionsMap) { + SmallVector<const char*, 16> OptionNames; Option *CAOpt = 0; // The ConsumeAfter option if it exists. for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { // If this option wants to handle multiple option names, get the full set. @@ -120,9 +120,8 @@ static void GetOptionInfo(std::vector<Option*> &PositionalOpts, // Handle named options. for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { // Add argument to the argument map! - if (!OptionsMap.insert(std::pair<std::string,Option*>(OptionNames[i], - O)).second) { - cerr << ProgramName << ": CommandLine Error: Argument '" + if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) { + errs() << ProgramName << ": CommandLine Error: Argument '" << OptionNames[i] << "' defined more than once!\n"; } } @@ -151,29 +150,39 @@ static void GetOptionInfo(std::vector<Option*> &PositionalOpts, /// LookupOption - Lookup the option specified by the specified option on the /// command line. If there is a value specified (after an equal sign) return -/// that as well. -static Option *LookupOption(const char *&Arg, const char *&Value, - std::map<std::string, Option*> &OptionsMap) { - while (*Arg == '-') ++Arg; // Eat leading dashes - - const char *ArgEnd = Arg; - while (*ArgEnd && *ArgEnd != '=') - ++ArgEnd; // Scan till end of argument name. +/// that as well. This assumes that leading dashes have already been stripped. +static Option *LookupOption(StringRef &Arg, StringRef &Value, + const StringMap<Option*> &OptionsMap) { + // Reject all dashes. + if (Arg.empty()) return 0; + + size_t EqualPos = Arg.find('='); + + // If we have an equals sign, remember the value. + if (EqualPos == StringRef::npos) { + // Look up the option. + StringMap<Option*>::const_iterator I = OptionsMap.find(Arg); + return I != OptionsMap.end() ? I->second : 0; + } - if (*ArgEnd == '=') // If we have an equals sign... - Value = ArgEnd+1; // Get the value, not the equals + // If the argument before the = is a valid option name, we match. If not, + // return Arg unmolested. + StringMap<Option*>::const_iterator I = + OptionsMap.find(Arg.substr(0, EqualPos)); + if (I == OptionsMap.end()) return 0; + + Value = Arg.substr(EqualPos+1); + Arg = Arg.substr(0, EqualPos); + return I->second; +} - if (*Arg == 0) return 0; - // Look up the option. - std::map<std::string, Option*>::iterator I = - OptionsMap.find(std::string(Arg, ArgEnd)); - return I != OptionsMap.end() ? I->second : 0; -} - -static inline bool ProvideOption(Option *Handler, const char *ArgName, - const char *Value, int argc, char **argv, +/// ProvideOption - For Value, this differentiates between an empty value ("") +/// and a null value (StringRef()). The later is accepted for arguments that +/// don't allow a value (-foo) the former is rejected (-foo=). +static inline bool ProvideOption(Option *Handler, StringRef ArgName, + StringRef Value, int argc, char **argv, int &i) { // Is this a multi-argument option? unsigned NumAdditionalVals = Handler->getNumAdditionalVals(); @@ -181,68 +190,62 @@ static inline bool ProvideOption(Option *Handler, const char *ArgName, // Enforce value requirements switch (Handler->getValueExpectedFlag()) { case ValueRequired: - if (Value == 0) { // No value specified? - if (i+1 < argc) { // Steal the next argument, like for '-o filename' - Value = argv[++i]; - } else { - return Handler->error(" requires a value!"); - } + if (Value.data() == 0) { // No value specified? + if (i+1 >= argc) + return Handler->error("requires a value!"); + // Steal the next argument, like for '-o filename' + Value = argv[++i]; } break; case ValueDisallowed: if (NumAdditionalVals > 0) - return Handler->error(": multi-valued option specified" - " with ValueDisallowed modifier!"); + return Handler->error("multi-valued option specified" + " with ValueDisallowed modifier!"); - if (Value) - return Handler->error(" does not allow a value! '" + - std::string(Value) + "' specified."); + if (Value.data()) + return Handler->error("does not allow a value! '" + + Twine(Value) + "' specified."); break; case ValueOptional: break; + default: - cerr << ProgramName + errs() << ProgramName << ": Bad ValueMask flag! CommandLine usage error:" << Handler->getValueExpectedFlag() << "\n"; - abort(); - break; + llvm_unreachable(0); } // If this isn't a multi-arg option, just run the handler. - if (NumAdditionalVals == 0) { - return Handler->addOccurrence(i, ArgName, Value ? Value : ""); - } + if (NumAdditionalVals == 0) + return Handler->addOccurrence(i, ArgName, Value); + // If it is, run the handle several times. - else { - bool MultiArg = false; - - if (Value) { - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) - return true; - --NumAdditionalVals; - MultiArg = true; - } + bool MultiArg = false; - while (NumAdditionalVals > 0) { + if (Value.data()) { + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + --NumAdditionalVals; + MultiArg = true; + } - if (i+1 < argc) { - Value = argv[++i]; - } else { - return Handler->error(": not enough values!"); - } - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) - return true; - MultiArg = true; - --NumAdditionalVals; - } - return false; + while (NumAdditionalVals > 0) { + if (i+1 >= argc) + return Handler->error("not enough values!"); + Value = argv[++i]; + + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + MultiArg = true; + --NumAdditionalVals; } + return false; } -static bool ProvidePositionalOption(Option *Handler, const std::string &Arg, - int i) { +static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; - return ProvideOption(Handler, Handler->ArgStr, Arg.c_str(), 0, 0, Dummy); + return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy); } @@ -260,33 +263,78 @@ static inline bool isPrefixedOrGrouping(const Option *O) { // see if there options that satisfy the predicate. If we find one, return it, // otherwise return null. // -static Option *getOptionPred(std::string Name, size_t &Length, +static Option *getOptionPred(StringRef Name, size_t &Length, bool (*Pred)(const Option*), - std::map<std::string, Option*> &OptionsMap) { + const StringMap<Option*> &OptionsMap) { - std::map<std::string, Option*>::iterator OMI = OptionsMap.find(Name); - if (OMI != OptionsMap.end() && Pred(OMI->second)) { - Length = Name.length(); - return OMI->second; - } + StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name); - if (Name.size() == 1) return 0; - do { - Name.erase(Name.end()-1, Name.end()); // Chop off the last character... + // Loop while we haven't found an option and Name still has at least two + // characters in it (so that the next iteration will not be the empty + // string. + while (OMI == OptionsMap.end() && Name.size() > 1) { + Name = Name.substr(0, Name.size()-1); // Chop off the last character. OMI = OptionsMap.find(Name); - - // Loop while we haven't found an option and Name still has at least two - // characters in it (so that the next iteration will not be the empty - // string... - } while ((OMI == OptionsMap.end() || !Pred(OMI->second)) && Name.size() > 1); + } if (OMI != OptionsMap.end() && Pred(OMI->second)) { - Length = Name.length(); + Length = Name.size(); return OMI->second; // Found one! } return 0; // No option found! } +/// HandlePrefixedOrGroupedOption - The specified argument string (which started +/// with at least one '-') does not fully match an available option. Check to +/// see if this is a prefix or grouped option. If so, split arg into output an +/// Arg/Value pair and return the Option to parse it with. +static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, + bool &ErrorParsing, + const StringMap<Option*> &OptionsMap) { + if (Arg.size() == 1) return 0; + + // Do the lookup! + size_t Length = 0; + Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap); + if (PGOpt == 0) return 0; + + // If the option is a prefixed option, then the value is simply the + // rest of the name... so fall through to later processing, by + // setting up the argument name flags and value fields. + if (PGOpt->getFormattingFlag() == cl::Prefix) { + Value = Arg.substr(Length); + Arg = Arg.substr(0, Length); + assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt); + return PGOpt; + } + + // This must be a grouped option... handle them now. Grouping options can't + // have values. + assert(isGrouping(PGOpt) && "Broken getOptionPred!"); + + do { + // Move current arg name out of Arg into OneArgName. + StringRef OneArgName = Arg.substr(0, Length); + Arg = Arg.substr(Length); + + // Because ValueRequired is an invalid flag for grouped arguments, + // we don't need to pass argc/argv in. + assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && + "Option can not be cl::Grouping AND cl::ValueRequired!"); + int Dummy; + ErrorParsing |= ProvideOption(PGOpt, OneArgName, + StringRef(), 0, 0, Dummy); + + // Get the next grouping option. + PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap); + } while (PGOpt && Length != Arg.size()); + + // Return the last option with Arg cut down to just the last one. + return PGOpt; +} + + + static bool RequiresValue(const Option *O) { return O->getNumOccurrencesFlag() == cl::Required || O->getNumOccurrencesFlag() == cl::OneOrMore; @@ -300,45 +348,35 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { /// ParseCStringVector - Break INPUT up wherever one or more /// whitespace characters are found, and store the resulting tokens in /// OUTPUT. The tokens stored in OUTPUT are dynamically allocated -/// using strdup (), so it is the caller's responsibility to free () +/// using strdup(), so it is the caller's responsibility to free() /// them later. /// -static void ParseCStringVector(std::vector<char *> &output, - const char *input) { +static void ParseCStringVector(std::vector<char *> &OutputVector, + const char *Input) { // Characters which will be treated as token separators: - static const char *const delims = " \v\f\t\r\n"; - - std::string work (input); - // Skip past any delims at head of input string. - size_t pos = work.find_first_not_of (delims); - // If the string consists entirely of delims, then exit early. - if (pos == std::string::npos) return; - // Otherwise, jump forward to beginning of first word. - work = work.substr (pos); - // Find position of first delimiter. - pos = work.find_first_of (delims); - - while (!work.empty() && pos != std::string::npos) { - // Everything from 0 to POS is the next word to copy. - output.push_back (strdup (work.substr (0,pos).c_str ())); - // Is there another word in the string? - size_t nextpos = work.find_first_not_of (delims, pos + 1); - if (nextpos != std::string::npos) { - // Yes? Then remove delims from beginning ... - work = work.substr (work.find_first_not_of (delims, pos + 1)); - // and find the end of the word. - pos = work.find_first_of (delims); - } else { - // No? (Remainder of string is delims.) End the loop. - work = ""; - pos = std::string::npos; + StringRef Delims = " \v\f\t\r\n"; + + StringRef WorkStr(Input); + while (!WorkStr.empty()) { + // If the first character is a delimiter, strip them off. + if (Delims.find(WorkStr[0]) != StringRef::npos) { + size_t Pos = WorkStr.find_first_not_of(Delims); + if (Pos == StringRef::npos) Pos = WorkStr.size(); + WorkStr = WorkStr.substr(Pos); + continue; } - } - - // If `input' ended with non-delim char, then we'll get here with - // the last word of `input' in `work'; copy it now. - if (!work.empty ()) { - output.push_back (strdup (work.c_str ())); + + // Find position of first delimiter. + size_t Pos = WorkStr.find_first_of(Delims); + if (Pos == StringRef::npos) Pos = WorkStr.size(); + + // Everything from 0 to Pos is the next word to copy. + char *NewStr = (char*)malloc(Pos+1); + memcpy(NewStr, WorkStr.data(), Pos); + NewStr[Pos] = 0; + OutputVector.push_back(NewStr); + + WorkStr = WorkStr.substr(Pos); } } @@ -372,20 +410,19 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, // Free all the strdup()ed strings. for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end(); i != e; ++i) - free (*i); + free(*i); } /// ExpandResponseFiles - Copy the contents of argv into newArgv, /// substituting the contents of the response files for the arguments /// of type @file. -static void ExpandResponseFiles(int argc, char** argv, +static void ExpandResponseFiles(unsigned argc, char** argv, std::vector<char*>& newArgv) { - for (int i = 1; i != argc; ++i) { - char* arg = argv[i]; + for (unsigned i = 1; i != argc; ++i) { + char *arg = argv[i]; if (arg[0] == '@') { - sys::PathWithStatus respFile(++arg); // Check that the response file is not empty (mmap'ing empty @@ -418,9 +455,9 @@ static void ExpandResponseFiles(int argc, char** argv, void cl::ParseCommandLineOptions(int argc, char **argv, const char *Overview, bool ReadResponseFiles) { // Process all registered options. - std::vector<Option*> PositionalOpts; - std::vector<Option*> SinkOpts; - std::map<std::string, Option*> Opts; + SmallVector<Option*, 4> PositionalOpts; + SmallVector<Option*, 4> SinkOpts; + StringMap<Option*> Opts; GetOptionInfo(PositionalOpts, SinkOpts, Opts); assert((!Opts.empty() || !PositionalOpts.empty()) && @@ -469,7 +506,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // unless there is only one positional argument... if (PositionalOpts.size() > 2) ErrorParsing |= - Opt->error(" error - this positional option will never be matched, " + Opt->error("error - this positional option will never be matched, " "because it does not Require a value, and a " "cl::ConsumeAfter option is active!"); } else if (UnboundedFound && !Opt->ArgStr[0]) { @@ -477,7 +514,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // not specified after an option that eats all extra arguments, or this // one will never get any! // - ErrorParsing |= Opt->error(" error - option can never match, because " + ErrorParsing |= Opt->error("error - option can never match, because " "another positional argument will match an " "unbounded number of values, and this option" " does not require a value!"); @@ -488,9 +525,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // PositionalVals - A vector of "positional" arguments we accumulate into - // the process at the end... + // the process at the end. // - std::vector<std::pair<std::string,unsigned> > PositionalVals; + SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals; // If the program has named positional arguments, and the name has been run // across, keep track of which positional argument was named. Otherwise put @@ -501,8 +538,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv, bool DashDashFound = false; // Have we read '--'? for (int i = 1; i < argc; ++i) { Option *Handler = 0; - const char *Value = 0; - const char *ArgName = ""; + StringRef Value; + StringRef ArgName = ""; // If the option list changed, this means that some command line // option has just been registered or deregistered. This can occur in @@ -524,7 +561,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv, if (ActivePositionalArg) { ProvidePositionalOption(ActivePositionalArg, argv[i], i); continue; // We are done! - } else if (!PositionalOpts.empty()) { + } + + if (!PositionalOpts.empty()) { PositionalVals.push_back(std::make_pair(argv[i],i)); // All of the positional arguments have been fulfulled, give the rest to @@ -550,69 +589,37 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // option is another positional argument. If so, treat it as an argument, // otherwise feed it to the eating positional. ArgName = argv[i]+1; + // Eat leading dashes. + while (!ArgName.empty() && ArgName[0] == '-') + ArgName = ArgName.substr(1); + Handler = LookupOption(ArgName, Value, Opts); if (!Handler || Handler->getFormattingFlag() != cl::Positional) { ProvidePositionalOption(ActivePositionalArg, argv[i], i); continue; // We are done! } - } else { // We start with a '-', must be an argument... + } else { // We start with a '-', must be an argument. ArgName = argv[i]+1; + // Eat leading dashes. + while (!ArgName.empty() && ArgName[0] == '-') + ArgName = ArgName.substr(1); + Handler = LookupOption(ArgName, Value, Opts); // Check to see if this "option" is really a prefixed or grouped argument. - if (Handler == 0) { - std::string RealName(ArgName); - if (RealName.size() > 1) { - size_t Length = 0; - Option *PGOpt = getOptionPred(RealName, Length, isPrefixedOrGrouping, - Opts); - - // If the option is a prefixed option, then the value is simply the - // rest of the name... so fall through to later processing, by - // setting up the argument name flags and value fields. - // - if (PGOpt && PGOpt->getFormattingFlag() == cl::Prefix) { - Value = ArgName+Length; - assert(Opts.find(std::string(ArgName, Value)) != Opts.end() && - Opts.find(std::string(ArgName, Value))->second == PGOpt); - Handler = PGOpt; - } else if (PGOpt) { - // This must be a grouped option... handle them now. - assert(isGrouping(PGOpt) && "Broken getOptionPred!"); - - do { - // Move current arg name out of RealName into RealArgName... - std::string RealArgName(RealName.begin(), - RealName.begin() + Length); - RealName.erase(RealName.begin(), RealName.begin() + Length); - - // Because ValueRequired is an invalid flag for grouped arguments, - // we don't need to pass argc/argv in... - // - assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && - "Option can not be cl::Grouping AND cl::ValueRequired!"); - int Dummy; - ErrorParsing |= ProvideOption(PGOpt, RealArgName.c_str(), - 0, 0, 0, Dummy); - - // Get the next grouping option... - PGOpt = getOptionPred(RealName, Length, isGrouping, Opts); - } while (PGOpt && Length != RealName.size()); - - Handler = PGOpt; // Ate all of the options. - } - } - } + if (Handler == 0) + Handler = HandlePrefixedOrGroupedOption(ArgName, Value, + ErrorParsing, Opts); } if (Handler == 0) { if (SinkOpts.empty()) { - cerr << ProgramName << ": Unknown command line argument '" + errs() << ProgramName << ": Unknown command line argument '" << argv[i] << "'. Try: '" << argv[0] << " --help'\n"; ErrorParsing = true; } else { - for (std::vector<Option*>::iterator I = SinkOpts.begin(), + for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(), E = SinkOpts.end(); I != E ; ++I) (*I)->addOccurrence(i, "", argv[i]); } @@ -620,24 +627,23 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // Check to see if this option accepts a comma separated list of values. If - // it does, we have to split up the value into multiple values... - if (Value && Handler->getMiscFlags() & CommaSeparated) { - std::string Val(Value); - std::string::size_type Pos = Val.find(','); - - while (Pos != std::string::npos) { - // Process the portion before the comma... - ErrorParsing |= ProvideOption(Handler, ArgName, - std::string(Val.begin(), - Val.begin()+Pos).c_str(), + // it does, we have to split up the value into multiple values. + if (Handler->getMiscFlags() & CommaSeparated) { + StringRef Val(Value); + StringRef::size_type Pos = Val.find(','); + + while (Pos != StringRef::npos) { + // Process the portion before the comma. + ErrorParsing |= ProvideOption(Handler, ArgName, Val.substr(0, Pos), argc, argv, i); - // Erase the portion before the comma, AND the comma... - Val.erase(Val.begin(), Val.begin()+Pos+1); - Value += Pos+1; // Increment the original value pointer as well... + // Erase the portion before the comma, AND the comma. + Val = Val.substr(Pos+1); + Value.substr(Pos+1); // Increment the original value pointer as well. - // Check for another comma... + // Check for another comma. Pos = Val.find(','); } + Value = Val; } // If this is a named positional argument, just remember that it is the @@ -650,7 +656,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Check and handle positional arguments now... if (NumPositionalRequired > PositionalVals.size()) { - cerr << ProgramName + errs() << ProgramName << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired << " positional arguments: See: " << argv[0] << " --help\n"; @@ -658,14 +664,14 @@ void cl::ParseCommandLineOptions(int argc, char **argv, ErrorParsing = true; } else if (!HasUnlimitedPositionals && PositionalVals.size() > PositionalOpts.size()) { - cerr << ProgramName + errs() << ProgramName << ": Too many positional arguments specified!\n" << "Can specify at most " << PositionalOpts.size() << " positional arguments: See: " << argv[0] << " --help\n"; ErrorParsing = true; } else if (ConsumeAfterOpt == 0) { - // Positional args have already been handled if ConsumeAfter is specified... + // Positional args have already been handled if ConsumeAfter is specified. unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size()); for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { if (RequiresValue(PositionalOpts[i])) { @@ -693,7 +699,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, ValNo++; break; default: - assert(0 && "Internal error, unexpected NumOccurrences flag in " + llvm_unreachable("Internal error, unexpected NumOccurrences flag in " "positional argument processing!"); } } @@ -730,13 +736,13 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // Loop over args and make sure all required args are specified! - for (std::map<std::string, Option*>::iterator I = Opts.begin(), + for (StringMap<Option*>::iterator I = Opts.begin(), E = Opts.end(); I != E; ++I) { switch (I->second->getNumOccurrencesFlag()) { case Required: case OneOrMore: if (I->second->getNumOccurrences() == 0) { - I->second->error(" must be specified at least once!"); + I->second->error("must be specified at least once!"); ErrorParsing = true; } // Fall through @@ -756,7 +762,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Free all the strdup()ed strings. for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end(); i != e; ++i) - free (*i); + free(*i); } // If we had an error processing our arguments, don't let the program execute @@ -767,36 +773,35 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Option Base class implementation // -bool Option::error(std::string Message, const char *ArgName) { - if (ArgName == 0) ArgName = ArgStr; - if (ArgName[0] == 0) - cerr << HelpStr; // Be nice for positional arguments +bool Option::error(const Twine &Message, StringRef ArgName) { + if (ArgName.data() == 0) ArgName = ArgStr; + if (ArgName.empty()) + errs() << HelpStr; // Be nice for positional arguments else - cerr << ProgramName << ": for the -" << ArgName; + errs() << ProgramName << ": for the -" << ArgName; - cerr << " option: " << Message << "\n"; + errs() << " option: " << Message << "\n"; return true; } -bool Option::addOccurrence(unsigned pos, const char *ArgName, - const std::string &Value, - bool MultiArg) { +bool Option::addOccurrence(unsigned pos, StringRef ArgName, + StringRef Value, bool MultiArg) { if (!MultiArg) NumOccurrences++; // Increment the number of times we have been seen switch (getNumOccurrencesFlag()) { case Optional: if (NumOccurrences > 1) - return error(": may only occur zero or one times!", ArgName); + return error("may only occur zero or one times!", ArgName); break; case Required: if (NumOccurrences > 1) - return error(": must occur exactly one time!", ArgName); + return error("must occur exactly one time!", ArgName); // Fall through case OneOrMore: case ZeroOrMore: case ConsumeAfter: break; - default: return error(": bad num occurrences flag value!"); + default: return error("bad num occurrences flag value!"); } return handleOccurrence(pos, ArgName, Value); @@ -823,8 +828,8 @@ size_t alias::getOptionWidth() const { // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { size_t L = std::strlen(ArgStr); - cout << " -" << ArgStr << std::string(GlobalWidth-L-6, ' ') << " - " - << HelpStr << "\n"; + errs() << " -" << ArgStr; + errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; } @@ -850,13 +855,12 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const { // void basic_parser_impl::printOptionInfo(const Option &O, size_t GlobalWidth) const { - cout << " -" << O.ArgStr; + outs() << " -" << O.ArgStr; if (const char *ValName = getValueName()) - cout << "=<" << getValueStr(O, ValName) << ">"; + outs() << "=<" << getValueStr(O, ValName) << '>'; - cout << std::string(GlobalWidth-getOptionWidth(O), ' ') << " - " - << O.HelpStr << "\n"; + outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; } @@ -864,81 +868,78 @@ void basic_parser_impl::printOptionInfo(const Option &O, // parser<bool> implementation // -bool parser<bool>::parse(Option &O, const char *ArgName, - const std::string &Arg, bool &Value) { +bool parser<bool>::parse(Option &O, StringRef ArgName, + StringRef Arg, bool &Value) { if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || Arg == "1") { Value = true; - } else if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { + return false; + } + + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { Value = false; - } else { - return O.error(": '" + Arg + - "' is invalid value for boolean argument! Try 0 or 1"); + return false; } - return false; + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); } // parser<boolOrDefault> implementation // -bool parser<boolOrDefault>::parse(Option &O, const char *ArgName, - const std::string &Arg, boolOrDefault &Value) { +bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName, + StringRef Arg, boolOrDefault &Value) { if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || Arg == "1") { Value = BOU_TRUE; - } else if (Arg == "false" || Arg == "FALSE" - || Arg == "False" || Arg == "0") { + return false; + } + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { Value = BOU_FALSE; - } else { - return O.error(": '" + Arg + - "' is invalid value for boolean argument! Try 0 or 1"); + return false; } - return false; + + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); } // parser<int> implementation // -bool parser<int>::parse(Option &O, const char *ArgName, - const std::string &Arg, int &Value) { - char *End; - Value = (int)strtol(Arg.c_str(), &End, 0); - if (*End != 0) - return O.error(": '" + Arg + "' value invalid for integer argument!"); +bool parser<int>::parse(Option &O, StringRef ArgName, + StringRef Arg, int &Value) { + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for integer argument!"); return false; } // parser<unsigned> implementation // -bool parser<unsigned>::parse(Option &O, const char *ArgName, - const std::string &Arg, unsigned &Value) { - char *End; - errno = 0; - unsigned long V = strtoul(Arg.c_str(), &End, 0); - Value = (unsigned)V; - if (((V == ULONG_MAX) && (errno == ERANGE)) - || (*End != 0) - || (Value != V)) - return O.error(": '" + Arg + "' value invalid for uint argument!"); +bool parser<unsigned>::parse(Option &O, StringRef ArgName, + StringRef Arg, unsigned &Value) { + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); return false; } // parser<double>/parser<float> implementation // -static bool parseDouble(Option &O, const std::string &Arg, double &Value) { - const char *ArgStart = Arg.c_str(); +static bool parseDouble(Option &O, StringRef Arg, double &Value) { + SmallString<32> TmpStr(Arg.begin(), Arg.end()); + const char *ArgStart = TmpStr.c_str(); char *End; Value = strtod(ArgStart, &End); if (*End != 0) - return O.error(": '" +Arg+ "' value invalid for floating point argument!"); + return O.error("'" + Arg + "' value invalid for floating point argument!"); return false; } -bool parser<double>::parse(Option &O, const char *AN, - const std::string &Arg, double &Val) { +bool parser<double>::parse(Option &O, StringRef ArgName, + StringRef Arg, double &Val) { return parseDouble(O, Arg, Val); } -bool parser<float>::parse(Option &O, const char *AN, - const std::string &Arg, float &Val) { +bool parser<float>::parse(Option &O, StringRef ArgName, + StringRef Arg, float &Val) { double dVal; if (parseDouble(O, Arg, dVal)) return true; @@ -955,14 +956,12 @@ bool parser<float>::parse(Option &O, const char *AN, // argument string. If the option is not found, getNumOptions() is returned. // unsigned generic_parser_base::findOption(const char *Name) { - unsigned i = 0, e = getNumOptions(); - std::string N(Name); + unsigned e = getNumOptions(); - while (i != e) - if (getOption(i) == N) + for (unsigned i = 0; i != e; ++i) { + if (strcmp(getOption(i), Name) == 0) return i; - else - ++i; + } return e; } @@ -989,21 +988,21 @@ void generic_parser_base::printOptionInfo(const Option &O, size_t GlobalWidth) const { if (O.hasArgStr()) { size_t L = std::strlen(O.ArgStr); - cout << " -" << O.ArgStr << std::string(GlobalWidth-L-6, ' ') - << " - " << O.HelpStr << "\n"; + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8; - cout << " =" << getOption(i) << std::string(NumSpaces, ' ') - << " - " << getDescription(i) << "\n"; + outs() << " =" << getOption(i); + outs().indent(NumSpaces) << " - " << getDescription(i) << '\n'; } } else { if (O.HelpStr[0]) - cout << " " << O.HelpStr << "\n"; + outs() << " " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t L = std::strlen(getOption(i)); - cout << " -" << getOption(i) << std::string(GlobalWidth-L-8, ' ') - << " - " << getDescription(i) << "\n"; + outs() << " -" << getOption(i); + outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n'; } } } @@ -1013,6 +1012,12 @@ void generic_parser_base::printOptionInfo(const Option &O, // --help and --help-hidden option implementation // +static int OptNameCompare(const void *LHS, const void *RHS) { + typedef std::pair<const char *, Option*> pair_ty; + + return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); +} + namespace { class HelpPrinter { @@ -1020,14 +1025,6 @@ class HelpPrinter { const Option *EmptyArg; const bool ShowHidden; - // isHidden/isReallyHidden - Predicates to be used to filter down arg lists. - inline static bool isHidden(std::pair<std::string, Option *> &OptPair) { - return OptPair.second->getOptionHiddenFlag() >= Hidden; - } - inline static bool isReallyHidden(std::pair<std::string, Option *> &OptPair) { - return OptPair.second->getOptionHiddenFlag() == ReallyHidden; - } - public: explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) { EmptyArg = 0; @@ -1037,34 +1034,40 @@ public: if (Value == false) return; // Get all the options. - std::vector<Option*> PositionalOpts; - std::vector<Option*> SinkOpts; - std::map<std::string, Option*> OptMap; + SmallVector<Option*, 4> PositionalOpts; + SmallVector<Option*, 4> SinkOpts; + StringMap<Option*> OptMap; GetOptionInfo(PositionalOpts, SinkOpts, OptMap); - // Copy Options into a vector so we can sort them as we like... - std::vector<std::pair<std::string, Option*> > Opts; - copy(OptMap.begin(), OptMap.end(), std::back_inserter(Opts)); - - // Eliminate Hidden or ReallyHidden arguments, depending on ShowHidden - Opts.erase(std::remove_if(Opts.begin(), Opts.end(), - std::ptr_fun(ShowHidden ? isReallyHidden : isHidden)), - Opts.end()); - - // Eliminate duplicate entries in table (from enum flags options, f.e.) - { // Give OptionSet a scope - std::set<Option*> OptionSet; - for (unsigned i = 0; i != Opts.size(); ++i) - if (OptionSet.count(Opts[i].second) == 0) - OptionSet.insert(Opts[i].second); // Add new entry to set - else - Opts.erase(Opts.begin()+i--); // Erase duplicate + // Copy Options into a vector so we can sort them as we like. + SmallVector<std::pair<const char *, Option*>, 128> Opts; + SmallPtrSet<Option*, 128> OptionSet; // Duplicate option detection. + + for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) { + // Ignore really-hidden options. + if (I->second->getOptionHiddenFlag() == ReallyHidden) + continue; + + // Unless showhidden is set, ignore hidden flags. + if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) + continue; + + // If we've already seen this option, don't add it to the list again. + if (!OptionSet.insert(I->second)) + continue; + + Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(), + I->second)); } + + // Sort the options list alphabetically. + qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); if (ProgramOverview) - cout << "OVERVIEW: " << ProgramOverview << "\n"; + outs() << "OVERVIEW: " << ProgramOverview << "\n"; - cout << "USAGE: " << ProgramName << " [options]"; + outs() << "USAGE: " << ProgramName << " [options]"; // Print out the positional options. Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists... @@ -1074,28 +1077,28 @@ public: for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) { if (PositionalOpts[i]->ArgStr[0]) - cout << " --" << PositionalOpts[i]->ArgStr; - cout << " " << PositionalOpts[i]->HelpStr; + outs() << " --" << PositionalOpts[i]->ArgStr; + outs() << " " << PositionalOpts[i]->HelpStr; } // Print the consume after option info if it exists... - if (CAOpt) cout << " " << CAOpt->HelpStr; + if (CAOpt) outs() << " " << CAOpt->HelpStr; - cout << "\n\n"; + outs() << "\n\n"; // Compute the maximum argument length... MaxArgLen = 0; for (size_t i = 0, e = Opts.size(); i != e; ++i) MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); - cout << "OPTIONS:\n"; + outs() << "OPTIONS:\n"; for (size_t i = 0, e = Opts.size(); i != e; ++i) Opts[i].second->printOptionInfo(MaxArgLen); // Print any extra help the user has declared. for (std::vector<const char *>::iterator I = MoreHelp->begin(), E = MoreHelp->end(); I != E; ++I) - cout << *I; + outs() << *I; MoreHelp->clear(); // Halt the program since help information was printed @@ -1120,37 +1123,64 @@ HHOp("help-hidden", cl::desc("Display all available options"), static void (*OverrideVersionPrinter)() = 0; +static int TargetArraySortFn(const void *LHS, const void *RHS) { + typedef std::pair<const char *, const Target*> pair_ty; + return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first); +} + namespace { class VersionPrinter { public: void print() { - cout << "Low Level Virtual Machine (http://llvm.org/):\n"; - cout << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; + raw_ostream &OS = outs(); + OS << "Low Level Virtual Machine (http://llvm.org/):\n" + << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; #ifdef LLVM_VERSION_INFO - cout << LLVM_VERSION_INFO; + OS << LLVM_VERSION_INFO; #endif - cout << "\n "; + OS << "\n "; #ifndef __OPTIMIZE__ - cout << "DEBUG build"; + OS << "DEBUG build"; #else - cout << "Optimized build"; + OS << "Optimized build"; #endif #ifndef NDEBUG - cout << " with assertions"; + OS << " with assertions"; #endif - cout << ".\n"; - cout << " Built " << __DATE__ << "(" << __TIME__ << ").\n"; + OS << ".\n" + << " Built " << __DATE__ << " (" << __TIME__ << ").\n" + << " Host: " << sys::getHostTriple() << '\n' + << '\n' + << " Registered Targets:\n"; + + std::vector<std::pair<const char *, const Target*> > Targets; + size_t Width = 0; + for (TargetRegistry::iterator it = TargetRegistry::begin(), + ie = TargetRegistry::end(); it != ie; ++it) { + Targets.push_back(std::make_pair(it->getName(), &*it)); + Width = std::max(Width, strlen(Targets.back().first)); + } + if (!Targets.empty()) + qsort(&Targets[0], Targets.size(), sizeof(Targets[0]), + TargetArraySortFn); + + for (unsigned i = 0, e = Targets.size(); i != e; ++i) { + OS << " " << Targets[i].first; + OS.indent(Width - strlen(Targets[i].first)) << " - " + << Targets[i].second->getShortDescription() << '\n'; + } + if (Targets.empty()) + OS << " (none)\n"; } void operator=(bool OptionWasSpecified) { - if (OptionWasSpecified) { - if (OverrideVersionPrinter == 0) { - print(); - exit(1); - } else { - (*OverrideVersionPrinter)(); - exit(1); - } + if (!OptionWasSpecified) return; + + if (OverrideVersionPrinter == 0) { + print(); + exit(1); } + (*OverrideVersionPrinter)(); + exit(1); } }; } // End anonymous namespace diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index cb8c4b0..423e90d 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -23,12 +23,12 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Instructions.h" using namespace llvm; /// Initialize a full (the default) or empty set for the specified type. /// -ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) : - Lower(BitWidth, 0), Upper(BitWidth, 0) { +ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { if (Full) Lower = Upper = APInt::getMaxValue(BitWidth); else @@ -37,16 +37,63 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) : /// Initialize a range to hold the single specified value. /// -ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) { } +ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {} ConstantRange::ConstantRange(const APInt &L, const APInt &U) : Lower(L), Upper(U) { - assert(L.getBitWidth() == U.getBitWidth() && + assert(L.getBitWidth() == U.getBitWidth() && "ConstantRange with unequal bit widths"); assert((L != U || (L.isMaxValue() || L.isMinValue())) && "Lower == Upper, but they aren't min or max value!"); } +ConstantRange ConstantRange::makeICmpRegion(unsigned Pred, + const ConstantRange &CR) { + uint32_t W = CR.getBitWidth(); + switch (Pred) { + default: assert(!"Invalid ICmp predicate to makeICmpRegion()"); + case ICmpInst::ICMP_EQ: + return CR; + case ICmpInst::ICMP_NE: + if (CR.isSingleElement()) + return ConstantRange(CR.getUpper(), CR.getLower()); + return ConstantRange(W); + case ICmpInst::ICMP_ULT: + return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax()); + case ICmpInst::ICMP_SLT: + return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax()); + case ICmpInst::ICMP_ULE: { + APInt UMax(CR.getUnsignedMax()); + if (UMax.isMaxValue()) + return ConstantRange(W); + return ConstantRange(APInt::getMinValue(W), UMax + 1); + } + case ICmpInst::ICMP_SLE: { + APInt SMax(CR.getSignedMax()); + if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue()) + return ConstantRange(W); + return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); + } + case ICmpInst::ICMP_UGT: + return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W)); + case ICmpInst::ICMP_SGT: + return ConstantRange(CR.getSignedMin() + 1, + APInt::getSignedMinValue(W)); + case ICmpInst::ICMP_UGE: { + APInt UMin(CR.getUnsignedMin()); + if (UMin.isMinValue()) + return ConstantRange(W); + return ConstantRange(UMin, APInt::getNullValue(W)); + } + case ICmpInst::ICMP_SGE: { + APInt SMin(CR.getSignedMin()); + if (SMin.isMinSignedValue()) + return ConstantRange(W); + return ConstantRange(SMin, APInt::getSignedMinValue(W)); + } + } +} + /// isFullSet - Return true if this set contains all of the elements possible /// for this data-type bool ConstantRange::isFullSet() const { @@ -112,14 +159,10 @@ APInt ConstantRange::getSignedMax() const { else return SignedMax; } else { - if ((getUpper() - 1).slt(getLower())) { - if (getLower() != SignedMax) - return SignedMax; - else - return getUpper() - 1; - } else { + if (getLower().isNegative() == getUpper().isNegative()) + return SignedMax; + else return getUpper() - 1; - } } } @@ -157,6 +200,30 @@ bool ConstantRange::contains(const APInt &V) const { return Lower.ule(V) || V.ult(Upper); } +/// contains - Return true if the argument is a subset of this range. +/// Two equal set contain each other. The empty set is considered to be +/// contained by all other sets. +/// +bool ConstantRange::contains(const ConstantRange &Other) const { + if (isFullSet()) return true; + if (Other.isFullSet()) return false; + if (Other.isEmptySet()) return true; + if (isEmptySet()) return false; + + if (!isWrappedSet()) { + if (Other.isWrappedSet()) + return false; + + return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper); + } + + if (!Other.isWrappedSet()) + return Other.getUpper().ule(Upper) || + Lower.ule(Other.getLower()); + + return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower()); +} + /// subtract - Subtract the specified constant from the endpoints of this /// constant range. ConstantRange ConstantRange::subtract(const APInt &Val) const { @@ -208,59 +275,20 @@ ConstantRange::intersect1Wrapped(const ConstantRange &LHS, } /// intersectWith - Return the range that results from the intersection of this -/// range with another range. -/// +/// range with another range. The resultant range is guaranteed to include all +/// elements contained in both input ranges, and to have the smallest possible +/// set size that does so. Because there may be two intersections with the +/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A). ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); - // Handle common special cases - if (isEmptySet() || CR.isFullSet()) - return *this; - if (isFullSet() || CR.isEmptySet()) - return CR; - - if (!isWrappedSet()) { - if (!CR.isWrappedSet()) { - using namespace APIntOps; - APInt L = umax(Lower, CR.Lower); - APInt U = umin(Upper, CR.Upper); - - if (L.ult(U)) // If range isn't empty... - return ConstantRange(L, U); - else - return ConstantRange(getBitWidth(), false);// Otherwise, empty set - } else - return intersect1Wrapped(CR, *this); - } else { // We know "this" is wrapped... - if (!CR.isWrappedSet()) - return intersect1Wrapped(*this, CR); - else { - // Both ranges are wrapped... - using namespace APIntOps; - APInt L = umax(Lower, CR.Lower); - APInt U = umin(Upper, CR.Upper); - return ConstantRange(L, U); - } - } - return *this; -} - -/// maximalIntersectWith - Return the range that results from the intersection -/// of this range with another range. The resultant range is guaranteed to -/// include all elements contained in both input ranges, and to have the -/// smallest possible set size that does so. Because there may be two -/// intersections with the same set size, A.maximalIntersectWith(B) might not -/// be equal to B.maximalIntersect(A). -ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const { - assert(getBitWidth() == CR.getBitWidth() && - "ConstantRange types don't agree!"); // Handle common cases. if ( isEmptySet() || CR.isFullSet()) return *this; if (CR.isEmptySet() || isFullSet()) return CR; if (!isWrappedSet() && CR.isWrappedSet()) - return CR.maximalIntersectWith(*this); + return CR.intersectWith(*this); if (!isWrappedSet() && !CR.isWrappedSet()) { if (Lower.ult(CR.Lower)) { @@ -343,69 +371,74 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this); - APInt L = Lower, U = Upper; - if (!isWrappedSet() && !CR.isWrappedSet()) { + if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) { + // If the two ranges are disjoint, find the smaller gap and bridge it. + APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper; + if (d1.ult(d2)) + return ConstantRange(Lower, CR.Upper); + else + return ConstantRange(CR.Lower, Upper); + } + + APInt L = Lower, U = Upper; if (CR.Lower.ult(L)) L = CR.Lower; - - if (CR.Upper.ugt(U)) + if ((CR.Upper - 1).ugt(U - 1)) U = CR.Upper; + + if (L == 0 && U == 0) + return ConstantRange(getBitWidth()); + + return ConstantRange(L, U); } - if (isWrappedSet() && !CR.isWrappedSet()) { - if ((CR.Lower.ult(Upper) && CR.Upper.ult(Upper)) || - (CR.Lower.ugt(Lower) && CR.Upper.ugt(Lower))) { + if (!CR.isWrappedSet()) { + // ------U L----- and ------U L----- : this + // L--U L--U : CR + if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower)) return *this; - } - if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) { + // ------U L----- : this + // L---------U : CR + if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) return ConstantRange(getBitWidth()); - } - - if (CR.Lower.ule(Upper) && CR.Upper.ule(Lower)) { - APInt d1 = CR.Upper - Upper, d2 = Lower - CR.Upper; - if (d1.ult(d2)) { - U = CR.Upper; - } else { - L = CR.Upper; - } - } - if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower)) { + // ----U L---- : this + // L---U : CR + // <d1> <d2> + if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) { APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper; - if (d1.ult(d2)) { - U = CR.Lower + 1; - } else { - L = CR.Upper - 1; - } + if (d1.ult(d2)) + return ConstantRange(Lower, CR.Upper); + else + return ConstantRange(CR.Lower, Upper); } - if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) { - APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Lower; + // ----U L----- : this + // L----U : CR + if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) + return ConstantRange(CR.Lower, Upper); - if (d1.ult(d2)) { - U = CR.Lower + 1; - } else { - L = CR.Lower; - } - } + // ------U L---- : this + // L-----U : CR + if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower)) + return ConstantRange(Lower, CR.Upper); } - if (isWrappedSet() && CR.isWrappedSet()) { - if (Lower.ult(CR.Upper) || CR.Lower.ult(Upper)) - return ConstantRange(getBitWidth()); + assert(isWrappedSet() && CR.isWrappedSet() && + "ConstantRange::unionWith missed wrapped union unwrapped case"); - if (CR.Upper.ugt(U)) { - U = CR.Upper; - } - - if (CR.Lower.ult(L)) { - L = CR.Lower; - } + // ------U L---- and ------U L---- : this + // -U L----------- and ------------U L : CR + if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper)) + return ConstantRange(getBitWidth()); - if (L == U) return ConstantRange(getBitWidth()); - } + APInt L = Lower, U = Upper; + if (CR.Upper.ugt(U)) + U = CR.Upper; + if (CR.Lower.ult(L)) + L = CR.Lower; return ConstantRange(L, U); } @@ -435,7 +468,7 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { assert(SrcTySize < DstTySize && "Not a value extension"); if (isFullSet()) { return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1), - APInt::getLowBitsSet(DstTySize, SrcTySize-1)); + APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1); } APInt L = Lower; L.sext(DstTySize); @@ -459,6 +492,99 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { return ConstantRange(L, U); } +ConstantRange +ConstantRange::add(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (isFullSet() || Other.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize(); + APInt NewLower = getLower() + Other.getLower(); + APInt NewUpper = getUpper() + Other.getUpper() - 1; + if (NewLower == NewUpper) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + ConstantRange X = ConstantRange(NewLower, NewUpper); + if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y)) + // We've wrapped, therefore, full set. + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return X; +} + +ConstantRange +ConstantRange::multiply(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (isFullSet() || Other.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt this_min = getUnsignedMin().zext(getBitWidth() * 2); + APInt this_max = getUnsignedMax().zext(getBitWidth() * 2); + APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2); + APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2); + + ConstantRange Result_zext = ConstantRange(this_min * Other_min, + this_max * Other_max + 1); + return Result_zext.truncate(getBitWidth()); +} + +ConstantRange +ConstantRange::smax(const ConstantRange &Other) const { + // X smax Y is: range(smax(X_smin, Y_smin), + // smax(X_smax, Y_smax)) + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin()); + APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1; + if (NewU == NewL) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + return ConstantRange(NewL, NewU); +} + +ConstantRange +ConstantRange::umax(const ConstantRange &Other) const { + // X umax Y is: range(umax(X_umin, Y_umin), + // umax(X_umax, Y_umax)) + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); + APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1; + if (NewU == NewL) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + return ConstantRange(NewL, NewU); +} + +ConstantRange +ConstantRange::udiv(const ConstantRange &RHS) const { + if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (RHS.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax()); + + APInt RHS_umin = RHS.getUnsignedMin(); + if (RHS_umin == 0) { + // We want the lowest value in RHS excluding zero. Usually that would be 1 + // except for a range in the form of [X, 1) in which case it would be X. + if (RHS.getUpper() == 1) + RHS_umin = RHS.getLower(); + else + RHS_umin = APInt(getBitWidth(), 1); + } + + APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1; + + // If the LHS is Full and the RHS is a wrapped interval containing 1 then + // this could occur. + if (Lower == Upper) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return ConstantRange(Lower, Upper); +} + /// print - Print out the bounds to a stream... /// void ConstantRange::print(raw_ostream &OS) const { @@ -470,3 +596,5 @@ void ConstantRange::print(raw_ostream &OS) const { void ConstantRange::dump() const { print(errs()); } + + diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index a09cddf..71ff411 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -27,51 +27,37 @@ #include "llvm/Support/Debug.h" using namespace llvm; +// All Debug.h functionality is a no-op in NDEBUG mode. +#ifndef NDEBUG bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option -namespace { -#ifndef NDEBUG - // -debug - Command line option to enable the DEBUG statements in the passes. - // This flag may only be enabled in debug builds. - static cl::opt<bool, true> - Debug("debug", cl::desc("Enable debug output"), cl::Hidden, - cl::location(DebugFlag)); +// -debug - Command line option to enable the DEBUG statements in the passes. +// This flag may only be enabled in debug builds. +static cl::opt<bool, true> +Debug("debug", cl::desc("Enable debug output"), cl::Hidden, + cl::location(DebugFlag)); - static std::string CurrentDebugType; - static struct DebugOnlyOpt { - void operator=(const std::string &Val) const { - DebugFlag |= !Val.empty(); - CurrentDebugType = Val; - } - } DebugOnlyOptLoc; +static std::string CurrentDebugType; +static struct DebugOnlyOpt { + void operator=(const std::string &Val) const { + DebugFlag |= !Val.empty(); + CurrentDebugType = Val; + } +} DebugOnlyOptLoc; - static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> > - DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), - cl::Hidden, cl::value_desc("debug string"), - cl::location(DebugOnlyOptLoc), cl::ValueRequired); -#endif -} +static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> > +DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), + cl::Hidden, cl::value_desc("debug string"), + cl::location(DebugOnlyOptLoc), cl::ValueRequired); // isCurrentDebugType - Return true if the specified string is the debug type // specified on the command line, or if none was specified on the command line // with the -debug-only=X option. // bool llvm::isCurrentDebugType(const char *DebugType) { -#ifndef NDEBUG return CurrentDebugType.empty() || DebugType == CurrentDebugType; +} #else - return false; +// Avoid "has no symbols" warning. +int Debug_dummy = 0; #endif -} - -// getErrorOutputStream - Returns the error output stream (std::cerr). This -// places the std::c* I/O streams into one .cpp file and relieves the whole -// program from having to have hundreds of static c'tor/d'tors for them. -// -OStream &llvm::getErrorOutputStream(const char *DebugType) { - static OStream cnoout(0); - if (DebugFlag && isCurrentDebugType(DebugType)) - return cerr; - else - return cnoout; -} diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index fa99035..8b688ca 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include <cassert> @@ -83,7 +84,7 @@ const char *TagString(unsigned Tag) { case DW_TAG_lo_user: return "DW_TAG_lo_user"; case DW_TAG_hi_user: return "DW_TAG_hi_user"; } - assert(0 && "Unknown Dwarf Tag"); + llvm_unreachable("Unknown Dwarf Tag"); return ""; } @@ -94,7 +95,7 @@ const char *ChildrenString(unsigned Children) { case DW_CHILDREN_no: return "CHILDREN_no"; case DW_CHILDREN_yes: return "CHILDREN_yes"; } - assert(0 && "Unknown Dwarf ChildrenFlag"); + llvm_unreachable("Unknown Dwarf ChildrenFlag"); return ""; } @@ -205,7 +206,7 @@ const char *AttributeString(unsigned Attribute) { case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; } - assert(0 && "Unknown Dwarf Attribute"); + llvm_unreachable("Unknown Dwarf Attribute"); return ""; } @@ -235,7 +236,7 @@ const char *FormEncodingString(unsigned Encoding) { case DW_FORM_ref_udata: return "FORM_ref_udata"; case DW_FORM_indirect: return "FORM_indirect"; } - assert(0 && "Unknown Dwarf Form Encoding"); + llvm_unreachable("Unknown Dwarf Form Encoding"); return ""; } @@ -310,7 +311,7 @@ const char *OperationEncodingString(unsigned Encoding) { case DW_OP_lo_user: return "OP_lo_user"; case DW_OP_hi_user: return "OP_hi_user"; } - assert(0 && "Unknown Dwarf Operation Encoding"); + llvm_unreachable("Unknown Dwarf Operation Encoding"); return ""; } @@ -336,7 +337,7 @@ const char *AttributeEncodingString(unsigned Encoding) { case DW_ATE_lo_user: return "ATE_lo_user"; case DW_ATE_hi_user: return "ATE_hi_user"; } - assert(0 && "Unknown Dwarf Attribute Encoding"); + llvm_unreachable("Unknown Dwarf Attribute Encoding"); return ""; } @@ -350,7 +351,7 @@ const char *DecimalSignString(unsigned Sign) { case DW_DS_leading_separate: return "DS_leading_separate"; case DW_DS_trailing_separate: return "DS_trailing_separate"; } - assert(0 && "Unknown Dwarf Decimal Sign Attribute"); + llvm_unreachable("Unknown Dwarf Decimal Sign Attribute"); return ""; } @@ -364,7 +365,7 @@ const char *EndianityString(unsigned Endian) { case DW_END_lo_user: return "END_lo_user"; case DW_END_hi_user: return "END_hi_user"; } - assert(0 && "Unknown Dwarf Endianity"); + llvm_unreachable("Unknown Dwarf Endianity"); return ""; } @@ -377,7 +378,7 @@ const char *AccessibilityString(unsigned Access) { case DW_ACCESS_protected: return "ACCESS_protected"; case DW_ACCESS_private: return "ACCESS_private"; } - assert(0 && "Unknown Dwarf Accessibility"); + llvm_unreachable("Unknown Dwarf Accessibility"); return ""; } @@ -389,7 +390,7 @@ const char *VisibilityString(unsigned Visibility) { case DW_VIS_exported: return "VIS_exported"; case DW_VIS_qualified: return "VIS_qualified"; } - assert(0 && "Unknown Dwarf Visibility"); + llvm_unreachable("Unknown Dwarf Visibility"); return ""; } @@ -401,7 +402,7 @@ const char *VirtualityString(unsigned Virtuality) { case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual"; case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual"; } - assert(0 && "Unknown Dwarf Virtuality"); + llvm_unreachable("Unknown Dwarf Virtuality"); return ""; } @@ -431,7 +432,7 @@ const char *LanguageString(unsigned Language) { case DW_LANG_lo_user: return "LANG_lo_user"; case DW_LANG_hi_user: return "LANG_hi_user"; } - assert(0 && "Unknown Dwarf Language"); + llvm_unreachable("Unknown Dwarf Language"); return ""; } @@ -444,7 +445,7 @@ const char *CaseString(unsigned Case) { case DW_ID_down_case: return "ID_down_case"; case DW_ID_case_insensitive: return "ID_case_insensitive"; } - assert(0 && "Unknown Dwarf Identifier Case"); + llvm_unreachable("Unknown Dwarf Identifier Case"); return ""; } @@ -458,7 +459,7 @@ const char *ConventionString(unsigned Convention) { case DW_CC_lo_user: return "CC_lo_user"; case DW_CC_hi_user: return "CC_hi_user"; } - assert(0 && "Unknown Dwarf Calling Convention"); + llvm_unreachable("Unknown Dwarf Calling Convention"); return ""; } @@ -471,7 +472,7 @@ const char *InlineCodeString(unsigned Code) { case DW_INL_declared_not_inlined: return "INL_declared_not_inlined"; case DW_INL_declared_inlined: return "INL_declared_inlined"; } - assert(0 && "Unknown Dwarf Inline Code"); + llvm_unreachable("Unknown Dwarf Inline Code"); return ""; } @@ -482,7 +483,7 @@ const char *ArrayOrderString(unsigned Order) { case DW_ORD_row_major: return "ORD_row_major"; case DW_ORD_col_major: return "ORD_col_major"; } - assert(0 && "Unknown Dwarf Array Order"); + llvm_unreachable("Unknown Dwarf Array Order"); return ""; } @@ -493,7 +494,7 @@ const char *DiscriminantString(unsigned Discriminant) { case DW_DSC_label: return "DSC_label"; case DW_DSC_range: return "DSC_range"; } - assert(0 && "Unknown Dwarf Discriminant Descriptor"); + llvm_unreachable("Unknown Dwarf Discriminant Descriptor"); return ""; } @@ -514,7 +515,7 @@ const char *LNStandardString(unsigned Standard) { case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin"; case DW_LNS_set_isa: return "LNS_set_isa"; } - assert(0 && "Unknown Dwarf Line Number Standard"); + llvm_unreachable("Unknown Dwarf Line Number Standard"); return ""; } @@ -529,7 +530,7 @@ const char *LNExtendedString(unsigned Encoding) { case DW_LNE_lo_user: return "LNE_lo_user"; case DW_LNE_hi_user: return "LNE_hi_user"; } - assert(0 && "Unknown Dwarf Line Number Extended Opcode Encoding"); + llvm_unreachable("Unknown Dwarf Line Number Extended Opcode Encoding"); return ""; } @@ -544,7 +545,7 @@ const char *MacinfoString(unsigned Encoding) { case DW_MACINFO_end_file: return "MACINFO_end_file"; case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext"; } - assert(0 && "Unknown Dwarf Macinfo Type Encodings"); + llvm_unreachable("Unknown Dwarf Macinfo Type Encodings"); return ""; } @@ -580,7 +581,7 @@ const char *CallFrameString(unsigned Encoding) { case DW_CFA_lo_user: return "CFA_lo_user"; case DW_CFA_hi_user: return "CFA_hi_user"; } - assert(0 && "Unknown Dwarf Call Frame Instruction Encodings"); + llvm_unreachable("Unknown Dwarf Call Frame Instruction Encodings"); return ""; } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp new file mode 100644 index 0000000..dff4f03 --- /dev/null +++ b/lib/Support/ErrorHandling.cpp @@ -0,0 +1,73 @@ +//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an API for error handling, it supersedes cerr+abort(), and +// cerr+exit() style error handling. +// Callbacks can be registered for these errors through this API. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Threading.h" +#include <cassert> +#include <cstdlib> + +using namespace llvm; +using namespace std; + +static llvm_error_handler_t ErrorHandler = 0; +static void *ErrorHandlerUserData = 0; + +namespace llvm { +void llvm_install_error_handler(llvm_error_handler_t handler, + void *user_data) { + assert(!llvm_is_multithreaded() && + "Cannot register error handlers after starting multithreaded mode!\n"); + assert(!ErrorHandler && "Error handler already registered!\n"); + ErrorHandler = handler; + ErrorHandlerUserData = user_data; +} + +void llvm_remove_error_handler() { + ErrorHandler = 0; +} + +void llvm_report_error(const char *reason) { + llvm_report_error(Twine(reason)); +} + +void llvm_report_error(const std::string &reason) { + llvm_report_error(Twine(reason)); +} + +void llvm_report_error(const Twine &reason) { + if (!ErrorHandler) { + errs() << "LLVM ERROR: " << reason << "\n"; + } else { + ErrorHandler(ErrorHandlerUserData, reason.str()); + } + exit(1); +} + +void llvm_unreachable_internal(const char *msg, const char *file, + unsigned line) { + // This code intentionally doesn't call the ErrorHandler callback, because + // llvm_unreachable is intended to be used to indicate "impossible" + // situations, and not legitimate runtime errors. + if (msg) + errs() << msg << "\n"; + errs() << "UNREACHABLE executed"; + if (file) + errs() << " at " << file << ":" << line; + errs() << "!\n"; + abort(); +} +} + diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 41c730e..954dc77 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cstring> @@ -50,7 +51,7 @@ void FoldingSetNodeID::AddInteger(unsigned long I) { else if (sizeof(long) == sizeof(long long)) { AddInteger((unsigned long long)I); } else { - assert(0 && "unexpected sizeof(long)"); + llvm_unreachable("unexpected sizeof(long)"); } } void FoldingSetNodeID::AddInteger(long long I) { @@ -62,14 +63,14 @@ void FoldingSetNodeID::AddInteger(unsigned long long I) { Bits.push_back(unsigned(I >> 32)); } -void FoldingSetNodeID::AddString(const char *String, const char *End) { - unsigned Size = static_cast<unsigned>(End - String); +void FoldingSetNodeID::AddString(StringRef String) { + unsigned Size = String.size(); Bits.push_back(Size); if (!Size) return; unsigned Units = Size / 4; unsigned Pos = 0; - const unsigned *Base = (const unsigned *)String; + const unsigned *Base = (const unsigned*) String.data(); // If the string is aligned do a bulk transfer. if (!((intptr_t)Base & 3)) { @@ -99,14 +100,6 @@ void FoldingSetNodeID::AddString(const char *String, const char *End) { Bits.push_back(V); } -void FoldingSetNodeID::AddString(const char *String) { - AddString(String, String + strlen(String)); -} - -void FoldingSetNodeID::AddString(const std::string &String) { - AddString(&*String.begin(), &*String.end()); -} - /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetImpl. unsigned FoldingSetNodeID::ComputeHash() const { diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp new file mode 100644 index 0000000..70f2cfa --- /dev/null +++ b/lib/Support/FormattedStream.cpp @@ -0,0 +1,93 @@ +//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of formatted_raw_ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +/// CountColumns - Examine the given char sequence and figure out which +/// column we end up in after output. +/// +static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) { + // Keep track of the current column by scanning the string for + // special characters + + for (const char *End = Ptr + Size; Ptr != End; ++Ptr) { + ++Column; + if (*Ptr == '\n' || *Ptr == '\r') + Column = 0; + else if (*Ptr == '\t') + // Assumes tab stop = 8 characters. + Column += (8 - (Column & 0x7)) & 0x7; + } + + return Column; +} + +/// ComputeColumn - Examine the current output and figure out which +/// column we end up in after output. +void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { + // If our previous scan pointer is inside the buffer, assume we already + // scanned those bytes. This depends on raw_ostream to not change our buffer + // in unexpected ways. + if (Ptr <= Scanned && Scanned <= Ptr + Size) { + // Scan all characters added since our last scan to determine the new + // column. + ColumnScanned = CountColumns(ColumnScanned, Scanned, + Size - (Scanned - Ptr)); + } else + ColumnScanned = CountColumns(ColumnScanned, Ptr, Size); + + // Update the scanning pointer. + Scanned = Ptr + Size; +} + +/// PadToColumn - Align the output to some column number. +/// +/// \param NewCol - The column to move to. +/// \param MinPad - The minimum space to give after the most recent +/// I/O, even if the current column + minpad > newcol. +/// +void formatted_raw_ostream::PadToColumn(unsigned NewCol) { + // Figure out what's in the buffer and add it to the column count. + ComputeColumn(getBufferStart(), GetNumBytesInBuffer()); + + // Output spaces until we reach the desired column. + indent(std::max(int(NewCol - ColumnScanned), 1)); +} + +void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { + // Figure out what's in the buffer and add it to the column count. + ComputeColumn(Ptr, Size); + + // Write the data to the underlying stream (which is unbuffered, so + // the data will be immediately written out). + TheStream->write(Ptr, Size); + + // Reset the scanning pointer. + Scanned = 0; +} + +/// fouts() - This returns a reference to a formatted_raw_ostream for +/// standard output. Use it like: fouts() << "foo" << "bar"; +formatted_raw_ostream &llvm::fouts() { + static formatted_raw_ostream S(outs()); + return S; +} + +/// ferrs() - This returns a reference to a formatted_raw_ostream for +/// standard error. Use it like: ferrs() << "foo" << "bar"; +formatted_raw_ostream &llvm::ferrs() { + static formatted_raw_ostream S(errs()); + return S; +} diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index c359dfb..c8bca6e 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -12,13 +12,47 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/Streams.h" #include "llvm/System/Path.h" #include "llvm/System/Program.h" #include "llvm/Config/config.h" using namespace llvm; -void llvm::DisplayGraph(const sys::Path &Filename) { +std::string llvm::DOT::EscapeString(const std::string &Label) { + std::string Str(Label); + for (unsigned i = 0; i != Str.length(); ++i) + switch (Str[i]) { + case '\n': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; + Str[i] = 'n'; + break; + case '\t': + Str.insert(Str.begin()+i, ' '); // Convert to two spaces + ++i; + Str[i] = ' '; + break; + case '\\': + if (i+1 != Str.length()) + switch (Str[i+1]) { + case 'l': continue; // don't disturb \l + case '|': case '{': case '}': + Str.erase(Str.begin()+i); continue; + default: break; + } + case '{': case '}': + case '<': case '>': + case '|': case '"': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; // don't infinite loop + break; + } + return Str; +} + + + +void llvm::DisplayGraph(const sys::Path &Filename, bool wait, + GraphProgram::Name program) { std::string ErrMsg; #if HAVE_GRAPHVIZ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ); @@ -28,18 +62,61 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(Filename.c_str()); args.push_back(0); - cerr << "Running 'Graphviz' program... " << std::flush; - if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; - } -#elif (HAVE_GV && HAVE_DOT) + errs() << "Running 'Graphviz' program... "; + if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) + errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg + << "\n"; + else + Filename.eraseFromDisk(); + +#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \ + HAVE_TWOPI || HAVE_CIRCO)) sys::Path PSFilename = Filename; PSFilename.appendSuffix("ps"); - - sys::Path dot(LLVM_PATH_DOT); + + sys::Path prog; + + // Set default grapher +#if HAVE_CIRCO + prog = sys::Path(LLVM_PATH_CIRCO); +#endif +#if HAVE_TWOPI + prog = sys::Path(LLVM_PATH_TWOPI); +#endif +#if HAVE_NEATO + prog = sys::Path(LLVM_PATH_NEATO); +#endif +#if HAVE_FDP + prog = sys::Path(LLVM_PATH_FDP); +#endif +#if HAVE_DOT + prog = sys::Path(LLVM_PATH_DOT); +#endif + + // Find which program the user wants +#if HAVE_DOT + if (program == GraphProgram::DOT) + prog = sys::Path(LLVM_PATH_DOT); +#endif +#if (HAVE_FDP) + if (program == GraphProgram::FDP) + prog = sys::Path(LLVM_PATH_FDP); +#endif +#if (HAVE_NEATO) + if (program == GraphProgram::NEATO) + prog = sys::Path(LLVM_PATH_NEATO); +#endif +#if (HAVE_TWOPI) + if (program == GraphProgram::TWOPI) + prog = sys::Path(LLVM_PATH_TWOPI); +#endif +#if (HAVE_CIRCO) + if (program == GraphProgram::CIRCO) + prog = sys::Path(LLVM_PATH_CIRCO); +#endif std::vector<const char*> args; - args.push_back(dot.c_str()); + args.push_back(prog.c_str()); args.push_back("-Tps"); args.push_back("-Nfontname=Courier"); args.push_back("-Gsize=7.5,10"); @@ -48,11 +125,13 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(PSFilename.c_str()); args.push_back(0); - cerr << "Running 'dot' program... " << std::flush; - if (sys::Program::ExecuteAndWait(dot, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: '" << ErrMsg << "\n"; + errs() << "Running '" << prog.str() << "' program... "; + + if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) { + errs() << "Error viewing graph " << Filename.str() << ": '" + << ErrMsg << "\n"; } else { - cerr << " done. \n"; + errs() << " done. \n"; sys::Path gv(LLVM_PATH_GV); args.clear(); @@ -62,11 +141,18 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(0); ErrMsg.clear(); - if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; + if (wait) { + if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) + errs() << "Error viewing graph: " << ErrMsg << "\n"; + Filename.eraseFromDisk(); + PSFilename.eraseFromDisk(); + } + else { + sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg); + errs() << "Remember to erase graph files: " << Filename.str() << " " + << PSFilename.str() << "\n"; } } - PSFilename.eraseFromDisk(); #elif HAVE_DOTTY sys::Path dotty(LLVM_PATH_DOTTY); @@ -75,15 +161,15 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(Filename.c_str()); args.push_back(0); - cerr << "Running 'dotty' program... " << std::flush; + errs() << "Running 'dotty' program... "; if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; + errs() << "Error viewing graph " << Filename.str() << ": " + << ErrMsg << "\n"; } else { #ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns return; #endif + Filename.eraseFromDisk(); } #endif - - Filename.eraseFromDisk(); } diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp new file mode 100644 index 0000000..91e3ecd --- /dev/null +++ b/lib/Support/MemoryObject.cpp @@ -0,0 +1,34 @@ +//===- MemoryObject.cpp - Abstract memory interface -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MemoryObject.h" +using namespace llvm; + +MemoryObject::~MemoryObject() { +} + +int MemoryObject::readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) const { + uint64_t current = address; + uint64_t limit = getBase() + getExtent(); + + while (current - address < size && current < limit) { + if (readByte(current, &buf[(current - address)])) + return -1; + + current++; + } + + if (copied) + *copied = current - address; + + return 0; +} diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp index ef32af4..36caecf 100644 --- a/lib/Support/PluginLoader.cpp +++ b/lib/Support/PluginLoader.cpp @@ -14,10 +14,9 @@ #define DONT_GET_PLUGIN_LOADER_OPTION #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PluginLoader.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/System/Mutex.h" -#include <ostream> #include <vector> using namespace llvm; @@ -25,23 +24,23 @@ static ManagedStatic<std::vector<std::string> > Plugins; static ManagedStatic<sys::SmartMutex<true> > PluginsLock; void PluginLoader::operator=(const std::string &Filename) { - sys::SmartScopedLock<true> Lock(&*PluginsLock); + sys::SmartScopedLock<true> Lock(*PluginsLock); std::string Error; if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) { - cerr << "Error opening '" << Filename << "': " << Error - << "\n -load request ignored.\n"; + errs() << "Error opening '" << Filename << "': " << Error + << "\n -load request ignored.\n"; } else { Plugins->push_back(Filename); } } unsigned PluginLoader::getNumPlugins() { - sys::SmartScopedLock<true> Lock(&*PluginsLock); + sys::SmartScopedLock<true> Lock(*PluginsLock); return Plugins.isConstructed() ? Plugins->size() : 0; } std::string &PluginLoader::getPlugin(unsigned num) { - sys::SmartScopedLock<true> Lock(&*PluginsLock); + sys::SmartScopedLock<true> Lock(*PluginsLock); assert(Plugins.isConstructed() && num < Plugins->size() && "Asking for an out of bounds plugin"); return (*Plugins)[num]; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 14290a1..68b41a7 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -19,6 +19,10 @@ #include "llvm/ADT/SmallString.h" using namespace llvm; +namespace llvm { + bool DisablePrettyStackTrace = false; +} + // FIXME: This should be thread local when llvm supports threads. static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead; @@ -67,15 +71,16 @@ static void CrashHandler(void *Cookie) { } if (!TmpStr.empty()) { - __crashreporter_info__ = strdup(TmpStr.c_str()); - errs() << __crashreporter_info__; + __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str()); + errs() << TmpStr.str(); } #endif } static bool RegisterCrashPrinter() { - sys::AddSignalHandler(CrashHandler, 0); + if (!DisablePrettyStackTrace) + sys::AddSignalHandler(CrashHandler, 0); return false; } diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp new file mode 100644 index 0000000..618ca05 --- /dev/null +++ b/lib/Support/Regex.cpp @@ -0,0 +1,92 @@ +//===-- Regex.cpp - Regular Expression matcher implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Regex.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "regex_impl.h" +#include <string> +using namespace llvm; + +Regex::Regex(const StringRef ®ex, unsigned Flags) { + unsigned flags = 0; + preg = new llvm_regex(); + preg->re_endp = regex.end(); + if (Flags & IgnoreCase) + flags |= REG_ICASE; + if (Flags & Newline) + flags |= REG_NEWLINE; + error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND); +} + +Regex::~Regex() { + llvm_regfree(preg); + delete preg; +} + +bool Regex::isValid(std::string &Error) { + if (!error) + return true; + + size_t len = llvm_regerror(error, preg, NULL, 0); + + Error.resize(len); + llvm_regerror(error, preg, &Error[0], len); + return false; +} + +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { + return preg->re_nsub; +} + +bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){ + unsigned nmatch = Matches ? preg->re_nsub+1 : 0; + + // pmatch needs to have at least one element. + SmallVector<llvm_regmatch_t, 8> pm; + pm.resize(nmatch > 0 ? nmatch : 1); + pm[0].rm_so = 0; + pm[0].rm_eo = String.size(); + + int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + + if (rc == REG_NOMATCH) + return false; + if (rc != 0) { + // regexec can fail due to invalid pattern or running out of memory. + error = rc; + return false; + } + + // There was a match. + + if (Matches) { // match position requested + Matches->clear(); + + for (unsigned i = 0; i != nmatch; ++i) { + if (pm[i].rm_so == -1) { + // this group didn't match + Matches->push_back(StringRef()); + continue; + } + assert(pm[i].rm_eo > pm[i].rm_so); + Matches->push_back(StringRef(String.data()+pm[i].rm_so, + pm[i].rm_eo-pm[i].rm_so)); + } + } + + return true; +} diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp index d5ffff9..b4e9430 100644 --- a/lib/Support/SlowOperationInformer.cpp +++ b/lib/Support/SlowOperationInformer.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SlowOperationInformer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Alarm.h" #include <sstream> #include <cassert> @@ -28,8 +28,8 @@ SlowOperationInformer::~SlowOperationInformer() { if (LastPrintAmount) { // If we have printed something, make _sure_ we print the 100% amount, and // also print a newline. - cout << std::string(LastPrintAmount, '\b') << "Progress " - << OperationName << ": 100% \n"; + outs() << std::string(LastPrintAmount, '\b') << "Progress " + << OperationName << ": 100% \n"; } } @@ -40,7 +40,7 @@ SlowOperationInformer::~SlowOperationInformer() { bool SlowOperationInformer::progress(unsigned Amount) { int status = sys::AlarmStatus(); if (status == -1) { - cout << "\n"; + outs() << "\n"; LastPrintAmount = 0; return true; } @@ -61,6 +61,7 @@ bool SlowOperationInformer::progress(unsigned Amount) { OS << "% "; LastPrintAmount = OS.str().size(); - cout << ToPrint+OS.str() << std::flush; + outs() << ToPrint+OS.str(); + outs().flush(); return false; } diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 6b0d55c1..4b93f7f 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -18,7 +18,24 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +namespace { + struct LineNoCacheTy { + int LastQueryBufferID; + const char *LastQuery; + unsigned LineNoOfQuery; + }; +} + +static LineNoCacheTy *getCache(void *Ptr) { + return (LineNoCacheTy*)Ptr; +} + + SourceMgr::~SourceMgr() { + // Delete the line # cache if allocated. + if (LineNoCacheTy *Cache = getCache(LineNoCache)) + delete Cache; + while (!Buffers.empty()) { delete Buffers.back().Buffer; Buffers.pop_back(); @@ -71,8 +88,31 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { const char *Ptr = Buff->getBufferStart(); + // If we have a line number cache, and if the query is to a later point in the + // same file, start searching from the last query location. This optimizes + // for the case when multiple diagnostics come out of one file in order. + if (LineNoCacheTy *Cache = getCache(LineNoCache)) + if (Cache->LastQueryBufferID == BufferID && + Cache->LastQuery <= Loc.getPointer()) { + Ptr = Cache->LastQuery; + LineNo = Cache->LineNoOfQuery; + } + + // Scan for the location being queried, keeping track of the number of lines + // we see. for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) if (*Ptr == '\n') ++LineNo; + + + // Allocate the line number cache if it doesn't exist. + if (LineNoCache == 0) + LineNoCache = new LineNoCacheTy(); + + // Update the line # cache. + LineNoCacheTy &Cache = *getCache(LineNoCache); + Cache.LastQueryBufferID = BufferID; + Cache.LastQuery = Ptr; + Cache.LineNoOfQuery = LineNo; return LineNo; } diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 33570b0..14f94bc 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -24,16 +24,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> -#include <ostream> #include <cstring> using namespace llvm; // GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } +namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } /// -stats - Command line option to cause transformations to emit stats about /// what they did. @@ -58,14 +57,14 @@ public: } static ManagedStatic<StatisticInfo> StatInfo; -static ManagedStatic<sys::Mutex> StatLock; +static ManagedStatic<sys::SmartMutex<true> > StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. void Statistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. - sys::ScopedLock Writer(&*StatLock); + sys::SmartScopedLock<true> Writer(*StatLock); if (!Initialized) { if (Enabled) StatInfo->addStatistic(this); @@ -96,7 +95,7 @@ StatisticInfo::~StatisticInfo() { if (Stats.empty()) return; // Get the stream to write to. - std::ostream &OutStream = *GetLibSupportInfoOutputFile(); + raw_ostream &OutStream = *GetLibSupportInfoOutputFile(); // Figure out how long the biggest Value and Name fields are. unsigned MaxNameLen = 0, MaxValLen = 0; @@ -125,8 +124,9 @@ StatisticInfo::~StatisticInfo() { } - OutStream << std::endl; // Flush the output stream... + OutStream << '\n'; // Flush the output stream... + OutStream.flush(); - if (&OutStream != cerr.stream() && &OutStream != cout.stream()) + if (&OutStream != &outs() && &OutStream != &errs()) delete &OutStream; // Close the file. } diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 0c61732..040308b 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -65,14 +65,13 @@ static unsigned HashString(const char *Start, const char *End) { /// specified bucket will be non-null. Otherwise, it will be null. In either /// case, the FullHashValue field of the bucket will be set to the hash value /// of the string. -unsigned StringMapImpl::LookupBucketFor(const char *NameStart, - const char *NameEnd) { +unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { unsigned HTSize = NumBuckets; if (HTSize == 0) { // Hash table unallocated so far? init(16); HTSize = NumBuckets; } - unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned FullHashValue = HashString(Name.begin(), Name.end()); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; @@ -102,12 +101,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart, // being non-null and for the full hash value) not at the items. This // is important for cache locality. - // Do the comparison like this because NameStart isn't necessarily + // Do the comparison like this because Name isn't necessarily // null-terminated! char *ItemStr = (char*)BucketItem+ItemSize; - unsigned ItemStrLen = BucketItem->getKeyLength(); - if (unsigned(NameEnd-NameStart) == ItemStrLen && - memcmp(ItemStr, NameStart, ItemStrLen) == 0) { + if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) { // We found a match! return BucketNo; } @@ -126,10 +123,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart, /// FindKey - Look up the bucket that contains the specified key. If it exists /// in the map, return the bucket number of the key. Otherwise return -1. /// This does not modify the map. -int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { +int StringMapImpl::FindKey(const StringRef &Key) const { unsigned HTSize = NumBuckets; if (HTSize == 0) return -1; // Really empty table? - unsigned FullHashValue = HashString(KeyStart, KeyEnd); + unsigned FullHashValue = HashString(Key.begin(), Key.end()); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; @@ -151,9 +148,7 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { // Do the comparison like this because NameStart isn't necessarily // null-terminated! char *ItemStr = (char*)BucketItem+ItemSize; - unsigned ItemStrLen = BucketItem->getKeyLength(); - if (unsigned(KeyEnd-KeyStart) == ItemStrLen && - memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { + if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) { // We found a match! return BucketNo; } @@ -172,16 +167,15 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { /// delete it. This aborts if the value isn't in the table. void StringMapImpl::RemoveKey(StringMapEntryBase *V) { const char *VStr = (char*)V + ItemSize; - StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength()); + StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength())); V2 = V2; assert(V == V2 && "Didn't find key?"); } /// RemoveKey - Remove the StringMapEntry for the specified key from the /// table, returning it. If the key is not in the table, this returns null. -StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart, - const char *KeyEnd) { - int Bucket = FindKey(KeyStart, KeyEnd); +StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) { + int Bucket = FindKey(Key); if (Bucket == -1) return 0; StringMapEntryBase *Result = TheTable[Bucket].Item; diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp index b9c1fd0..1ee917f 100644 --- a/lib/Support/StringPool.cpp +++ b/lib/Support/StringPool.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/StringPool.h" -#include "llvm/Support/Streams.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; @@ -22,12 +22,12 @@ StringPool::~StringPool() { assert(InternTable.empty() && "PooledStringPtr leaked!"); } -PooledStringPtr StringPool::intern(const char *Begin, const char *End) { - table_t::iterator I = InternTable.find(Begin, End); +PooledStringPtr StringPool::intern(const StringRef &Key) { + table_t::iterator I = InternTable.find(Key); if (I != InternTable.end()) return PooledStringPtr(&*I); - entry_t *S = entry_t::Create(Begin, End); + entry_t *S = entry_t::Create(Key.begin(), Key.end()); S->getValue().Pool = this; InternTable.insert(S); diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp new file mode 100644 index 0000000..deaa19e --- /dev/null +++ b/lib/Support/StringRef.cpp @@ -0,0 +1,188 @@ +//===-- StringRef.cpp - Lightweight String References ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +using namespace llvm; + +// MSVC emits references to this into the translation units which reference it. +#ifndef _MSC_VER +const size_t StringRef::npos; +#endif + +//===----------------------------------------------------------------------===// +// String Searching +//===----------------------------------------------------------------------===// + + +/// find - Search for the first string \arg Str in the string. +/// +/// \return - The index of the first occurence of \arg Str, or npos if not +/// found. +size_t StringRef::find(const StringRef &Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = 0, e = Length - N + 1; i != e; ++i) + if (substr(i, N).equals(Str)) + return i; + return npos; +} + +/// rfind - Search for the last string \arg Str in the string. +/// +/// \return - The index of the last occurence of \arg Str, or npos if not +/// found. +size_t StringRef::rfind(const StringRef &Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = Length - N + 1, e = 0; i != e;) { + --i; + if (substr(i, N).equals(Str)) + return i; + } + return npos; +} + +/// find_first_of - Find the first character from the string 'Chars' in the +/// current string or return npos if not in string. +StringRef::size_type StringRef::find_first_of(StringRef Chars) const { + for (size_type i = 0, e = Length; i != e; ++i) + if (Chars.find(Data[i]) != npos) + return i; + return npos; +} + +/// find_first_not_of - Find the first character in the string that is not +/// in the string 'Chars' or return npos if all are in string. Same as find. +StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const { + for (size_type i = 0, e = Length; i != e; ++i) + if (Chars.find(Data[i]) == npos) + return i; + return npos; +} + + +//===----------------------------------------------------------------------===// +// Helpful Algorithms +//===----------------------------------------------------------------------===// + +/// count - Return the number of non-overlapped occurrences of \arg Str in +/// the string. +size_t StringRef::count(const StringRef &Str) const { + size_t Count = 0; + size_t N = Str.size(); + if (N > Length) + return 0; + for (size_t i = 0, e = Length - N + 1; i != e; ++i) + if (substr(i, N).equals(Str)) + ++Count; + return Count; +} + +/// GetAsUnsignedInteger - Workhorse method that converts a integer character +/// sequence of radix up to 36 to an unsigned long long value. +static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { + // Autosense radix if not specified. + if (Radix == 0) { + if (Str.startswith("0x")) { + Str = Str.substr(2); + Radix = 16; + } else if (Str.startswith("0b")) { + Str = Str.substr(2); + Radix = 2; + } else if (Str.startswith("0")) + Radix = 8; + else + Radix = 10; + } + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Parse all the bytes of the string given this radix. Watch for overflow. + Result = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= '0' && Str[0] <= '9') + CharVal = Str[0]-'0'; + else if (Str[0] >= 'a' && Str[0] <= 'z') + CharVal = Str[0]-'a'+10; + else if (Str[0] >= 'A' && Str[0] <= 'Z') + CharVal = Str[0]-'A'+10; + else + return true; + + // If the parsed value is larger than the integer radix, the string is + // invalid. + if (CharVal >= Radix) + return true; + + // Add in this character. + unsigned long long PrevResult = Result; + Result = Result*Radix+CharVal; + + // Check for overflow. + if (Result < PrevResult) + return true; + + Str = Str.substr(1); + } + + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { + return GetAsUnsignedInteger(*this, Radix, Result); +} + + +bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { + unsigned long long ULLVal; + + // Handle positive strings first. + if (empty() || front() != '-') { + if (GetAsUnsignedInteger(*this, Radix, ULLVal) || + // Check for value so large it overflows a signed value. + (long long)ULLVal < 0) + return true; + Result = ULLVal; + return false; + } + + // Get the positive part of the value. + if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || + // Reject values so large they'd overflow as negative signed, but allow + // "-0". This negates the unsigned so that the negative isn't undefined + // on signed overflow. + (long long)-ULLVal > 0) + return true; + + Result = -ULLVal; + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, int &Result) const { + long long Val; + if (getAsInteger(Radix, Val) || + (int)Val != Val) + return true; + Result = Val; + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { + unsigned long long Val; + if (getAsInteger(Radix, Val) || + (unsigned)Val != Val) + return true; + Result = Val; + return false; +} diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp index c8c3238..299032f 100644 --- a/lib/Support/SystemUtils.cpp +++ b/lib/Support/SystemUtils.cpp @@ -12,22 +12,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Streams.h" #include "llvm/Support/SystemUtils.h" #include "llvm/System/Process.h" #include "llvm/System/Program.h" -#include <ostream> +#include "llvm/Support/raw_ostream.h" using namespace llvm; -bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check, +bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check, bool print_warning) { - if (stream_to_check == cout.stream() && - sys::Process::StandardOutIsDisplayed()) { + if (stream_to_check.is_displayed()) { if (print_warning) { - cerr << "WARNING: You're attempting to print out a bitcode file.\n" - << "This is inadvisable as it may cause display problems. If\n" - << "you REALLY want to taste LLVM bitcode first-hand, you\n" - << "can force output with the `-f' option.\n\n"; + errs() << "WARNING: You're attempting to print out a bitcode file.\n" + << "This is inadvisable as it may cause display problems. If\n" + << "you REALLY want to taste LLVM bitcode first-hand, you\n" + << "can force output with the `-f' option.\n\n"; } return true; } @@ -35,24 +33,17 @@ bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check, } /// FindExecutable - Find a named executable, giving the argv[0] of program -/// being executed. This allows us to find another LLVM tool if it is built -/// into the same directory, but that directory is neither the current -/// directory, nor in the PATH. If the executable cannot be found, return an -/// empty string. Return the input string if given a full path to an executable. -/// +/// being executed. This allows us to find another LLVM tool if it is built in +/// the same directory. If the executable cannot be found, return an +/// empty string. +/// @brief Find a named executable. #undef FindExecutable // needed on windows :( sys::Path llvm::FindExecutable(const std::string &ExeName, - const std::string &ProgramPath) { - // First check if the given name is already a valid path to an executable. - sys::Path Result(ExeName); - Result.makeAbsolute(); - if (Result.canExecute()) - return Result; - - // Otherwise check the directory that the calling program is in. We can do + const char *Argv0, void *MainAddr) { + // Check the directory that the calling program is in. We can do // this if ProgramPath contains at least one / character, indicating that it - // is a relative path to bugpoint itself. - Result = ProgramPath; + // is a relative path to the executable itself. + sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr); Result.eraseComponent(); if (!Result.isEmpty()) { Result.appendComponent(ExeName); @@ -60,5 +51,5 @@ sys::Path llvm::FindExecutable(const std::string &ExeName, return Result; } - return sys::Program::FindProgramByName(ExeName); + return sys::Path(); } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp new file mode 100644 index 0000000..5896447 --- /dev/null +++ b/lib/Support/TargetRegistry.cpp @@ -0,0 +1,92 @@ +//===--- TargetRegistry.cpp - Target registration -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetRegistry.h" +#include "llvm/System/Host.h" +#include <cassert> +using namespace llvm; + +// Clients are responsible for avoid race conditions in registration. +static Target *FirstTarget = 0; + +TargetRegistry::iterator TargetRegistry::begin() { + return iterator(FirstTarget); +} + +const Target *TargetRegistry::lookupTarget(const std::string &TT, + std::string &Error) { + // Provide special warning when no targets are initialized. + if (begin() == end()) { + Error = "Unable to find target for this triple (no targets are registered)"; + return 0; + } + const Target *Best = 0, *EquallyBest = 0; + unsigned BestQuality = 0; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (unsigned Qual = it->TripleMatchQualityFn(TT)) { + if (!Best || Qual > BestQuality) { + Best = &*it; + EquallyBest = 0; + BestQuality = Qual; + } else if (Qual == BestQuality) + EquallyBest = &*it; + } + } + + if (!Best) { + Error = "No available targets are compatible with this triple, " + "see -version for the available targets."; + return 0; + } + + // Otherwise, take the best target, but make sure we don't have two equally + // good best targets. + if (EquallyBest) { + Error = std::string("Cannot choose between targets \"") + + Best->Name + "\" and \"" + EquallyBest->Name + "\""; + return 0; + } + + return Best; +} + +void TargetRegistry::RegisterTarget(Target &T, + const char *Name, + const char *ShortDesc, + Target::TripleMatchQualityFnTy TQualityFn, + bool HasJIT) { + assert(Name && ShortDesc && TQualityFn && + "Missing required target information!"); + + // Check if this target has already been initialized, we allow this as a + // convenience to some clients. + if (T.Name) + return; + + // Add to the list of targets. + T.Next = FirstTarget; + FirstTarget = &T; + + T.Name = Name; + T.ShortDesc = ShortDesc; + T.TripleMatchQualityFn = TQualityFn; + T.HasJIT = HasJIT; +} + +const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { + const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error); + + if (TheTarget && !TheTarget->hasJIT()) { + Error = "No JIT compatible target available for this host"; + return 0; + } + + return TheTarget; +} + diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index ede1dc9..dd58d1f 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -14,16 +14,16 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include "llvm/System/Process.h" #include <algorithm> -#include <fstream> #include <functional> #include <map> using namespace llvm; // GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } +namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy // of constructor/destructor ordering being unspecified by C++. Basically the @@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) { static ManagedStatic<std::vector<Timer*> > ActiveTimers; void Timer::startTimer() { - sys::SmartScopedLock<true> L(&Lock); + sys::SmartScopedLock<true> L(Lock); Started = true; ActiveTimers->push_back(this); TimeRecord TR = getTimeRecord(true); @@ -157,7 +157,7 @@ void Timer::startTimer() { } void Timer::stopTimer() { - sys::SmartScopedLock<true> L(&Lock); + sys::SmartScopedLock<true> L(Lock); TimeRecord TR = getTimeRecord(false); Elapsed += TR.Elapsed; UserTime += TR.UserTime; @@ -229,7 +229,7 @@ static ManagedStatic<Name2Timer> NamedTimers; static ManagedStatic<Name2Pair> NamedGroupedTimers; static Timer &getNamedRegionTimer(const std::string &Name) { - sys::SmartScopedLock<true> L(&*TimerLock); + sys::SmartScopedLock<true> L(*TimerLock); Name2Timer::iterator I = NamedTimers->find(Name); if (I != NamedTimers->end()) return I->second; @@ -239,7 +239,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) { static Timer &getNamedRegionTimer(const std::string &Name, const std::string &GroupName) { - sys::SmartScopedLock<true> L(&*TimerLock); + sys::SmartScopedLock<true> L(*TimerLock); Name2Pair::iterator I = NamedGroupedTimers->find(GroupName); if (I == NamedGroupedTimers->end()) { @@ -269,38 +269,17 @@ NamedRegionTimer::NamedRegionTimer(const std::string &Name, // TimerGroup Implementation //===----------------------------------------------------------------------===// -// printAlignedFP - Simulate the printf "%A.Bf" format, where A is the -// TotalWidth size, and B is the AfterDec size. -// -static void printAlignedFP(double Val, unsigned AfterDec, unsigned TotalWidth, - std::ostream &OS) { - assert(TotalWidth >= AfterDec+1 && "Bad FP Format!"); - OS.width(TotalWidth-AfterDec-1); - char OldFill = OS.fill(); - OS.fill(' '); - OS << (int)Val; // Integer part; - OS << "."; - OS.width(AfterDec); - OS.fill('0'); - unsigned ResultFieldSize = 1; - while (AfterDec--) ResultFieldSize *= 10; - OS << (int)(Val*ResultFieldSize) % ResultFieldSize; - OS.fill(OldFill); -} -static void printVal(double Val, double Total, std::ostream &OS) { +static void printVal(double Val, double Total, raw_ostream &OS) { if (Total < 1e-7) // Avoid dividing by zero... OS << " ----- "; else { - OS << " "; - printAlignedFP(Val, 4, 7, OS); - OS << " ("; - printAlignedFP(Val*100/Total, 1, 5, OS); - OS << "%)"; + OS << " " << format("%7.4f", Val) << " ("; + OS << format("%5.1f", Val*100/Total) << "%)"; } } -void Timer::print(const Timer &Total, std::ostream &OS) { +void Timer::print(const Timer &Total, raw_ostream &OS) { if (&Total < this) { Total.Lock.acquire(); Lock.acquire(); @@ -320,13 +299,11 @@ void Timer::print(const Timer &Total, std::ostream &OS) { OS << " "; if (Total.MemUsed) { - OS.width(9); - OS << MemUsed << " "; + OS << format("%9lld", (long long)MemUsed) << " "; } if (Total.PeakMem) { if (PeakMem) { - OS.width(9); - OS << PeakMem << " "; + OS << format("%9lld", (long long)PeakMem) << " "; } else OS << " "; } @@ -344,28 +321,30 @@ void Timer::print(const Timer &Total, std::ostream &OS) { } // GetLibSupportInfoOutputFile - Return a file stream to print our output on... -std::ostream * +raw_ostream * llvm::GetLibSupportInfoOutputFile() { std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename(); if (LibSupportInfoOutputFilename.empty()) - return cerr.stream(); + return &errs(); if (LibSupportInfoOutputFilename == "-") - return cout.stream(); + return &outs(); - std::ostream *Result = new std::ofstream(LibSupportInfoOutputFilename.c_str(), - std::ios::app); - if (!Result->good()) { - cerr << "Error opening info-output-file '" + + std::string Error; + raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(), + Error, raw_fd_ostream::F_Append); + if (Error.empty()) + return Result; + + errs() << "Error opening info-output-file '" << LibSupportInfoOutputFilename << " for appending!\n"; - delete Result; - return cerr.stream(); - } - return Result; + delete Result; + return &errs(); } void TimerGroup::removeTimer() { - sys::SmartScopedLock<true> L(&*TimerLock); + sys::SmartScopedLock<true> L(*TimerLock); if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report... // Sort the timers in descending order by amount of time taken... std::sort(TimersToPrint.begin(), TimersToPrint.end(), @@ -375,7 +354,7 @@ void TimerGroup::removeTimer() { unsigned Padding = (80-Name.length())/2; if (Padding > 80) Padding = 0; // Don't allow "negative" numbers - std::ostream *OutStream = GetLibSupportInfoOutputFile(); + raw_ostream *OutStream = GetLibSupportInfoOutputFile(); ++NumTimers; { // Scope to contain Total timer... don't allow total timer to drop us to @@ -397,10 +376,8 @@ void TimerGroup::removeTimer() { if (this != DefaultTimerGroup) { *OutStream << " Total Execution Time: "; - printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream); - *OutStream << " seconds ("; - printAlignedFP(Total.getWallTime(), 4, 5, *OutStream); - *OutStream << " wall clock)\n"; + *OutStream << format("%5.4f", Total.getProcessTime()) << " seconds ("; + *OutStream << format("%5.4f", Total.getWallTime()) << " wall clock)\n"; } *OutStream << "\n"; @@ -422,24 +399,25 @@ void TimerGroup::removeTimer() { TimersToPrint[i].print(Total, *OutStream); Total.print(Total, *OutStream); - *OutStream << std::endl; // Flush output + *OutStream << '\n'; + OutStream->flush(); } --NumTimers; TimersToPrint.clear(); - if (OutStream != cerr.stream() && OutStream != cout.stream()) + if (OutStream != &errs() && OutStream != &outs()) delete OutStream; // Close the file... } } void TimerGroup::addTimer() { - sys::SmartScopedLock<true> L(&*TimerLock); + sys::SmartScopedLock<true> L(*TimerLock); ++NumTimers; } void TimerGroup::addTimerToPrint(const Timer &T) { - sys::SmartScopedLock<true> L(&*TimerLock); + sys::SmartScopedLock<true> L(*TimerLock); TimersToPrint.push_back(Timer(true, T)); } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 279bd43..6f805da 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include <cassert> #include <cstring> using namespace llvm; @@ -18,22 +21,60 @@ const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case InvalidArch: return "<invalid>"; case UnknownArch: return "unknown"; - - case x86: return "i386"; - case x86_64: return "x86_64"; - case ppc: return "powerpc"; - case ppc64: return "powerpc64"; + + case alpha: return "alpha"; + case arm: return "arm"; + case bfin: return "bfin"; + case cellspu: return "cellspu"; + case mips: return "mips"; + case mipsel: return "mipsel"; + case msp430: return "msp430"; + case pic16: return "pic16"; + case ppc64: return "powerpc64"; + case ppc: return "powerpc"; + case sparc: return "sparc"; + case systemz: return "s390x"; + case tce: return "tce"; + case thumb: return "thumb"; + case x86: return "i386"; + case x86_64: return "x86_64"; + case xcore: return "xcore"; } return "<invalid>"; } +const char *Triple::getArchTypePrefix(ArchType Kind) { + switch (Kind) { + default: + return 0; + + case alpha: return "alpha"; + + case arm: + case thumb: return "arm"; + + case bfin: return "bfin"; + + case cellspu: return "spu"; + + case ppc64: + case ppc: return "ppc"; + + case sparc: return "sparc"; + + case x86: + case x86_64: return "x86"; + case xcore: return "xcore"; + } +} + const char *Triple::getVendorTypeName(VendorType Kind) { switch (Kind) { case UnknownVendor: return "unknown"; case Apple: return "apple"; - case PC: return "PC"; + case PC: return "pc"; } return "<invalid>"; @@ -44,35 +85,166 @@ const char *Triple::getOSTypeName(OSType Kind) { case UnknownOS: return "unknown"; case AuroraUX: return "auroraux"; + case Cygwin: return "cygwin"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; case Linux: return "linux"; + case MinGW32: return "mingw32"; + case MinGW64: return "mingw64"; + case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; + case Solaris: return "solaris"; + case Win32: return "win32"; } return "<invalid>"; } +Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) { + if (Name == "alpha") + return alpha; + if (Name == "arm") + return arm; + if (Name == "bfin") + return bfin; + if (Name == "cellspu") + return cellspu; + if (Name == "mips") + return mips; + if (Name == "mipsel") + return mipsel; + if (Name == "msp430") + return msp430; + if (Name == "pic16") + return pic16; + if (Name == "ppc64") + return ppc64; + if (Name == "ppc") + return ppc; + if (Name == "sparc") + return sparc; + if (Name == "systemz") + return systemz; + if (Name == "tce") + return tce; + if (Name == "thumb") + return thumb; + if (Name == "x86") + return x86; + if (Name == "x86-64") + return x86_64; + if (Name == "xcore") + return xcore; + + return UnknownArch; +} + +Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) { + // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for + // archs which Darwin doesn't use. + + // The matching this routine does is fairly pointless, since it is neither the + // complete architecture list, nor a reasonable subset. The problem is that + // historically the driver driver accepts this and also ties its -march= + // handling to the architecture name, so we need to be careful before removing + // support for it. + + // This code must be kept in sync with Clang's Darwin specific argument + // translation. + + if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" || + Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" || + Str == "ppc7450" || Str == "ppc970") + return Triple::ppc; + + if (Str == "ppc64") + return Triple::ppc64; + + if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" || + Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" || + Str == "pentIIm5" || Str == "pentium4") + return Triple::x86; + + if (Str == "x86_64") + return Triple::x86_64; + + // This is derived from the driver driver. + if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" || + Str == "armv6" || Str == "armv7") + return Triple::arm; + + return Triple::UnknownArch; +} + // void Triple::Parse() const { assert(!isInitialized() && "Invalid parse call."); - std::string ArchName = getArchName(); + StringRef ArchName = getArchName(); + StringRef VendorName = getVendorName(); + StringRef OSName = getOSName(); + if (ArchName.size() == 4 && ArchName[0] == 'i' && - ArchName[2] == '8' && ArchName[3] == '6') + ArchName[2] == '8' && ArchName[3] == '6' && + ArchName[1] - '3' < 6) // i[3-9]86 Arch = x86; else if (ArchName == "amd64" || ArchName == "x86_64") Arch = x86_64; + else if (ArchName == "bfin") + Arch = bfin; + else if (ArchName == "pic16") + Arch = pic16; else if (ArchName == "powerpc") Arch = ppc; else if (ArchName == "powerpc64") Arch = ppc64; + else if (ArchName == "arm" || + ArchName.startswith("armv") || + ArchName == "xscale") + Arch = arm; + else if (ArchName == "thumb" || + ArchName.startswith("thumbv")) + Arch = thumb; + else if (ArchName.startswith("alpha")) + Arch = alpha; + else if (ArchName == "spu" || ArchName == "cellspu") + Arch = cellspu; + else if (ArchName == "msp430") + Arch = msp430; + else if (ArchName == "mips" || ArchName == "mipsallegrex") + Arch = mips; + else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || + ArchName == "psp") + Arch = mipsel; + else if (ArchName == "sparc") + Arch = sparc; + else if (ArchName == "s390x") + Arch = systemz; + else if (ArchName == "tce") + Arch = tce; + else if (ArchName == "xcore") + Arch = xcore; else Arch = UnknownArch; - std::string VendorName = getVendorName(); + + // Handle some exceptional cases where the OS / environment components are + // stuck into the vendor field. + if (StringRef(getTriple()).count('-') == 1) { + StringRef VendorName = getVendorName(); + + if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc. + Vendor = PC; + OS = MinGW32; + return; + } + + // arm-elf is another example, but we don't currently parse anything about + // the environment. + } + if (VendorName == "apple") Vendor = Apple; else if (VendorName == "pc") @@ -80,78 +252,129 @@ void Triple::Parse() const { else Vendor = UnknownVendor; - std::string OSName = getOSName(); - if (memcmp(&OSName[0], "auroraux", 8) == 0) + if (OSName.startswith("auroraux")) OS = AuroraUX; - else if (memcmp(&OSName[0], "darwin", 6) == 0) + else if (OSName.startswith("cygwin")) + OS = Cygwin; + else if (OSName.startswith("darwin")) OS = Darwin; - else if (memcmp(&OSName[0], "dragonfly", 9) == 0) + else if (OSName.startswith("dragonfly")) OS = DragonFly; - else if (memcmp(&OSName[0], "freebsd", 7) == 0) + else if (OSName.startswith("freebsd")) OS = FreeBSD; - else if (memcmp(&OSName[0], "linux", 5) == 0) + else if (OSName.startswith("linux")) OS = Linux; - else if (memcmp(&OSName[0], "openbsd", 7) == 0) + else if (OSName.startswith("mingw32")) + OS = MinGW32; + else if (OSName.startswith("mingw64")) + OS = MinGW64; + else if (OSName.startswith("netbsd")) + OS = NetBSD; + else if (OSName.startswith("openbsd")) OS = OpenBSD; + else if (OSName.startswith("solaris")) + OS = Solaris; + else if (OSName.startswith("win32")) + OS = Win32; else OS = UnknownOS; assert(isInitialized() && "Failed to initialize!"); } -static std::string extract(const std::string &A, - std::string::size_type begin, - std::string::size_type end) { - if (begin == std::string::npos) - return ""; - if (end == std::string::npos) - return A.substr(begin); - return A.substr(begin, end - begin); +StringRef Triple::getArchName() const { + return StringRef(Data).split('-').first; // Isolate first component } -static std::string extract1(const std::string &A, - std::string::size_type begin, - std::string::size_type end) { - if (begin == std::string::npos || begin == end) - return ""; - return extract(A, begin + 1, end); +StringRef Triple::getVendorName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').first; // Isolate second component } -std::string Triple::getArchName() const { - std::string Tmp = Data; - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getOSName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').first; // Isolate third component } -std::string Triple::getVendorName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').second; // Strip third component } -std::string Triple::getOSName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getOSAndEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').second; // Strip second component } -std::string Triple::getEnvironmentName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, std::string::npos); +static unsigned EatNumber(StringRef &Str) { + assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); + unsigned Result = Str[0]-'0'; + + // Eat the digit. + Str = Str.substr(1); + + // Handle "darwin11". + if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') { + Result = Result*10 + (Str[0] - '0'); + // Eat the digit. + Str = Str.substr(1); + } + + return Result; } -std::string Triple::getOSAndEnvironmentName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, std::string::npos); +/// getDarwinNumber - Parse the 'darwin number' out of the specific target +/// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is +/// not defined, return 0's. This requires that the triple have an OSType of +/// darwin before it is called. +void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min, + unsigned &Revision) const { + assert(getOS() == Darwin && "Not a darwin target triple!"); + StringRef OSName = getOSName(); + assert(OSName.startswith("darwin") && "Unknown darwin target triple!"); + + // Strip off "darwin". + OSName = OSName.substr(6); + + Maj = Min = Revision = 0; + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + // The major version is the first digit. + Maj = EatNumber(OSName); + if (OSName.empty()) return; + + // Handle minor version: 10.4.9 -> darwin8.9. + if (OSName[0] != '.') + return; + + // Eat the '.'. + OSName = OSName.substr(1); + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + Min = EatNumber(OSName); + if (OSName.empty()) return; + + // Handle revision darwin8.9.1 + if (OSName[0] != '.') + return; + + // Eat the '.'. + OSName = OSName.substr(1); + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + Revision = EatNumber(OSName); } -void Triple::setTriple(const std::string &Str) { - Data = Str; +void Triple::setTriple(const Twine &Str) { + Data = Str.str(); Arch = InvalidArch; } @@ -167,15 +390,22 @@ void Triple::setOS(OSType Kind) { setOSName(getOSTypeName(Kind)); } -void Triple::setArchName(const std::string &Str) { - setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); +void Triple::setArchName(const StringRef &Str) { + // Work around a miscompilation bug for Twines in gcc 4.0.3. + SmallString<64> Triple; + Triple += Str; + Triple += "-"; + Triple += getVendorName(); + Triple += "-"; + Triple += getOSAndEnvironmentName(); + setTriple(Triple.str()); } -void Triple::setVendorName(const std::string &Str) { +void Triple::setVendorName(const StringRef &Str) { setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); } -void Triple::setOSName(const std::string &Str) { +void Triple::setOSName(const StringRef &Str) { if (hasEnvironment()) setTriple(getArchName() + "-" + getVendorName() + "-" + Str + "-" + getEnvironmentName()); @@ -183,11 +413,11 @@ void Triple::setOSName(const std::string &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } -void Triple::setEnvironmentName(const std::string &Str) { +void Triple::setEnvironmentName(const StringRef &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + "-" + Str); } -void Triple::setOSAndEnvironmentName(const std::string &Str) { +void Triple::setOSAndEnvironmentName(const StringRef &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp new file mode 100644 index 0000000..292c0c2 --- /dev/null +++ b/lib/Support/Twine.cpp @@ -0,0 +1,133 @@ +//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +std::string Twine::str() const { + SmallString<256> Vec; + toVector(Vec); + return std::string(Vec.begin(), Vec.end()); +} + +void Twine::toVector(SmallVectorImpl<char> &Out) const { + raw_svector_ostream OS(Out); + print(OS); +} + +void Twine::printOneChild(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: break; + case Twine::EmptyKind: break; + case Twine::TwineKind: + static_cast<const Twine*>(Ptr)->print(OS); + break; + case Twine::CStringKind: + OS << static_cast<const char*>(Ptr); + break; + case Twine::StdStringKind: + OS << *static_cast<const std::string*>(Ptr); + break; + case Twine::StringRefKind: + OS << *static_cast<const StringRef*>(Ptr); + break; + case Twine::DecUIKind: + OS << *static_cast<const unsigned int*>(Ptr); + break; + case Twine::DecIKind: + OS << *static_cast<const int*>(Ptr); + break; + case Twine::DecULKind: + OS << *static_cast<const unsigned long*>(Ptr); + break; + case Twine::DecLKind: + OS << *static_cast<const long*>(Ptr); + break; + case Twine::DecULLKind: + OS << *static_cast<const unsigned long long*>(Ptr); + break; + case Twine::DecLLKind: + OS << *static_cast<const long long*>(Ptr); + break; + case Twine::UHexKind: + OS.write_hex(*static_cast<const uint64_t*>(Ptr)); + break; + } +} + +void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: + OS << "null"; break; + case Twine::EmptyKind: + OS << "empty"; break; + case Twine::TwineKind: + OS << "rope:"; + static_cast<const Twine*>(Ptr)->printRepr(OS); + break; + case Twine::CStringKind: + OS << "cstring:\"" + << static_cast<const char*>(Ptr) << "\""; + break; + case Twine::StdStringKind: + OS << "std::string:\"" + << static_cast<const std::string*>(Ptr) << "\""; + break; + case Twine::StringRefKind: + OS << "stringref:\"" + << static_cast<const StringRef*>(Ptr) << "\""; + break; + case Twine::DecUIKind: + OS << "decUI:\"" << *static_cast<const unsigned int*>(Ptr) << "\""; + break; + case Twine::DecIKind: + OS << "decI:\"" << *static_cast<const int*>(Ptr) << "\""; + break; + case Twine::DecULKind: + OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\""; + break; + case Twine::DecLKind: + OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\""; + break; + case Twine::DecULLKind: + OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\""; + break; + case Twine::DecLLKind: + OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\""; + break; + case Twine::UHexKind: + OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\""; + break; + } +} + +void Twine::print(raw_ostream &OS) const { + printOneChild(OS, LHS, getLHSKind()); + printOneChild(OS, RHS, getRHSKind()); +} + +void Twine::printRepr(raw_ostream &OS) const { + OS << "(Twine "; + printOneChildRepr(OS, LHS, getLHSKind()); + OS << " "; + printOneChildRepr(OS, RHS, getRHSKind()); + OS << ")"; +} + +void Twine::dump() const { + print(llvm::errs()); +} + +void Twine::dumpRepr() const { + printRepr(llvm::errs()); +} diff --git a/lib/Support/raw_os_ostream.cpp b/lib/Support/raw_os_ostream.cpp new file mode 100644 index 0000000..3374dd7 --- /dev/null +++ b/lib/Support/raw_os_ostream.cpp @@ -0,0 +1,30 @@ +//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements support adapting raw_ostream to std::ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_os_ostream.h" +#include <ostream> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// raw_os_ostream +//===----------------------------------------------------------------------===// + +raw_os_ostream::~raw_os_ostream() { + flush(); +} + +void raw_os_ostream::write_impl(const char *Ptr, size_t Size) { + OS.write(Ptr, Size); +} + +uint64_t raw_os_ostream::current_pos() { return OS.tellp(); } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 42e6fda..0a82cc1 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -18,7 +18,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" -#include <ostream> +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include <sys/stat.h> +#include <sys/types.h> #if defined(HAVE_UNISTD_H) # include <unistd.h> @@ -43,10 +47,59 @@ using namespace llvm; +raw_ostream::~raw_ostream() { + // raw_ostream's subclasses should take care to flush the buffer + // in their destructors. + assert(OutBufCur == OutBufStart && + "raw_ostream destructor called with non-empty buffer!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; + + // If there are any pending errors, report them now. Clients wishing + // to avoid llvm_report_error calls should check for errors with + // has_error() and clear the error flag with clear_error() before + // destructing raw_ostream objects which may have errors. + if (Error) + llvm_report_error("IO failure on output stream."); +} // An out of line virtual method to provide a home for the class vtable. void raw_ostream::handle() {} +size_t raw_ostream::preferred_buffer_size() { + // BUFSIZ is intended to be a reasonable default. + return BUFSIZ; +} + +void raw_ostream::SetBuffered() { + // Ask the subclass to determine an appropriate buffer size. + if (size_t Size = preferred_buffer_size()) + SetBufferSize(Size); + else + // It may return 0, meaning this stream should be unbuffered. + SetUnbuffered(); +} + +void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, + BufferKind Mode) { + assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || + (Mode != Unbuffered && BufferStart && Size)) && + "stream must be unbuffered or have at least one byte"); + // Make sure the current buffer is free of content (we can't flush here; the + // child buffer management logic will be in write_impl). + assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; + OutBufStart = BufferStart; + OutBufEnd = OutBufStart+Size; + OutBufCur = OutBufStart; + BufferMode = Mode; + + assert(OutBufStart <= OutBufEnd && "Invalid size!"); +} + raw_ostream &raw_ostream::operator<<(unsigned long N) { // Zero is a special case. if (N == 0) @@ -73,10 +126,10 @@ raw_ostream &raw_ostream::operator<<(long N) { } raw_ostream &raw_ostream::operator<<(unsigned long long N) { - // Zero is a special case. - if (N == 0) - return *this << '0'; - + // Output using 32-bit div/mod when possible. + if (N == static_cast<unsigned long>(N)) + return this->operator<<(static_cast<unsigned long>(N)); + char NumberBuffer[20]; char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; @@ -97,10 +150,7 @@ raw_ostream &raw_ostream::operator<<(long long N) { return this->operator<<(static_cast<unsigned long long>(N)); } -raw_ostream &raw_ostream::operator<<(const void *P) { - uintptr_t N = (uintptr_t) P; - *this << '0' << 'x'; - +raw_ostream &raw_ostream::write_hex(unsigned long long N) { // Zero is a special case. if (N == 0) return *this << '0'; @@ -110,7 +160,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) { char *CurPtr = EndPtr; while (N) { - unsigned x = N % 16; + uintptr_t x = N % 16; *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10); N /= 16; } @@ -118,44 +168,78 @@ raw_ostream &raw_ostream::operator<<(const void *P) { return write(CurPtr, EndPtr-CurPtr); } +raw_ostream &raw_ostream::operator<<(const void *P) { + *this << '0' << 'x'; + + return write_hex((uintptr_t) P); +} + +raw_ostream &raw_ostream::operator<<(double N) { + this->operator<<(ftostr(N)); + return *this; +} + + + void raw_ostream::flush_nonempty() { assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty."); - write_impl(OutBufStart, OutBufCur - OutBufStart); - OutBufCur = OutBufStart; + size_t Length = OutBufCur - OutBufStart; + OutBufCur = OutBufStart; + write_impl(OutBufStart, Length); } raw_ostream &raw_ostream::write(unsigned char C) { // Group exceptional cases into a single branch. - if (OutBufCur >= OutBufEnd) { - if (Unbuffered) { - write_impl(reinterpret_cast<char*>(&C), 1); - return *this; + if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) { + if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (BufferMode == Unbuffered) { + write_impl(reinterpret_cast<char*>(&C), 1); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(C); } - - if (!OutBufStart) - SetBufferSize(); - else - flush_nonempty(); + + flush_nonempty(); } *OutBufCur++ = C; return *this; } -raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) { +raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // Group exceptional cases into a single branch. if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) { - if (Unbuffered) { - write_impl(Ptr, Size); - return *this; + if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (BufferMode == Unbuffered) { + write_impl(Ptr, Size); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(Ptr, Size); } - - if (!OutBufStart) - SetBufferSize(); - else + + // Write out the data in buffer-sized blocks until the remainder + // fits within the buffer. + do { + size_t NumBytes = OutBufEnd - OutBufCur; + copy_to_buffer(Ptr, NumBytes); flush_nonempty(); + Ptr += NumBytes; + Size -= NumBytes; + } while (OutBufCur+Size > OutBufEnd); } - + + copy_to_buffer(Ptr, Size); + + return *this; +} + +void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) { + assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!"); + // Handle short strings specially, memcpy isn't very good at very short // strings. switch (Size) { @@ -165,40 +249,24 @@ raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) { case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH case 0: break; default: - // Normally the string to emit is shorter than the buffer. - if (Size <= unsigned(OutBufEnd-OutBufStart)) { - memcpy(OutBufCur, Ptr, Size); - break; - } - - // Otherwise we are emitting a string larger than our buffer. We - // know we already flushed, so just write it out directly. - write_impl(Ptr, Size); - Size = 0; + memcpy(OutBufCur, Ptr, Size); break; } - OutBufCur += Size; - return *this; + OutBufCur += Size; } // Formatted output. raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { // If we have more than a few bytes left in our output buffer, try // formatting directly onto its end. - // - // FIXME: This test is a bit silly, since if we don't have enough - // space in the buffer we will have to flush the formatted output - // anyway. We should just flush upfront in such cases, and use the - // whole buffer as our scratch pad. Note, however, that this case is - // also necessary for correctness on unbuffered streams. - unsigned NextBufferSize = 127; - if (OutBufEnd-OutBufCur > 3) { - unsigned BufferBytesLeft = OutBufEnd-OutBufCur; - unsigned BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); + size_t NextBufferSize = 127; + size_t BufferBytesLeft = OutBufEnd - OutBufCur; + if (BufferBytesLeft > 3) { + size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); // Common case is that we have plenty of space. - if (BytesUsed < BufferBytesLeft) { + if (BytesUsed <= BufferBytesLeft) { OutBufCur += BytesUsed; return *this; } @@ -217,11 +285,11 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { V.resize(NextBufferSize); // Try formatting into the SmallVector. - unsigned BytesUsed = Fmt.print(&V[0], NextBufferSize); + size_t BytesUsed = Fmt.print(V.data(), NextBufferSize); // If BytesUsed fit into the vector, we win. if (BytesUsed <= NextBufferSize) - return write(&V[0], BytesUsed); + return write(V.data(), BytesUsed); // Otherwise, try again with a new size. assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?"); @@ -229,6 +297,26 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { } } +/// indent - Insert 'NumSpaces' spaces. +raw_ostream &raw_ostream::indent(unsigned NumSpaces) { + static const char Spaces[] = " " + " " + " "; + + // Usually the indentation is small, handle it with a fastpath. + if (NumSpaces < array_lengthof(Spaces)) + return write(Spaces, NumSpaces); + + while (NumSpaces) { + unsigned NumToWrite = std::min(NumSpaces, + (unsigned)array_lengthof(Spaces)-1); + write(Spaces, NumToWrite); + NumSpaces -= NumToWrite; + } + return *this; +} + + //===----------------------------------------------------------------------===// // Formatted Output //===----------------------------------------------------------------------===// @@ -245,8 +333,12 @@ void format_object_base::home() { /// occurs, information about the error is put into ErrorInfo, and the /// stream should be immediately destroyed; the string will be empty /// if no error occurred. -raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, - std::string &ErrorInfo) : pos(0) { +raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, + unsigned Flags) : pos(0) { + // Verify that we don't have both "append" and "excl". + assert((!(Flags & F_Excl) || !(Flags & F_Append)) && + "Cannot specify both 'excl' and 'append' file creation flags!"); + ErrorInfo.clear(); // Handle "-" as stdout. @@ -254,18 +346,26 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, FD = STDOUT_FILENO; // If user requested binary then put stdout into binary mode if // possible. - if (Binary) + if (Flags & F_Binary) sys::Program::ChangeStdoutToBinary(); ShouldClose = false; return; } - int Flags = O_WRONLY|O_CREAT|O_TRUNC; + int OpenFlags = O_WRONLY|O_CREAT; #ifdef O_BINARY - if (Binary) - Flags |= O_BINARY; + if (Flags & F_Binary) + OpenFlags |= O_BINARY; #endif - FD = open(Filename, Flags, 0644); + + if (Flags & F_Append) + OpenFlags |= O_APPEND; + else + OpenFlags |= O_TRUNC; + if (Flags & F_Excl) + OpenFlags |= O_EXCL; + + FD = open(Filename, OpenFlags, 0664); if (FD < 0) { ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; ShouldClose = false; @@ -275,33 +375,56 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, } raw_fd_ostream::~raw_fd_ostream() { - if (FD >= 0) { - flush(); - if (ShouldClose) - ::close(FD); - } + if (FD < 0) return; + flush(); + if (ShouldClose) + if (::close(FD) != 0) + error_detected(); } -void raw_fd_ostream::write_impl(const char *Ptr, unsigned Size) { + +void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert (FD >= 0 && "File already closed."); pos += Size; - ::write(FD, Ptr, Size); + if (::write(FD, Ptr, Size) != (ssize_t) Size) + error_detected(); } void raw_fd_ostream::close() { assert (ShouldClose); ShouldClose = false; flush(); - ::close(FD); + if (::close(FD) != 0) + error_detected(); FD = -1; } uint64_t raw_fd_ostream::seek(uint64_t off) { flush(); - pos = lseek(FD, off, SEEK_SET); + pos = ::lseek(FD, off, SEEK_SET); + if (pos != off) + error_detected(); return pos; } +size_t raw_fd_ostream::preferred_buffer_size() { +#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize. + assert(FD >= 0 && "File not yet open!"); + struct stat statbuf; + if (fstat(FD, &statbuf) == 0) { + // If this is a terminal, don't use buffering. Line buffering + // would be a more traditional thing to do, but it's not worth + // the complexity. + if (S_ISCHR(statbuf.st_mode) && isatty(FD)) + return 0; + // Return the preferred block size. + return statbuf.st_blksize; + } + error_detected(); +#endif + return raw_ostream::preferred_buffer_size(); +} + raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, bool bg) { if (sys::Process::ColorNeedsFlush()) @@ -310,7 +433,7 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg) : sys::Process::OutputColor(colors, bold, bg); if (colorcode) { - unsigned len = strlen(colorcode); + size_t len = strlen(colorcode); write(colorcode, len); // don't account colors towards output characters pos -= len; @@ -323,7 +446,7 @@ raw_ostream &raw_fd_ostream::resetColor() { flush(); const char *colorcode = sys::Process::ResetColor(); if (colorcode) { - unsigned len = strlen(colorcode); + size_t len = strlen(colorcode); write(colorcode, len); // don't account colors towards output characters pos -= len; @@ -331,12 +454,18 @@ raw_ostream &raw_fd_ostream::resetColor() { return *this; } +bool raw_fd_ostream::is_displayed() const { + return sys::Process::FileDescriptorIsDisplayed(FD); +} + //===----------------------------------------------------------------------===// // raw_stdout/err_ostream //===----------------------------------------------------------------------===// +// Set buffer settings to model stdout and stderr behavior. +// Set standard error to be unbuffered by default. raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {} -raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, +raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, true) {} // An out of line virtual method to provide a home for the class vtable. @@ -357,23 +486,12 @@ raw_ostream &llvm::errs() { return S; } -//===----------------------------------------------------------------------===// -// raw_os_ostream -//===----------------------------------------------------------------------===// - -raw_os_ostream::~raw_os_ostream() { - flush(); -} - -void raw_os_ostream::write_impl(const char *Ptr, unsigned Size) { - OS.write(Ptr, Size); +/// nulls() - This returns a reference to a raw_ostream which discards output. +raw_ostream &llvm::nulls() { + static raw_null_ostream S; + return S; } -uint64_t raw_os_ostream::current_pos() { return OS.tellp(); } - -uint64_t raw_os_ostream::tell() { - return (uint64_t)OS.tellp() + GetNumBytesInBuffer(); -} //===----------------------------------------------------------------------===// // raw_string_ostream @@ -383,7 +501,7 @@ raw_string_ostream::~raw_string_ostream() { flush(); } -void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) { +void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { OS.append(Ptr, Size); } @@ -391,16 +509,65 @@ void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) { // raw_svector_ostream //===----------------------------------------------------------------------===// +// The raw_svector_ostream implementation uses the SmallVector itself as the +// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is +// always pointing past the end of the vector, but within the vector +// capacity. This allows raw_ostream to write directly into the correct place, +// and we only need to set the vector size when the data is flushed. + +raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) { + // Set up the initial external buffer. We make sure that the buffer has at + // least 128 bytes free; raw_ostream itself only requires 64, but we want to + // make sure that we don't grow the buffer unnecessarily on destruction (when + // the data is flushed). See the FIXME below. + OS.reserve(OS.size() + 128); + SetBuffer(OS.end(), OS.capacity() - OS.size()); +} + raw_svector_ostream::~raw_svector_ostream() { + // FIXME: Prevent resizing during this flush(). flush(); } -void raw_svector_ostream::write_impl(const char *Ptr, unsigned Size) { - OS.append(Ptr, Ptr + Size); +void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { + assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() && + "Invalid write_impl() call!"); + + // We don't need to copy the bytes, just commit the bytes to the + // SmallVector. + OS.set_size(OS.size() + Size); + + // Grow the vector if necessary. + if (OS.capacity() - OS.size() < 64) + OS.reserve(OS.capacity() * 2); + + // Update the buffer position. + SetBuffer(OS.end(), OS.capacity() - OS.size()); } uint64_t raw_svector_ostream::current_pos() { return OS.size(); } -uint64_t raw_svector_ostream::tell() { - return OS.size() + GetNumBytesInBuffer(); +StringRef raw_svector_ostream::str() { + flush(); + return StringRef(OS.begin(), OS.size()); +} + +//===----------------------------------------------------------------------===// +// raw_null_ostream +//===----------------------------------------------------------------------===// + +raw_null_ostream::~raw_null_ostream() { +#ifndef NDEBUG + // ~raw_ostream asserts that the buffer is empty. This isn't necessary + // with raw_null_ostream, but it's better to have raw_null_ostream follow + // the rules than to change the rules just for raw_null_ostream. + flush(); +#endif +} + +void raw_null_ostream::write_impl(const char *Ptr, size_t Size) { +} + +uint64_t raw_null_ostream::current_pos() { + return 0; } diff --git a/lib/Support/regcclass.h b/lib/Support/regcclass.h new file mode 100644 index 0000000..2cea3e4 --- /dev/null +++ b/lib/Support/regcclass.h @@ -0,0 +1,70 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cclass.h 8.3 (Berkeley) 3/20/94 + */ + +/* character-class table */ +static struct cclass { + const char *name; + const char *chars; + const char *multis; +} cclasses[] = { + { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", ""} , + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + ""} , + { "blank", " \t", ""} , + { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", ""} , + { "digit", "0123456789", ""} , + { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "lower", "abcdefghijklmnopqrstuvwxyz", + ""} , + { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + ""} , + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "space", "\t\n\v\f\r ", ""} , + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + ""} , + { "xdigit", "0123456789ABCDEFabcdef", + ""} , + { NULL, 0, "" } +}; diff --git a/lib/Support/regcname.h b/lib/Support/regcname.h new file mode 100644 index 0000000..3c0bb24 --- /dev/null +++ b/lib/Support/regcname.h @@ -0,0 +1,139 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cname.h 8.3 (Berkeley) 3/20/94 + */ + +/* character-name table */ +static struct cname { + const char *name; + char code; +} cnames[] = { + { "NUL", '\0' }, + { "SOH", '\001' }, + { "STX", '\002' }, + { "ETX", '\003' }, + { "EOT", '\004' }, + { "ENQ", '\005' }, + { "ACK", '\006' }, + { "BEL", '\007' }, + { "alert", '\007' }, + { "BS", '\010' }, + { "backspace", '\b' }, + { "HT", '\011' }, + { "tab", '\t' }, + { "LF", '\012' }, + { "newline", '\n' }, + { "VT", '\013' }, + { "vertical-tab", '\v' }, + { "FF", '\014' }, + { "form-feed", '\f' }, + { "CR", '\015' }, + { "carriage-return", '\r' }, + { "SO", '\016' }, + { "SI", '\017' }, + { "DLE", '\020' }, + { "DC1", '\021' }, + { "DC2", '\022' }, + { "DC3", '\023' }, + { "DC4", '\024' }, + { "NAK", '\025' }, + { "SYN", '\026' }, + { "ETB", '\027' }, + { "CAN", '\030' }, + { "EM", '\031' }, + { "SUB", '\032' }, + { "ESC", '\033' }, + { "IS4", '\034' }, + { "FS", '\034' }, + { "IS3", '\035' }, + { "GS", '\035' }, + { "IS2", '\036' }, + { "RS", '\036' }, + { "IS1", '\037' }, + { "US", '\037' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ',' }, + { "hyphen", '-' }, + { "hyphen-minus", '-' }, + { "period", '.' }, + { "full-stop", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, + { "less-than-sign", '<' }, + { "equals-sign", '=' }, + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "underscore", '_' }, + { "low-line", '_' }, + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "DEL", '\177' }, + { NULL, 0 } +}; diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c new file mode 100644 index 0000000..cd018d5 --- /dev/null +++ b/lib/Support/regcomp.c @@ -0,0 +1,1525 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +#include "regcclass.h" +#include "regcname.h" + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + char *next; /* next character in RE */ + char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +# define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +static void p_ere(struct parse *, int); +static void p_ere_exp(struct parse *); +static void p_str(struct parse *); +static void p_bre(struct parse *, int, int); +static int p_simp_re(struct parse *, int); +static int p_count(struct parse *); +static void p_bracket(struct parse *); +static void p_b_term(struct parse *, cset *); +static void p_b_cclass(struct parse *, cset *); +static void p_b_eclass(struct parse *, cset *); +static char p_b_symbol(struct parse *); +static char p_b_coll_elem(struct parse *, int); +static char othercase(int); +static void bothcases(struct parse *, int); +static void ordinary(struct parse *, int); +static void nonnewline(struct parse *); +static void repeat(struct parse *, sopno, int, int); +static int seterr(struct parse *, int); +static cset *allocset(struct parse *); +static void freeset(struct parse *, cset *); +static int freezeset(struct parse *, cset *); +static int firstch(struct parse *, cset *); +static int nch(struct parse *, cset *); +static void mcadd(struct parse *, cset *, const char *); +static void mcinvert(struct parse *, cset *); +static void mccase(struct parse *, cset *); +static int isinsets(struct re_guts *, int); +static int samesets(struct re_guts *, int, int); +static void categorize(struct parse *, struct re_guts *); +static sopno dupl(struct parse *, sopno, sopno); +static void doemit(struct parse *, sop, size_t); +static void doinsert(struct parse *, sop, size_t, sopno); +static void dofwd(struct parse *, sopno, sop); +static void enlarge(struct parse *, sopno); +static void stripsnug(struct parse *, struct re_guts *); +static void findmust(struct parse *, struct re_guts *); +static sopno pluscount(struct parse *, struct re_guts *); + +static char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define SETERROR(e) seterr(p, (e)) +#define REQUIRE(co, e) (void)((co) || SETERROR(e)) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some <assert.h>s have bugs too */ +#endif + +/* + - llvm_regcomp - interface for parser and compilation + */ +int /* 0 success, otherwise REG_something */ +llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) +{ + struct parse pa; + struct re_guts *g; + struct parse *p = &pa; + int i; + size_t len; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif + + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return(REG_INVARG); + + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return(REG_INVARG); + len = preg->re_endp - pattern; + } else + len = strlen((const char *)pattern); + + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(cat_t)); + if (g == NULL) + return(REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)calloc(p->ssize, sizeof(sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return(REG_ESPACE); + } + + /* set things up */ + p->g = g; + p->next = (char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; + } + g->csetsize = NC; + g->sets = NULL; + g->setbits = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->mlen = 0; + g->nsub = 0; + g->ncategories = 1; /* category 0 is "everything else" */ + g->categories = &g->catspace[-(CHAR_MIN)]; + (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); + g->backrefs = 0; + + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + categorize(p, g); + stripsnug(p, g); + findmust(p, g); + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in llvm_regexec() */ + if (g->iflags®EX_BAD) + SETERROR(REG_ASSERT); +#endif + + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + llvm_regfree(preg); + return(p->error); +} + +/* + - p_ere - ERE parser top level, concatenation and alternation + */ +static void +p_ere(struct parse *p, int stop) /* character this ERE should end at */ +{ + char c; + sopno prevback = 0; + sopno prevfwd = 0; + sopno conc; + int first = 1; /* is this the first alternative? */ + + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ + + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; + } + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ + } + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); +} + +/* + - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + */ +static void +p_ere_exp(struct parse *p) +{ + char c; + sopno pos; + int count; + int count2; + sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_EMPTY); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + REQUIRE(MORE(), REG_EESCAPE); + c = GETNEXT(); + ordinary(p, c); + break; + case '{': /* okay as ordinary except if digit follows */ + REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, c); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) + return; /* no repetition, we're done */ + NEXT(); + + REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) + return; + SETERROR(REG_BADRPT); +} + +/* + - p_str - string (no metacharacters) "parser" + */ +static void +p_str(struct parse *p) +{ + REQUIRE(MORE(), REG_EMPTY); + while (MORE()) + ordinary(p, GETNEXT()); +} + +/* + - p_bre - BRE parser top level, anchoring and concatenation + * Giving end1 as OUT essentially eliminates the end1/end2 check. + * + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. The + * only undesirable side effect is that '$' gets included as a character + * category in such cases. This is fairly harmless; not worth fixing. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(struct parse *p, + int end1, /* first terminating character */ + int end2) /* second terminating character */ +{ + sopno start = HERE(); + int first = 1; /* first subexpression? */ + int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ +} + +/* + - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ +{ + int c; + int count; + int count2; + sopno pos; + int i; + sopno subno; +# define BACKSL (1<<CHAR_BIT) + + pos = HERE(); /* repetion op, if any, covers from here */ + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + if (c == '\\') { + REQUIRE(MORE(), REG_EESCAPE); + c = BACKSL | GETNEXT(); + } + switch (c) { + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + REQUIRE(starordinary, REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, (char)c); + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == '$') /* $ (but not \$) ends it */ + return(1); + + return(0); +} + +/* + - p_count - parse a repetition count + */ +static int /* the value */ +p_count(struct parse *p) +{ + int count = 0; + int ndigits = 0; + + while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return(count); +} + +/* + - p_bracket - parse a bracketed character list + * + * Note a significant property of this code: if the allocset() did SETERROR, + * no set operations are done. + */ +static void +p_bracket(struct parse *p) +{ + cset *cs; + int invert = 0; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if ((cs = allocset(p)) == NULL) { + /* allocset did set error status in p */ + return; + } + + if (EAT('^')) + invert++; /* make note to invert set at end */ + if (EAT(']')) + CHadd(cs, ']'); + else if (EAT('-')) + CHadd(cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(cs, '-'); + MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) { /* don't mess things up further */ + freeset(p, cs); + return; + } + + if (p->g->cflags®_ICASE) { + int i; + int ci; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i) && isalpha(i)) { + ci = othercase(i); + if (ci != i) + CHadd(cs, ci); + } + if (cs->multis != NULL) + mccase(p, cs); + } + if (invert) { + int i; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i)) + CHsub(cs, i); + else + CHadd(cs, i); + if (p->g->cflags®_NEWLINE) + CHsub(cs, '\n'); + if (cs->multis != NULL) + mcinvert(p, cs); + } + + assert(cs->multis == NULL); /* xxx */ + + if (nch(p, cs) == 1) { /* optimize singleton sets */ + ordinary(p, firstch(p, cs)); + freeset(p, cs); + } else + EMIT(OANYOF, freezeset(p, cs)); +} + +/* + - p_b_term - parse one term of a bracketed character list + */ +static void +p_b_term(struct parse *p, cset *cs) +{ + char c; + char start, finish; + int i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ +/* xxx revision needed for multichar stuff */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; +/* xxx what about signed chars here... */ + REQUIRE(start <= finish, REG_ERANGE); + for (i = start; i <= finish; i++) + CHadd(cs, i); + break; + } +} + +/* + - p_b_cclass - parse a character-class name and deal with it + */ +static void +p_b_cclass(struct parse *p, cset *cs) +{ + char *sp = p->next; + struct cclass *cp; + size_t len; + const char *u; + char c; + + while (MORE() && isalpha(PEEK())) + NEXT(); + len = p->next - sp; + for (cp = cclasses; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + break; + if (cp->name == NULL) { + /* oops, didn't find it */ + SETERROR(REG_ECTYPE); + return; + } + + u = cp->chars; + while ((c = *u++) != '\0') + CHadd(cs, c); + for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); +} + +/* + - p_b_eclass - parse an equivalence-class name and deal with it + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(struct parse *p, cset *cs) +{ + char c; + + c = p_b_coll_elem(p, '='); + CHadd(cs, c); +} + +/* + - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + */ +static char /* value of symbol */ +p_b_symbol(struct parse *p) +{ + char value; + + REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return(GETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return(value); +} + +/* + - p_b_coll_elem - parse a collating-element name and look it up + */ +static char /* value of collating element */ +p_b_coll_elem(struct parse *p, + int endc) /* name ended by endc,']' */ +{ + char *sp = p->next; + struct cname *cp; + int len; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return(0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + return(cp->code); /* known name */ + if (len == 1) + return(*sp); /* single character */ + SETERROR(REG_ECOLLATE); /* neither */ + return(0); +} + +/* + - othercase - return the case counterpart of an alphabetic + */ +static char /* if no counterpart, return ch */ +othercase(int ch) +{ + ch = (uch)ch; + assert(isalpha(ch)); + if (isupper(ch)) + return ((uch)tolower(ch)); + else if (islower(ch)) + return ((uch)toupper(ch)); + else /* peculiar, but could happen */ + return(ch); +} + +/* + - bothcases - emit a dualcase version of a two-case character + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(struct parse *p, int ch) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[3]; + + ch = (uch)ch; + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + p->end = bracket+2; + bracket[0] = ch; + bracket[1] = ']'; + bracket[2] = '\0'; + p_bracket(p); + assert(p->next == bracket+2); + p->next = oldnext; + p->end = oldend; +} + +/* + - ordinary - emit an ordinary character + */ +static void +ordinary(struct parse *p, int ch) +{ + cat_t *cap = p->g->categories; + + if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) + bothcases(p, ch); + else { + EMIT(OCHAR, (uch)ch); + if (cap[ch] == 0) + cap[ch] = p->g->ncategories++; + } +} + +/* + - nonnewline - emit REG_NEWLINE version of OANY + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(struct parse *p) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + - repeat - generate code for a bounded repetition, recursively if needed + */ +static void +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe INFINITY) */ +{ + sopno finish = HERE(); +# define N 2 +# define INF 3 +# define REP(f, t) ((f)*8 + (t)) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_ASSERT); /* just in case */ + break; + } +} + +/* + - seterr - set an error condition + */ +static int /* useless but makes type checking happy */ +seterr(struct parse *p, int e) +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return(0); /* make the return value well-defined */ +} + +/* + - allocset - allocate a set of characters for [] + */ +static cset * +allocset(struct parse *p) +{ + int no = p->g->ncsets++; + size_t nc; + size_t nbytes; + cset *cs; + size_t css = (size_t)p->g->csetsize; + int i; + + if (no >= p->ncsalloc) { /* need another column of space */ + void *ptr; + + p->ncsalloc += CHAR_BIT; + nc = p->ncsalloc; + assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT * css; + + ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); + if (ptr == NULL) + goto nomem; + p->g->sets = ptr; + + ptr = (uch *)realloc((char *)p->g->setbits, nbytes); + if (ptr == NULL) + goto nomem; + p->g->setbits = ptr; + + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + + (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); + } + /* XXX should not happen */ + if (p->g->sets == NULL || p->g->setbits == NULL) + goto nomem; + + cs = &p->g->sets[no]; + cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); + cs->mask = 1 << ((no) % CHAR_BIT); + cs->hash = 0; + cs->smultis = 0; + cs->multis = NULL; + + return(cs); +nomem: + free(p->g->sets); + p->g->sets = NULL; + free(p->g->setbits); + p->g->setbits = NULL; + + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + return(NULL); +} + +/* + - freeset - free a now-unused set + */ +static void +freeset(struct parse *p, cset *cs) +{ + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, i); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + - freezeset - final processing on a set of characters + * + * The main task here is merging identical sets. This is usually a waste + * of time (although the hash code minimizes the overhead), but can win + * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash + * is done using addition rather than xor -- all ASCII [aA] sets xor to + * the same value! + */ +static int /* set number */ +freezeset(struct parse *p, cset *cs) +{ + uch h = cs->hash; + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + cset *cs2; + size_t css = (size_t)p->g->csetsize; + + /* look for an earlier one which is the same */ + for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) + if (cs2->hash == h && cs2 != cs) { + /* maybe */ + for (i = 0; i < css; i++) + if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + break; /* no */ + if (i == css) + break; /* yes */ + } + + if (cs2 < top) { /* found one */ + freeset(p, cs); + cs = cs2; + } + + return((int)(cs - p->g->sets)); +} + +/* + - firstch - return first character in a set (which must have at least one) + */ +static int /* character; there is no "none" value */ +firstch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + return((char)i); + assert(never); + return(0); /* arbitrary */ +} + +/* + - nch - number of characters in a set + */ +static int +nch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + int n = 0; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + n++; + return(n); +} + +/* + - mcadd - add a collating element to a cset + */ +static void +mcadd( struct parse *p, cset *cs, const char *cp) +{ + size_t oldend = cs->smultis; + void *np; + + cs->smultis += strlen(cp) + 1; + np = realloc(cs->multis, cs->smultis); + if (np == NULL) { + if (cs->multis) + free(cs->multis); + cs->multis = NULL; + SETERROR(REG_ESPACE); + return; + } + cs->multis = np; + + llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); +} + +/* + - mcinvert - invert the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mcinvert(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - mccase - add case counterparts of the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mccase(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - isinsets - is this character in any sets? + */ +static int /* predicate */ +isinsets(struct re_guts *g, int c) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc = (uch)c; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc] != 0) + return(1); + return(0); +} + +/* + - samesets - are these two characters in exactly the same sets? + */ +static int /* predicate */ +samesets(struct re_guts *g, int c1, int c2) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc1 = (uch)c1; + unsigned uc2 = (uch)c2; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc1] != col[uc2]) + return(0); + return(1); +} + +/* + - categorize - sort out character categories + */ +static void +categorize(struct parse *p, struct re_guts *g) +{ + cat_t *cats = g->categories; + int c; + int c2; + cat_t cat; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (cats[c] == 0 && isinsets(g, c)) { + cat = g->ncategories++; + cats[c] = cat; + for (c2 = c+1; c2 <= CHAR_MAX; c2++) + if (cats[c2] == 0 && samesets(g, c, c2)) + cats[c2] = cat; + } +} + +/* + - dupl - emit a duplicate of a bunch of sops + */ +static sopno /* start of duplicate */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ +{ + sopno ret = HERE(); + sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return(ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memmove((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof(sop)); + p->slen += len; + return(ret); +} + +/* + - doemit - emit a strip operator + * + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. + */ +static void +doemit(struct parse *p, sop op, size_t opnd) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<<OPSHIFT); + + /* deal with undersized strip */ + if (p->slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); + + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); +} + +/* + - doinsert - insert a sop into the strip + */ +static void +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) +{ + sopno sn; + sop s; + int i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof(sop)); + p->strip[pos] = s; +} + +/* + - dofwd - complete a forward reference + */ +static void +dofwd(struct parse *p, sopno pos, sop value) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<<OPSHIFT); + p->strip[pos] = OP(p->strip[pos]) | value; +} + +/* + - enlarge - enlarge the strip + */ +static void +enlarge(struct parse *p, sopno size) +{ + sop *sp; + + if (p->ssize >= size) + return; + + sp = (sop *)realloc(p->strip, size*sizeof(sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; + } + p->strip = sp; + p->ssize = size; +} + +/* + - stripsnug - compact the strip + */ +static void +stripsnug(struct parse *p, struct re_guts *g) +{ + g->nstates = p->slen; + g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + - findmust - fill in must and mlen with longest mandatory literal string + * + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. + * + * Note that must and mlen got initialized during setup. + */ +static void +findmust(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop *start = 0; /* start initialized in the default case, after that */ + sop *newstart = 0; /* newstart was initialized in the OCHAR case */ + sopno newlen; + sop s; + char *cp; + sopno i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* find the longest OCHAR sequence in strip */ + newlen = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) /* new sequence */ + newstart = scan - 1; + newlen++; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= REGEX_BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* fallthrough */ + default: /* things that break a sequence */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + } + newlen = 0; + break; + } + } while (OP(s) != OEND); + + if (g->mlen == 0) /* there isn't one */ + return; + + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; + return; + } + cp = g->must; + scan = start; + for (i = g->mlen; i > 0; i--) { + while (OP(s = *scan++) != OCHAR) + continue; + assert(cp < g->must + g->mlen); + *cp++ = (char)OPND(s); + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ +} + +/* + - pluscount - count + nesting + */ +static sopno /* nesting depth */ +pluscount(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop s; + sopno plusnest = 0; + sopno maxnest = 0; + + if (p->error != 0) + return(0); /* there may not be an OEND */ + + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; + } + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= REGEX_BAD; + return(maxnest); +} diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc new file mode 100644 index 0000000..0f27cfd --- /dev/null +++ b/lib/Support/regengine.inc @@ -0,0 +1,1027 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)engine.c 8.5 (Berkeley) 3/20/94 + */ + +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#define nope snope +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#define nope lnope +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + char *offp; /* offsets work from here */ + char *beginp; /* start of string -- virtual NUL precedes */ + char *endp; /* end of string -- virtual NUL here */ + char *coldp; /* can be no match starting before here */ + char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ +}; + +static int matcher(struct re_guts *, char *, size_t, llvm_regmatch_t[], int); +static char *dissect(struct match *, char *, char *, sopno, sopno); +static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); +static char *fast(struct match *, char *, char *, sopno, sopno); +static char *slow(struct match *, char *, char *, sopno, sopno); +static states step(struct re_guts *, sopno, sopno, states, int, states); +#define MAX_RECURSION 100 +#define BOL (OUT+1) +#define EOL (BOL+1) +#define BOLEOL (BOL+2) +#define NOTHING (BOL+3) +#define BOW (BOL+4) +#define EOW (BOL+5) +#define CODEMAX (BOL+5) /* highest code used */ +#define NONCHAR(c) ((c) > CHAR_MAX) +#define NNONCHAR (CODEMAX-CHAR_MAX) +#ifdef REDEBUG +static void print(struct match *, char *, states, int, FILE *); +#endif +#ifdef REDEBUG +static void at(struct match *, char *, char *, char *, sopno, sopno); +#endif +#ifdef REDEBUG +static char *pchar(int); +#endif + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } +static int nope = 0; +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + - matcher - the actual matching engine + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(struct re_guts *g, char *string, size_t nmatch, llvm_regmatch_t pmatch[], + int eflags) +{ + char *endp; + size_t i; + struct match mv; + struct match *m = &mv; + char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; + char *start; + char *stop; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } + if (stop < start) + return(REG_INVARG); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + free(m->pmatch); + free(m->lastpos); + STATETEARDOWN(m); + return(REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp++; + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, he wants the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * + sizeof(llvm_regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return(REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + if (!g->backrefs && !(m->eflags®_BACKR)) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = (char **)malloc((g->nplus+1) * + sizeof(char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free(m->pmatch); + STATETEARDOWN(m); + return(REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + if (m->coldp == stop) + break; + start = m->coldp + 1; /* recycle starting later */ + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return(0); +} + +/* + - dissect - figure out what matched what, no back references + */ +static char * /* == stop (success) always */ +dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + char *sp; /* start of string matched by it */ + char *stp; /* string matched by it cannot pass here */ + char *rest; /* start of rest of string */ + char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *sep; /* end of string matched by subsubRE */ + char *oldssp; /* previous ssp */ + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(nope); + break; + case OCHAR: + sp++; + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp++; + break; + case OBACK_: + case O_BACK: + assert(nope); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + { + char *dp = dissect(m, ssp, sep, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == sep); + } + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + { + char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(nope); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(nope); + break; + } + } + + assert(sp == stop); + return(sp); +} + +/* + - backref - figure out what matched what, figuring in back references + */ +static char * /* == stop (success) or NULL (failure) */ +backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, + sopno lev, int rec) /* PLUS nesting level */ +{ + int i; + sopno ss; /* start sop of current subRE */ + char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *dp; + size_t len; + int hard; + sop s; + llvm_regoff_t offsave; + cset *cs; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop || *sp++ != (char)OPND(s)) + return(NULL); + break; + case OANY: + if (sp == stop) + return(NULL); + sp++; + break; + case OANYOF: + cs = &m->g->sets[OPND(s)]; + if (sp == stop || !CHIN(cs, *sp++)) + return(NULL); + break; + case OBOL: + if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOL: + if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OBOW: + if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && + !ISWORD(*(sp-1))) ) && + (sp < m->endp && ISWORD(*sp)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOW: + if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp)) ) && + (sp > m->beginp && ISWORD(*(sp-1))) ) + { /* yes */ } + else + return(NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return(NULL); + return(sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return(NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + if (len == 0 && rec++ > MAX_RECURSION) + return(NULL); + assert(stop - m->beginp >= len); + if (sp > stop - len) + return(NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return(NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); /* not */ + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); + if (dp == NULL) + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + else + return(dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev, rec); + if (dp != NULL) + return(dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return(NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_so = offsave; + return(NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_eo = offsave; + return(NULL); + break; + default: /* uh oh */ + assert(nope); + break; + } + + /* "can't happen" */ + assert(nope); + /* NOTREACHED */ + return NULL; +} + +/* + - fast - step through the string at top speed + */ +static char * /* where tentative match ended, or NULL */ +fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *coldp; /* last p after which no match was underway */ + + CLEAR(st); + SET1(st, startst); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return(p+1); + else + return(NULL); +} + +/* + - slow - step through the string more deliberately + */ +static char * /* where it ended */ +slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + states st = m->st; + states empty = m->empty; + states tmp = m->tmp; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *matchp; /* last p at which a match ended */ + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + return(matchp); +} + + +/* + - step - map set of states reachable before char to set reachable after + */ +static states +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + int ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ +{ + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + int i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != (char)OPND(s)); + if (ch == (char)OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(nope); + break; + } + } + + return(aft); +} + +#ifdef REDEBUG +/* + - print - print a set of states + */ +static void +print(struct match *m, char *caption, states st, int ch, FILE *d) +{ + struct re_guts *g = m->g; + int i; + int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + (void)fprintf(d, "%s", caption); + if (ch != '\0') + (void)fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + (void)fprintf(d, "\n"); +} + +/* + - at - print current situation + */ +static void +at(struct match *m, char *title, char *start, char *stop, sopno startst, + sopno stopst) +{ + if (!(m->eflags®_TRACE)) + return; + + (void)printf("%s %s-", title, pchar(*start)); + (void)printf("%s ", pchar(*stop)); + (void)printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + - pchar - make a character printable + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static char * /* -> representation */ +pchar(int ch) +{ + static char pbuf[10]; + + if (isprint(ch) || ch == ' ') + (void)snprintf(pbuf, sizeof pbuf, "%c", ch); + else + (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); + return(pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match +#undef nope diff --git a/lib/Support/regerror.c b/lib/Support/regerror.c new file mode 100644 index 0000000..1d67c9a --- /dev/null +++ b/lib/Support/regerror.c @@ -0,0 +1,135 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regerror.c 8.4 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" + +#ifdef _MSC_VER +#define snprintf _snprintf +#endif + +static const char *regatoi(const llvm_regex_t *, char *, int); + +static struct rerr { + int code; + const char *name; + const char *explain; +} rerrs[] = { + { REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { 0, "", "*** unknown regexp error code ***" } +}; + +/* + - llvm_regerror - the interface to error numbers + = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); + */ +/* ARGSUSED */ +size_t +llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size) +{ + struct rerr *r; + size_t len; + int target = errcode &~ REG_ITOA; + const char *s; + char convbuf[50]; + + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf, sizeof convbuf); + else { + for (r = rerrs; r->code != 0; r++) + if (r->code == target) + break; + + if (errcode®_ITOA) { + if (r->code != 0) { + assert(strlen(r->name) < sizeof(convbuf)); + (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf); + } else + (void)snprintf(convbuf, sizeof convbuf, + "REG_0x%x", target); + s = convbuf; + } else + s = r->explain; + } + + len = strlen(s) + 1; + if (errbuf_size > 0) { + llvm_strlcpy(errbuf, s, errbuf_size); + } + + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + */ +static const char * +regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize) +{ + struct rerr *r; + + for (r = rerrs; r->code != 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code == 0) + return("0"); + + (void)snprintf(localbuf, localbufsize, "%d", r->code); + return(localbuf); +} diff --git a/lib/Support/regex2.h b/lib/Support/regex2.h new file mode 100644 index 0000000..21659c3 --- /dev/null +++ b/lib/Support/regex2.h @@ -0,0 +1,157 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex2.h 8.4 (Berkeley) 3/20/94 + */ + +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef unsigned long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0xf8000000LU +#define OPDMASK 0x07ffffffLU +#define OPSHIFT ((unsigned)27) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1LU<<OPSHIFT) /* endmarker - */ +#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */ +#define OBOL (3LU<<OPSHIFT) /* left anchor - */ +#define OEOL (4LU<<OPSHIFT) /* right anchor - */ +#define OANY (5LU<<OPSHIFT) /* . - */ +#define OANYOF (6LU<<OPSHIFT) /* [...] set number */ +#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */ +#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */ +#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */ +#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */ +#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */ +#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */ +#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */ +#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */ +#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */ +#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */ +#define OBOW (19LU<<OPSHIFT) /* begin word - */ +#define OEOW (20LU<<OPSHIFT) /* end word - */ + +/* + * Structure for [] character-set representation. Character sets are + * done as bit vectors, grouped 8 to a byte vector for compactness. + * The individual set therefore has both a pointer to the byte vector + * and a mask to pick out the relevant bit of each byte. A hash code + * simplifies testing whether two sets could be identical. + * + * This will get trickier for multicharacter collating elements. As + * preliminary hooks for dealing with such things, we also carry along + * a string of multi-character elements, and decide the size of the + * vectors at run time. + */ +typedef struct { + uch *ptr; /* -> uch [csetsize] */ + uch mask; /* bit within array */ + uch hash; /* hash code */ + size_t smultis; + char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ +} cset; +/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) +#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) + +/* stuff for character categories */ +typedef unsigned char cat_t; + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +# define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int cflags; /* copy of llvm_regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +# define USEBOL 01 /* used ^ */ +# define USEEOL 02 /* used $ */ +# define REGEX_BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + int ncategories; /* how many character categories */ + cat_t *categories; /* ->catspace[-CHAR_MIN] */ + char *must; /* match must contain this string */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ + /* catspace must be last */ + cat_t catspace[1]; /* actually [NC] */ +}; + +/* misc utilities */ +#define OUT (CHAR_MAX+1) /* a non-character value */ +#define ISWORD(c) (isalnum(c&0xff) || (c) == '_') diff --git a/lib/Support/regex_impl.h b/lib/Support/regex_impl.h new file mode 100644 index 0000000..f8296c9 --- /dev/null +++ b/lib/Support/regex_impl.h @@ -0,0 +1,108 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992 Henry Spencer. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer of the University of Toronto. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex.h 8.1 (Berkeley) 6/2/93 + */ + +#ifndef _REGEX_H_ +#define _REGEX_H_ + +#include <sys/types.h> +typedef off_t llvm_regoff_t; +typedef struct { + llvm_regoff_t rm_so; /* start of match */ + llvm_regoff_t rm_eo; /* end of match */ +} llvm_regmatch_t; + +typedef struct llvm_regex { + int re_magic; + size_t re_nsub; /* number of parenthesized subexpressions */ + const char *re_endp; /* end pointer for REG_PEND */ + struct re_guts *re_g; /* none of your business :-) */ +} llvm_regex_t; + +/* llvm_regcomp() flags */ +#define REG_BASIC 0000 +#define REG_EXTENDED 0001 +#define REG_ICASE 0002 +#define REG_NOSUB 0004 +#define REG_NEWLINE 0010 +#define REG_NOSPEC 0020 +#define REG_PEND 0040 +#define REG_DUMP 0200 + +/* llvm_regerror() flags */ +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 +#define REG_EMPTY 14 +#define REG_ASSERT 15 +#define REG_INVARG 16 +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 0400 /* convert number to name (!) */ + +/* llvm_regexec() flags */ +#define REG_NOTBOL 00001 +#define REG_NOTEOL 00002 +#define REG_STARTEND 00004 +#define REG_TRACE 00400 /* tracing of execution */ +#define REG_LARGE 01000 /* force large representation */ +#define REG_BACKR 02000 /* force use of backref code */ + +#ifdef __cplusplus +extern "C" { +#endif + +int llvm_regcomp(llvm_regex_t *, const char *, int); +size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); +int llvm_regexec(const llvm_regex_t *, const char *, size_t, + llvm_regmatch_t [], int); +void llvm_regfree(llvm_regex_t *); +size_t llvm_strlcpy(char *dst, const char *src, size_t siz); + +#ifdef __cplusplus +} +#endif + +#endif /* !_REGEX_H_ */ diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c new file mode 100644 index 0000000..7d70f6e --- /dev/null +++ b/lib/Support/regexec.c @@ -0,0 +1,161 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regexec.c 8.3 (Berkeley) 3/20/94 + */ + +/* + * the outer shell of llvm_regexec() + * + * This file includes engine.inc *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. + */ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <ctype.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* macros for manipulating states, small version */ +#define states long +#define states1 states /* for later use in llvm_regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) +#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS long dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate long +#define INIT(o, n) ((o) = (unsigned long)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) (((v) & (o)) != 0) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* function names */ +#define SNAMES /* engine.inc looks after details */ + +#include "regengine.inc" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES + +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memmove(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS long vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate long +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ + +#include "regengine.inc" + +/* + - llvm_regexec - interface for matching + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. + */ +int /* 0 success, REG_NOMATCH failure */ +llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch, + llvm_regmatch_t pmatch[], int eflags) +{ + struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif + + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags®EX_BAD)); + if (g->iflags®EX_BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); + + if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) + return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); + else + return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); +} diff --git a/lib/Support/regfree.c b/lib/Support/regfree.c new file mode 100644 index 0000000..dc2b4af --- /dev/null +++ b/lib/Support/regfree.c @@ -0,0 +1,72 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regfree.c 8.3 (Berkeley) 3/20/94 + */ + +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* + - llvm_regfree - free everything + */ +void +llvm_regfree(llvm_regex_t *preg) +{ + struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); +} diff --git a/lib/Support/regstrlcpy.c b/lib/Support/regstrlcpy.c new file mode 100644 index 0000000..8b68afd --- /dev/null +++ b/lib/Support/regstrlcpy.c @@ -0,0 +1,52 @@ +/* + * This code is derived from OpenBSD's libc, original license follows: + * + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +#include "regex_impl.h" +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +llvm_strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} diff --git a/lib/Support/regutils.h b/lib/Support/regutils.h new file mode 100644 index 0000000..d0ee100 --- /dev/null +++ b/lib/Support/regutils.h @@ -0,0 +1,53 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)utils.h 8.3 (Berkeley) 3/20/94 + */ + +/* utility definitions */ +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include <assert.h> + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif |