diff options
Diffstat (limited to 'include/clang/Basic/ConvertUTF.h')
-rw-r--r-- | include/clang/Basic/ConvertUTF.h | 203 |
1 files changed, 0 insertions, 203 deletions
diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h deleted file mode 100644 index cdc4269..0000000 --- a/include/clang/Basic/ConvertUTF.h +++ /dev/null @@ -1,203 +0,0 @@ -/*===--- ConvertUTF.h - Universal Character Names conversions ---------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *==------------------------------------------------------------------------==*/ -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* --------------------------------------------------------------------- - - Conversions between UTF32, UTF-16, and UTF-8. Header file. - - Several funtions are included here, forming a complete set of - conversions between the three formats. UTF-7 is not included - here, but is handled in a separate source file. - - Each of these routines takes pointers to input buffers and output - buffers. The input buffers are const. - - Each routine converts the text between *sourceStart and sourceEnd, - putting the result into the buffer between *targetStart and - targetEnd. Note: the end pointers are *after* the last item: e.g. - *(sourceEnd - 1) is the last item. - - The return result indicates whether the conversion was successful, - and if not, whether the problem was in the source or target buffers. - (Only the first encountered problem is indicated.) - - After the conversion, *sourceStart and *targetStart are both - updated to point to the end of last text successfully converted in - the respective buffers. - - Input parameters: - sourceStart - pointer to a pointer to the source buffer. - The contents of this are modified on return so that - it points at the next thing to be converted. - targetStart - similarly, pointer to pointer to the target buffer. - sourceEnd, targetEnd - respectively pointers to the ends of the - two buffers, for overflow checking only. - - These conversion functions take a ConversionFlags argument. When this - flag is set to strict, both irregular sequences and isolated surrogates - will cause an error. When the flag is set to lenient, both irregular - sequences and isolated surrogates are converted. - - Whether the flag is strict or lenient, all illegal sequences will cause - an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, - or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code - must check for illegal sequences. - - When the flag is set to lenient, characters over 0x10FFFF are converted - to the replacement character; otherwise (when the flag is set to strict) - they constitute an error. - - Output parameters: - The value "sourceIllegal" is returned from some routines if the input - sequence is malformed. When "sourceIllegal" is returned, the source - value will point to the illegal value that caused the problem. E.g., - in UTF-8 when a sequence is malformed, it points to the start of the - malformed sequence. - - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. - Fixes & updates, Sept 2001. - ------------------------------------------------------------------------- */ - -#ifndef CLANG_BASIC_CONVERTUTF_H -#define CLANG_BASIC_CONVERTUTF_H - -/* --------------------------------------------------------------------- - The following 4 definitions are compiler-specific. - The C standard does not guarantee that wchar_t has at least - 16 bits, so wchar_t is no less portable than unsigned short! - All should be unsigned values to avoid sign extension during - bit mask & shift operations. ------------------------------------------------------------------------- */ - -typedef unsigned int UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ -typedef unsigned char Boolean; /* 0 or 1 */ - -/* Some fundamental constants */ -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 - -typedef enum { - conversionOK, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ -} ConversionResult; - -typedef enum { - strictConversion = 0, - lenientConversion -} ConversionFlags; - -/* This is for C++ and does no harm in C */ -#ifdef __cplusplus -extern "C" { -#endif - -ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF8toUTF32 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -#ifdef CLANG_NEEDS_THESE_ONE_DAY -ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); -#endif - -ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); -#endif - -Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); - -Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd); - -unsigned getNumBytesForUTF8(UTF8 firstByte); - -#ifdef __cplusplus -} - -/*************************************************************************/ -/* Below are LLVM-specific wrappers of the functions above. */ - -#include "llvm/ADT/StringRef.h" - -namespace clang { - -/** - * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on - * WideCharWidth. The converted data is written to ResultPtr, which needs to - * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success, - * ResultPtr will point one after the end of the copied string. On failure, - * ResultPtr will not be changed, and ErrorPtr will be set to the location of - * the first character which could not be converted. - * \return true on success. - */ -bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, - char *&ResultPtr, const UTF8 *&ErrorPtr); - -/** - * Convert an Unicode code point to UTF8 sequence. - * - * \param Source a Unicode code point. - * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least - * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is - * updated one past end of the converted sequence. - * - * \returns true on success. - */ -bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr); - -} - -#endif - -/* --------------------------------------------------------------------- */ |