summaryrefslogtreecommitdiffstats
path: root/contrib/gcc/mbchar.c
diff options
context:
space:
mode:
authorobrien <obrien@FreeBSD.org>2002-02-01 18:16:02 +0000
committerobrien <obrien@FreeBSD.org>2002-02-01 18:16:02 +0000
commitc9ab9ae440a8066b2c2b85b157b1fdadcf09916a (patch)
tree086d9d6c8fbd4fc8fe4495059332f66bc0f8d12b /contrib/gcc/mbchar.c
parent2ecfd8bd04b63f335c1ec6295740a4bfd97a4fa6 (diff)
downloadFreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.zip
FreeBSD-src-c9ab9ae440a8066b2c2b85b157b1fdadcf09916a.tar.gz
Enlist the FreeBSD-CURRENT users as testers of what is to become Gcc 3.1.0.
These bits are taken from the FSF anoncvs repo on 1-Feb-2002 08:20 PST.
Diffstat (limited to 'contrib/gcc/mbchar.c')
-rw-r--r--contrib/gcc/mbchar.c196
1 files changed, 120 insertions, 76 deletions
diff --git a/contrib/gcc/mbchar.c b/contrib/gcc/mbchar.c
index a22e52b..5c86dbf 100644
--- a/contrib/gcc/mbchar.c
+++ b/contrib/gcc/mbchar.c
@@ -1,28 +1,26 @@
/* Multibyte Character Functions.
Copyright (C) 1998 Free Software Foundation, Inc.
-This file is part of GNU CC.
+This file is part of GCC.
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-/* These functions are used to manipulate multibyte characters. */
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
/* Note regarding cross compilation:
- In general translation of multibyte characters to wide characters can
+ In general, translation of multibyte characters to wide characters can
only work in a native compiler since the translation function (mbtowc)
needs to know about both the source and target character encoding. However,
this particular implementation for JIS, SJIS and EUCJP source characters
@@ -31,37 +29,30 @@ Boston, MA 02111-1307, USA. */
leaves the source character values unchanged (except for removing the
state shifting markers). */
-#ifdef MULTIBYTE_CHARS
#include "config.h"
+#ifdef MULTIBYTE_CHARS
#include "system.h"
#include "mbchar.h"
#include <locale.h>
-typedef enum
-{
- ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
-} JIS_CHAR_TYPE;
+typedef enum {ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER,
+ JIS_C_NUM} JIS_CHAR_TYPE;
-typedef enum
-{
- ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
- J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
-} JIS_STATE;
+typedef enum {ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+ J2_ESC, J2_ESC_BR, INV, JIS_S_NUM} JIS_STATE;
+
+typedef enum {COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP,
+ EMPTY, ERROR} JIS_ACTION;
+
+/* State/action tables for processing JIS encoding:
+
+ Where possible, switches to JIS are grouped with proceding JIS characters
+ and switches to ASCII are grouped with preceding JIS characters.
+ Thus, maximum returned length is:
+ 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. */
-typedef enum
-{
- COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
-} JIS_ACTION;
-
-/*****************************************************************************
- * state/action tables for processing JIS encoding
- * Where possible, switches to JIS are grouped with proceding JIS characters
- * and switches to ASCII are grouped with preceding JIS characters.
- * Thus, maximum returned length is:
- * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
- *****************************************************************************/
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
-/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH*/
/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
@@ -75,105 +66,131 @@ static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
-/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH */
/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
-/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
-/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
+/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
-/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
-/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
+/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
+/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR},
+/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
+/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR},
};
-char *literal_codeset = NULL;
+const char *literal_codeset = NULL;
+
+/* Store into *PWC (if PWC is not null) the wide character
+ corresponding to the multibyte character at the start of the
+ buffer S of size N. Return the number of bytes in the multibyte
+ character. Return -1 if the bytes do not form a valid character,
+ or 0 if S is null or points to a null byte.
+
+ This function behaves like the Standard C function mbtowc, except
+ it treats locale names of the form "C-..." specially. */
int
local_mbtowc (pwc, s, n)
- wchar_t *pwc;
- const char *s;
- size_t n;
+ wchar_t *pwc;
+ const char *s;
+ size_t n;
{
static JIS_STATE save_state = ASCII;
JIS_STATE curr_state = save_state;
- unsigned char *t = (unsigned char *)s;
+ const unsigned char *t = (const unsigned char *) s;
if (s != NULL && n == 0)
return -1;
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
- {
- /* This must be the "C" locale or unknown locale -- fall thru */
- }
+ /* This must be the "C" locale or unknown locale -- fall thru */
+ ;
else if (! strcmp (literal_codeset, "C-SJIS"))
{
int char1;
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
char1 = *t;
if (ISSJIS1 (char1))
{
int char2 = t[1];
+
if (n <= 1)
return -1;
+
if (ISSJIS2 (char2))
{
if (pwc != NULL)
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ *pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
return 2;
}
+
return -1;
}
+
if (pwc != NULL)
- *pwc = (wchar_t)*t;
+ *pwc = (wchar_t) *t;
+
if (*t == '\0')
return 0;
+
return 1;
}
else if (! strcmp (literal_codeset, "C-EUCJP"))
{
int char1;
+
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
char1 = *t;
if (ISEUCJP (char1))
{
int char2 = t[1];
+
if (n <= 1)
return -1;
+
if (ISEUCJP (char2))
{
if (pwc != NULL)
- *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ *pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
return 2;
}
+
return -1;
}
+
if (pwc != NULL)
- *pwc = (wchar_t)*t;
+ *pwc = (wchar_t) *t;
+
if (*t == '\0')
return 0;
+
return 1;
}
else if (! strcmp (literal_codeset, "C-JIS"))
{
JIS_ACTION action;
JIS_CHAR_TYPE ch;
- unsigned char *ptr;
- int i, curr_ch;
+ const unsigned char *ptr;
+ size_t i, curr_ch;
if (s == NULL)
{
save_state = ASCII;
- return 1; /* state-dependent */
+ /* State-dependent. */
+ return 1;
}
ptr = t;
- for (i = 0; i < n; ++i)
+ for (i = 0; i < n; i++)
{
curr_ch = t[i];
switch (curr_ch)
@@ -213,59 +230,84 @@ local_mbtowc (pwc, s, n)
{
case NOOP:
break;
+
case EMPTY:
if (pwc != NULL)
- *pwc = (wchar_t)0;
+ *pwc = (wchar_t) 0;
+
save_state = curr_state;
return i;
+
case COPYA:
if (pwc != NULL)
- *pwc = (wchar_t)*ptr;
+ *pwc = (wchar_t) *ptr;
save_state = curr_state;
- return (i + 1);
+ return i + 1;
+
case COPYJ:
if (pwc != NULL)
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ *pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
+
save_state = curr_state;
- return (i + 1);
+ return i + 1;
+
case COPYJ2:
if (pwc != NULL)
- *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ *pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
+
save_state = curr_state;
- return (ptr - t) + 2;
+ return ptr - t + 2;
+
case MAKE_A:
case MAKE_J:
- ptr = (char *)(t + i + 1);
+ ptr = (const unsigned char *) (t + i + 1);
break;
+
case ERROR:
default:
return -1;
}
}
- return -1; /* n < bytes needed */
+ /* More than n bytes needed. */
+ return -1;
}
#ifdef CROSS_COMPILE
if (s == NULL)
- return 0; /* not state-dependent */
+ /* Not state-dependent. */
+ return 0;
+
if (pwc != NULL)
*pwc = *s;
return 1;
#else
- /* This must be the "C" locale or unknown locale. */
+
+ /* This must be the "C" locale or unknown locale. */
return mbtowc (pwc, s, n);
#endif
}
+/* Return the number of bytes in the multibyte character at the start
+ of the buffer S of size N. Return -1 if the bytes do not form a
+ valid character, or 0 if S is null or points to a null byte.
+
+ This function behaves like the Standard C function mblen, except
+ it treats locale names of the form "C-..." specially. */
+
int
local_mblen (s, n)
- const char *s;
- size_t n;
+ const char *s;
+ size_t n;
{
return local_mbtowc (NULL, s, n);
}
+/* Return the maximum mumber of bytes in a multibyte character.
+
+ This function returns the same value as the Standard C macro MB_CUR_MAX,
+ except it treats locale names of the form "C-..." specially. */
+
int
local_mb_cur_max ()
{
@@ -287,4 +329,6 @@ local_mb_cur_max ()
return 1; /* default */
#endif
}
+#else /* MULTIBYTE_CHARS */
+extern int dummy; /* silence 'ANSI C forbids an empty source file' warning */
#endif /* MULTIBYTE_CHARS */
OpenPOWER on IntegriCloud