diff options
Diffstat (limited to 'lib/libiconv_modules/HZ')
-rw-r--r-- | lib/libiconv_modules/HZ/Makefile | 6 | ||||
-rw-r--r-- | lib/libiconv_modules/HZ/citrus_hz.c | 648 | ||||
-rw-r--r-- | lib/libiconv_modules/HZ/citrus_hz.h | 37 |
3 files changed, 691 insertions, 0 deletions
diff --git a/lib/libiconv_modules/HZ/Makefile b/lib/libiconv_modules/HZ/Makefile new file mode 100644 index 0000000..a501187 --- /dev/null +++ b/lib/libiconv_modules/HZ/Makefile @@ -0,0 +1,6 @@ +# $FreeBSD$ + +SHLIB= HZ +SRCS+= citrus_hz.c + +.include <bsd.lib.mk> diff --git a/lib/libiconv_modules/HZ/citrus_hz.c b/lib/libiconv_modules/HZ/citrus_hz.c new file mode 100644 index 0000000..3775ea6 --- /dev/null +++ b/lib/libiconv_modules/HZ/citrus_hz.c @@ -0,0 +1,648 @@ +/* $FreeBSD$ */ +/* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ + +/*- + * Copyright (c)2004, 2006 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +#include <sys/queue.h> +#include <sys/types.h> + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> + +#include "citrus_namespace.h" +#include "citrus_types.h" +#include "citrus_bcs.h" +#include "citrus_module.h" +#include "citrus_stdenc.h" + +#include "citrus_hz.h" +#include "citrus_prop.h" + +/* + * wchar_t mapping: + * + * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx + * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx + * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx + */ + +#define ESCAPE_CHAR '~' + +typedef enum { + CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 +} charset_t; + +typedef struct { + int end; + int start; + int width; +} range_t; + +static const range_t ranges[] = { +#define RANGE(start, end) { start, end, (end - start) + 1 } +/* CTRL */ RANGE(0x00, 0x1F), +/* ASCII */ RANGE(0x20, 0x7F), +/* GB2312 */ RANGE(0x21, 0x7E), +/* CS94 */ RANGE(0x21, 0x7E), +/* CS96 */ RANGE(0x20, 0x7F), +#undef RANGE +}; + +typedef struct escape_t escape_t; +typedef struct { + charset_t charset; + escape_t *escape; + ssize_t length; +#define ROWCOL_MAX 3 +} graphic_t; + +typedef TAILQ_HEAD(escape_list, escape_t) escape_list; +struct escape_t { + TAILQ_ENTRY(escape_t) entry; + escape_list *set; + graphic_t *left; + graphic_t *right; + int ch; +}; + +#define GL(escape) ((escape)->left) +#define GR(escape) ((escape)->right) +#define SET(escape) ((escape)->set) +#define ESC(escape) ((escape)->ch) +#define INIT(escape) (TAILQ_FIRST(SET(escape))) + +static __inline escape_t * +find_escape(escape_list *set, int ch) +{ + escape_t *escape; + + TAILQ_FOREACH(escape, set, entry) { + if (ESC(escape) == ch) + break; + } + + return (escape); +} + +typedef struct { + escape_list e0; + escape_list e1; + graphic_t *ascii; + graphic_t *gb2312; +} _HZEncodingInfo; + +#define E0SET(ei) (&(ei)->e0) +#define E1SET(ei) (&(ei)->e1) +#define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) +#define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) + +typedef struct { + escape_t *inuse; + int chlen; + char ch[ROWCOL_MAX]; +} _HZState; + +#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) +#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ + +#define _FUNCNAME(m) _citrus_HZ_##m +#define _ENCODING_INFO _HZEncodingInfo +#define _ENCODING_STATE _HZState +#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX +#define _ENCODING_IS_STATE_DEPENDENT 1 +#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) + +static __inline void +_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, + _HZState * __restrict psenc) +{ + + psenc->chlen = 0; + psenc->inuse = INIT0(ei); +} + +static __inline void +/*ARGSUSED*/ +_citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, + void *__restrict pspriv, const _HZState * __restrict psenc) +{ + + memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); +} + +static __inline void +/*ARGSUSED*/ +_citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, + _HZState * __restrict psenc, const void * __restrict pspriv) +{ + + memcpy((void *)psenc, pspriv, sizeof(*psenc)); +} + +static int +_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, + wchar_t * __restrict pwc, char ** __restrict s, size_t n, + _HZState * __restrict psenc, size_t * __restrict nresult) +{ + escape_t *candidate, *init; + graphic_t *graphic; + const range_t *range; + char *s0; + wchar_t wc; + int bit, ch, head, len, tail; + + if (*s == NULL) { + _citrus_HZ_init_state(ei, psenc); + *nresult = 1; + return (0); + } + s0 = *s; + if (psenc->chlen < 0 || psenc->inuse == NULL) + return (EINVAL); + + wc = (wchar_t)0; + bit = head = tail = 0; + graphic = NULL; + for (len = 0; len <= MB_LEN_MAX;) { + if (psenc->chlen == tail) { + if (n-- < 1) { + *s = s0; + *nresult = (size_t)-2; + return (0); + } + psenc->ch[psenc->chlen++] = *s0++; + ++len; + } + ch = (unsigned char)psenc->ch[tail++]; + if (tail == 1) { + if ((ch & ~0x80) <= 0x1F) { + if (psenc->inuse != INIT0(ei)) + break; + wc = (wchar_t)ch; + goto done; + } + if (ch & 0x80) { + graphic = GR(psenc->inuse); + bit = 0x80; + ch &= ~0x80; + } else { + graphic = GL(psenc->inuse); + if (ch == ESCAPE_CHAR) + continue; + bit = 0x0; + } + if (graphic == NULL) + break; + } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { + if (tail < psenc->chlen) + return (EINVAL); + if (ch == ESCAPE_CHAR) { + ++head; + } else if (ch == '\n') { + if (psenc->inuse != INIT0(ei)) + break; + tail = psenc->chlen = 0; + continue; + } else { + candidate = NULL; + init = INIT0(ei); + if (psenc->inuse == init) { + init = INIT1(ei); + } else if (INIT(psenc->inuse) == init) { + if (ESC(init) != ch) + break; + candidate = init; + } + if (candidate == NULL) { + candidate = find_escape( + SET(psenc->inuse), ch); + if (candidate == NULL) { + if (init == NULL || + ESC(init) != ch) + break; + candidate = init; + } + } + psenc->inuse = candidate; + tail = psenc->chlen = 0; + continue; + } + } else if (ch & 0x80) { + if (graphic != GR(psenc->inuse)) + break; + ch &= ~0x80; + } else { + if (graphic != GL(psenc->inuse)) + break; + } + range = &ranges[(size_t)graphic->charset]; + if (range->start > ch || range->end < ch) + break; + wc <<= 8; + wc |= ch; + if (graphic->length == (tail - head)) { + if (graphic->charset > GB2312) + bit |= ESC(psenc->inuse) << 24; + wc |= bit; + goto done; + } + } + *nresult = (size_t)-1; + return (EILSEQ); +done: + if (tail < psenc->chlen) + return (EINVAL); + *s = s0; + if (pwc != NULL) + *pwc = wc; + psenc->chlen = 0; + *nresult = (wc == 0) ? 0 : len; + + return (0); +} + +static int +_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, + char * __restrict s, size_t n, wchar_t wc, + _HZState * __restrict psenc, size_t * __restrict nresult) +{ + escape_t *candidate, *init; + graphic_t *graphic; + const range_t *range; + size_t len; + int bit, ch; + + if (psenc->chlen != 0 || psenc->inuse == NULL) + return (EINVAL); + if (wc & 0x80) { + bit = 0x80; + wc &= ~0x80; + } else { + bit = 0x0; + } + if ((uint32_t)wc <= 0x1F) { + candidate = INIT0(ei); + graphic = (bit == 0) ? candidate->left : candidate->right; + if (graphic == NULL) + goto ilseq; + range = &ranges[(size_t)CTRL]; + len = 1; + } else if ((uint32_t)wc <= 0x7F) { + graphic = ei->ascii; + if (graphic == NULL) + goto ilseq; + candidate = graphic->escape; + range = &ranges[(size_t)graphic->charset]; + len = graphic->length; + } else if ((uint32_t)wc <= 0x7F7F) { + graphic = ei->gb2312; + if (graphic == NULL) + goto ilseq; + candidate = graphic->escape; + range = &ranges[(size_t)graphic->charset]; + len = graphic->length; + } else { + ch = (wc >> 24) & 0xFF; + candidate = find_escape(E0SET(ei), ch); + if (candidate == NULL) { + candidate = find_escape(E1SET(ei), ch); + if (candidate == NULL) + goto ilseq; + } + wc &= ~0xFF000000; + graphic = (bit == 0) ? candidate->left : candidate->right; + if (graphic == NULL) + goto ilseq; + range = &ranges[(size_t)graphic->charset]; + len = graphic->length; + } + if (psenc->inuse != candidate) { + init = INIT0(ei); + if (SET(psenc->inuse) == SET(candidate)) { + if (INIT(psenc->inuse) != init || + psenc->inuse == init || candidate == init) + init = NULL; + } else if (candidate == (init = INIT(candidate))) { + init = NULL; + } + if (init != NULL) { + if (n < 2) + return (E2BIG); + n -= 2; + psenc->ch[psenc->chlen++] = ESCAPE_CHAR; + psenc->ch[psenc->chlen++] = ESC(init); + } + if (n < 2) + return (E2BIG); + n -= 2; + psenc->ch[psenc->chlen++] = ESCAPE_CHAR; + psenc->ch[psenc->chlen++] = ESC(candidate); + psenc->inuse = candidate; + } + if (n < len) + return (E2BIG); + while (len-- > 0) { + ch = (wc >> (len * 8)) & 0xFF; + if (range->start > ch || range->end < ch) + goto ilseq; + psenc->ch[psenc->chlen++] = ch | bit; + } + memcpy(s, psenc->ch, psenc->chlen); + *nresult = psenc->chlen; + psenc->chlen = 0; + + return (0); + +ilseq: + *nresult = (size_t)-1; + return (EILSEQ); +} + +static __inline int +_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, + char * __restrict s, size_t n, _HZState * __restrict psenc, + size_t * __restrict nresult) +{ + escape_t *candidate; + + if (psenc->chlen != 0 || psenc->inuse == NULL) + return (EINVAL); + candidate = INIT0(ei); + if (psenc->inuse != candidate) { + if (n < 2) + return (E2BIG); + n -= 2; + psenc->ch[psenc->chlen++] = ESCAPE_CHAR; + psenc->ch[psenc->chlen++] = ESC(candidate); + } + if (n < 1) + return (E2BIG); + if (psenc->chlen > 0) + memcpy(s, psenc->ch, psenc->chlen); + *nresult = psenc->chlen; + _citrus_HZ_init_state(ei, psenc); + + return (0); +} + +static __inline int +_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, + _HZState * __restrict psenc, int * __restrict rstate) +{ + + if (psenc->chlen < 0 || psenc->inuse == NULL) + return (EINVAL); + *rstate = (psenc->chlen == 0) + ? ((psenc->inuse == INIT0(ei)) + ? _STDENC_SDGEN_INITIAL + : _STDENC_SDGEN_STABLE) + : ((psenc->ch[0] == ESCAPE_CHAR) + ? _STDENC_SDGEN_INCOMPLETE_SHIFT + : _STDENC_SDGEN_INCOMPLETE_CHAR); + + return (0); +} + +static __inline int +/*ARGSUSED*/ +_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, + _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) +{ + int bit; + + if (wc & 0x80) { + bit = 0x80; + wc &= ~0x80; + } else + bit = 0x0; + if ((uint32_t)wc <= 0x7F) { + *csid = (_csid_t)bit; + *idx = (_index_t)wc; + } else if ((uint32_t)wc <= 0x7F7F) { + *csid = (_csid_t)(bit | 0x8000); + *idx = (_index_t)wc; + } else { + *csid = (_index_t)(wc & ~0x00FFFF7F); + *idx = (_csid_t)(wc & 0x00FFFF7F); + } + + return (0); +} + +static __inline int +/*ARGSUSED*/ +_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, + wchar_t * __restrict wc, _csid_t csid, _index_t idx) +{ + + *wc = (wchar_t)idx; + switch (csid) { + case 0x80: + case 0x8080: + *wc |= (wchar_t)0x80; + /*FALLTHROUGH*/ + case 0x0: + case 0x8000: + break; + default: + *wc |= (wchar_t)csid; + } + + return (0); +} + +static void +_citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) +{ + escape_t *escape; + + while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { + TAILQ_REMOVE(E0SET(ei), escape, entry); + free(GL(escape)); + free(GR(escape)); + free(escape); + } + while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { + TAILQ_REMOVE(E1SET(ei), escape, entry); + free(GL(escape)); + free(GR(escape)); + free(escape); + } +} + +static int +_citrus_HZ_parse_char(void **context, const char *name __unused, const char *s) +{ + escape_t *escape; + void **p; + + p = (void **)*context; + escape = (escape_t *)p[0]; + if (escape->ch != '\0') + return (EINVAL); + escape->ch = *s++; + if (escape->ch == ESCAPE_CHAR || *s != '\0') + return (EINVAL); + + return (0); +} + +static int +_citrus_HZ_parse_graphic(void **context, const char *name, const char *s) +{ + _HZEncodingInfo *ei; + escape_t *escape; + graphic_t *graphic; + void **p; + + p = (void **)*context; + escape = (escape_t *)p[0]; + ei = (_HZEncodingInfo *)p[1]; + graphic = malloc(sizeof(*graphic)); + if (graphic == NULL) + return (ENOMEM); + memset(graphic, 0, sizeof(*graphic)); + if (strcmp("GL", name) == 0) { + if (GL(escape) != NULL) + goto release; + GL(escape) = graphic; + } else if (strcmp("GR", name) == 0) { + if (GR(escape) != NULL) + goto release; + GR(escape) = graphic; + } else { +release: + free(graphic); + return (EINVAL); + } + graphic->escape = escape; + if (_bcs_strncasecmp("ASCII", s, 5) == 0) { + if (s[5] != '\0') + return (EINVAL); + graphic->charset = ASCII; + graphic->length = 1; + ei->ascii = graphic; + return (0); + } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { + if (s[6] != '\0') + return (EINVAL); + graphic->charset = GB2312; + graphic->length = 2; + ei->gb2312 = graphic; + return (0); + } else if (strncmp("94*", s, 3) == 0) + graphic->charset = CS94; + else if (strncmp("96*", s, 3) == 0) + graphic->charset = CS96; + else + return (EINVAL); + s += 3; + switch(*s) { + case '1': case '2': case '3': + graphic->length = (size_t)(*s - '0'); + if (*++s == '\0') + break; + /*FALLTHROUGH*/ + default: + return (EINVAL); + } + return (0); +} + +static const _citrus_prop_hint_t escape_hints[] = { +_CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), +_CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), +_CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), +_CITRUS_PROP_HINT_END +}; + +static int +_citrus_HZ_parse_escape(void **context, const char *name, const char *s) +{ + _HZEncodingInfo *ei; + escape_t *escape; + void *p[2]; + + ei = (_HZEncodingInfo *)*context; + escape = malloc(sizeof(*escape)); + if (escape == NULL) + return (EINVAL); + memset(escape, 0, sizeof(*escape)); + if (strcmp("0", name) == 0) { + escape->set = E0SET(ei); + TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); + } else if (strcmp("1", name) == 0) { + escape->set = E1SET(ei); + TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); + } else { + free(escape); + return (EINVAL); + } + p[0] = (void *)escape; + p[1] = (void *)ei; + return (_citrus_prop_parse_variable( + escape_hints, (void *)&p[0], s, strlen(s))); +} + +static const _citrus_prop_hint_t root_hints[] = { +_CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), +_CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), +_CITRUS_PROP_HINT_END +}; + +static int +_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, + const void * __restrict var, size_t lenvar) +{ + int errnum; + + memset(ei, 0, sizeof(*ei)); + TAILQ_INIT(E0SET(ei)); + TAILQ_INIT(E1SET(ei)); + errnum = _citrus_prop_parse_variable( + root_hints, (void *)ei, var, lenvar); + if (errnum != 0) + _citrus_HZ_encoding_module_uninit(ei); + return (errnum); +} + +/* ---------------------------------------------------------------------- + * public interface for stdenc + */ + +_CITRUS_STDENC_DECLS(HZ); +_CITRUS_STDENC_DEF_OPS(HZ); + +#include "citrus_stdenc_template.h" diff --git a/lib/libiconv_modules/HZ/citrus_hz.h b/lib/libiconv_modules/HZ/citrus_hz.h new file mode 100644 index 0000000..9737b7b --- /dev/null +++ b/lib/libiconv_modules/HZ/citrus_hz.h @@ -0,0 +1,37 @@ +/* $FreeBSD$ */ +/* $NetBSD: citrus_hz.h,v 1.1 2006/11/22 23:38:27 tnozaki Exp $ */ + +/*- + * Copyright (c)2004, 2006 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _CITRUS_HZ_H_ +#define _CITRUS_HZ_H_ + +__BEGIN_DECLS +_CITRUS_STDENC_GETOPS_FUNC(HZ); +__END_DECLS + +#endif |