diff options
Diffstat (limited to 'lib/libiconv_modules/iconv_std/citrus_iconv_std.c')
-rw-r--r-- | lib/libiconv_modules/iconv_std/citrus_iconv_std.c | 583 |
1 files changed, 583 insertions, 0 deletions
diff --git a/lib/libiconv_modules/iconv_std/citrus_iconv_std.c b/lib/libiconv_modules/iconv_std/citrus_iconv_std.c new file mode 100644 index 0000000..8349c46 --- /dev/null +++ b/lib/libiconv_modules/iconv_std/citrus_iconv_std.c @@ -0,0 +1,583 @@ +/* $FreeBSD$ */ +/* $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $ */ + +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/endian.h> +#include <sys/queue.h> + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "citrus_namespace.h" +#include "citrus_types.h" +#include "citrus_module.h" +#include "citrus_region.h" +#include "citrus_mmap.h" +#include "citrus_hash.h" +#include "citrus_iconv.h" +#include "citrus_stdenc.h" +#include "citrus_mapper.h" +#include "citrus_csmapper.h" +#include "citrus_memstream.h" +#include "citrus_iconv_std.h" +#include "citrus_esdb.h" + +/* ---------------------------------------------------------------------- */ + +_CITRUS_ICONV_DECLS(iconv_std); +_CITRUS_ICONV_DEF_OPS(iconv_std); + + +/* ---------------------------------------------------------------------- */ + +int +_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops) +{ + + memcpy(ops, &_citrus_iconv_std_iconv_ops, + sizeof(_citrus_iconv_std_iconv_ops)); + + return (0); +} + +/* ---------------------------------------------------------------------- */ + +/* + * convenience routines for stdenc. + */ +static __inline void +save_encoding_state(struct _citrus_iconv_std_encoding *se) +{ + + if (se->se_ps) + memcpy(se->se_pssaved, se->se_ps, + _stdenc_get_state_size(se->se_handle)); +} + +static __inline void +restore_encoding_state(struct _citrus_iconv_std_encoding *se) +{ + + if (se->se_ps) + memcpy(se->se_ps, se->se_pssaved, + _stdenc_get_state_size(se->se_handle)); +} + +static __inline void +init_encoding_state(struct _citrus_iconv_std_encoding *se) +{ + + if (se->se_ps) + _stdenc_init_state(se->se_handle, se->se_ps); +} + +static __inline int +mbtocsx(struct _citrus_iconv_std_encoding *se, + _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult, + struct iconv_hooks *hooks) +{ + + return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, + nresult, hooks)); +} + +static __inline int +cstombx(struct _citrus_iconv_std_encoding *se, + char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult, + struct iconv_hooks *hooks) +{ + + return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, + nresult, hooks)); +} + +static __inline int +wctombx(struct _citrus_iconv_std_encoding *se, + char *s, size_t n, _wc_t wc, size_t *nresult, + struct iconv_hooks *hooks) +{ + + return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult, + hooks)); +} + +static __inline int +put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n, + size_t *nresult) +{ + + return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult)); +} + +static __inline int +get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) +{ + struct _stdenc_state_desc ssd; + int ret; + + ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, + _STDENC_SDID_GENERIC, &ssd); + if (!ret) + *rstate = ssd.u.generic.state; + + return (ret); +} + +/* + * init encoding context + */ +static int +init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, + void *ps1, void *ps2) +{ + int ret = -1; + + se->se_handle = cs; + se->se_ps = ps1; + se->se_pssaved = ps2; + + if (se->se_ps) + ret = _stdenc_init_state(cs, se->se_ps); + if (!ret && se->se_pssaved) + ret = _stdenc_init_state(cs, se->se_pssaved); + + return (ret); +} + +static int +open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, + unsigned long *rnorm) +{ + struct _csmapper *cm; + int ret; + + ret = _csmapper_open(&cm, src, dst, 0, rnorm); + if (ret) + return (ret); + if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || + _csmapper_get_state_size(cm) != 0) { + _csmapper_close(cm); + return (EINVAL); + } + + *rcm = cm; + + return (0); +} + +static void +close_dsts(struct _citrus_iconv_std_dst_list *dl) +{ + struct _citrus_iconv_std_dst *sd; + + while ((sd = TAILQ_FIRST(dl)) != NULL) { + TAILQ_REMOVE(dl, sd, sd_entry); + _csmapper_close(sd->sd_mapper); + free(sd); + } +} + +static int +open_dsts(struct _citrus_iconv_std_dst_list *dl, + const struct _esdb_charset *ec, const struct _esdb *dbdst) +{ + struct _citrus_iconv_std_dst *sd, *sdtmp; + unsigned long norm; + int i, ret; + + sd = malloc(sizeof(*sd)); + if (sd == NULL) + return (errno); + + for (i = 0; i < dbdst->db_num_charsets; i++) { + ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, + dbdst->db_charsets[i].ec_csname, &norm); + if (ret == 0) { + sd->sd_csid = dbdst->db_charsets[i].ec_csid; + sd->sd_norm = norm; + /* insert this mapper by sorted order. */ + TAILQ_FOREACH(sdtmp, dl, sd_entry) { + if (sdtmp->sd_norm > norm) { + TAILQ_INSERT_BEFORE(sdtmp, sd, + sd_entry); + sd = NULL; + break; + } + } + if (sd) + TAILQ_INSERT_TAIL(dl, sd, sd_entry); + sd = malloc(sizeof(*sd)); + if (sd == NULL) { + ret = errno; + close_dsts(dl); + return (ret); + } + } else if (ret != ENOENT) { + close_dsts(dl); + free(sd); + return (ret); + } + } + free(sd); + return (0); +} + +static void +close_srcs(struct _citrus_iconv_std_src_list *sl) +{ + struct _citrus_iconv_std_src *ss; + + while ((ss = TAILQ_FIRST(sl)) != NULL) { + TAILQ_REMOVE(sl, ss, ss_entry); + close_dsts(&ss->ss_dsts); + free(ss); + } +} + +static int +open_srcs(struct _citrus_iconv_std_src_list *sl, + const struct _esdb *dbsrc, const struct _esdb *dbdst) +{ + struct _citrus_iconv_std_src *ss; + int count = 0, i, ret; + + ss = malloc(sizeof(*ss)); + if (ss == NULL) + return (errno); + + TAILQ_INIT(&ss->ss_dsts); + + for (i = 0; i < dbsrc->db_num_charsets; i++) { + ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); + if (ret) + goto err; + if (!TAILQ_EMPTY(&ss->ss_dsts)) { + ss->ss_csid = dbsrc->db_charsets[i].ec_csid; + TAILQ_INSERT_TAIL(sl, ss, ss_entry); + ss = malloc(sizeof(*ss)); + if (ss == NULL) { + ret = errno; + goto err; + } + count++; + TAILQ_INIT(&ss->ss_dsts); + } + } + free(ss); + + return (count ? 0 : ENOENT); + +err: + free(ss); + close_srcs(sl); + return (ret); +} + +/* do convert a character */ +#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ +static int +/*ARGSUSED*/ +do_conv(const struct _citrus_iconv_std_shared *is, + _csid_t *csid, _index_t *idx) +{ + struct _citrus_iconv_std_dst *sd; + struct _citrus_iconv_std_src *ss; + _index_t tmpidx; + int ret; + + TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { + if (ss->ss_csid == *csid) { + TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { + ret = _csmapper_convert(sd->sd_mapper, + &tmpidx, *idx, NULL); + switch (ret) { + case _MAPPER_CONVERT_SUCCESS: + *csid = sd->sd_csid; + *idx = tmpidx; + return (0); + case _MAPPER_CONVERT_NONIDENTICAL: + break; + case _MAPPER_CONVERT_SRC_MORE: + /*FALLTHROUGH*/ + case _MAPPER_CONVERT_DST_MORE: + /*FALLTHROUGH*/ + case _MAPPER_CONVERT_ILSEQ: + return (EILSEQ); + case _MAPPER_CONVERT_FATAL: + return (EINVAL); + } + } + break; + } + } + + return (E_NO_CORRESPONDING_CHAR); +} +/* ---------------------------------------------------------------------- */ + +static int +/*ARGSUSED*/ +_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, + const char * __restrict src, const char * __restrict dst) +{ + struct _citrus_esdb esdbdst, esdbsrc; + struct _citrus_iconv_std_shared *is; + int ret; + + is = malloc(sizeof(*is)); + if (is == NULL) { + ret = errno; + goto err0; + } + ret = _citrus_esdb_open(&esdbsrc, src); + if (ret) + goto err1; + ret = _citrus_esdb_open(&esdbdst, dst); + if (ret) + goto err2; + ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, + esdbsrc.db_variable, esdbsrc.db_len_variable); + if (ret) + goto err3; + ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, + esdbdst.db_variable, esdbdst.db_len_variable); + if (ret) + goto err4; + is->is_use_invalid = esdbdst.db_use_invalid; + is->is_invalid = esdbdst.db_invalid; + + TAILQ_INIT(&is->is_srcs); + ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); + if (ret) + goto err5; + + _esdb_close(&esdbsrc); + _esdb_close(&esdbdst); + ci->ci_closure = is; + + return (0); + +err5: + _stdenc_close(is->is_dst_encoding); +err4: + _stdenc_close(is->is_src_encoding); +err3: + _esdb_close(&esdbdst); +err2: + _esdb_close(&esdbsrc); +err1: + free(is); +err0: + return (ret); +} + +static void +_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) +{ + struct _citrus_iconv_std_shared *is = ci->ci_closure; + + if (is == NULL) + return; + + _stdenc_close(is->is_src_encoding); + _stdenc_close(is->is_dst_encoding); + close_srcs(&is->is_srcs); + free(is); +} + +static int +_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) +{ + const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; + struct _citrus_iconv_std_context *sc; + char *ptr; + size_t sz, szpsdst, szpssrc; + + szpssrc = _stdenc_get_state_size(is->is_src_encoding); + szpsdst = _stdenc_get_state_size(is->is_dst_encoding); + + sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); + sc = malloc(sz); + if (sc == NULL) + return (errno); + + ptr = (char *)&sc[1]; + if (szpssrc > 0) + init_encoding(&sc->sc_src_encoding, is->is_src_encoding, + ptr, ptr+szpssrc); + else + init_encoding(&sc->sc_src_encoding, is->is_src_encoding, + NULL, NULL); + ptr += szpssrc*2; + if (szpsdst > 0) + init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, + ptr, ptr+szpsdst); + else + init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, + NULL, NULL); + + cv->cv_closure = (void *)sc; + + return (0); +} + +static void +_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) +{ + + free(cv->cv_closure); +} + +static int +_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, + char * __restrict * __restrict in, size_t * __restrict inbytes, + char * __restrict * __restrict out, size_t * __restrict outbytes, + uint32_t flags, size_t * __restrict invalids) +{ + const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; + struct _citrus_iconv_std_context *sc = cv->cv_closure; + _csid_t csid; + _index_t idx; + char *tmpin; + size_t inval, szrin, szrout; + int ret, state = 0; + + inval = 0; + if (in == NULL || *in == NULL) { + /* special cases */ + if (out != NULL && *out != NULL) { + /* init output state and store the shift sequence */ + save_encoding_state(&sc->sc_src_encoding); + save_encoding_state(&sc->sc_dst_encoding); + szrout = 0; + + ret = put_state_resetx(&sc->sc_dst_encoding, + *out, *outbytes, &szrout); + if (ret) + goto err; + + if (szrout == (size_t)-2) { + /* too small to store the character */ + ret = EINVAL; + goto err; + } + *out += szrout; + *outbytes -= szrout; + } else + /* otherwise, discard the shift sequence */ + init_encoding_state(&sc->sc_dst_encoding); + init_encoding_state(&sc->sc_src_encoding); + *invalids = 0; + return (0); + } + + /* normal case */ + for (;;) { + if (*inbytes == 0) { + ret = get_state_desc_gen(&sc->sc_src_encoding, &state); + if (state == _STDENC_SDGEN_INITIAL || + state == _STDENC_SDGEN_STABLE) + break; + } + + /* save the encoding states for the error recovery */ + save_encoding_state(&sc->sc_src_encoding); + save_encoding_state(&sc->sc_dst_encoding); + + /* mb -> csid/index */ + tmpin = *in; + szrin = szrout = 0; + ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin, + *inbytes, &szrin, cv->cv_shared->ci_hooks); + if (ret) + goto err; + + if (szrin == (size_t)-2) { + /* incompleted character */ + ret = get_state_desc_gen(&sc->sc_src_encoding, &state); + if (ret) { + ret = EINVAL; + goto err; + } + switch (state) { + case _STDENC_SDGEN_INITIAL: + case _STDENC_SDGEN_STABLE: + /* fetch shift sequences only. */ + goto next; + } + ret = EINVAL; + goto err; + } + /* convert the character */ + ret = do_conv(is, &csid, &idx); + if (ret) { + if (ret == E_NO_CORRESPONDING_CHAR) { + inval++; + szrout = 0; + if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) && + !cv->cv_shared->ci_discard_ilseq) && + is->is_use_invalid) { + ret = wctombx(&sc->sc_dst_encoding, + *out, *outbytes, is->is_invalid, + &szrout, cv->cv_shared->ci_hooks); + if (ret) + goto err; + } + goto next; + } else + goto err; + } + /* csid/index -> mb */ + ret = cstombx(&sc->sc_dst_encoding, + *out, *outbytes, csid, idx, &szrout, + cv->cv_shared->ci_hooks); + if (ret) + goto err; +next: + *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ + *in = tmpin; + *outbytes -= szrout; + *out += szrout; + } + *invalids = inval; + + return (0); + +err: + restore_encoding_state(&sc->sc_src_encoding); + restore_encoding_state(&sc->sc_dst_encoding); + *invalids = inval; + + return (ret); +} |