diff options
author | gabor <gabor@FreeBSD.org> | 2011-02-25 00:04:39 +0000 |
---|---|---|
committer | gabor <gabor@FreeBSD.org> | 2011-02-25 00:04:39 +0000 |
commit | c91ab1769b1237e3663d59888cebe31ceee47570 (patch) | |
tree | ed256cfe2a006269f4a380597f93880be8d5c1cd /usr.bin | |
parent | 5aabdb149c1a96a1e9717d45fde4096807423095 (diff) | |
download | FreeBSD-src-c91ab1769b1237e3663d59888cebe31ceee47570.zip FreeBSD-src-c91ab1769b1237e3663d59888cebe31ceee47570.tar.gz |
Add the BSD-licensed Citrus iconv to the base system with default off
setting. It can be built by setting the WITH_ICONV knob. While this
knob is unset, the library part, the binaries, the header file and
the metadata files will not be built or installed so it makes no impact
on the system if left turned off.
This work is based on the iconv implementation in NetBSD but a great
number of improvements and feature additions have been included:
- Some utilities have been added. There is a conversion table generator,
which can compare conversion tables to reference data generated by
GNU libiconv. This helps ensuring conversion compatibility.
- UTF-16 surrogate support and some endianness issues have been fixed.
- The rather chaotic Makefiles to build metadata have been refactored
and cleaned up, now it is easy to read and it is also easier to add
support for new encodings.
- A bunch of new encodings and encoding aliases have been added.
- Support for 1->2, 1->3 and 1->4 mappings, which is needed for
transliterating with flying accents as GNU does, like "u.
- Lots of warnings have been fixed, the major part of the code is
now WARNS=6 clean.
- New section 1 and section 5 manual pages have been added.
- Some GNU-specific calls have been implemented:
iconvlist(), iconvctl(), iconv_canonicalize(), iconv_open_into()
- Support for GNU's //IGNORE suffix has been added.
- The "-" argument for stdin is now recognized in iconv(1) as per POSIX.
- The Big5 conversion module has been fixed.
- The iconv.h header files is supposed to be compatible with the
GNU version, i.e. sources should build with base iconv.h and
GNU libiconv. It also includes a macro magic to deal with the
char ** and const char ** incompatibility.
- GNU compatibility: "" or "char" means the current local
encoding in use
- Various cleanups and style(9) fixes.
Approved by: delphij (mentor)
Obtained from: The NetBSD Project
Sponsored by: Google Summer of Code 2009
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/Makefile | 9 | ||||
-rw-r--r-- | usr.bin/iconv/Makefile | 15 | ||||
-rw-r--r-- | usr.bin/iconv/iconv.1 | 119 | ||||
-rw-r--r-- | usr.bin/iconv/iconv.c | 220 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/Makefile | 9 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/Makefile.inc | 7 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/ldef.h | 42 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/lex.l | 106 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/mkcsmapper.1 | 89 | ||||
-rw-r--r-- | usr.bin/mkcsmapper/yacc.y | 731 | ||||
-rw-r--r-- | usr.bin/mkcsmapper_static/Makefile | 14 | ||||
-rw-r--r-- | usr.bin/mkesdb/Makefile | 9 | ||||
-rw-r--r-- | usr.bin/mkesdb/Makefile.inc | 6 | ||||
-rw-r--r-- | usr.bin/mkesdb/ldef.h | 40 | ||||
-rw-r--r-- | usr.bin/mkesdb/lex.l | 99 | ||||
-rw-r--r-- | usr.bin/mkesdb/mkesdb.1 | 84 | ||||
-rw-r--r-- | usr.bin/mkesdb/yacc.y | 332 | ||||
-rw-r--r-- | usr.bin/mkesdb_static/Makefile | 14 |
18 files changed, 1945 insertions, 0 deletions
diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 6667ec8..44408ba 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -61,6 +61,7 @@ SUBDIR= alias \ gzip \ head \ hexdump \ + ${_iconv} \ id \ ipcrm \ ipcs \ @@ -98,7 +99,9 @@ SUBDIR= alias \ mesg \ minigzip \ ministat \ + ${_mkcsmapper} \ mkdep \ + ${_mkesdb} \ mkfifo \ mklocale \ mktemp \ @@ -239,6 +242,12 @@ _clang= clang SUBDIR+= hesinfo .endif +.if ${MK_ICONV} != "no" +_iconv= iconv +_mkcsmapper= mkcsmapper +_mkesdb= mkesdb +.endif + .if ${MK_GROFF} != "no" SUBDIR+= vgrind .endif diff --git a/usr.bin/iconv/Makefile b/usr.bin/iconv/Makefile new file mode 100644 index 0000000..7e8f6e7 --- /dev/null +++ b/usr.bin/iconv/Makefile @@ -0,0 +1,15 @@ +# $FreeBSD$ +# $NetBSD: Makefile,v 1.6 2009/04/14 22:15:21 lukem Exp $ + +.include <bsd.own.mk> + +PROG= iconv +#SRCS= iconv.c +MAN= iconv.1 + +WARNS?= 6 + +LDADD+= -lcrypt +DPADD+= ${LIBCRYPT} + +.include <bsd.prog.mk> diff --git a/usr.bin/iconv/iconv.1 b/usr.bin/iconv/iconv.1 new file mode 100644 index 0000000..3018a4b --- /dev/null +++ b/usr.bin/iconv/iconv.1 @@ -0,0 +1,119 @@ +.\" $FreeBSD$ +.\" $NetBSD: iconv.1,v 1.3 2008/03/20 11:35:44 tnozaki Exp $ +.\" +.\" Copyright (c)2003 Citrus Project, +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd October 22, 2009 +.Dt ICONV 1 +.Os +.Sh NAME +.Nm iconv +.Nd codeset conversion utility +.Sh SYNOPSIS +.Nm +.Op Fl cs +.Fl f +.Ar from_name +.Fl t +.Ar to_name +.Op Ar file ... +.Nm +.Fl f +.Ar from_name +.Op Fl cs +.Op Fl t Ar to_name +.Op Ar file ... +.Nm +.Fl t +.Ar to_name +.Op Fl cs +.Op Fl f Ar from_name +.Op Ar file ... +.Nm +.Fl l +.Sh DESCRIPTION +The +.Nm +utility converts the codeset of +.Ar file +(or from standard input if no file is specified) from codeset +.Ar from_name +to codeset +.Ar to_name +and outputs the +converted text on standard output. +.Pp +The following options are available: +.Bl -tag -width 0123 +.It Fl c +Prevent output of any invalid characters. +By default, +.Nm +outputs an +.Dq invalid character +specified by the +.Ar to_name +codeset when it encounts a character which is valid in the +.Ar from_name +codeset but does not have a corresponding character in the +.Ar to_name +codeset. +.It Fl f +Specifies the source codeset name as +.Ar from_name . +.It Fl l +Lists available codeset names. +Note that not all combinations of +.Ar from_name +and +.Ar to_name +are valid. +.It Fl s +Silent. +By default, +.Nm +outputs the number of +.Dq invalid characters +to standard error if they exist. +This option prevents this behaviour. +.It Fl t +Specifies the destination codeset name as +.Ar to_name . +.El +.Sh EXIT STATUS +.Ex -std iconv +.Sh SEE ALSO +.Xr mkcsmapper 1 , +.Xr mkesdb 1 , +.Xr iconv 3 +.Sh STANDARDS +.Nm +conform to +.St -p1003.1-2008 . +.Sh HISTORY +.Nm +first appeared in +.Nx 2.0 , and made its appearance in +.Fx 9.0 . diff --git a/usr.bin/iconv/iconv.c b/usr.bin/iconv/iconv.c new file mode 100644 index 0000000..0da65a0 --- /dev/null +++ b/usr.bin/iconv/iconv.c @@ -0,0 +1,220 @@ +/* $FreeBSD$ */ +/* $NetBSD: iconv.c,v 1.16 2009/02/20 15:28:21 yamt Exp $ */ + +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <iconv.h> +#include <limits.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static unsigned long long invalids; + +static void do_conv(FILE *, const char *, const char *, bool, bool); +static int do_list(unsigned int, const char * const *, void *); +static void usage(void); + +struct option long_options[] = +{ + {"from-code", required_argument, NULL, 'f'}, + {"list", no_argument, NULL, 'l'}, + {"silent", no_argument, NULL, 's'}, + {"to-code", required_argument, NULL, 't'}, + {NULL, no_argument, NULL, 0} +}; + +static void +usage(void) +{ + (void)fprintf(stderr, + "Usage:\t%1$s [-cs] -f <from_code> -t <to_code> [file ...]\n" + "\t%1$s -f <from_code> [-cs] [-t <to_code>] [file ...]\n" + "\t%1$s -t <to_code> [-cs] [-f <from_code>] [file ...]\n" + "\t%1$s -l\n", getprogname()); + exit(1); +} + +#define INBUFSIZE 1024 +#define OUTBUFSIZE (INBUFSIZE * 2) +static void +do_conv(FILE *fp, const char *from, const char *to, bool silent, + bool hide_invalid) +{ + iconv_t cd; + char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out; + char *in; + size_t inbytes, outbytes, ret; + + if ((cd = iconv_open(to, from)) == (iconv_t)-1) + err(EXIT_FAILURE, "iconv_open(%s, %s)", to, from); + + if (hide_invalid) { + int arg = 1; + + if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&arg) == -1) + err(1, NULL); + } + while ((inbytes = fread(inbuf, 1, INBUFSIZE, fp)) > 0) { + in = inbuf; + while (inbytes > 0) { + size_t inval; + + out = outbuf; + outbytes = OUTBUFSIZE; + ret = __iconv(cd, &in, &inbytes, &out, &outbytes, + 0, &inval); + invalids += inval; + if (outbytes < OUTBUFSIZE) + (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, + stdout); + if (ret == (size_t)-1 && errno != E2BIG) { + if (errno != EINVAL || in == inbuf) + err(EXIT_FAILURE, "iconv()"); + + /* incomplete input character */ + (void)memmove(inbuf, in, inbytes); + ret = fread(inbuf + inbytes, 1, + INBUFSIZE - inbytes, fp); + if (ret == 0) { + fflush(stdout); + if (feof(fp)) + errx(EXIT_FAILURE, + "unexpected end of file; " + "the last character is " + "incomplete."); + else + err(EXIT_FAILURE, "fread()"); + } + in = inbuf; + inbytes += ret; + } + } + } + /* reset the shift state of the output buffer */ + outbytes = OUTBUFSIZE; + out = outbuf; + ret = iconv(cd, NULL, NULL, &out, &outbytes); + if (ret == (size_t)-1) + err(EXIT_FAILURE, "iconv()"); + if (outbytes < OUTBUFSIZE) + (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, stdout); + + if (invalids > 0 && !silent) + warnx("warning: invalid characters: %llu", invalids); + + iconv_close(cd); +} + +static int +do_list(unsigned int n, const char * const *list, void *data __unused) +{ + unsigned int i; + + for(i = 0; i < n; i++) { + printf("%s", list[i]); + if (i < n - 1) + printf(" "); + } + printf("\n"); + + return (1); +} + +int +main(int argc, char **argv) +{ + FILE *fp; + char *opt_f, *opt_t; + int ch, i; + bool opt_c = false, opt_s = false; + + opt_f = opt_t = strdup(""); + + setlocale(LC_ALL, ""); + setprogname(argv[0]); + + while ((ch = getopt_long(argc, argv, "csLlf:t:", + long_options, NULL)) != -1) { + switch (ch) { + case 'c': + opt_c = true; + break; + case 's': + opt_s = true; + break; + case 'l': + /* list */ + if (opt_s || opt_c || strcmp(opt_f, "") != 0 || + strcmp(opt_t, "") != 0) { + warnx("-l is not allowed with other flags."); + usage(); + } + iconvlist(do_list, NULL); + return (EXIT_SUCCESS); + case 'f': + /* from */ + if (optarg != NULL) + opt_f = strdup(optarg); + break; + case 't': + /* to */ + if (optarg != NULL) + opt_t = strdup(optarg); + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + if ((strcmp(opt_f, "") == 0) && (strcmp(opt_t, "") == 0)) + usage(); + if (argc == 0) + do_conv(stdin, opt_f, opt_t, opt_s, opt_c); + else { + for (i = 0; i < argc; i++) { + fp = (strcmp(argv[i], "-") != 0) ? + fopen(argv[i], "r") : stdin; + if (fp == NULL) + err(EXIT_FAILURE, "Cannot open `%s'", + argv[i]); + do_conv(fp, opt_f, opt_t, opt_s, + opt_c); + (void)fclose(fp); + } + } + return (EXIT_SUCCESS); +} diff --git a/usr.bin/mkcsmapper/Makefile b/usr.bin/mkcsmapper/Makefile new file mode 100644 index 0000000..5d58a0c --- /dev/null +++ b/usr.bin/mkcsmapper/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../lib/libc/iconv + +PROG= mkcsmapper +LDFLAGS+= -L${.CURDIR}/../../lib/libc + +.include "${.CURDIR}/Makefile.inc" +.include <bsd.prog.mk> diff --git a/usr.bin/mkcsmapper/Makefile.inc b/usr.bin/mkcsmapper/Makefile.inc new file mode 100644 index 0000000..ca8a675 --- /dev/null +++ b/usr.bin/mkcsmapper/Makefile.inc @@ -0,0 +1,7 @@ +# $FreeBSD$ + +SRCS+= lex.l yacc.y +CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../mkcsmapper \ + -I${.CURDIR}/../../lib/libc/iconv \ + -I${.CURDIR}/../../lib/libiconv_modules/mapper_std \ + --param max-inline-insns-single=64 diff --git a/usr.bin/mkcsmapper/ldef.h b/usr.bin/mkcsmapper/ldef.h new file mode 100644 index 0000000..d0adf9a --- /dev/null +++ b/usr.bin/mkcsmapper/ldef.h @@ -0,0 +1,42 @@ +/* $FreeBSD$ */ +/* $NetBSD: ldef.h,v 1.2 2006/09/09 14:35:17 tnozaki Exp $ */ + +/*- + * Copyright (c)2003, 2006 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +typedef struct { + u_int32_t begin; + u_int32_t end; + u_int32_t width; +} linear_zone_t; + +extern int debug; +extern int line_number; +extern int yyerror(const char *); +extern int yylex(void); +extern int yyparse(void); diff --git a/usr.bin/mkcsmapper/lex.l b/usr.bin/mkcsmapper/lex.l new file mode 100644 index 0000000..dd46caf --- /dev/null +++ b/usr.bin/mkcsmapper/lex.l @@ -0,0 +1,106 @@ +/* $FreeBSD$ */ +/* $NetBSD: lex.l,v 1.4 2006/02/09 22:03:15 dogcow Exp $ */ + +%{ +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/endian.h> + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "ldef.h" +#include "yacc.h" + +int line_number = 1; +%} +%option nounput + +%x COMMENT + +%% + +[ \t]+ { } +#.*[\n]|"//".*[\n]|[\n] { line_number++; return (R_LN); } + +"/*" { BEGIN COMMENT; } +<COMMENT>"*/" { BEGIN 0; } +<COMMENT>[\n] { line_number++; } +<COMMENT>. { } +<COMMENT><<EOF>> { + yyerror("unexpected file end (unterminate comment)\n"); + exit(1); + } + +"="|"/"|"-" { return ((int)yytext[0]); } + +([1-9][0-9]*)|(0[0-9]*)|(0[xX][0-9A-Fa-f]+) { + yylval.i_value = strtoul(yytext, NULL, 0); + return (L_IMM); + } + +"TYPE" { return (R_TYPE); } +"NAME" { return (R_NAME); } +"SRC_ZONE" { return (R_SRC_ZONE); } +"DST_INVALID" { return (R_DST_INVALID); } +"DST_ILSEQ" { return (R_DST_ILSEQ); } +"DST_UNIT_BITS" { return (R_DST_UNIT_BITS); } +"BEGIN_MAP" { return (R_BEGIN_MAP); } +"END_MAP" { return (R_END_MAP); } +"INVALID" { return (R_INVALID); } +"ILSEQ" { return (R_ILSEQ); } +"OOB_MODE" { return (R_OOB_MODE); } +"ROWCOL" { return (R_ROWCOL); } + +\"([^\"\n]*(\\\")?)*\"|\'([^\'\n]*(\\\')?)*\' { + size_t len; + + len = strlen(yytext); + yylval.s_value = malloc(len - 1); + strlcpy(yylval.s_value, yytext + 1, len - 1); + return (L_STRING); + } +[^ =/\-0-9\t\n][^ \t\n]* { + yylval.s_value = strdup(yytext); + return (L_STRING); + } + +%% + +#ifndef yywrap +int +yywrap(void) +{ + + return (1); +} +#endif diff --git a/usr.bin/mkcsmapper/mkcsmapper.1 b/usr.bin/mkcsmapper/mkcsmapper.1 new file mode 100644 index 0000000..a2666b4 --- /dev/null +++ b/usr.bin/mkcsmapper/mkcsmapper.1 @@ -0,0 +1,89 @@ +.\" Copyright (c) 2009 Gabor Kovesdan <gabor@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" Portions of this text are reprinted and reproduced in electronic form +.\" from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- +.\" Portable Operating System Interface (POSIX), The Open Group Base +.\" Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of +.\" Electrical and Electronics Engineers, Inc and The Open Group. In the +.\" event of any discrepancy between this version and the original IEEE and +.\" The Open Group Standard, the original IEEE and The Open Group Standard is +.\" the referee document. The original Standard can be obtained online at +.\" http://www.opengroup.org/unix/online.html. +.\" +.\" $FreeBSD$ +.\" +.Dd Sep 6, 2009 +.Dt mkcsmapper 1 +.Os +.Sh NAME +.Nm mkcsmapper +.Nd generates hashed conversion data for +.Xr iconv 3 +.Sh SYNOPSIS +.Nm +.Op Fl mpd +.Fl o +.Ar outfile +.Ar infile +.Sh DESCRIPTION +The +.Nm +utility generates binary conversion data from plain text conversion tables +for the +.Xr iconv 3 +library. +The conversion data has two components: +The conversion mapping between specific character encodings. +A pivot file, which the possible source and destination encoding +pairs or the set of mappings to use for a compound encoding. +.Pp +The following options are available: +.Bl -tag -width 0123 +.It Fl d +Turns on debug mode. +.It Fl m +Generate mapping data from +.Ar infile . +.It Fl o Ar outfile +Put generated binary data to +.Ar outfile . +.It Fl p +Generate pivot data from +.Ar outfile . +.El +.Sh EXIT STATUS +.Ex -std mkcsmapper +.Sh SEE ALSO +.Xr iconv 1 , +.Xr iconv 3 , +.Xr mkesdb 1 +.Sh HISTORY +.Nm +first appeared in +.Nx 2.0 , and made its appearance in +.Fx 9.0 . +.Sh AUTHORS +This manual page was written by +.An Gabor Kovesdan Aq gabor@FreeBSD.org . diff --git a/usr.bin/mkcsmapper/yacc.y b/usr.bin/mkcsmapper/yacc.y new file mode 100644 index 0000000..5d1a06b --- /dev/null +++ b/usr.bin/mkcsmapper/yacc.y @@ -0,0 +1,731 @@ +/* $FreeBSD$ */ +/* $NetBSD: yacc.y,v 1.7 2006/09/09 14:35:17 tnozaki Exp $ */ + +%{ +/*- + * Copyright (c)2003, 2006 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +#include "ldef.h" + +#ifndef __packed +#define __packed +#endif + +#include "citrus_namespace.h" +#include "citrus_types.h" +#include "citrus_mapper_std_file.h" +#include "citrus_region.h" +#include "citrus_db_factory.h" +#include "citrus_db_hash.h" +#include "citrus_lookup_factory.h" +#include "citrus_pivot_factory.h" + +extern FILE *yyin; + +int debug = 0; + +static linear_zone_t rowcol[_CITRUS_MAPPER_STD_ROWCOL_MAX]; +static char *map_name; +static char *output = NULL; +static void *table = NULL; +static size_t rowcol_len = 0; +static size_t table_size; +static u_int32_t done_flag = 0; +static u_int32_t dst_ilseq, dst_invalid, dst_unit_bits, oob_mode; +static u_int32_t rowcol_bits = 0, rowcol_mask = 0; +static u_int32_t src_next; +static int map_type; +static void (*putfunc)(void *, size_t, u_int32_t) = NULL; + +#define DF_TYPE 0x00000001 +#define DF_NAME 0x00000002 +#define DF_SRC_ZONE 0x00000004 +#define DF_DST_INVALID 0x00000008 +#define DF_DST_ILSEQ 0x00000010 +#define DF_DST_UNIT_BITS 0x00000020 +#define DF_OOB_MODE 0x00000040 + +static void dump_file(void); +static void setup_map(void); +static void set_type(int); +static void set_name(char *); +static void set_src_zone(u_int32_t); +static void set_dst_invalid(u_int32_t); +static void set_dst_ilseq(u_int32_t); +static void set_dst_unit_bits(u_int32_t); +static void set_oob_mode(u_int32_t); +static int check_src(u_int32_t, u_int32_t); +static void store(const linear_zone_t *, u_int32_t, int); +static void put8(void *, size_t, u_int32_t); +static void put16(void *, size_t, u_int32_t); +static void put32(void *, size_t, u_int32_t); +static void set_range(u_int32_t, u_int32_t); +static void set_src(linear_zone_t *, u_int32_t, u_int32_t); +%} + +%union { + u_int32_t i_value; + char *s_value; + linear_zone_t lz_value; +} + +%token R_TYPE R_NAME R_SRC_ZONE R_DST_UNIT_BITS +%token R_DST_INVALID R_DST_ILSEQ +%token R_BEGIN_MAP R_END_MAP R_INVALID R_ROWCOL +%token R_ILSEQ R_OOB_MODE +%token R_LN +%token <i_value> L_IMM +%token <s_value> L_STRING + +%type <lz_value> src +%type <i_value> dst types oob_mode_sel zone + +%% + +file : property mapping lns + { dump_file(); } + +property : /* empty */ + | property R_LN + | property name + | property type + | property src_zone + | property dst_invalid + | property dst_ilseq + | property dst_unit_bits + | property oob_mode + +name : R_NAME L_STRING { set_name($2); $2 = NULL; } +type : R_TYPE types { set_type($2); } +types : R_ROWCOL { $$ = R_ROWCOL; } +range : L_IMM '-' L_IMM { set_range($1, $3); } + +ranges : /* empty */ + | ranges range '/' + +src_zone : R_SRC_ZONE zone { set_src_zone($2); } +zone : range { + $$ = 32; + } + | range '/' range '/' ranges L_IMM { + $$ = $6; + } + +dst_invalid : R_DST_INVALID L_IMM { set_dst_invalid($2); } +dst_ilseq : R_DST_ILSEQ L_IMM { set_dst_ilseq($2); } +dst_unit_bits : R_DST_UNIT_BITS L_IMM { set_dst_unit_bits($2); } +oob_mode : R_OOB_MODE oob_mode_sel { set_oob_mode($2); } + +oob_mode_sel : R_INVALID { $$ = _CITRUS_MAPPER_STD_OOB_NONIDENTICAL; } + | R_ILSEQ { $$ = _CITRUS_MAPPER_STD_OOB_ILSEQ; } + +mapping : begin_map map_elems R_END_MAP +begin_map : R_BEGIN_MAP lns { setup_map(); } + +map_elems : /* empty */ + | map_elems map_elem lns + +map_elem : src '=' dst + { store(&$1, $3, 0); } + | src '=' L_IMM '-' + { store(&$1, $3, 1); } +dst : L_IMM + { + $$ = $1; + } + | R_INVALID + { + $$ = dst_invalid; + } + | R_ILSEQ + { + $$ = dst_ilseq; + } + +src : /* empty */ + { + set_src(&$$, src_next, src_next); + } + | L_IMM + { + set_src(&$$, $1, $1); + } + | L_IMM '-' L_IMM + { + set_src(&$$, $1, $3); + } + | '-' L_IMM + { + set_src(&$$, src_next, $2); + } +lns : R_LN + | lns R_LN + +%% + +static void +warning(const char *s) +{ + + fprintf(stderr, "%s in %d\n", s, line_number); +} + +int +yyerror(const char *s) +{ + + warning(s); + exit(1); +} + +void +put8(void *ptr, size_t ofs, u_int32_t val) +{ + + *((u_int8_t *)ptr + ofs) = val; +} + +void +put16(void *ptr, size_t ofs, u_int32_t val) +{ + + u_int16_t oval = htons(val); + memcpy((u_int16_t *)ptr + ofs, &oval, 2); +} + +void +put32(void *ptr, size_t ofs, u_int32_t val) +{ + + u_int32_t oval = htonl(val); + memcpy((u_int32_t *)ptr + ofs, &oval, 4); +} + +static void +alloc_table(void) +{ + linear_zone_t *p; + size_t i; + uint32_t val = 0; + + i = rowcol_len; + p = &rowcol[--i]; + table_size = p->width; + while (i > 0) { + p = &rowcol[--i]; + table_size *= p->width; + } + table = (void *)malloc(table_size * dst_unit_bits / 8); + if (table == NULL) { + perror("malloc"); + exit(1); + } + + switch (oob_mode) { + case _CITRUS_MAPPER_STD_OOB_NONIDENTICAL: + val = dst_invalid; + break; + case _CITRUS_MAPPER_STD_OOB_ILSEQ: + val = dst_ilseq; + break; + default: + break; + } + for (i = 0; i < table_size; i++) + (*putfunc)(table, i, val); +} + +static void +setup_map(void) +{ + + if ((done_flag & DF_SRC_ZONE)==0) { + fprintf(stderr, "SRC_ZONE is mandatory.\n"); + exit(1); + } + if ((done_flag & DF_DST_UNIT_BITS)==0) { + fprintf(stderr, "DST_UNIT_BITS is mandatory.\n"); + exit(1); + } + + if ((done_flag & DF_DST_INVALID) == 0) + dst_invalid = 0xFFFFFFFF; + if ((done_flag & DF_DST_ILSEQ) == 0) + dst_ilseq = 0xFFFFFFFE; + if ((done_flag & DF_OOB_MODE) == 0) + oob_mode = _CITRUS_MAPPER_STD_OOB_NONIDENTICAL; + + alloc_table(); +} + +static void +create_rowcol_info(struct _region *r) +{ + void *ptr; + size_t i, len, ofs; + + ofs = 0; + ptr = malloc(_CITRUS_MAPPER_STD_ROWCOL_INFO_SIZE); + if (ptr == NULL) + err(EXIT_FAILURE, "malloc"); + put32(ptr, ofs, rowcol_bits); ofs++; + put32(ptr, ofs, dst_invalid); ofs++; + + /* XXX: keep backward compatibility */ + switch (rowcol_len) { + case 1: + put32(ptr, ofs, 0); ofs++; + put32(ptr, ofs, 0); ofs++; + /*FALLTHROUGH*/ + case 2: + len = 0; + break; + default: + len = rowcol_len; + } + for (i = 0; i < rowcol_len; ++i) { + put32(ptr, ofs, rowcol[i].begin); ofs++; + put32(ptr, ofs, rowcol[i].end); ofs++; + } + put32(ptr, ofs, dst_unit_bits); ofs++; + put32(ptr, ofs, len); ofs++; + + _region_init(r, ptr, ofs * 4); +} + + +static void +create_rowcol_ext_ilseq_info(struct _region *r) +{ + void *ptr; + size_t ofs; + + ofs = 0; + ptr = malloc(_CITRUS_MAPPER_STD_ROWCOL_EXT_ILSEQ_SIZE); + if (ptr == NULL) + err(EXIT_FAILURE, "malloc"); + + put32(ptr, ofs, oob_mode); ofs++; + put32(ptr, ofs, dst_ilseq); ofs++; + + _region_init(r, ptr, _CITRUS_MAPPER_STD_ROWCOL_EXT_ILSEQ_SIZE); +} + +#define CHKERR(ret, func, a) \ +do { \ + ret = func a; \ + if (ret) \ + errx(EXIT_FAILURE, "%s: %s", #func, strerror(ret)); \ +} while (/*CONSTCOND*/0) + +static void +dump_file(void) +{ + struct _db_factory *df; + struct _region data; + void *serialized; + FILE *fp; + size_t size; + int ret; + + /* + * build database + */ + CHKERR(ret, _db_factory_create, (&df, _db_hash_std, NULL)); + + /* store type */ + CHKERR(ret, _db_factory_addstr_by_s, + (df, _CITRUS_MAPPER_STD_SYM_TYPE, _CITRUS_MAPPER_STD_TYPE_ROWCOL)); + + /* store info */ + create_rowcol_info(&data); + CHKERR(ret, _db_factory_add_by_s, + (df, _CITRUS_MAPPER_STD_SYM_INFO, &data, 1)); + + /* ilseq extension */ + create_rowcol_ext_ilseq_info(&data); + CHKERR(ret, _db_factory_add_by_s, + (df, _CITRUS_MAPPER_STD_SYM_ROWCOL_EXT_ILSEQ, &data, 1)); + + /* store table */ + _region_init(&data, table, table_size*dst_unit_bits/8); + CHKERR(ret, _db_factory_add_by_s, + (df, _CITRUS_MAPPER_STD_SYM_TABLE, &data, 1)); + + /* + * dump database to file + */ + fp = output ? fopen(output, "wb") : stdout; + + if (fp == NULL) { + perror("fopen"); + exit(1); + } + + /* dump database body */ + size = _db_factory_calc_size(df); + serialized = malloc(size); + _region_init(&data, serialized, size); + CHKERR(ret, _db_factory_serialize, + (df, _CITRUS_MAPPER_STD_MAGIC, &data)); + if (fwrite(serialized, size, 1, fp) != 1) + err(EXIT_FAILURE, "fwrite"); + + fclose(fp); +} + +static void +/*ARGSUSED*/ +set_type(int type) +{ + + if (done_flag & DF_TYPE) { + warning("TYPE is duplicated. ignored this one"); + return; + } + + map_type = type; + + done_flag |= DF_TYPE; +} + +static void +/*ARGSUSED*/ +set_name(char *str) +{ + + if (done_flag & DF_NAME) { + warning("NAME is duplicated. ignored this one"); + return; + } + + map_name = str; + + done_flag |= DF_NAME; +} + +static void +set_src_zone(u_int32_t val) +{ + linear_zone_t *p; + size_t i; + + if (done_flag & DF_SRC_ZONE) { + warning("SRC_ZONE is duplicated. ignored this one"); + return; + } + rowcol_bits = val; + + /* sanity check */ + switch (rowcol_bits) { + case 8: case 16: case 32: + if (rowcol_len <= 32 / rowcol_bits) + break; + /*FALLTHROUGH*/ + default: + goto bad; + } + rowcol_mask = 1 << (rowcol_bits - 1); + rowcol_mask |= rowcol_mask - 1; + for (i = 0; i < rowcol_len; ++i) { + p = &rowcol[i]; + if (p->end > rowcol_mask) + goto bad; + } + done_flag |= DF_SRC_ZONE; + return; + +bad: + yyerror("Illegal argument for SRC_ZONE"); +} + +static void +set_dst_invalid(u_int32_t val) +{ + + if (done_flag & DF_DST_INVALID) { + warning("DST_INVALID is duplicated. ignored this one"); + return; + } + + dst_invalid = val; + + done_flag |= DF_DST_INVALID; +} + +static void +set_dst_ilseq(u_int32_t val) +{ + + if (done_flag & DF_DST_ILSEQ) { + warning("DST_ILSEQ is duplicated. ignored this one"); + return; + } + + dst_ilseq = val; + + done_flag |= DF_DST_ILSEQ; +} + +static void +set_oob_mode(u_int32_t val) +{ + + if (done_flag & DF_OOB_MODE) { + warning("OOB_MODE is duplicated. ignored this one"); + return; + } + + oob_mode = val; + + done_flag |= DF_OOB_MODE; +} + +static void +set_dst_unit_bits(u_int32_t val) +{ + + if (done_flag & DF_DST_UNIT_BITS) { + warning("DST_UNIT_BITS is duplicated. ignored this one"); + return; + } + + switch (val) { + case 8: + putfunc = &put8; + dst_unit_bits = val; + break; + case 16: + putfunc = &put16; + dst_unit_bits = val; + break; + case 32: + putfunc = &put32; + dst_unit_bits = val; + break; + default: + yyerror("Illegal argument for DST_UNIT_BITS"); + } + done_flag |= DF_DST_UNIT_BITS; +} + +static int +check_src(u_int32_t begin, u_int32_t end) +{ + linear_zone_t *p; + size_t i; + u_int32_t m, n; + + if (begin > end) + return (1); + if (begin < end) { + m = begin & ~rowcol_mask; + n = end & ~rowcol_mask; + if (m != n) + return (1); + } + for (i = rowcol_len * rowcol_bits, p = &rowcol[0]; i > 0; ++p) { + i -= rowcol_bits; + m = (begin >> i) & rowcol_mask; + if (m < p->begin || m > p->end) + return (1); + } + if (begin < end) { + n = end & rowcol_mask; + --p; + if (n < p->begin || n > p->end) + return (1); + } + return (0); +} + +static void +store(const linear_zone_t *lz, u_int32_t dst, int inc) +{ + linear_zone_t *p; + size_t i, ofs; + u_int32_t n; + + ofs = 0; + for (i = rowcol_len * rowcol_bits, p = &rowcol[0]; i > 0; ++p) { + i -= rowcol_bits; + n = ((lz->begin >> i) & rowcol_mask) - p->begin; + ofs = (ofs * p->width) + n; + } + n = lz->width; + while (n-- > 0) { + (*putfunc)(table, ofs++, dst); + if (inc) + dst++; + } +} + +static void +set_range(u_int32_t begin, u_int32_t end) +{ + linear_zone_t *p; + + if (rowcol_len >= _CITRUS_MAPPER_STD_ROWCOL_MAX) + goto bad; + p = &rowcol[rowcol_len++]; + + if (begin > end) + goto bad; + p->begin = begin, p->end = end; + p->width = end - begin + 1; + + return; + +bad: + yyerror("Illegal argument for SRC_ZONE"); +} + +static void +set_src(linear_zone_t *lz, u_int32_t begin, u_int32_t end) +{ + + if (check_src(begin, end) != 0) + yyerror("illegal zone"); + + lz->begin = begin, lz->end = end; + lz->width = end - begin + 1; + + src_next = end + 1; +} + +static void +do_mkdb(FILE *in) +{ + FILE *out; + int ret; + + /* dump DB to file */ + out = output ? fopen(output, "wb") : stdout; + + if (out == NULL) + err(EXIT_FAILURE, "fopen"); + + ret = _lookup_factory_convert(out, in); + fclose(out); + if (ret && output) + unlink(output); /* dump failure */ +} + +static void +do_mkpv(FILE *in) +{ + FILE *out; + int ret; + + /* dump pivot to file */ + out = output ? fopen(output, "wb") : stdout; + + if (out == NULL) + err(EXIT_FAILURE, "fopen"); + + ret = _pivot_factory_convert(out, in); + fclose(out); + if (ret && output) + unlink(output); /* dump failure */ + if (ret) + errx(EXIT_FAILURE, "%s\n", strerror(ret)); +} + +static void +usage(void) +{ + warnx("usage: \n" + "\t%s [-d] [-o outfile] [infile]\n" + "\t%s -m [-d] [-o outfile] [infile]\n" + "\t%s -p [-d] [-o outfile] [infile]\n", + getprogname(), getprogname(), getprogname()); + exit(1); +} + +int +main(int argc, char **argv) +{ + FILE *in = NULL; + int ch, mkdb = 0, mkpv = 0; + + while ((ch = getopt(argc, argv, "do:mp")) != EOF) { + switch (ch) { + case 'd': + debug = 1; + break; + case 'o': + output = strdup(optarg); + break; + case 'm': + mkdb = 1; + break; + case 'p': + mkpv = 1; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + switch (argc) { + case 0: + in = stdin; + break; + case 1: + in = fopen(argv[0], "r"); + if (!in) + err(EXIT_FAILURE, "%s", argv[0]); + break; + default: + usage(); + } + + if (mkdb) + do_mkdb(in); + else if (mkpv) + do_mkpv(in); + else { + yyin = in; + yyparse(); + } + + return (0); +} diff --git a/usr.bin/mkcsmapper_static/Makefile b/usr.bin/mkcsmapper_static/Makefile new file mode 100644 index 0000000..0c6e10d --- /dev/null +++ b/usr.bin/mkcsmapper_static/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../lib/libc/iconv ${.CURDIR}/../mkcsmapper + +PROG= mkcsmapper_static +SRCS= citrus_bcs.c citrus_db_factory.c citrus_db_hash.c \ + citrus_lookup_factory.c citrus_pivot_factory.c +NO_MAN= yes +NO_SHARED= yes + +build-tools: mkcsmapper_static + +.include "${.CURDIR}/../mkcsmapper/Makefile.inc" +.include <bsd.prog.mk> diff --git a/usr.bin/mkesdb/Makefile b/usr.bin/mkesdb/Makefile new file mode 100644 index 0000000..d4bd03f --- /dev/null +++ b/usr.bin/mkesdb/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../lib/libc/iconv + +PROG= mkesdb +LDFLAGS+= -L${.OBJDIR}/../../lib/libc + +.include "${.CURDIR}/Makefile.inc" +.include <bsd.prog.mk> diff --git a/usr.bin/mkesdb/Makefile.inc b/usr.bin/mkesdb/Makefile.inc new file mode 100644 index 0000000..33d0b36 --- /dev/null +++ b/usr.bin/mkesdb/Makefile.inc @@ -0,0 +1,6 @@ +# $FreeBSD$ + +SRCS+= lex.l yacc.y +CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../mkesdb \ + -I${.CURDIR}/../../lib/libc/iconv \ + --param max-inline-insns-single=64 diff --git a/usr.bin/mkesdb/ldef.h b/usr.bin/mkesdb/ldef.h new file mode 100644 index 0000000..a14277c --- /dev/null +++ b/usr.bin/mkesdb/ldef.h @@ -0,0 +1,40 @@ +/* $FreeBSD$ */ +/* $NetBSD: ldef.h,v 1.2 2005/06/27 01:23:59 fvdl Exp $ */ + +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +extern int line_number; +extern int yyerror(const char *); +extern int yylex(void); +extern int yyparse(void); + +struct named_csid { + STAILQ_ENTRY(named_csid) ci_entry; + char *ci_symbol; + u_int32_t ci_csid; +}; +STAILQ_HEAD(named_csid_list, named_csid); diff --git a/usr.bin/mkesdb/lex.l b/usr.bin/mkesdb/lex.l new file mode 100644 index 0000000..033897a --- /dev/null +++ b/usr.bin/mkesdb/lex.l @@ -0,0 +1,99 @@ +/* $FreeBSD$ */ +/* $NetBSD: lex.l,v 1.3 2006/02/09 22:03:15 dogcow Exp $ */ + +%{ +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/endian.h> +#include <sys/queue.h> +#include <sys/types.h> + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "ldef.h" +#include "yacc.h" + +int line_number = 1; +%} +%option nounput + +%x COMMENT + +%% + +[ \t]+ { } +#.*[\n]|"//".*[\n]|[\n] { line_number++; return (R_LN); } + +"/*" { BEGIN COMMENT; } +<COMMENT>"*/" { BEGIN 0; } +<COMMENT>[\n] { line_number++; } +<COMMENT>. { } +<COMMENT><<EOF>> { + yyerror("unexpected file end (unterminate comment)\n"); + exit(1); + } + +([1-9][0-9]*)|(0[0-9]*)|(0[xX][0-9A-Fa-f]+) { + yylval.i_value = strtoul(yytext, NULL, 0); + return (L_IMM); + } + +"NAME" { return (R_NAME); } +"ENCODING" { return (R_ENCODING); } +"VARIABLE" { return (R_VARIABLE); } +"DEFCSID" { return (R_DEFCSID); } +"INVALID" { return (R_INVALID); } + +\"([^\"\n]*(\\\")?)*\"|\'([^\'\n]*(\\\')?)*\' { + size_t len; + + len = strlen(yytext); + yylval.s_value = malloc(len - 1); + strlcpy(yylval.s_value, yytext + 1, len - 1); + return (L_STRING); + } +[^ =/\-0-9\t\n][^ \t\n]* { + yylval.s_value = strdup(yytext); + return (L_STRING); + } + +%% + +#ifndef yywrap +int +yywrap(void) +{ + + return (1); +} +#endif diff --git a/usr.bin/mkesdb/mkesdb.1 b/usr.bin/mkesdb/mkesdb.1 new file mode 100644 index 0000000..2e06371 --- /dev/null +++ b/usr.bin/mkesdb/mkesdb.1 @@ -0,0 +1,84 @@ +.\" Copyright (c) 2009 Gabor Kovesdan <gabor@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" Portions of this text are reprinted and reproduced in electronic form +.\" from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- +.\" Portable Operating System Interface (POSIX), The Open Group Base +.\" Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of +.\" Electrical and Electronics Engineers, Inc and The Open Group. In the +.\" event of any discrepancy between this version and the original IEEE and +.\" The Open Group Standard, the original IEEE and The Open Group Standard is +.\" the referee document. The original Standard can be obtained online at +.\" http://www.opengroup.org/unix/online.html. +.\" +.\" $FreeBSD$ +.\" +.Dd November 1, 2009 +.Dt mkesdb 1 +.Os +.Sh NAME +.Nm mkesdb +.Nd generates conversion catalog for +.Xr iconv 3 +.Sh SYNOPSIS +.Nm +.Op Fl md +.Fl o +.Ar outfile +.Ar infile +.Sh DESCRIPTION +The +.Nm +utility generates conversion catalog for the +.Xr iconv 3 +library. +The generated data includes a list of supported encodings +and their most important characteristics, like the pluggable +module to use +.Pp +The following options are available: +.Bl -tag -width 0123 +.It Fl d +Turns on debug mode. +.It Fl m +Process directory catalog or alias file. +If not specified, the input file is treated as an encoding description. +.It Fl o Ar outfile +Put generated binary data to +.Ar outfile . +.El +.Sh EXIT STATUS +.Ex -std mkesdb +.Sh SEE ALSO +.Xr iconv 1 , +.Xr iconv 3 , +.Xr mkcsmapper 1 +.Sh HISTORY +.Nm +first appeared in +.Nx 2.0 , and made its appearance in +.Fx 9.0 . +.Sh AUTHORS +This manual page was written by +.An Gabor Kovesdan Aq gabor@FreeBSD.org . diff --git a/usr.bin/mkesdb/yacc.y b/usr.bin/mkesdb/yacc.y new file mode 100644 index 0000000..0c01b61 --- /dev/null +++ b/usr.bin/mkesdb/yacc.y @@ -0,0 +1,332 @@ +/* $FreeBSD$ */ +/* $NetBSD: yacc.y,v 1.4 2005/06/02 02:09:25 lukem Exp $ */ + +%{ +/*- + * Copyright (c)2003 Citrus Project, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/queue.h> + +#include <assert.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "citrus_namespace.h" +#include "citrus_types.h" +#include "citrus_region.h" +#include "citrus_esdb_file.h" +#include "citrus_db_hash.h" +#include "citrus_db_factory.h" +#include "citrus_lookup_factory.h" + +#include "ldef.h" + +extern FILE *yyin; + +static struct named_csid_list named_csids; +static char *encoding, *name, *output = NULL, *variable; +static u_int32_t invalid; +static int debug = 0, num_csids = 0, use_invalid = 0; + +static void dump_file(void); +static void register_named_csid(char *, u_int32_t); +static void set_invalid(u_int32_t); +static void set_prop_string(const char *, char **, char **); +%} +%union { + u_int32_t i_value; + char *s_value; +} + +%token R_NAME R_ENCODING R_VARIABLE R_DEFCSID R_INVALID +%token R_LN +%token <i_value> L_IMM +%token <s_value> L_STRING + +%% + +file : property + { dump_file(); } + +property : /* empty */ + | property R_LN + | property name R_LN + | property encoding R_LN + | property variable R_LN + | property defcsid R_LN + | property invalid R_LN + +name : R_NAME L_STRING + { + set_prop_string("NAME", &name, &$2); + } + +encoding : R_ENCODING L_STRING + { + set_prop_string("ENCODING", &encoding, &$2); + } +variable : R_VARIABLE L_STRING + { + set_prop_string("VARIABLE", &variable, &$2); + } +defcsid : R_DEFCSID L_STRING L_IMM + { + register_named_csid($2, $3); + $2 = NULL; + } +invalid : R_INVALID L_IMM + { + set_invalid($2); + } +%% + +int +yyerror(const char *s) +{ + + fprintf(stderr, "%s in %d\n", s, line_number); + + return (0); +} + +#define CHKERR(ret, func, a) \ +do { \ + ret = func a; \ + if (ret) \ + errx(EXIT_FAILURE, "%s: %s", #func, strerror(ret)); \ +} while (/*CONSTCOND*/0) +static void +dump_file(void) +{ + struct _db_factory *df; + struct _region data; + struct named_csid *csid; + FILE *fp; + char buf[100]; + void *serialized; + size_t size; + int i, ret; + + ret = 0; + if (!name) { + fprintf(stderr, "NAME is mandatory.\n"); + ret = 1; + } + if (!encoding) { + fprintf(stderr, "ENCODING is mandatory.\n"); + ret = 1; + } + if (ret) + exit(1); + + /* + * build database + */ + CHKERR(ret, _db_factory_create, (&df, _db_hash_std, NULL)); + + /* store version */ + CHKERR(ret, _db_factory_add32_by_s, (df, _CITRUS_ESDB_SYM_VERSION, + _CITRUS_ESDB_VERSION)); + + /* store encoding */ + CHKERR(ret, _db_factory_addstr_by_s, (df, _CITRUS_ESDB_SYM_ENCODING, + encoding)); + + /* store variable */ + if (variable) + CHKERR(ret, _db_factory_addstr_by_s, + (df, _CITRUS_ESDB_SYM_VARIABLE, variable)); + + /* store invalid */ + if (use_invalid) + CHKERR(ret, _db_factory_add32_by_s, (df, + _CITRUS_ESDB_SYM_INVALID, invalid)); + + /* store num of charsets */ + CHKERR(ret, _db_factory_add32_by_s, (df, _CITRUS_ESDB_SYM_NUM_CHARSETS, + num_csids)); + i = 0; + STAILQ_FOREACH(csid, &named_csids, ci_entry) { + snprintf(buf, sizeof(buf), _CITRUS_ESDB_SYM_CSNAME_PREFIX "%d", + i); + CHKERR(ret, _db_factory_addstr_by_s, + (df, buf, csid->ci_symbol)); + snprintf(buf, sizeof(buf), _CITRUS_ESDB_SYM_CSID_PREFIX "%d", + i); + CHKERR(ret, _db_factory_add32_by_s, (df, buf, csid->ci_csid)); + i++; + } + + /* + * dump database to file + */ + fp = output ? fopen(output, "wb") : stdout; + if (fp == NULL) { + perror("fopen"); + exit(1); + } + + /* dump database body */ + size = _db_factory_calc_size(df); + serialized = malloc(size); + _region_init(&data, serialized, size); + CHKERR(ret, _db_factory_serialize, (df, _CITRUS_ESDB_MAGIC, &data)); + if (fwrite(serialized, size, 1, fp) != 1) + err(EXIT_FAILURE, "fwrite"); + + fclose(fp); +} + +static void +set_prop_string(const char *res, char **store, char **data) +{ + char buf[256]; + + if (*store) { + snprintf(buf, sizeof(buf), + "%s is duplicated. ignored the one", res); + yyerror(buf); + return; + } + + *store = *data; + *data = NULL; +} + +static void +set_invalid(u_int32_t inv) +{ + + invalid = inv; + use_invalid = 1; +} + +static void +register_named_csid(char *sym, u_int32_t val) +{ + struct named_csid *csid; + + STAILQ_FOREACH(csid, &named_csids, ci_entry) { + if (strcmp(csid->ci_symbol, sym) == 0) { + yyerror("multiply defined CSID"); + exit(1); + } + } + + csid = malloc(sizeof(*csid)); + if (csid == NULL) { + perror("malloc"); + exit(1); + } + csid->ci_symbol = sym; + csid->ci_csid = val; + STAILQ_INSERT_TAIL(&named_csids, csid, ci_entry); + num_csids++; +} + +static void +do_mkdb(FILE *in) +{ + FILE *out; + int ret; + + /* dump DB to file */ + out = output ? fopen(output, "wb") : stdout; + if (out == NULL) + err(EXIT_FAILURE, "fopen"); + + ret = _lookup_factory_convert(out, in); + fclose(out); + if (ret && output) + unlink(output); /* dump failure */ + if (ret) + errx(EXIT_FAILURE, "%s\n", strerror(ret)); +} + +static void +usage(void) +{ + errx(EXIT_FAILURE, + "usage:\n" + "\t%s [-o outfile] [infile]\n" + "\t%s -m [-o outfile] [infile]", + getprogname(), getprogname()); +} + +int +main(int argc, char **argv) +{ + FILE *in = NULL; + int ch, mkdb = 0; + + while ((ch = getopt(argc, argv, "do:m")) != EOF) { + switch (ch) { + case 'd': + debug = 1; + break; + case 'o': + output = strdup(optarg); + break; + case 'm': + mkdb = 1; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + switch (argc) { + case 0: + in = stdin; + break; + case 1: + in = fopen(argv[0], "r"); + if (!in) + err(EXIT_FAILURE, "%s", argv[0]); + break; + default: + usage(); + } + + if (mkdb) + do_mkdb(in); + else { + STAILQ_INIT(&named_csids); + yyin = in; + yyparse(); + } + + return (0); +} diff --git a/usr.bin/mkesdb_static/Makefile b/usr.bin/mkesdb_static/Makefile new file mode 100644 index 0000000..7328efc --- /dev/null +++ b/usr.bin/mkesdb_static/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../lib/libc/iconv ${.CURDIR}/../mkesdb + +PROG= mkesdb_static +SRCS= citrus_bcs.c citrus_db_factory.c citrus_db_hash.c \ + citrus_lookup_factory.c +NO_MAN= yes +NO_SHARED= yes + +build-tools: mkesdb_static + +.include "${.CURDIR}/../mkesdb/Makefile.inc" +.include <bsd.prog.mk> |