diff options
author | bapt <bapt@FreeBSD.org> | 2015-11-20 23:15:05 +0000 |
---|---|---|
committer | bapt <bapt@FreeBSD.org> | 2015-11-20 23:15:05 +0000 |
commit | 3a63b2a80a42ea40a57a1484d6f37bb3feffebef (patch) | |
tree | 9fcf517b1ef728833d3a00b6ecb6248a7ccc2174 | |
parent | cb131007f726c807d0b07e3d8a00cbb0d6724b58 (diff) | |
download | FreeBSD-src-3a63b2a80a42ea40a57a1484d6f37bb3feffebef.zip FreeBSD-src-3a63b2a80a42ea40a57a1484d6f37bb3feffebef.tar.gz |
Reintegrate colldef(1) and mklocale(1)
While those tools are not needed anymore they are necessary to build FreeBSD 9
and 10. it does not hurt to keep those tools around until both 9 and 10 branch
become EOLed.
Modify colldef(1) to build after the change in the collation header, and ensure
it does produce the same collation definition it used to generate for 9 and 10
Reported by: Oliver Pinter
-rw-r--r-- | usr.bin/Makefile | 2 | ||||
-rw-r--r-- | usr.bin/colldef/Makefile | 12 | ||||
-rw-r--r-- | usr.bin/colldef/Makefile.depend | 27 | ||||
-rw-r--r-- | usr.bin/colldef/colldef.1 | 272 | ||||
-rw-r--r-- | usr.bin/colldef/common.h | 11 | ||||
-rw-r--r-- | usr.bin/colldef/parse.y | 402 | ||||
-rw-r--r-- | usr.bin/colldef/scan.l | 287 | ||||
-rw-r--r-- | usr.bin/mklocale/Makefile | 10 | ||||
-rw-r--r-- | usr.bin/mklocale/Makefile.depend | 26 | ||||
-rw-r--r-- | usr.bin/mklocale/extern.h | 35 | ||||
-rw-r--r-- | usr.bin/mklocale/ldef.h | 53 | ||||
-rw-r--r-- | usr.bin/mklocale/lex.l | 175 | ||||
-rw-r--r-- | usr.bin/mklocale/mklocale.1 | 304 | ||||
-rw-r--r-- | usr.bin/mklocale/yacc.y | 869 |
14 files changed, 2485 insertions, 0 deletions
diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 750c2ab..86ea209 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -25,6 +25,7 @@ SUBDIR= alias \ cksum \ cmp \ col \ + colldef \ colrm \ column \ comm \ @@ -96,6 +97,7 @@ SUBDIR= alias \ mkdep \ mkfifo \ mkimg \ + mklocale \ mktemp \ mkulzma \ mkuzip \ diff --git a/usr.bin/colldef/Makefile b/usr.bin/colldef/Makefile new file mode 100644 index 0000000..fd59a6c --- /dev/null +++ b/usr.bin/colldef/Makefile @@ -0,0 +1,12 @@ +# $FreeBSD$ + +PROG= colldef +SRCS= parse.y scan.l y.tab.h +LFLAGS= -8 -i +CFLAGS+=-I. -I${.CURDIR} -I${.CURDIR}/../../lib/libc/locale +CFLAGS+=-DCOLLATE_DEBUG -DYY_NO_UNPUT -DYY_NO_INPUT +LIBADD= l + +WARNS?= 2 + +.include <bsd.prog.mk> diff --git a/usr.bin/colldef/Makefile.depend b/usr.bin/colldef/Makefile.depend new file mode 100644 index 0000000..3ca32c6 --- /dev/null +++ b/usr.bin/colldef/Makefile.depend @@ -0,0 +1,27 @@ +# $FreeBSD$ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + gnu/lib/csu \ + gnu/lib/libgcc \ + include \ + include/arpa \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + usr.bin/lex/lib \ + usr.bin/yacc.host \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +parse.o: parse.c +parse.po: parse.c +scan.o: scan.c +scan.o: y.tab.h +scan.po: scan.c +scan.po: y.tab.h +.endif diff --git a/usr.bin/colldef/colldef.1 b/usr.bin/colldef/colldef.1 new file mode 100644 index 0000000..b541cd9 --- /dev/null +++ b/usr.bin/colldef/colldef.1 @@ -0,0 +1,272 @@ +.\" Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> +.\" at Electronni Visti IA, Kiev, Ukraine. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd January 27, 1995 +.Dt COLLDEF 1 +.Os +.Sh NAME +.Nm colldef +.Nd convert collation sequence source definition +.Sh SYNOPSIS +.Nm +.Op Fl I Ar map_dir +.Op Fl o Ar out_file +.Op Ar filename +.Sh DESCRIPTION +The +.Nm +utility converts a collation sequence source definition +into a format usable by the +.Fn strxfrm +and +.Fn strcoll +functions. +It is used to define the many ways in which +strings can be ordered and collated. +The +.Fn strxfrm +function transforms +its first argument and places the result in its second +argument. +The transformed string is such that it can be +correctly ordered with other transformed strings by using +.Fn strcmp , +.Fn strncmp , +or +.Fn memcmp . +The +.Fn strcoll +function transforms its arguments and does a +comparison. +.Pp +The +.Nm +utility reads the collation sequence source definition +from the standard input and stores the converted definition in filename. +The output file produced contains the +database with collating sequence information in a form +usable by system commands and routines. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl I Ar map_dir +Set directory name where +.Ar charmap +files can be found, current directory by default. +.It Fl o Ar out_file +Set output file name, +.Ar LC_COLLATE +by default. +.El +.Pp +The collation sequence definition specifies a set of collating elements and +the rules defining how strings containing these should be ordered. +This is most useful for different language definitions. +.Pp +The specification file can consist of three statements: +.Ar charmap , +.Ar substitute +and +.Ar order . +.Pp +Of these, only the +.Ar order +statement is required. +When +.Ar charmap +or +.Ar substitute +is +supplied, these statements must be ordered as above. +Any +statements after the order statement are ignored. +.Pp +Lines in the specification file beginning with a +.Ql # +are +treated as comments and are ignored. +Blank lines are also +ignored. +.Pp +.Dl "charmap charmapfile" +.Pp +.Ar Charmap +defines where a mapping of the character +and collating element symbols to the actual +character encoding can be found. +.Pp +The format of +.Ar charmapfile +is shown below. +Symbol +names are separated from their values by TAB or +SPACE characters. +Symbol-value can be specified in +a hexadecimal (\ex\fI??\fR) or octal (\e\fI???\fR) +representation, and can be only one character in length. +.Bd -literal -offset indent +symbol-name1 symbol-value1 +symbol-name2 symbol-value2 +\&... +.Ed +.Pp +Symbol names cannot be specified in +.Ar substitute +fields. +.Pp +The +.Ar charmap +statement is optional. +.Bd -literal -offset indent +substitute "symbol" with "repl_string" +.Ed +.Pp +The +.Ar substitute +statement substitutes the character +.Ar symbol +with the string +.Ar repl_string . +Symbol names cannot be specified in +.Ar repl_string +field. +The +.Ar substitute +statement is optional. +.Pp +.Dl "order order_list" +.Pp +.Ar Order_list +is a list of symbols, separated by semi colons, that defines the +collating sequence. +The +special symbol +.Ar ... +specifies, in a short-hand +form, symbols that are sequential in machine code +order. +.Pp +An order list element +can be represented in any one of the following +ways: +.Bl -bullet +.It +The symbol itself (for example, +.Ar a +for the lower-case letter +.Ar a ) . +.It +The symbol in octal representation (for example, +.Ar \e141 +for the letter +.Ar a ) . +.It +The symbol in hexadecimal representation (for example, +.Ar \ex61 +for the letter +.Ar a ) . +.It +The symbol name as defined in the +.Ar charmap +file (for example, +.Ar <letterA> +for +.Ar letterA \e023 +record in +.Ar charmapfile ) . +If character map name have +.Ar > +character, it must be escaped as +.Ar /> , +single +.Ar / +must be escaped as +.Ar // . +.It +Symbols +.Ar \ea , +.Ar \eb , +.Ar \ef , +.Ar \en , +.Ar \er , +.Ar \ev +are permitted in its usual C-language meaning. +.It +The symbol chain (for example: +.Ar abc , +.Ar <letterA><letterB>c , +.Ar \exf1b\exf2 ) +.It +The symbol range (for example, +.Ar a;...;z ) . +.It +Comma-separated symbols, ranges and chains enclosed in parenthesis (for example +.Ar \&( +.Ar sym1 , +.Ar sym2 , +.Ar ... +.Ar \&) ) +are assigned the +same primary ordering but different secondary +ordering. +.It +Comma-separated symbols, ranges and chains enclosed in curly brackets (for example +.Ar \&{ +.Ar sym1 , +.Ar sym2 , +.Ar ... +.Ar \&} ) +are assigned the same primary ordering only. +.El +.Pp +The backslash character +.Ar \e +is used for continuation. +In this case, no characters are permitted +after the backslash character. +.Sh FILES +.Bl -tag -width indent +.It Pa /usr/share/locale/ Ns Ao Ar language Ac Ns Pa /LC_COLLATE +The standard shared location for collation orders +under the locale +.Aq Ar language . +.El +.Sh EXIT STATUS +The +.Nm +utility exits with the following values: +.Bl -tag -width indent +.It Li 0 +No errors were found and the output was successfully created. +.It Li !=0 +Errors were found. +.El +.Sh SEE ALSO +.Xr mklocale 1 , +.Xr setlocale 3 , +.Xr strcoll 3 , +.Xr strxfrm 3 diff --git a/usr.bin/colldef/common.h b/usr.bin/colldef/common.h new file mode 100644 index 0000000..316490d --- /dev/null +++ b/usr.bin/colldef/common.h @@ -0,0 +1,11 @@ +/* + * $FreeBSD$ + */ + +#define CHARMAP_SYMBOL_LEN 64 +#define BUFSIZE 80 + +extern int line_no; + +extern u_char charmap_table[UCHAR_MAX + 1][CHARMAP_SYMBOL_LEN]; +extern char map_name[FILENAME_MAX]; diff --git a/usr.bin/colldef/parse.y b/usr.bin/colldef/parse.y new file mode 100644 index 0000000..1c04874 --- /dev/null +++ b/usr.bin/colldef/parse.y @@ -0,0 +1,402 @@ +%{ +/*- + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <arpa/inet.h> +#include <err.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sysexits.h> +#include "collate.h" +#include "common.h" + +extern FILE *yyin; +void yyerror(const char *fmt, ...) __printflike(1, 2); +int yyparse(void); +int yylex(void); +static void usage(void); +static void collate_print_tables(void); + +#undef STR_LEN +#define STR_LEN 10 +#undef TABLE_SIZE +#define TABLE_SIZE 100 +#undef COLLATE_VERSION +#define COLLATE_VERSION "1.0\n" +#undef COLLATE_VERSION_2 +#define COLLATE_VERSION1_2 "1.2\n" + +struct __collate_st_char_pri { + int prim, sec; +}; + +struct __collate_st_chain_pri { + u_char str[STR_LEN]; + int prim, sec; +}; + +char map_name[FILENAME_MAX] = "."; +char curr_chain[STR_LEN]; + +char __collate_version[STR_LEN]; +u_char charmap_table[UCHAR_MAX + 1][CHARMAP_SYMBOL_LEN]; + +#undef __collate_substitute_table +u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; +#undef __collate_char_pri_table +struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +struct __collate_st_chain_pri *__collate_chain_pri_table; + +int chain_index = 0; +int prim_pri = 1, sec_pri = 1; +#ifdef COLLATE_DEBUG +int debug; +#endif + +const char *out_file = "LC_COLLATE"; +%} +%union { + u_char ch; + u_char str[BUFSIZE]; +} +%token SUBSTITUTE WITH ORDER RANGE +%token <str> STRING +%token <str> DEFN +%token <ch> CHAR +%% +collate : statment_list +; +statment_list : statment + | statment_list '\n' statment +; +statment : + | charmap + | substitute + | order +; +charmap : DEFN CHAR { + if (strlen($1) + 1 > CHARMAP_SYMBOL_LEN) + yyerror("Charmap symbol name '%s' is too long", $1); + strcpy(charmap_table[$2], $1); +} +; +substitute : SUBSTITUTE CHAR WITH STRING { + if ($2 == '\0') + yyerror("NUL character can't be substituted"); + if (strchr($4, $2) != NULL) + yyerror("Char 0x%02x substitution is recursive", $2); + if (strlen($4) + 1 > STR_LEN) + yyerror("Char 0x%02x substitution is too long", $2); + strcpy(__collate_substitute_table[$2], $4); +} +; +order : ORDER order_list { + FILE *fp; + int ch, substed, ordered; + uint32_t u32; + + for (ch = 0; ch < UCHAR_MAX + 1; ch++) { + substed = (__collate_substitute_table[ch][0] != ch); + ordered = !!__collate_char_pri_table[ch].prim; + if (!ordered && !substed) + yyerror("Char 0x%02x not found", ch); + if (substed && ordered) + yyerror("Char 0x%02x can't be ordered since substituted", ch); + } + + if ((__collate_chain_pri_table = realloc(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table) * (chain_index + 1))) == NULL) + yyerror("can't grow chain table"); + (void)memset(&__collate_chain_pri_table[chain_index], 0, + sizeof(__collate_chain_pri_table[0])); + chain_index++; + +#ifdef COLLATE_DEBUG + if (debug) + collate_print_tables(); +#endif + if ((fp = fopen(out_file, "w")) == NULL) + err(EX_UNAVAILABLE, "can't open destination file %s", + out_file); + + strcpy(__collate_version, COLLATE_VERSION1_2); + if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1) + err(EX_IOERR, + "I/O error writing collate version to destination file %s", + out_file); + u32 = htonl(chain_index); + if (fwrite(&u32, sizeof(u32), 1, fp) != 1) + err(EX_IOERR, + "I/O error writing chains number to destination file %s", + out_file); + if (fwrite(__collate_substitute_table, + sizeof(__collate_substitute_table), 1, fp) != 1) + err(EX_IOERR, + "I/O error writing substitution table to destination file %s", + out_file); + for (ch = 0; ch < UCHAR_MAX + 1; ch++) { + __collate_char_pri_table[ch].prim = + htonl(__collate_char_pri_table[ch].prim); + __collate_char_pri_table[ch].sec = + htonl(__collate_char_pri_table[ch].sec); + } + if (fwrite(__collate_char_pri_table, + sizeof(__collate_char_pri_table), 1, fp) != 1) + err(EX_IOERR, + "I/O error writing char table to destination file %s", + out_file); + for (ch = 0; ch < chain_index; ch++) { + __collate_chain_pri_table[ch].prim = + htonl(__collate_chain_pri_table[ch].prim); + __collate_chain_pri_table[ch].sec = + htonl(__collate_chain_pri_table[ch].sec); + } + if (fwrite(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table), chain_index, fp) != + (size_t)chain_index) + err(EX_IOERR, + "I/O error writing chain table to destination file %s", + out_file); + if (fclose(fp) != 0) + err(EX_IOERR, "I/O error closing destination file %s", + out_file); + exit(EX_OK); +} +; +order_list : item + | order_list ';' item +; +chain : CHAR CHAR { + curr_chain[0] = $1; + curr_chain[1] = $2; + if (curr_chain[0] == '\0' || curr_chain[1] == '\0') + yyerror("\\0 can't be chained"); + curr_chain[2] = '\0'; +} + | chain CHAR { + static char tb[2]; + + tb[0] = $2; + if (tb[0] == '\0') + yyerror("\\0 can't be chained"); + if (strlen(curr_chain) + 2 > STR_LEN) + yyerror("Chain '%s' grows too long", curr_chain); + (void)strcat(curr_chain, tb); +} +; +item : CHAR { + if (__collate_char_pri_table[$1].prim) + yyerror("Char 0x%02x duplicated", $1); + __collate_char_pri_table[$1].prim = prim_pri++; +} + | chain { + if ((__collate_chain_pri_table = realloc(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table) * (chain_index + 1))) == NULL) + yyerror("can't grow chain table"); + (void)memset(&__collate_chain_pri_table[chain_index], 0, + sizeof(__collate_chain_pri_table[0])); + (void)strcpy(__collate_chain_pri_table[chain_index].str, curr_chain); + __collate_chain_pri_table[chain_index].prim = prim_pri++; + chain_index++; +} + | CHAR RANGE CHAR { + u_int i; + + if ($3 <= $1) + yyerror("Illegal range 0x%02x -- 0x%02x", $1, $3); + + for (i = $1; i <= $3; i++) { + if (__collate_char_pri_table[(u_char)i].prim) + yyerror("Char 0x%02x duplicated", (u_char)i); + __collate_char_pri_table[(u_char)i].prim = prim_pri++; + } +} + | '{' prim_order_list '}' { + prim_pri++; +} + | '(' sec_order_list ')' { + prim_pri++; + sec_pri = 1; +} +; +prim_order_list : prim_sub_item + | prim_order_list ',' prim_sub_item +; +sec_order_list : sec_sub_item + | sec_order_list ',' sec_sub_item +; +prim_sub_item : CHAR { + if (__collate_char_pri_table[$1].prim) + yyerror("Char 0x%02x duplicated", $1); + __collate_char_pri_table[$1].prim = prim_pri; +} + | CHAR RANGE CHAR { + u_int i; + + if ($3 <= $1) + yyerror("Illegal range 0x%02x -- 0x%02x", + $1, $3); + + for (i = $1; i <= $3; i++) { + if (__collate_char_pri_table[(u_char)i].prim) + yyerror("Char 0x%02x duplicated", (u_char)i); + __collate_char_pri_table[(u_char)i].prim = prim_pri; + } +} + | chain { + if ((__collate_chain_pri_table = realloc(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table) * (chain_index + 1))) == NULL) + yyerror("can't grow chain table"); + (void)memset(&__collate_chain_pri_table[chain_index], 0, + sizeof(__collate_chain_pri_table[0])); + (void)strcpy(__collate_chain_pri_table[chain_index].str, curr_chain); + __collate_chain_pri_table[chain_index].prim = prim_pri; + chain_index++; +} +; +sec_sub_item : CHAR { + if (__collate_char_pri_table[$1].prim) + yyerror("Char 0x%02x duplicated", $1); + __collate_char_pri_table[$1].prim = prim_pri; + __collate_char_pri_table[$1].sec = sec_pri++; +} + | CHAR RANGE CHAR { + u_int i; + + if ($3 <= $1) + yyerror("Illegal range 0x%02x -- 0x%02x", + $1, $3); + + for (i = $1; i <= $3; i++) { + if (__collate_char_pri_table[(u_char)i].prim) + yyerror("Char 0x%02x duplicated", (u_char)i); + __collate_char_pri_table[(u_char)i].prim = prim_pri; + __collate_char_pri_table[(u_char)i].sec = sec_pri++; + } +} + | chain { + if ((__collate_chain_pri_table = realloc(__collate_chain_pri_table, + sizeof(*__collate_chain_pri_table) * (chain_index + 1))) == NULL) + yyerror("can't grow chain table"); + (void)memset(&__collate_chain_pri_table[chain_index], 0, + sizeof(__collate_chain_pri_table[0])); + (void)strcpy(__collate_chain_pri_table[chain_index].str, curr_chain); + __collate_chain_pri_table[chain_index].prim = prim_pri; + __collate_chain_pri_table[chain_index].sec = sec_pri++; + chain_index++; +} +; +%% +int +main(int ac, char **av) +{ + int ch; + +#ifdef COLLATE_DEBUG + while((ch = getopt(ac, av, ":do:I:")) != -1) { +#else + while((ch = getopt(ac, av, ":o:I:")) != -1) { +#endif + switch (ch) + { +#ifdef COLLATE_DEBUG + case 'd': + debug++; + break; +#endif + case 'o': + out_file = optarg; + break; + + case 'I': + strlcpy(map_name, optarg, sizeof(map_name)); + break; + + default: + usage(); + } + } + ac -= optind; + av += optind; + if (ac > 0) { + if ((yyin = fopen(*av, "r")) == NULL) + err(EX_UNAVAILABLE, "can't open source file %s", *av); + } + for (ch = 0; ch <= UCHAR_MAX; ch++) + __collate_substitute_table[ch][0] = ch; + yyparse(); + return 0; +} + +static void +usage(void) +{ + fprintf(stderr, "usage: colldef [-I map_dir] [-o out_file] [filename]\n"); + exit(EX_USAGE); +} + +void +yyerror(const char *fmt, ...) +{ + va_list ap; + char msg[128]; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + errx(EX_UNAVAILABLE, "%s near line %d", msg, line_no); +} + +#ifdef COLLATE_DEBUG +static void +collate_print_tables(void) +{ + int i; + + printf("Substitute table:\n"); + for (i = 0; i < UCHAR_MAX + 1; i++) + if (i != *__collate_substitute_table[i]) + printf("\t'%c' --> \"%s\"\n", i, + __collate_substitute_table[i]); + printf("Chain priority table:\n"); + for (i = 0; i < chain_index - 1; i++) + printf("\t\"%s\" : %d %d\n", + __collate_chain_pri_table[i].str, + __collate_chain_pri_table[i].prim, + __collate_chain_pri_table[i].sec); + printf("Char priority table:\n"); + for (i = 0; i < UCHAR_MAX + 1; i++) + printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim, + __collate_char_pri_table[i].sec); +} +#endif diff --git a/usr.bin/colldef/scan.l b/usr.bin/colldef/scan.l new file mode 100644 index 0000000..b396ed0 --- /dev/null +++ b/usr.bin/colldef/scan.l @@ -0,0 +1,287 @@ +%x string name charmap defn nchar subs subs2 +%{ +/*- + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <ctype.h> +#include <err.h> +#include <limits.h> +#include <unistd.h> +#include <string.h> +#include <sysexits.h> +#include "common.h" +#include "y.tab.h" + +int line_no = 1, save_no, fromsubs; +u_char buf[BUFSIZE], *ptr; +FILE *map_fp; +YY_BUFFER_STATE main_buf, map_buf; +#ifdef FLEX_DEBUG +YYSTYPE yylval; +#endif /* FLEX_DEBUG */ +int yylex(void); +%} +%% +<INITIAL,charmap,nchar,subs,subs2>[ \t]+ ; +<subs2>\" { ptr = buf; BEGIN(string); } +<subs>\< { ptr = buf; fromsubs = 1; BEGIN(name); } +<INITIAL>\< { ptr = buf; fromsubs = 0; BEGIN(name); } +^#.*\n line_no++; +^\n line_no++; +<INITIAL>\\\n line_no++; +<INITIAL,nchar,subs>\\t { yylval.ch = '\t'; return CHAR; } +<INITIAL,nchar,subs>\\n { yylval.ch = '\n'; return CHAR; } +<INITIAL,nchar,subs>\\b { yylval.ch = '\b'; return CHAR; } +<INITIAL,nchar,subs>\\f { yylval.ch = '\f'; return CHAR; } +<INITIAL,nchar,subs>\\v { yylval.ch = '\v'; return CHAR; } +<INITIAL,nchar,subs>\\r { yylval.ch = '\r'; return CHAR; } +<INITIAL,nchar,subs>\\a { yylval.ch = '\a'; return CHAR; } +<subs2>\n { + line_no++; + BEGIN(INITIAL); + return '\n'; +} +<INITIAL,nchar>\n { + line_no++; + if (map_fp != NULL) { + ptr = buf; + BEGIN(defn); + } + return '\n'; +} +<INITIAL>[;,{}()] return *yytext; +<INITIAL>substitute { BEGIN(subs); return SUBSTITUTE; } +<subs>with { BEGIN(subs2); return WITH; } +<INITIAL>order return ORDER; +<INITIAL>charmap BEGIN(charmap); +<INITIAL>;[ \t]*\.\.\.[ \t]*; return RANGE; +<INITIAL,nchar,subs>\\[0-7]{3} { + u_int v; + + sscanf(&yytext[1], "%o", &v); + yylval.ch = (u_char)v; + return CHAR; +} +<INITIAL,nchar,subs>\\x[0-9a-fA-F]{2} { + u_int v; + + sscanf(&yytext[2], "%x", &v); + yylval.ch = (u_char)v; + return CHAR; +} +<INITIAL,nchar,subs>\\. { yylval.ch = yytext[1]; return CHAR; } +<INITIAL,nchar,subs>. { yylval.ch = *yytext; return CHAR; } +<defn>^#.*\n line_no++; +<defn>[ \t]+ { + if (ptr == buf) + errx(EX_UNAVAILABLE, "map expected near line %u of %s", + line_no, map_name); + *ptr = '\0'; + strcpy(yylval.str, buf); + BEGIN(nchar); + return DEFN; +} +<name>\/\/ { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "name buffer overflow near line %u, character '/'", + line_no); + *ptr++ = '/'; +} +<name>\/\> { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "name buffer overflow near line %u, character '>'", + line_no); + *ptr++ = '>'; +} +<string>\\\" { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\"'", + line_no); + *ptr++ = '"'; +} +<name>\> { + u_int i; + + if (ptr == buf) + errx(EX_UNAVAILABLE, "non-empty name expected near line %u", + line_no); + *ptr = '\0'; + for (i = 0; i <= UCHAR_MAX; i++) { + if (strcmp(charmap_table[i], buf) == 0) + goto findit; + } + errx(EX_UNAVAILABLE, "name <%s> not 'charmap'-defined near line %u", + buf, line_no); + findit: + yylval.ch = i; + if (fromsubs) + BEGIN(subs); + else + BEGIN(INITIAL); + return CHAR; +} +<string>\" { + *ptr = '\0'; + strcpy(yylval.str, buf); + BEGIN(subs2); + return STRING; +} +<name,defn>. { + const char *s = (map_fp != NULL) ? map_name : "input"; + + if (!isascii(*yytext) || !isprint(*yytext)) + errx(EX_UNAVAILABLE, "non-ASCII or non-printable character 0x%02x not allowed in the map/name near line %u of %s", + *yytext, line_no, s); + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "map/name buffer overflow near line %u of %s, character '%c'", + line_no, s, *yytext); + *ptr++ = *yytext; +} +<string>\\t { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\t'", + line_no); + *ptr++ = '\t'; +} +<string>\\b { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\b'", + line_no); + *ptr++ = '\b'; +} +<string>\\f { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\f'", + line_no); + *ptr++ = '\f'; +} +<string>\\v { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\v'", + line_no); + *ptr++ = '\v'; +} +<string>\\n { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\n'", + line_no); + *ptr++ = '\n'; +} +<string>\\r { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\r'", + line_no); + *ptr++ = '\r'; +} +<string>\\a { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '\\a'", + line_no); + *ptr++ = '\a'; +} +<name,string,defn>\n { + const char *s = (map_fp != NULL) ? map_name : "input"; + + errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s); +} +<name,string,nchar><<EOF>> { + const char *s = (map_fp != NULL) ? map_name : "input"; + + errx(EX_UNAVAILABLE, "premature EOF in the name/string/char near line %u of %s", line_no, s); +} +<string>\\x[0-9a-f]{2} { + u_int v; + + sscanf(&yytext[2], "%x", &v); + *ptr++ = (u_char)v; +} +<string>\\[0-7]{3} { + u_int v; + + sscanf(&yytext[1], "%o", &v); + *ptr++ = (u_char)v; +} +<string>\\. { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '%c'", + line_no, yytext[1]); + *ptr++ = yytext[1]; +} +<string>. { + if(ptr >= buf + sizeof(buf) - 1) + errx(EX_UNAVAILABLE, "string buffer overflow near line %u, character '%c'", + line_no, *yytext); + *ptr++ = *yytext; +} +<charmap>[^ \t\n]+ { + strcat(map_name, "/"); + strcat(map_name, yytext); + if((map_fp = fopen(map_name, "r")) == NULL) + err(EX_UNAVAILABLE, "can't open 'charmap' file %s", + map_name); + save_no = line_no; + line_no = 1; + map_buf = yy_new_buffer(map_fp, YY_BUF_SIZE); + main_buf = YY_CURRENT_BUFFER; + yy_switch_to_buffer(map_buf); + ptr = buf; + BEGIN(defn); +} +<charmap>\n { + errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u", + line_no); +} +<charmap><<EOF>> { + errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u", + line_no); +} +<INITIAL,defn><<EOF>> { + if(map_fp != NULL) { + if (ptr != buf) + errx(EX_UNAVAILABLE, "premature EOF in the map near line %u of %s", line_no, map_name); + yy_switch_to_buffer(main_buf); + yy_delete_buffer(map_buf); + fclose(map_fp); + map_fp = NULL; + line_no = save_no; + BEGIN(INITIAL); + } else + yyterminate(); +} +%% +#ifdef FLEX_DEBUG +main() +{ + while(yylex()) + ; + return 0; +} +#endif /* FLEX_DEBUG */ diff --git a/usr.bin/mklocale/Makefile b/usr.bin/mklocale/Makefile new file mode 100644 index 0000000..83850c4 --- /dev/null +++ b/usr.bin/mklocale/Makefile @@ -0,0 +1,10 @@ +# @(#)Makefile 8.1 (Berkeley) 6/7/93 +# $FreeBSD$ + +PROG= mklocale +SRCS= yacc.y lex.l y.tab.h +CFLAGS+= -I. -I${.CURDIR} -I${.CURDIR}/../../lib/libc/locale + +NO_WMISSING_VARIABLE_DECLARATIONS= + +.include <bsd.prog.mk> diff --git a/usr.bin/mklocale/Makefile.depend b/usr.bin/mklocale/Makefile.depend new file mode 100644 index 0000000..b3bfc6d --- /dev/null +++ b/usr.bin/mklocale/Makefile.depend @@ -0,0 +1,26 @@ +# $FreeBSD$ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + gnu/lib/csu \ + gnu/lib/libgcc \ + include \ + include/arpa \ + include/xlocale \ + lib/${CSU_DIR} \ + lib/libc \ + lib/libcompiler_rt \ + usr.bin/yacc.host \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +lex.o: lex.c +lex.o: y.tab.h +lex.po: lex.c +lex.po: y.tab.h +yacc.o: yacc.c +yacc.po: yacc.c +.endif diff --git a/usr.bin/mklocale/extern.h b/usr.bin/mklocale/extern.h new file mode 100644 index 0000000..60268cc --- /dev/null +++ b/usr.bin/mklocale/extern.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +int yylex(void); diff --git a/usr.bin/mklocale/ldef.h b/usr.bin/mklocale/ldef.h new file mode 100644 index 0000000..392252f --- /dev/null +++ b/usr.bin/mklocale/ldef.h @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ldef.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD$ + */ + +#include <sys/types.h> +#include "runefile.h" + +/* + * This should look a LOT like a _RuneEntry + */ +typedef struct rune_list { + int32_t min; + int32_t max; + int32_t map; + uint32_t *types; + struct rune_list *next; +} rune_list; + +typedef struct rune_map { + uint32_t map[_CACHED_RUNES]; + rune_list *root; +} rune_map; diff --git a/usr.bin/mklocale/lex.l b/usr.bin/mklocale/lex.l new file mode 100644 index 0000000..08fa54a --- /dev/null +++ b/usr.bin/mklocale/lex.l @@ -0,0 +1,175 @@ +%{ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)lex.l 8.1 (Berkeley) 6/6/93"; +#endif +#endif /* not lint */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> + +#include "ldef.h" +#include "y.tab.h" +#include "extern.h" + +#define YY_DECL int yylex(void) +%} + +ODIGIT [0-7] +DIGIT [0-9] +XDIGIT [0-9a-fA-F] +W [\t\n\r ] + +%% +\'.\' { yylval.rune = (unsigned char)yytext[1]; + return(RUNE); } + +'\\a' { yylval.rune = '\a'; + return(RUNE); } +'\\b' { yylval.rune = '\b'; + return(RUNE); } +'\\f' { yylval.rune = '\f'; + return(RUNE); } +'\\n' { yylval.rune = '\n'; + return(RUNE); } +'\\r' { yylval.rune = '\r'; + return(RUNE); } +'\\t' { yylval.rune = '\t'; + return(RUNE); } +'\\v' { yylval.rune = '\v'; + return(RUNE); } + +0x{XDIGIT}+ { yylval.rune = strtol(yytext, 0, 16); + return(RUNE); } +0{ODIGIT}+ { yylval.rune = strtol(yytext, 0, 8); + return(RUNE); } +{DIGIT}+ { yylval.rune = strtol(yytext, 0, 10); + return(RUNE); } + + +MAPLOWER { return(MAPLOWER); } +MAPUPPER { return(MAPUPPER); } +TODIGIT { return(DIGITMAP); } +INVALID { return(INVALID); } + +ALPHA { yylval.i = _CTYPE_A|_CTYPE_R|_CTYPE_G; + return(LIST); } +CONTROL { yylval.i = _CTYPE_C; + return(LIST); } +DIGIT { yylval.i = _CTYPE_D|_CTYPE_R|_CTYPE_G; + return(LIST); } +GRAPH { yylval.i = _CTYPE_G|_CTYPE_R; + return(LIST); } +LOWER { yylval.i = _CTYPE_L|_CTYPE_R|_CTYPE_G; + return(LIST); } +PUNCT { yylval.i = _CTYPE_P|_CTYPE_R|_CTYPE_G; + return(LIST); } +SPACE { yylval.i = _CTYPE_S; + return(LIST); } +UPPER { yylval.i = _CTYPE_U|_CTYPE_R|_CTYPE_G; + return(LIST); } +XDIGIT { yylval.i = _CTYPE_X|_CTYPE_R|_CTYPE_G; + return(LIST); } +BLANK { yylval.i = _CTYPE_B; + return(LIST); } +PRINT { yylval.i = _CTYPE_R; + return(LIST); } +IDEOGRAM { yylval.i = _CTYPE_I|_CTYPE_R|_CTYPE_G; + return(LIST); } +SPECIAL { yylval.i = _CTYPE_T|_CTYPE_R|_CTYPE_G; + return(LIST); } +PHONOGRAM { yylval.i = _CTYPE_Q|_CTYPE_R|_CTYPE_G; + return(LIST); } +SWIDTH0 { yylval.i = _CTYPE_SW0; return(LIST); } +SWIDTH1 { yylval.i = _CTYPE_SW1; return(LIST); } +SWIDTH2 { yylval.i = _CTYPE_SW2; return(LIST); } +SWIDTH3 { yylval.i = _CTYPE_SW3; return(LIST); } + +VARIABLE[\t ] { static char vbuf[1024]; + char *v = vbuf; + while ((*v = input()) && *v != '\n') + ++v; + if (*v) { + unput(*v); + *v = 0; + } + yylval.str = vbuf; + return(VARIABLE); + } + +ENCODING { return(ENCODING); } + +\".*\" { char *e = yytext + 1; + yylval.str = e; + while (*e && *e != '"') + ++e; + *e = 0; + return(STRING); } + +\<|\(|\[ { return(LBRK); } + +\>|\)|\] { return(RBRK); } + +\- { return(THRU); } +\.\.\. { return(THRU); } + +\: { return(':'); } + +{W}+ ; + +^\#.*\n ; +\/\* { char lc = 0; + do { + while ((lc) != '*') + if ((lc = input()) == 0) + break; + } while((lc = input()) != '/'); + } + +\\$ ; +. { printf("Lex is skipping '%s'\n", yytext); } +%% + +#if !defined(yywrap) +int +yywrap(void) +{ + return(1); +} +#endif diff --git a/usr.bin/mklocale/mklocale.1 b/usr.bin/mklocale/mklocale.1 new file mode 100644 index 0000000..ceab25a --- /dev/null +++ b/usr.bin/mklocale/mklocale.1 @@ -0,0 +1,304 @@ +.\" Copyright (c) 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Paul Borman at Krystal Technologies. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94 +.\" $FreeBSD$ +.\" +.Dd October 17, 2004 +.Dt MKLOCALE 1 +.Os +.Sh NAME +.Nm mklocale +.Nd make LC_CTYPE locale files +.Sh SYNOPSIS +.Nm +.Op Fl d +.Ar "< src-file" +.Ar "> language/LC_CTYPE" +.Nm +.Op Fl d +.Fl o +.Ar language/LC_CTYPE +.Ar src-file +.Sh DESCRIPTION +The +.Nm +utility reads a +.Dv LC_CTYPE +source file from standard input and produces a +.Dv LC_CTYPE +binary file on standard output suitable for placement in +.Pa /usr/share/locale/ Ns Ar language Ns Pa /LC_CTYPE . +.Pp +The format of +.Ar src-file +is quite simple. +It consists of a series of lines which start with a keyword and have +associated data following. +C style comments are used +to place comments in the file. +.Pp +Following options are available: +.Bl -tag -width indent +.It Fl d +Turns on debugging messages. +.It Fl o +Specify output file. +.El +.Pp +Besides the keywords which will be listed below, +the following are valid tokens in +.Ar src-file : +.Bl -tag -width ".Ar literal" +.It Dv RUNE +A +.Dv RUNE +may be any of the following: +.Bl -tag -width ".Ar 0x[0-9a-z]*" +.It Ar 'x' +The ASCII character +.Ar x . +.It Ar '\ex' +The ANSI C character +.Ar \ex +where +.Ar \ex +is one of +.Dv \ea , +.Dv \eb , +.Dv \ef , +.Dv \en , +.Dv \er , +.Dv \et , +or +.Dv \ev . +.It Ar 0x[0-9a-z]* +A hexadecimal number representing a rune code. +.It Ar 0[0-7]* +An octal number representing a rune code. +.It Ar [1-9][0-9]* +A decimal number representing a rune code. +.El +.It Dv STRING +A string enclosed in double quotes ("). +.It Dv THRU +Either +.Dv ... +or +.Dv - . +Used to indicate ranges. +.It Ar literal +The follow characters are taken literally: +.Bl -tag -width ".Dv <\|\|(\|\|[" +.It Dv "<\|(\|[" +Used to start a mapping. +All are equivalent. +.It Dv ">\|\^)\|]" +Used to end a mapping. +All are equivalent. +.It Dv ":" +Used as a delimiter in mappings. +.El +.El +.Pp +Key words which should only appear once are: +.Bl -tag -width ".Dv PHONOGRAM" +.It Dv ENCODING +Followed by a +.Dv STRING +which indicates the encoding mechanism to be used for this locale. +The current encodings are: +.Bl -tag -width ".Dv MSKanji" +.It Dv ASCII +American Standard Code for Information Interchange. +.It Dv BIG5 +The +.Dq Big5 +encoding of Chinese. +.It Dv EUC +.Dv EUC +encoding as used by several +vendors of +.Ux +systems. +.It Dv GB18030 +PRC national standard for encoding of Chinese text. +.It Dv GB2312 +Older PRC national standard for encoding Chinese text. +.It Dv GBK +A widely used encoding method for Chinese text, +backwards compatible with GB\ 2312-1980. +.It Dv MSKanji +The method of encoding Japanese used by Microsoft, +loosely based on JIS. +Also known as +.Dq "Shift JIS" +and +.Dq SJIS . +.It Dv NONE +No translation and the default. +.It Dv UTF-8 +The +.Dv UTF-8 +transformation format of +.Tn ISO +10646 +as defined by RFC 2279. +.El +.It Dv VARIABLE +This keyword must be followed by a single tab or space character, +after which encoding specific data is placed. +Currently only the +.Dv "EUC" +encoding requires variable data. +See +.Xr euc 5 +for further details. +.It Dv INVALID +(obsolete) +A single +.Dv RUNE +follows and is used as the invalid rune for this locale. +.El +.Pp +The following keywords may appear multiple times and have the following +format for data: +.Bl -tag -width ".Dv <RUNE1 THRU RUNEn : RUNE2>" -offset indent +.It Dv <RUNE1 RUNE2> +.Dv RUNE1 +is mapped to +.Dv RUNE2 . +.It Dv <RUNE1 THRU RUNEn : RUNE2> +Runes +.Dv RUNE1 +through +.Dv RUNEn +are mapped to +.Dv RUNE2 +through +.Dv RUNE2 ++ n-1. +.El +.Bl -tag -width ".Dv PHONOGRAM" +.It Dv MAPLOWER +Defines the tolower mappings. +.Dv RUNE2 +is the lower case representation of +.Dv RUNE1 . +.It Dv MAPUPPER +Defines the toupper mappings. +.Dv RUNE2 +is the upper case representation of +.Dv RUNE1 . +.It Dv TODIGIT +Defines a map from runes to their digit value. +.Dv RUNE2 +is the integer value represented by +.Dv RUNE1 . +For example, the ASCII character +.Ql 0 +would map to the decimal value 0. +Only values up to 255 +are allowed. +.El +.Pp +The following keywords may appear multiple times and have the following +format for data: +.Bl -tag -width ".Dv RUNE1 THRU RUNEn" -offset indent +.It Dv RUNE +This rune has the property defined by the keyword. +.It Dv "RUNE1 THRU RUNEn" +All the runes between and including +.Dv RUNE1 +and +.Dv RUNEn +have the property defined by the keyword. +.El +.Bl -tag -width ".Dv PHONOGRAM" +.It Dv ALPHA +Defines runes which are alphabetic, printable and graphic. +.It Dv CONTROL +Defines runes which are control characters. +.It Dv DIGIT +Defines runes which are decimal digits, printable and graphic. +.It Dv GRAPH +Defines runes which are graphic and printable. +.It Dv LOWER +Defines runes which are lower case, printable and graphic. +.It Dv PUNCT +Defines runes which are punctuation, printable and graphic. +.It Dv SPACE +Defines runes which are spaces. +.It Dv UPPER +Defines runes which are upper case, printable and graphic. +.It Dv XDIGIT +Defines runes which are hexadecimal digits, printable and graphic. +.It Dv BLANK +Defines runes which are blank. +.It Dv PRINT +Defines runes which are printable. +.It Dv IDEOGRAM +Defines runes which are ideograms, printable and graphic. +.It Dv SPECIAL +Defines runes which are special characters, printable and graphic. +.It Dv PHONOGRAM +Defines runes which are phonograms, printable and graphic. +.It Dv SWIDTH0 +Defines runes with display width 0. +.It Dv SWIDTH1 +Defines runes with display width 1. +.It Dv SWIDTH2 +Defines runes with display width 2. +.It Dv SWIDTH3 +Defines runes with display width 3. +.El +.Pp +If no display width explicitly defined, width 1 assumed +for printable runes by default. +.Sh SEE ALSO +.Xr colldef 1 , +.Xr setlocale 3 , +.Xr wcwidth 3 , +.Xr big5 5 , +.Xr euc 5 , +.Xr gb18030 5 , +.Xr gb2312 5 , +.Xr gbk 5 , +.Xr mskanji 5 , +.Xr utf8 5 +.Sh HISTORY +The +.Nm +utility first appeared in +.Bx 4.4 . +.Sh BUGS +The +.Nm +utility is overly simplistic. diff --git a/usr.bin/mklocale/yacc.y b/usr.bin/mklocale/yacc.y new file mode 100644 index 0000000..c5304c9 --- /dev/null +++ b/usr.bin/mklocale/yacc.y @@ -0,0 +1,869 @@ +%{ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93"; +#endif /* 0 */ +#endif /* not lint */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <arpa/inet.h> + +#include <ctype.h> +#include <err.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "ldef.h" +#include "extern.h" +#include "runefile.h" + +static void *xmalloc(unsigned int sz); +static uint32_t *xlalloc(unsigned int sz); +void yyerror(const char *s); +static uint32_t *xrelalloc(uint32_t *old, unsigned int sz); +static void dump_tables(void); +static void cleanout(void); + +const char *locale_file = "<stdout>"; + +rune_map maplower = { { 0 }, NULL }; +rune_map mapupper = { { 0 }, NULL }; +rune_map types = { { 0 }, NULL }; + +_FileRuneLocale new_locale = { "", "", {}, {}, {}, 0, 0, 0, 0 }; +char *variable = NULL; + +void set_map(rune_map *, rune_list *, uint32_t); +void set_digitmap(rune_map *, rune_list *); +void add_map(rune_map *, rune_list *, uint32_t); +static void usage(void); +%} + +%union { + int32_t rune; + int i; + char *str; + + rune_list *list; +} + +%token <rune> RUNE +%token LBRK +%token RBRK +%token THRU +%token MAPLOWER +%token MAPUPPER +%token DIGITMAP +%token <i> LIST +%token <str> VARIABLE +%token ENCODING +%token INVALID +%token <str> STRING + +%type <list> list +%type <list> map + + +%% + +locale : /* empty */ + | table + { dump_tables(); } + ; + +table : entry + | table entry + ; + +entry : ENCODING STRING + { if (strcmp($2, "NONE") && + strcmp($2, "ASCII") && + strcmp($2, "UTF-8") && + strcmp($2, "EUC") && + strcmp($2, "GBK") && + strcmp($2, "GB18030") && + strcmp($2, "GB2312") && + strcmp($2, "BIG5") && + strcmp($2, "MSKanji")) + warnx("ENCODING %s is not supported by libc", $2); + strncpy(new_locale.encoding, $2, + sizeof(new_locale.encoding)); } + | VARIABLE + { new_locale.variable_len = strlen($1) + 1; + variable = xmalloc(new_locale.variable_len); + strcpy(variable, $1); + } + | INVALID RUNE + { warnx("the INVALID keyword is deprecated"); } + | LIST list + { set_map(&types, $2, $1); } + | MAPLOWER map + { set_map(&maplower, $2, 0); } + | MAPUPPER map + { set_map(&mapupper, $2, 0); } + | DIGITMAP map + { set_digitmap(&types, $2); } + ; + +list : RUNE + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $1; + $$->max = $1; + $$->next = 0; + } + | RUNE THRU RUNE + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $1; + $$->max = $3; + $$->next = 0; + } + | list RUNE + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $2; + $$->max = $2; + $$->next = $1; + } + | list RUNE THRU RUNE + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $2; + $$->max = $4; + $$->next = $1; + } + ; + +map : LBRK RUNE RUNE RBRK + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $2; + $$->max = $2; + $$->map = $3; + $$->next = 0; + } + | map LBRK RUNE RUNE RBRK + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $3; + $$->max = $3; + $$->map = $4; + $$->next = $1; + } + | LBRK RUNE THRU RUNE ':' RUNE RBRK + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $2; + $$->max = $4; + $$->map = $6; + $$->next = 0; + } + | map LBRK RUNE THRU RUNE ':' RUNE RBRK + { + $$ = (rune_list *)xmalloc(sizeof(rune_list)); + $$->min = $3; + $$->max = $5; + $$->map = $7; + $$->next = $1; + } + ; +%% + +int debug; +FILE *fp; + +static void +cleanout(void) +{ + if (fp != NULL) + unlink(locale_file); +} + +int +main(int ac, char *av[]) +{ + int x; + + fp = stdout; + + while ((x = getopt(ac, av, "do:")) != -1) { + switch(x) { + case 'd': + debug = 1; + break; + case 'o': + locale_file = optarg; + if ((fp = fopen(locale_file, "w")) == NULL) + err(1, "%s", locale_file); + atexit(cleanout); + break; + default: + usage(); + } + } + + switch (ac - optind) { + case 0: + break; + case 1: + if (freopen(av[optind], "r", stdin) == 0) + err(1, "%s", av[optind]); + break; + default: + usage(); + } + for (x = 0; x < _CACHED_RUNES; ++x) { + mapupper.map[x] = x; + maplower.map[x] = x; + } + memcpy(new_locale.magic, _FILE_RUNE_MAGIC_1, sizeof(new_locale.magic)); + + yyparse(); + + return(0); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: mklocale [-d] [-o output] [source]\n"); + exit(1); +} + +void +yyerror(const char *s) +{ + fprintf(stderr, "%s\n", s); +} + +static void * +xmalloc(unsigned int sz) +{ + void *r = malloc(sz); + if (!r) + errx(1, "xmalloc"); + return(r); +} + +static uint32_t * +xlalloc(unsigned int sz) +{ + uint32_t *r = (uint32_t *)malloc(sz * sizeof(uint32_t)); + if (!r) + errx(1, "xlalloc"); + return(r); +} + +static uint32_t * +xrelalloc(uint32_t *old, unsigned int sz) +{ + uint32_t *r = (uint32_t *)realloc((char *)old, + sz * sizeof(uint32_t)); + if (!r) + errx(1, "xrelalloc"); + return(r); +} + +void +set_map(rune_map *map, rune_list *list, uint32_t flag) +{ + while (list) { + rune_list *nlist = list->next; + add_map(map, list, flag); + list = nlist; + } +} + +void +set_digitmap(rune_map *map, rune_list *list) +{ + int32_t i; + + while (list) { + rune_list *nlist = list->next; + for (i = list->min; i <= list->max; ++i) { + if (list->map + (i - list->min)) { + rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list)); + tmp->min = i; + tmp->max = i; + add_map(map, tmp, list->map + (i - list->min)); + } + } + free(list); + list = nlist; + } +} + +void +add_map(rune_map *map, rune_list *list, uint32_t flag) +{ + int32_t i; + rune_list *lr = 0; + rune_list *r; + int32_t run; + + while (list->min < _CACHED_RUNES && list->min <= list->max) { + if (flag) + map->map[list->min++] |= flag; + else + map->map[list->min++] = list->map++; + } + + if (list->min > list->max) { + free(list); + return; + } + + run = list->max - list->min + 1; + + if (!(r = map->root) || (list->max < r->min - 1) + || (!flag && list->max == r->min - 1)) { + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = map->root; + map->root = list; + return; + } + + for (r = map->root; r && r->max + 1 < list->min; r = r->next) + lr = r; + + if (!r) { + /* + * We are off the end. + */ + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = 0; + lr->next = list; + return; + } + + if (list->max < r->min - 1) { + /* + * We come before this range and we do not intersect it. + * We are not before the root node, it was checked before the loop + */ + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = lr->next; + lr->next = list; + return; + } + + /* + * At this point we have found that we at least intersect with + * the range pointed to by `r', we might intersect with one or + * more ranges beyond `r' as well. + */ + + if (!flag && list->map - list->min != r->map - r->min) { + /* + * There are only two cases when we are doing case maps and + * our maps needn't have the same offset. When we are adjoining + * but not intersecting. + */ + if (list->max + 1 == r->min) { + lr->next = list; + list->next = r; + return; + } + if (list->min - 1 == r->max) { + list->next = r->next; + r->next = list; + return; + } + errx(1, "error: conflicting map entries"); + } + + if (list->min >= r->min && list->max <= r->max) { + /* + * Subset case. + */ + + if (flag) { + for (i = list->min; i <= list->max; ++i) + r->types[i - r->min] |= flag; + } + free(list); + return; + } + if (list->min <= r->min && list->max >= r->max) { + /* + * Superset case. Make him big enough to hold us. + * We might need to merge with the guy after him. + */ + if (flag) { + list->types = xlalloc(list->max - list->min + 1); + + for (i = list->min; i <= list->max; ++i) + list->types[i - list->min] = flag; + + for (i = r->min; i <= r->max; ++i) + list->types[i - list->min] |= r->types[i - r->min]; + + free(r->types); + r->types = list->types; + } else { + r->map = list->map; + } + r->min = list->min; + r->max = list->max; + free(list); + } else if (list->min < r->min) { + /* + * Our tail intersects his head. + */ + if (flag) { + list->types = xlalloc(r->max - list->min + 1); + + for (i = r->min; i <= r->max; ++i) + list->types[i - list->min] = r->types[i - r->min]; + + for (i = list->min; i < r->min; ++i) + list->types[i - list->min] = flag; + + for (i = r->min; i <= list->max; ++i) + list->types[i - list->min] |= flag; + + free(r->types); + r->types = list->types; + } else { + r->map = list->map; + } + r->min = list->min; + free(list); + return; + } else { + /* + * Our head intersects his tail. + * We might need to merge with the guy after him. + */ + if (flag) { + r->types = xrelalloc(r->types, list->max - r->min + 1); + + for (i = list->min; i <= r->max; ++i) + r->types[i - r->min] |= flag; + + for (i = r->max+1; i <= list->max; ++i) + r->types[i - r->min] = flag; + } + r->max = list->max; + free(list); + } + + /* + * Okay, check to see if we grew into the next guy(s) + */ + while ((lr = r->next) && r->max >= lr->min) { + if (flag) { + if (r->max >= lr->max) { + /* + * Good, we consumed all of him. + */ + for (i = lr->min; i <= lr->max; ++i) + r->types[i - r->min] |= lr->types[i - lr->min]; + } else { + /* + * "append" him on to the end of us. + */ + r->types = xrelalloc(r->types, lr->max - r->min + 1); + + for (i = lr->min; i <= r->max; ++i) + r->types[i - r->min] |= lr->types[i - lr->min]; + + for (i = r->max+1; i <= lr->max; ++i) + r->types[i - r->min] = lr->types[i - lr->min]; + + r->max = lr->max; + } + } else { + if (lr->max > r->max) + r->max = lr->max; + } + + r->next = lr->next; + + if (flag) + free(lr->types); + free(lr); + } +} + +static void +dump_tables(void) +{ + int x, first_d, curr_d; + rune_list *list; + + /* + * See if we can compress some of the istype arrays + */ + for(list = types.root; list; list = list->next) { + list->map = list->types[0]; + for (x = 1; x < list->max - list->min + 1; ++x) { + if ((int32_t)list->types[x] != list->map) { + list->map = 0; + break; + } + } + } + + first_d = curr_d = -1; + for (x = 0; x < _CACHED_RUNES; ++x) { + uint32_t r = types.map[x]; + + if (r & _CTYPE_D) { + if (first_d < 0) + first_d = curr_d = x; + else if (x != curr_d + 1) + errx(1, "error: DIGIT range is not contiguous"); + else if (x - first_d > 9) + errx(1, "error: DIGIT range is too big"); + else + curr_d++; + if (!(r & _CTYPE_X)) + errx(1, + "error: DIGIT range is not a subset of XDIGIT range"); + } + } + if (first_d < 0) + errx(1, "error: no DIGIT range defined in the single byte area"); + else if (curr_d - first_d < 9) + errx(1, "error: DIGIT range is too small in the single byte area"); + + /* + * Fill in our tables. Do this in network order so that + * diverse machines have a chance of sharing data. + * (Machines like Crays cannot share with little machines due to + * word size. Sigh. We tried.) + */ + for (x = 0; x < _CACHED_RUNES; ++x) { + new_locale.runetype[x] = htonl(types.map[x]); + new_locale.maplower[x] = htonl(maplower.map[x]); + new_locale.mapupper[x] = htonl(mapupper.map[x]); + } + + /* + * Count up how many ranges we will need for each of the extents. + */ + list = types.root; + + while (list) { + new_locale.runetype_ext_nranges++; + list = list->next; + } + new_locale.runetype_ext_nranges = + htonl(new_locale.runetype_ext_nranges); + + list = maplower.root; + + while (list) { + new_locale.maplower_ext_nranges++; + list = list->next; + } + new_locale.maplower_ext_nranges = + htonl(new_locale.maplower_ext_nranges); + + list = mapupper.root; + + while (list) { + new_locale.mapupper_ext_nranges++; + list = list->next; + } + new_locale.mapupper_ext_nranges = + htonl(new_locale.mapupper_ext_nranges); + + new_locale.variable_len = htonl(new_locale.variable_len); + + /* + * Okay, we are now ready to write the new locale file. + */ + + /* + * PART 1: The _FileRuneLocale structure + */ + if (fwrite((char *)&new_locale, sizeof(new_locale), 1, fp) != 1) { + perror(locale_file); + exit(1); + } + /* + * PART 2: The runetype_ext structures (not the actual tables) + */ + list = types.root; + + while (list) { + _FileRuneEntry re; + + re.min = htonl(list->min); + re.max = htonl(list->max); + re.map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) { + perror(locale_file); + exit(1); + } + + list = list->next; + } + /* + * PART 3: The maplower_ext structures + */ + list = maplower.root; + + while (list) { + _FileRuneEntry re; + + re.min = htonl(list->min); + re.max = htonl(list->max); + re.map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) { + perror(locale_file); + exit(1); + } + + list = list->next; + } + /* + * PART 4: The mapupper_ext structures + */ + list = mapupper.root; + + while (list) { + _FileRuneEntry re; + + re.min = htonl(list->min); + re.max = htonl(list->max); + re.map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) { + perror(locale_file); + exit(1); + } + + list = list->next; + } + /* + * PART 5: The runetype_ext tables + */ + list = types.root; + + while (list) { + for (x = 0; x < list->max - list->min + 1; ++x) + list->types[x] = htonl(list->types[x]); + + if (!list->map) { + if (fwrite((char *)list->types, + (list->max - list->min + 1) * sizeof(uint32_t), + 1, fp) != 1) { + perror(locale_file); + exit(1); + } + } + list = list->next; + } + /* + * PART 6: And finally the variable data + */ + if (new_locale.variable_len != 0 && + fwrite(variable, ntohl(new_locale.variable_len), 1, fp) != 1) { + perror(locale_file); + exit(1); + } + if (fclose(fp) != 0) { + perror(locale_file); + exit(1); + } + fp = NULL; + + if (!debug) + return; + + if (new_locale.encoding[0]) + fprintf(stderr, "ENCODING %s\n", new_locale.encoding); + if (variable) + fprintf(stderr, "VARIABLE %s\n", variable); + + fprintf(stderr, "\nMAPLOWER:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + if (isprint(maplower.map[x])) + fprintf(stderr, " '%c'", (int)maplower.map[x]); + else if (maplower.map[x]) + fprintf(stderr, "%04x", maplower.map[x]); + else + fprintf(stderr, "%4x", 0); + if ((x & 0xf) == 0xf) + fprintf(stderr, "\n"); + else + fprintf(stderr, " "); + } + fprintf(stderr, "\n"); + + for (list = maplower.root; list; list = list->next) + fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map); + + fprintf(stderr, "\nMAPUPPER:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + if (isprint(mapupper.map[x])) + fprintf(stderr, " '%c'", (int)mapupper.map[x]); + else if (mapupper.map[x]) + fprintf(stderr, "%04x", mapupper.map[x]); + else + fprintf(stderr, "%4x", 0); + if ((x & 0xf) == 0xf) + fprintf(stderr, "\n"); + else + fprintf(stderr, " "); + } + fprintf(stderr, "\n"); + + for (list = mapupper.root; list; list = list->next) + fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map); + + + fprintf(stderr, "\nTYPES:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + uint32_t r = types.map[x]; + + if (r) { + if (isprint(x)) + fprintf(stderr, " '%c': %2d", x, (int)(r & 0xff)); + else + fprintf(stderr, "%04x: %2d", x, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n"); + } + } + + for (list = types.root; list; list = list->next) { + if (list->map && list->min + 3 < list->max) { + uint32_t r = list->map; + + fprintf(stderr, "%04x: %2d", + (uint32_t)list->min, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n...\n"); + + fprintf(stderr, "%04x: %2d", + (uint32_t)list->max, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n"); + } else + for (x = list->min; x <= list->max; ++x) { + uint32_t r = ntohl(list->types[x - list->min]); + + if (r) { + fprintf(stderr, "%04x: %2d", x, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _CTYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _CTYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n"); + } + } + } +} |