summaryrefslogtreecommitdiffstats
path: root/usr.bin/colldef
diff options
context:
space:
mode:
authorache <ache@FreeBSD.org>1996-10-15 22:05:18 +0000
committerache <ache@FreeBSD.org>1996-10-15 22:05:18 +0000
commit09ecea6256343d7919a1a4c0f715ae15056afb4d (patch)
treed40ae2e044f66de5e98964bf031937e4cfb7da98 /usr.bin/colldef
parent5b6e3c50cb4801e4ad7b4c615bb5e03d748ea841 (diff)
downloadFreeBSD-src-09ecea6256343d7919a1a4c0f715ae15056afb4d.zip
FreeBSD-src-09ecea6256343d7919a1a4c0f715ae15056afb4d.tar.gz
Save half of LC_COLLATE space
ASCIIfy data files, use RFC1345 symbolic names instead of hex codes Improve parser and manpage
Diffstat (limited to 'usr.bin/colldef')
-rw-r--r--usr.bin/colldef/colldef.1131
-rw-r--r--usr.bin/colldef/data/Makefile8
-rw-r--r--usr.bin/colldef/data/lt_LN.ISO_8859-1.src45
-rw-r--r--usr.bin/colldef/data/map.CP866174
-rw-r--r--usr.bin/colldef/data/map.ISO_8859-1174
-rw-r--r--usr.bin/colldef/data/map.KOI8-R174
-rw-r--r--usr.bin/colldef/data/ru_SU.CP866.src39
-rw-r--r--usr.bin/colldef/data/ru_SU.KOI8-R.src37
-rw-r--r--usr.bin/colldef/parse.y300
-rw-r--r--usr.bin/colldef/scan.l194
10 files changed, 1073 insertions, 203 deletions
diff --git a/usr.bin/colldef/colldef.1 b/usr.bin/colldef/colldef.1
index 5cb3b3a..4c44cf7 100644
--- a/usr.bin/colldef/colldef.1
+++ b/usr.bin/colldef/colldef.1
@@ -31,7 +31,9 @@
.Nd convert collation sequence source definition
.Sh SYNOPSIS
.Nm colldef
-.Ar [-o out_file] [filename]
+.Op Fl I Ar map_dir
+.Op Fl o Ar out_file
+.Op Ar filename
.Sh DESCRIPTION
.Ar colldef
converts a collation sequence source definition
@@ -61,27 +63,46 @@ The output file produced contains the
database with collating sequence information in a form
usable by system commands and routines.
.Pp
+Options list:
+.Bl -tag -width 4n
+.It Cm Fl I Ar map_dir
+This option set directory name where
+.Ar charmap
+files can be found, current directory by default.
+.It Cm Fl o Ar out_file
+This option set output file name,
+.Ar LC_COLLATE
+by default.
+.El
+.Pp
The collation sequence definition specifies a set of collating elements and
the rules defining how strings containing these should be ordered.
This is most useful for different language definitions.
.Pp
The specification file can consist of three statements:
-.Ar charmap
-,
+.Ar charmap ,
.Ar substitute
-, and
+and
+.Ar order .
+.Pp
+Of these, only the
.Ar order
-. Of these, only the order
-statement is required. When charmap or substitute is
+statement is required. When
+.Ar charmap
+or
+.Ar substitute
+is
supplied, these statements must be ordered as above. Any
statements after the order statement are ignored.
.Pp
-Lines in the specification file beginning with a # are
+Lines in the specification file beginning with a
+.Ar #
+are
treated as comments and are ignored. Blank lines are also
ignored.
.Pp
.Ar charmap charmapfile
-
+.Pp
.Ar charmap
defines where a mapping of the character
and collating element symbols to the actual
@@ -92,84 +113,106 @@ The format of
is shown below. Symbol
names are separated from their values by TAB or
SPACE characters. symbol-value can be specified in
-a hexadecimal (\ex??) or octal (\e???)
+a hexadecimal (\ex\fI??\fR) or octal (\e\fI???\fR)
representation, and can be only one character in length.
.Pp
.Ar symbol-name1 symbol-value1
-
+.br
.Ar symbol-name2 symbol-value2
-
+.br
.Ar ...
-
.Pp
-Symbol names cannot be specified in substitute
+Symbol names cannot be specified in
+.Ar substitute
fields. Symbol names also cannot be combined with
any other representation, such as, <c>h, c<h>,
<c>\ex68, or <c><h>. Symbol names can be used with
primary and secondary ordering as in the following
example.
.Pp
-The charmap statement is optional.
+The
+.Ar charmap
+statement is optional.
+.Pp
+.Ar substitute
+"\fIchar\fR"
+.Ar with
+"\fIrepl\fR"
.Pp
-.Ar substitute char with repl
The
.Ar substitute
statement substitutes the character
-
.Ar char
with the string
-.Ar repl
-.
+.Ar repl .
.Pp
-The substitute statement is optional.
+The
+.Ar substitute
+statement is optional.
.Pp
.Ar order order_list
-
+.Pp
.Ar order_list
is a list of symbols, separated by semi colons, that defines the collating sequence. The
-special symbol,
+special symbol
.Ar ...
-, specifies, in a short-hand
+specifies, in a short-hand
form, symbols that are sequential in machine code
order.
.Pp
A symbol can be up to two characters in length and
can be represented in any one of the following
ways:
-.Bl -tag -width XX
-.It o The symbol itself (for example,
+.Bl -bullet
+.It
+The symbol itself (for example,
.Ar a
for the lower-case letter
-.Ar a
-).
-.It o The symbol chain (for example,
-.Ar abc
-)
-.It o In octal representation (for example,
+.Ar a )
+.It
+The symbol chain (for example,
+.Ar abc )
+.It
+In octal representation (for example,
.Ar \e141
for the letter
-.Ar a
-).
-.It o In hexadecimal representation (for example,
+.Ar a )
+.It
+In hexadecimal representation (for example,
.Ar \ex61
for the letter
-.Ar a
-).
-.It o The symbol name as defined in the charmap file (for example,
+.Ar a )
+.It
+The symbol name as defined in the
+.Ar charmap
+file (for example,
.Ar <abc>
for
-.Ar \e023 abc
+.Ar abc \e023
record in
-.Ar charmapfile
-).
-.It o Symbols
-.Ar \ea, \eb, \ef, \en, \er, \ev
-are permitted in its usual C-language meaning.
+.Ar charmapfile ) .
+If character map name have
+.Ar >
+character, it must be escaped as
+.Ar /> ,
+single
+.Ar /
+must be escaped as
+.Ar // .
+.It
+Symbols
+.Ar \ea ,
+.Ar \eb ,
+.Ar \ef ,
+.Ar \en ,
+.Ar \er ,
+.Ar \ev
+are permitted in its usual C-language meaning
.El
.Pp
-The backslash character,
+The backslash character
.Ar \e
-, is used for continuation. In this case, no characters are permitted
+is used for continuation. In this case, no characters are permitted
after the backslash character. And as a quotation mark.
.Pp
Symbols enclosed in parentheses are assigned the
diff --git a/usr.bin/colldef/data/Makefile b/usr.bin/colldef/data/Makefile
index 6e653c6..7114d57 100644
--- a/usr.bin/colldef/data/Makefile
+++ b/usr.bin/colldef/data/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.1 1995/11/03 14:53:50 ache Exp $
+# $Id: Makefile,v 1.2 1996/06/24 04:24:30 jkh Exp $
NOMAN=YES
CLEANFILES+= ${LOCALES:S/$/.out/g}
@@ -20,10 +20,14 @@ LATIN1LINKS = \
.SUFFIXES: .src .out
.src.out:
- ${COLLDEF} -o ${.TARGET} ${.IMPSRC}
+ ${COLLDEF} -I ${.CURDIR} -o ${.TARGET} ${.IMPSRC}
all: ${LOCALES:S/$/.out/g}
+ru_SU.KOI8-R.out: map.KOI8-R
+ru_SU.CP866.out: map.CP866
+lt_LN.ISO_8859-1.out: map.ISO_8859-1
+
afterinstall:
for l in ${LOCALES}; do \
${INSTALL} ${COPY} -m 644 -o ${BINOWN} -g ${BINGRP} $$l.out ${LOCALEDIR}/$$l/LC_COLLATE; \
diff --git a/usr.bin/colldef/data/lt_LN.ISO_8859-1.src b/usr.bin/colldef/data/lt_LN.ISO_8859-1.src
index 54a80b8..791b78b 100644
--- a/usr.bin/colldef/data/lt_LN.ISO_8859-1.src
+++ b/usr.bin/colldef/data/lt_LN.ISO_8859-1.src
@@ -1,31 +1,32 @@
# latin1
+charmap map.ISO_8859-1
order \
# spaces
- \xa0;\x20;\t;\v;\r;\n;\f;\
+ <NS>;<SP>;\t;\v;\r;\n;\f;\
# puncts
- _;\xaf;\xad;-;\,;\;;:;!;\xa1;?;\xbf;/;.;\xb4;`;^;\xa8;~;\xb7;\
- \xb8;';\";\xab;\xbb;\(;\);[;];\{;\};\xa7;\xb6;\xa9;\xae;@;\xa4;\
- \xa2;$;\xa3;\xa5;*;\\;&;\#;%;+;\xb1;\xf7;\xd7;\<;=;\>;\xac;|;\xa6;\
- \xb0;\xb5;\
+ _;<'m>;<-->;-;\,;\;;:;!;<!I>;?;<?I>;/;.;<''>;`;^;<':>;~;<.M>;\
+ <',>;';\";<<<>;</>/>>;\(;\);[;];\{;\};<SE>;<PI>;<Co>;<Rg>;@;<Cu>;\
+ <Ct>;$;<Pd>;<Ye>;*;\\;&;\#;%;+;<+->;<-:>;<*X>;\<;=;>;<NO>;|;<BB>;\
+ <DG>;<My>;\
# controls
- \x00;...;\x08;\x0e;...;\x1f;\x7f;\x80;...;\x9f;\
+ <NU>;...;<BS>;<SO>;...;<US>;<DT>;<PA>;...;<AC>;\
# digits
- 0;\xbc;\xbd;\xbe;1;\xb9;2;\xb2;3;\xb3;4;...;9;\
+ 0;<14>;<12>;<34>;1;<1S>;2;<2S>;3;<3S>;4;...;9;\
# capital
- A;\xc1;\xc0;\xc2;\xc5;\xc4;\xc3;\xc6;\
- B;C;\xc7;D;E;\xc9;\xc8;\xca;\xcb;\
- F;...;I;\xcd;\xcc;\xce;\xcf;\
- J;...;N;\xd1;O;\xd3;\xd2;\xd4;\xd6;\xd5;\xd8;\
- P;...;U;\xda;\xd9;\xdb;\xdc;\
- V;...;Y;\xdd;Z;\
- \xd0;\xde;\
+ A;<A'>;<A!>;<A/>>;<AA>;<A:>;<A?>;<AE>;\
+ B;C;<C,>;D;E;<E'>;<E!>;<E/>>;<E:>;\
+ F;...;I;<I'>;<I!>;<I/>>;<I:>;\
+ J;...;N;<N?>;O;<O'>;<O!>;<O/>>;<O:>;<O?>;<O//>;\
+ P;...;U;<U'>;<U!>;<U/>>;<U:>;\
+ V;...;Y;<Y'>;Z;\
+ <D->;<TH>;\
# small
- a;\xe1;\xe0;\xe2;\xe5;\xe4;\xe3;\xe6;\
- b;c;\xe7;d;e;\xe9;\xe8;\xea;\xeb;\
- f;...;i;\xed;\xec;\xee;\xef;\
- j;...;n;\xf1;o;\xf3;\xf2;\xf4;\xf6;\xf5;\xf8;\
- p;...;u;\xfa;\xf9;\xfb;\xfc;\
- v;...;y;\xfd;\xff;z;\
- \xf0;\xfe;\xdf;\
+ a;<a'>;<a!>;<a/>>;<aa>;<a:>;<a?>;<ae>;\
+ b;c;<c,>;d;e;<e'>;<e!>;<e/>>;<e:>;\
+ f;...;i;<i'>;<i!>;<i/>>;<i:>;\
+ j;...;n;<n?>;o;<o'>;<o!>;<o/>>;<o:>;<o?>;<o//>;\
+ p;...;u;<u'>;<u!>;<u/>>;<u:>;\
+ v;...;y;<y'>;<y:>;z;\
+ <d->;<th>;<ss>;\
# remains
- \xaa;\xba
+ <-a>;<-o>
diff --git a/usr.bin/colldef/data/map.CP866 b/usr.bin/colldef/data/map.CP866
new file mode 100644
index 0000000..0311771
--- /dev/null
+++ b/usr.bin/colldef/data/map.CP866
@@ -0,0 +1,174 @@
+NU \x00
+SH \x01
+SX \x02
+EX \x03
+ET \x04
+EQ \x05
+AK \x06
+BL \x07
+BS \x08
+HT \x09
+LF \x0a
+VT \x0b
+FF \x0c
+CR \x0d
+SO \x0e
+SI \x0f
+DL \x10
+D1 \x11
+D2 \x12
+D3 \x13
+D4 \x14
+NK \x15
+SY \x16
+EB \x17
+CN \x18
+EM \x19
+SB \x1a
+EC \x1b
+FS \x1c
+GS \x1d
+RS \x1e
+US \x1f
+SP \x20
+Nb \x23
+DO \x24
+At \x40
+<( \x5b
+// \x5c
+)> \x5d
+'> \x5e
+'! \x60
+(! \x7b
+!! \x7c
+!) \x7d
+'? \x7e
+DT \x7f
+hh \xc4
+vv \xb3
+dr \xda
+dl \xbf
+ur \xc0
+ul \xd9
+vr \xc3
+vl \xb4
+dh \xc2
+uh \xc1
+vh \xc5
+TB \xdf
+LB \xdc
+FB \xdb
+lB \xdd
+RB \xde
+.S \xb0
+:S \xb1
+?S \xb2
+Iu \xf4
+fS \xfe
+sb \xf9
+RT \xfb
+?2 \xf7
+=< \xf3
+>= \xf2
+NS \xff
+Il \xf5
+DG \xf8
+2S \xfd
+.M \xfa
+-: \xf6
+HH \xcd
+VV \xba
+dR \xd5
+io \xf1
+Dr \xd6
+DR \xc9
+dL \xb8
+Dl \xb7
+LD \xbb
+uR \xd4
+Ur \xd3
+UR \xc8
+uL \xbe
+Ul \xbd
+UL \xbc
+vR \xc6
+Vr \xc7
+VR \xcc
+vL \xb5
+IO \xf0
+Vl \xb6
+VL \xb9
+dH \xd1
+Dh \xd2
+DH \xcb
+uH \xcf
+Uh \xd0
+UH \xca
+vH \xd8
+Vh \xd7
+VH \xce
+Co \xfc
+ju \xee
+a= \xa0
+b= \xa1
+c= \xe6
+d= \xa4
+e= \xa5
+f= \xe4
+g= \xa3
+h= \xe5
+i= \xa8
+j= \xa9
+k= \xaa
+l= \xab
+m= \xac
+n= \xad
+o= \xae
+p= \xaf
+ja \xef
+r= \xe0
+s= \xe1
+t= \xe2
+u= \xe3
+z% \xa6
+v= \xa2
+%' \xec
+y= \xeb
+z= \xa7
+s% \xe8
+je \xed
+sc \xe9
+c% \xe7
+=' \xea
+JU \x9e
+A= \x80
+B= \x81
+C= \x96
+D= \x84
+E= \x85
+F= \x94
+G= \x83
+H= \x95
+I= \x88
+J= \x89
+K= \x8a
+L= \x8b
+M= \x8c
+N= \x8d
+O= \x8e
+P= \x8f
+JA \x9f
+R= \x90
+S= \x91
+T= \x92
+U= \x93
+Z% \x86
+V= \x82
+%" \x9c
+Y= \x9b
+Z= \x87
+S% \x98
+JE \x9d
+Sc \x99
+C% \x97
+=" \x9a
diff --git a/usr.bin/colldef/data/map.ISO_8859-1 b/usr.bin/colldef/data/map.ISO_8859-1
new file mode 100644
index 0000000..ee5a557
--- /dev/null
+++ b/usr.bin/colldef/data/map.ISO_8859-1
@@ -0,0 +1,174 @@
+NU \x00
+SH \x01
+SX \x02
+EX \x03
+ET \x04
+EQ \x05
+AK \x06
+BL \x07
+BS \x08
+HT \x09
+LF \x0a
+VT \x0b
+FF \x0c
+CR \x0d
+SO \x0e
+SI \x0f
+DL \x10
+D1 \x11
+D2 \x12
+D3 \x13
+D4 \x14
+NK \x15
+SY \x16
+EB \x17
+CN \x18
+EM \x19
+SB \x1a
+EC \x1b
+FS \x1c
+GS \x1d
+RS \x1e
+US \x1f
+SP \x20
+Nb \x23
+DO \x24
+At \x40
+<( \x5b
+// \x5c
+)> \x5d
+'> \x5e
+'! \x60
+(! \x7b
+!! \x7c
+!) \x7d
+'? \x7e
+DT \x7f
+PA \x80
+HO \x81
+BH \x82
+NH \x83
+IN \x84
+NL \x85
+SA \x86
+ES \x87
+HS \x88
+HJ \x89
+VS \x8a
+PD \x8b
+PU \x8c
+RI \x8d
+S2 \x8e
+S3 \x8f
+DC \x90
+P1 \x91
+P2 \x92
+TS \x93
+CC \x94
+MW \x95
+SG \x96
+EG \x97
+SS \x98
+GC \x99
+SC \x9a
+CI \x9b
+ST \x9c
+OC \x9d
+PM \x9e
+AC \x9f
+NS \xa0
+!I \xa1
+Ct \xa2
+Pd \xa3
+Cu \xa4
+Ye \xa5
+BB \xa6
+SE \xa7
+': \xa8
+Co \xa9
+-a \xaa
+<< \xab
+NO \xac
+-- \xad
+Rg \xae
+'m \xaf
+DG \xb0
++- \xb1
+2S \xb2
+3S \xb3
+'' \xb4
+My \xb5
+PI \xb6
+.M \xb7
+', \xb8
+1S \xb9
+-o \xba
+>> \xbb
+14 \xbc
+12 \xbd
+34 \xbe
+?I \xbf
+A! \xc0
+A' \xc1
+A> \xc2
+A? \xc3
+A: \xc4
+AA \xc5
+AE \xc6
+C, \xc7
+E! \xc8
+E' \xc9
+E> \xca
+E: \xcb
+I! \xcc
+I' \xcd
+I> \xce
+I: \xcf
+D- \xd0
+N? \xd1
+O! \xd2
+O' \xd3
+O> \xd4
+O? \xd5
+O: \xd6
+*X \xd7
+O/ \xd8
+U! \xd9
+U' \xda
+U> \xdb
+U: \xdc
+Y' \xdd
+TH \xde
+ss \xdf
+a! \xe0
+a' \xe1
+a> \xe2
+a? \xe3
+a: \xe4
+aa \xe5
+ae \xe6
+c, \xe7
+e! \xe8
+e' \xe9
+e> \xea
+e: \xeb
+i! \xec
+i' \xed
+i> \xee
+i: \xef
+d- \xf0
+n? \xf1
+o! \xf2
+o' \xf3
+o> \xf4
+o? \xf5
+o: \xf6
+-: \xf7
+o/ \xf8
+u! \xf9
+u' \xfa
+u> \xfb
+u: \xfc
+y' \xfd
+th \xfe
+y: \xff
diff --git a/usr.bin/colldef/data/map.KOI8-R b/usr.bin/colldef/data/map.KOI8-R
new file mode 100644
index 0000000..180568f
--- /dev/null
+++ b/usr.bin/colldef/data/map.KOI8-R
@@ -0,0 +1,174 @@
+NU \x00
+SH \x01
+SX \x02
+EX \x03
+ET \x04
+EQ \x05
+AK \x06
+BL \x07
+BS \x08
+HT \x09
+LF \x0a
+VT \x0b
+FF \x0c
+CR \x0d
+SO \x0e
+SI \x0f
+DL \x10
+D1 \x11
+D2 \x12
+D3 \x13
+D4 \x14
+NK \x15
+SY \x16
+EB \x17
+CN \x18
+EM \x19
+SB \x1a
+EC \x1b
+FS \x1c
+GS \x1d
+RS \x1e
+US \x1f
+SP \x20
+Nb \x23
+DO \x24
+At \x40
+<( \x5b
+// \x5c
+)> \x5d
+'> \x5e
+'! \x60
+(! \x7b
+!! \x7c
+!) \x7d
+'? \x7e
+DT \x7f
+hh \x80
+vv \x81
+dr \x82
+dl \x83
+ur \x84
+ul \x85
+vr \x86
+vl \x87
+dh \x88
+uh \x89
+vh \x8a
+TB \x8b
+LB \x8c
+FB \x8d
+lB \x8e
+RB \x8f
+.S \x90
+:S \x91
+?S \x92
+Iu \x93
+fS \x94
+sb \x95
+RT \x96
+?2 \x97
+=< \x98
+>= \x99
+NS \x9a
+Il \x9b
+DG \x9c
+2S \x9d
+.M \x9e
+-: \x9f
+HH \xa0
+VV \xa1
+dR \xa2
+io \xa3
+Dr \xa4
+DR \xa5
+dL \xa6
+Dl \xa7
+LD \xa8
+uR \xa9
+Ur \xaa
+UR \xab
+uL \xac
+Ul \xad
+UL \xae
+vR \xaf
+Vr \xb0
+VR \xb1
+vL \xb2
+IO \xb3
+Vl \xb4
+VL \xb5
+dH \xb6
+Dh \xb7
+DH \xb8
+uH \xb9
+Uh \xba
+UH \xbb
+vH \xbc
+Vh \xbd
+VH \xbe
+Co \xbf
+ju \xc0
+a= \xc1
+b= \xc2
+c= \xc3
+d= \xc4
+e= \xc5
+f= \xc6
+g= \xc7
+h= \xc8
+i= \xc9
+j= \xca
+k= \xcb
+l= \xcc
+m= \xcd
+n= \xce
+o= \xcf
+p= \xd0
+ja \xd1
+r= \xd2
+s= \xd3
+t= \xd4
+u= \xd5
+z% \xd6
+v= \xd7
+%' \xd8
+y= \xd9
+z= \xda
+s% \xdb
+je \xdc
+sc \xdd
+c% \xde
+=' \xdf
+JU \xe0
+A= \xe1
+B= \xe2
+C= \xe3
+D= \xe4
+E= \xe5
+F= \xe6
+G= \xe7
+H= \xe8
+I= \xe9
+J= \xea
+K= \xeb
+L= \xec
+M= \xed
+N= \xee
+O= \xef
+P= \xf0
+JA \xf1
+R= \xf2
+S= \xf3
+T= \xf4
+U= \xf5
+Z% \xf6
+V= \xf7
+%" \xf8
+Y= \xf9
+Z= \xfa
+S% \xfb
+JE \xfc
+Sc \xfd
+C% \xfe
+=" \xff
diff --git a/usr.bin/colldef/data/ru_SU.CP866.src b/usr.bin/colldef/data/ru_SU.CP866.src
index 2648fd8..a9f8fa9 100644
--- a/usr.bin/colldef/data/ru_SU.CP866.src
+++ b/usr.bin/colldef/data/ru_SU.CP866.src
@@ -1,27 +1,32 @@
-# cp866
+# IBM Code Page 866
+charmap map.CP866
order \
# spaces
- ;\ ;\t;\v;\r;\n;\f;\
+ <NS>;<SP>;\t;\v;\r;\n;\f;\
# puncts
_;-;\,;\;;:;!;?;/;.;`;^;~;\
- ';;\";\(;\);[;];\{;\};;@;\
- $;*;\\;&;\#;%;+;;\<;;=;;\>;|;\
- ;;;;;;\
- ;;;;;;;;\
- ;;;;;;;;\
- ;;;;;;;;\
- ;;;;;;;;\
- ;;;;;;;;\
- ;;;;;;;;;\
+ ';<.M>;\";\(;\);[;];\{;\};<Co>;@;\
+ $;*;\\;&;\#;%;+;<-:>;\<;<=<>;=;</>=>;>;|;\
+ <DG>;<sb>;<RT>;<?2>;<Iu>;<Il>;\
+ <hh>;<HH>;<vv>;<VV>;<dr>;<dR>;<Dr>;<DR>;\
+ <dl>;<dL>;<Dl>;<LD>;<ur>;<uR>;<Ur>;<UR>;\
+ <ul>;<uL>;<Ul>;<UL>;<vr>;<vR>;<Vr>;<VR>;\
+ <vl>;<vL>;<Vl>;<VL>;<dh>;<dH>;<Dh>;<DH>;\
+ <uh>;<uH>;<Uh>;<UH>;<vh>;<vH>;<Vh>;<VH>;\
+ <TB>;<LB>;<FB>;<lB>;<RB>;<.S>;<:S>;<?S>;<fS>;\
# controls
- \x00;...;\x08;\x0e;...;\x1f;\x7f;\
+ <NU>;...;<BS>;<SO>;...;<US>;<DT>;\
# digits
- 0;1;2;;3;...;9;\
+ 0;1;2;<2S>;3;...;9;\
# capital
A;...;Z;\
- ;;;;;;;;;;;;;;;;;;;;;;;;\
- ;;;;;;;;;\
+ <A=>;<B=>;<V=>;<G=>;<D=>;<E=>;<IO>;<Z%>;<Z=>;\
+ <I=>;<J=>;<K=>;<L=>;<M=>;<N=>;<O=>;<P=>;<R=>;\
+ <S=>;<T=>;<U=>;<F=>;<H=>;<C=>;<C%>;<S%>;<Sc>;\
+ <=">;<Y=>;<%">;<JE>;<JU>;<JA>;\
# small
a;...;z;\
- ;;;;;;;;;;;;;;;;;;;;;;;;\
- ;;;;;;;;
+ <a=>;<b=>;<v=>;<g=>;<d=>;<e=>;<io>;<z%>;<z=>;\
+ <i=>;<j=>;<k=>;<l=>;<m=>;<n=>;<o=>;<p=>;<r=>;\
+ <s=>;<t=>;<u=>;<f=>;<h=>;<c=>;<c%>;<s%>;<sc>;\
+ <='>;<y=>;<%'>;<je>;<ju>;<ja>
diff --git a/usr.bin/colldef/data/ru_SU.KOI8-R.src b/usr.bin/colldef/data/ru_SU.KOI8-R.src
index 87a09a0..a3977d9 100644
--- a/usr.bin/colldef/data/ru_SU.KOI8-R.src
+++ b/usr.bin/colldef/data/ru_SU.KOI8-R.src
@@ -1,27 +1,32 @@
# koi8-r
+charmap map.KOI8-R
order \
# spaces
- \x9a;\x20;\t;\v;\r;\n;\f;\
+ <NS>;<SP>;\t;\v;\r;\n;\f;\
# puncts
_;-;\,;\;;:;!;?;/;.;`;^;~;\
- ';\x9e;\";\(;\);[;];\{;\};\xbf;@;\
- $;*;\\;&;\#;%;+;\x9f;\<;\x98;=;\x99;\>;|;\
- \x9c;\x95;\x96;\x97;\x93;\x9b;\
- \x80;\xa0;\x81;\xa1;\x82;\xa2;\xa4;\xa5;\
- \x83;\xa6;\xa7;\xa8;\x84;\xa9;\xaa;\xab;\
- \x85;\xac;\xad;\xae;\x86;\xaf;\xb0;\xb1;\
- \x87;\xb2;\xb4;\xb5;\x88;\xb6;\xb7;\xb8;\
- \x89;\xb9;\xba;\xbb;\x8a;\xbc;\xbd;\xbe;\
- \x8b;\x8c;\x8d;\x8e;\x8f;\x90;\x91;\x92;\x94;\
+ ';<.M>;\";\(;\);[;];\{;\};<Co>;@;\
+ $;*;\\;&;\#;%;+;<-:>;\<;<=<>;=;</>=>;>;|;\
+ <DG>;<sb>;<RT>;<?2>;<Iu>;<Il>;\
+ <hh>;<HH>;<vv>;<VV>;<dr>;<dR>;<Dr>;<DR>;\
+ <dl>;<dL>;<Dl>;<LD>;<ur>;<uR>;<Ur>;<UR>;\
+ <ul>;<uL>;<Ul>;<UL>;<vr>;<vR>;<Vr>;<VR>;\
+ <vl>;<vL>;<Vl>;<VL>;<dh>;<dH>;<Dh>;<DH>;\
+ <uh>;<uH>;<Uh>;<UH>;<vh>;<vH>;<Vh>;<VH>;\
+ <TB>;<LB>;<FB>;<lB>;<RB>;<.S>;<:S>;<?S>;<fS>;\
# controls
- \x00;...;\x08;\x0e;...;\x1f;\x7f;\
+ <NU>;...;<BS>;<SO>;...;<US>;<DT>;\
# digits
- 0;1;2;\x9d;3;...;9;\
+ 0;1;2;<2S>;3;...;9;\
# capital
A;...;Z;\
- ;;;;;;;;;;;;;;;;;;;;;;;;\
- ;;;;;;;;;\
+ <A=>;<B=>;<V=>;<G=>;<D=>;<E=>;<IO>;<Z%>;<Z=>;\
+ <I=>;<J=>;<K=>;<L=>;<M=>;<N=>;<O=>;<P=>;<R=>;\
+ <S=>;<T=>;<U=>;<F=>;<H=>;<C=>;<C%>;<S%>;<Sc>;\
+ <=">;<Y=>;<%">;<JE>;<JU>;<JA>;\
# small
a;...;z;\
- ;;;;;;;;;;;;;;;;;;;;;;;;\
- ;;;;;;;;
+ <a=>;<b=>;<v=>;<g=>;<d=>;<e=>;<io>;<z%>;<z=>;\
+ <i=>;<j=>;<k=>;<l=>;<m=>;<n=>;<o=>;<p=>;<r=>;\
+ <s=>;<t=>;<u=>;<f=>;<h=>;<c=>;<c%>;<s%>;<sc>;\
+ <='>;<y=>;<%'>;<je>;<ju>;<ja>
diff --git a/usr.bin/colldef/parse.y b/usr.bin/colldef/parse.y
index 47f3cd5..55067c5 100644
--- a/usr.bin/colldef/parse.y
+++ b/usr.bin/colldef/parse.y
@@ -25,10 +25,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: parse.y,v 1.2 1995/01/24 11:15:47 alex Exp alex $
+ * $Id: parse.y,v 1.1.1.1 1995/02/17 17:29:50 ache Exp $
*/
#include <err.h>
+#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -37,13 +38,16 @@
extern int line_no;
extern FILE *yyin;
+void yyerror(char *fmt, ...);
-u_char __collate_charmap_table[UCHAR_MAX + 1][STR_LEN];
+char map_name[FILENAME_MAX] = ".";
+
+char __collate_version[STR_LEN];
+u_char charmap_table[UCHAR_MAX + 1][STR_LEN];
u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
-struct __collate_st_name_pri __collate_name_pri_table[TABLE_SIZE];
struct __collate_st_chain_pri __collate_chain_pri_table[TABLE_SIZE];
-int name_index, chain_index;
+int chain_index;
int prim_pri = 1, sec_pri = 1;
#ifdef COLLATE_DEBUG
int debug;
@@ -59,6 +63,7 @@ char *out_file = "LC_COLLATE";
%token <str> STRING
%token <str> NAME
%token <str> CHAIN
+%token <str> DEFN
%token <ch> CHAR
%%
collate : statment_list
@@ -71,12 +76,8 @@ statment :
| substitute
| order
;
-charmap : CHAIN CHAR {
- strcpy(__collate_charmap_table[$2], $1);
-}
- | CHAR CHAR {
- __collate_charmap_table[$2][0] = $1;
- __collate_charmap_table[$2][1] = '\0';
+charmap : DEFN CHAR {
+ strcpy(charmap_table[$2], $1);
}
;
substitute : SUBSTITUTE STRING WITH STRING {
@@ -87,14 +88,18 @@ order : ORDER order_list {
FILE *fp = fopen(out_file, "w");
if(!fp)
- err(EX_UNAVAILABLE, "con't open destination file %s",
+ err(EX_UNAVAILABLE, "can't open destination file %s",
out_file);
- fwrite(__collate_charmap_table, sizeof(__collate_charmap_table), 1, fp);
+ strcpy(__collate_version, COLLATE_VERSION);
+ fwrite(__collate_version, sizeof(__collate_version), 1, fp);
fwrite(__collate_substitute_table, sizeof(__collate_substitute_table), 1, fp);
fwrite(__collate_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
fwrite(__collate_chain_pri_table, sizeof(__collate_chain_pri_table), 1, fp);
- fwrite(__collate_name_pri_table, sizeof(__collate_name_pri_table), 1, fp);
+ if (fflush(fp))
+ err(EX_UNAVAILABLE, "IO error writting to destination file %s",
+ out_file);
+ fclose(fp);
#ifdef COLLATE_DEBUG
if (debug)
__collate_print_tables();
@@ -113,21 +118,86 @@ item : CHAR { __collate_char_pri_table[$1].prim = prim_pri++; }
__collate_chain_pri_table[chain_index++].prim = prim_pri++;
}
| NAME {
- if (name_index >= TABLE_SIZE - 1)
- yyerror("__collate_name_pri_table overflow");
- strcpy(__collate_name_pri_table[name_index].str, $1);
- __collate_name_pri_table[name_index++].prim = prim_pri++;
+ u_int i;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0)
+ goto findi;
+ yyerror("Name <%s> not defined", $1);
+ findi:
+
+ __collate_char_pri_table[i].prim = prim_pri++;
}
| CHAR RANGE CHAR {
u_int i;
if ($3 <= $1)
- yyerror("Illegal range %c -- %c near line %d\n",
- $1, $3, line_no);
+ yyerror("Illegal range 0x%02x -- 0x%02x", $1, $3);
- for (i = $1; i <= $3; i++) {
+ for (i = $1; i <= $3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri++;
+}
+ | NAME RANGE CHAR {
+ u_int i, c1;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto find1;
+ }
+ yyerror("Name <%s> not defined", $1);
+ find1:
+
+ if ($3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, $3);
+
+ for (i = c1; i <= $3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri++;
+}
+ | CHAR RANGE NAME {
+ u_int i, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto find3;
+ }
+ yyerror("Name <%s> not defined", $3);
+ find3:
+
+ if (c3 <= $1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ $1, c3);
+
+ for (i = $1; i <= c3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri++;
+}
+ | NAME RANGE NAME {
+ u_int i, c1, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto find21;
+ }
+ yyerror("Name <%s> not defined", $1);
+ find21:
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto find23;
+ }
+ yyerror("Name <%s> not defined", $3);
+ find23:
+
+ if (c3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, c3);
+
+ for (i = c1; i <= c3; i++)
__collate_char_pri_table[(u_char)i].prim = prim_pri++;
- }
}
| '{' prim_order_list '}' {
prim_pri++;
@@ -150,18 +220,84 @@ prim_sub_item : CHAR {
u_int i;
if ($3 <= $1)
- yyerror("Illegal range %c -- %c near line %d\n",
- $1, $3, line_no);
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ $1, $3);
- for (i = $1; i <= $3; i++) {
+ for (i = $1; i <= $3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+}
+ | NAME RANGE CHAR {
+ u_int i, c1;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto findpsi1;
+ }
+ yyerror("Name <%s> not defined", $1);
+ findpsi1:
+
+ if ($3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, $3);
+
+ for (i = c1; i <= $3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+}
+ | CHAR RANGE NAME {
+ u_int i, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto findpsi3;
+ }
+ yyerror("Name <%s> not defined", $3);
+ findpsi3:
+
+ if (c3 <= $1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ $1, c3);
+
+ for (i = $1; i <= c3; i++)
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+}
+ | NAME RANGE NAME {
+ u_int i, c1, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto findpsi21;
+ }
+ yyerror("Name <%s> not defined", $1);
+ findpsi21:
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto findpsi23;
+ }
+ yyerror("Name <%s> not defined", $3);
+ findpsi23:
+
+ if (c3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, c3);
+
+ for (i = c1; i <= c3; i++)
__collate_char_pri_table[(u_char)i].prim = prim_pri;
- }
}
| NAME {
- if (name_index >= TABLE_SIZE - 1)
- yyerror("__collate_name_pri_table overflow");
- strcpy(__collate_name_pri_table[name_index].str, $1);
- __collate_name_pri_table[name_index++].prim = prim_pri;
+ u_int i;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0)
+ goto findpsi;
+ yyerror("Name <%s> not defined", $1);
+ findpsi:
+
+ __collate_char_pri_table[i].prim = prim_pri;
}
| CHAIN {
if (chain_index >= TABLE_SIZE - 1)
@@ -178,20 +314,93 @@ sec_sub_item : CHAR {
u_int i;
if ($3 <= $1)
- yyerror("Illegal range %c -- %c near line %d\n",
- $1, $3, line_no);
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ $1, $3);
for (i = $1; i <= $3; i++) {
__collate_char_pri_table[(u_char)i].prim = prim_pri;
__collate_char_pri_table[(u_char)i].sec = sec_pri++;
}
}
+ | NAME RANGE CHAR {
+ u_int i, c1;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto findssi1;
+ }
+ yyerror("Name <%s> not defined", $1);
+ findssi1:
+
+ if ($3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, $3);
+
+ for (i = c1; i <= $3; i++) {
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+ __collate_char_pri_table[(u_char)i].sec = sec_pri++;
+ }
+}
+ | CHAR RANGE NAME {
+ u_int i, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto findssi3;
+ }
+ yyerror("Name <%s> not defined", $3);
+ findssi3:
+
+ if (c3 <= $1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ $1, c3);
+
+ for (i = $1; i <= c3; i++) {
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+ __collate_char_pri_table[(u_char)i].sec = sec_pri++;
+ }
+}
+ | NAME RANGE NAME {
+ u_int i, c1, c3;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0) {
+ c1 = i;
+ goto findssi21;
+ }
+ yyerror("Name <%s> not defined", $1);
+ findssi21:
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $3) == 0) {
+ c3 = i;
+ goto findssi23;
+ }
+ yyerror("Name <%s> not defined", $3);
+ findssi23:
+
+ if (c3 <= c1)
+ yyerror("Illegal range 0x%02x -- 0x%02x",
+ c1, c3);
+
+ for (i = c1; i <= c3; i++) {
+ __collate_char_pri_table[(u_char)i].prim = prim_pri;
+ __collate_char_pri_table[(u_char)i].sec = sec_pri++;
+ }
+}
| NAME {
- if (name_index >= TABLE_SIZE - 1)
- yyerror("__collate_name_pri_table overflow");
- strcpy(__collate_name_pri_table[name_index].str, $1);
- __collate_name_pri_table[name_index].prim = prim_pri;
- __collate_name_pri_table[name_index++].sec = sec_pri++;
+ u_int i;
+
+ for (i = 0; i <= UCHAR_MAX; i++)
+ if (strcmp(charmap_table[i], $1) == 0)
+ goto findssi;
+ yyerror("Name <%s> not defined", $1);
+ findssi:
+
+ __collate_char_pri_table[i].prim = prim_pri;
+ __collate_char_pri_table[i].sec = sec_pri++;
}
| CHAIN {
if (chain_index >= TABLE_SIZE - 1)
@@ -208,9 +417,9 @@ main(ac, av)
int ch;
#ifdef COLLATE_DEBUG
- while((ch = getopt(ac, av, ":do:")) != EOF) {
+ while((ch = getopt(ac, av, ":do:I:")) != EOF) {
#else
- while((ch = getopt(ac, av, ":o:")) != EOF) {
+ while((ch = getopt(ac, av, ":o:I:")) != EOF) {
#endif
switch (ch)
{
@@ -223,8 +432,12 @@ main(ac, av)
out_file = optarg;
break;
+ case 'I':
+ strcpy(map_name, optarg);
+ break;
+
default:
- fprintf(stderr, "Usage: %s [-o out_file] [in_file]\n",
+ fprintf(stderr, "Usage: %s [-o out_file] [-I map_dir] [in_file]\n",
av[0]);
exit(EX_OK);
}
@@ -241,8 +454,13 @@ main(ac, av)
return 0;
}
-yyerror(msg)
- char *msg;
+void yyerror(char *fmt, ...)
{
+ va_list ap;
+ char msg[128];
+
+ va_start(ap, fmt);
+ vsprintf(msg, fmt, ap);
+ va_end(ap);
errx(EX_UNAVAILABLE, "%s near line %d", msg, line_no);
}
diff --git a/usr.bin/colldef/scan.l b/usr.bin/colldef/scan.l
index d3e7243..6a7d0cf 100644
--- a/usr.bin/colldef/scan.l
+++ b/usr.bin/colldef/scan.l
@@ -1,4 +1,4 @@
-%x string name charmap
+%x string name charmap defn nchar subs
%{
/*-
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
@@ -26,9 +26,10 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: scan.l,v 1.1 1995/02/17 17:29:49 ache Exp $
+ * $Id: scan.l,v 1.3 1996/06/02 17:18:18 phk Exp $
*/
+#include <ctype.h>
#include <err.h>
#include <unistd.h>
#include <string.h>
@@ -36,51 +37,64 @@
#include "collate.h"
#include "y.tab.h"
-int line_no = 1;
+int line_no = 1, save_no;
u_char buf[STR_LEN], *ptr;
FILE *map_fp;
+extern char map_name[];
YY_BUFFER_STATE main_buf, map_buf;
#ifdef FLEX_DEBUG
YYSTYPE yylval;
#endif /* FLEX_DEBUG */
%}
%%
-<INITIAL,charmap>[ \t] ;
-\" { ptr = buf; BEGIN(string); }
-\< { ptr = buf; BEGIN(name); }
+<INITIAL,charmap,nchar,subs>[ \t]+ ;
+<subs>\" { ptr = buf; BEGIN(string); }
+<INITIAL>\< { ptr = buf; BEGIN(name); }
^#.*\n line_no++;
^\n line_no++;
-\\\n line_no++;
-\\t { yylval.ch = '\t'; return CHAR; }
-\\n { yylval.ch = '\n'; return CHAR; }
-\\b { yylval.ch = '\b'; return CHAR; }
-\\f { yylval.ch = '\f'; return CHAR; }
-\\v { yylval.ch = '\v'; return CHAR; }
-\\r { yylval.ch = '\r'; return CHAR; }
-\\a { yylval.ch = '\a'; return CHAR; }
-\\. { yylval.ch = yytext[1]; return CHAR; }
-<INITIAL,charmap>\n { line_no++; return '\n'; }
-[;,{}()] return *yytext;
-substitute return SUBSTITUTE;
-with return WITH;
-order return ORDER;
-charmap BEGIN(charmap);
-;[ \t]*\.\.\.[ \t]*; return RANGE;
-\\[0-7]{3} {
+<INITIAL>\\\n line_no++;
+<INITIAL,nchar>\\t { yylval.ch = '\t'; return CHAR; }
+<INITIAL,nchar>\\n { yylval.ch = '\n'; return CHAR; }
+<INITIAL,nchar>\\b { yylval.ch = '\b'; return CHAR; }
+<INITIAL,nchar>\\f { yylval.ch = '\f'; return CHAR; }
+<INITIAL,nchar>\\v { yylval.ch = '\v'; return CHAR; }
+<INITIAL,nchar>\\r { yylval.ch = '\r'; return CHAR; }
+<INITIAL,nchar>\\a { yylval.ch = '\a'; return CHAR; }
+<INITIAL,nchar>\\. { yylval.ch = yytext[1]; return CHAR; }
+<subs>\n {
+ line_no++;
+ BEGIN(INITIAL);
+ return '\n';
+}
+<INITIAL,nchar>\n {
+ line_no++;
+ if (map_fp != NULL) {
+ ptr = buf;
+ BEGIN(defn);
+ }
+ return '\n';
+}
+<INITIAL>[;,{}()] return *yytext;
+<INITIAL>substitute { BEGIN(subs); return SUBSTITUTE; }
+<subs>with return WITH;
+<INITIAL>order return ORDER;
+<INITIAL>charmap BEGIN(charmap);
+<INITIAL>;[ \t]*\.\.\.[ \t]*; return RANGE;
+<INITIAL,nchar>\\[0-7]{3} {
u_int v;
sscanf(&yytext[1], "%o", &v);
yylval.ch = (u_char)v;
return CHAR;
}
-\\x[0-9a-z]{2} {
+<INITIAL,nchar>\\x[0-9a-z]{2} {
u_int v;
sscanf(&yytext[2], "%x", &v);
yylval.ch = (u_char)v;
return CHAR;
}
-[^;,{}() \t\n"<]+ {
+<INITIAL>[^;,{}() \t\n"<]+ {
if(yyleng == 1) {
yylval.ch = *yytext;
return CHAR;
@@ -91,19 +105,41 @@ charmap BEGIN(charmap);
strcpy(yylval.str, yytext);
return CHAIN;
}
-<name>\\\> {
+<nchar>. {
+ yylval.ch = *yytext;
+ return CHAR;
+}
+<defn>[ \t]+ {
+ if (ptr == buf)
+ errx(EX_UNAVAILABLE, "map expected near line %u of %s",
+ line_no, map_name);
+ *ptr = '\0';
+ strcpy(yylval.str, buf);
+ BEGIN(nchar);
+ return DEFN;
+}
+<name>\/\/ {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "name buffer overflaw near line %u, character '/'",
+ line_no);
+ *ptr++ = '/';
+}
+<name>\/\> {
+ if(ptr >= buf + sizeof(buf) - 1)
+ errx(EX_UNAVAILABLE, "name buffer overflaw near line %u, character '>'",
line_no);
*ptr++ = '>';
}
<string>\\\" {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\"'",
line_no);
*ptr++ = '"';
}
<name>\> {
+ if (ptr == buf)
+ errx(EX_UNAVAILABLE, "name expected near line %u",
+ line_no);
*ptr = '\0';
strcpy(yylval.str, buf);
BEGIN(INITIAL);
@@ -112,93 +148,129 @@ charmap BEGIN(charmap);
<string>\" {
*ptr = '\0';
strcpy(yylval.str, buf);
- BEGIN(INITIAL);
+ BEGIN(subs);
return STRING;
}
-<name,string>. {
+<name,defn>. {
+ char *s = (map_fp != NULL) ? map_name : "input";
+
+ if (!isascii(*yytext) || !isprint(*yytext))
+ errx(EX_UNAVAILABLE, "non-ASCII or non-printable character 0x%02x not allowed in the map/name near line %u of %s",
+ *yytext, line_no, s);
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
- line_no);
+ errx(EX_UNAVAILABLE, "map/name buffer overflaw near line %u of %s, character '%c'",
+ line_no, s, *yytext);
*ptr++ = *yytext;
}
-<name,string>\\t {
+<string>\\t {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\t'",
line_no);
*ptr++ = '\t';
}
-<name,string>\\b {
+<string>\\b {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\b'",
line_no);
*ptr++ = '\b';
}
-<name,string>\\f {
+<string>\\f {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\f'",
line_no);
*ptr++ = '\f';
}
-<name,string>\\v {
+<string>\\v {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\v'",
line_no);
*ptr++ = '\v';
}
-<name,string>\\n {
+<string>\\n {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\n'",
line_no);
*ptr++ = '\n';
}
-<name,string>\\r {
+<string>\\r {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\r'",
line_no);
*ptr++ = '\r';
}
-<name,string>\\a {
+<string>\\a {
if(ptr >= buf + sizeof(buf) - 1)
- errx(EX_UNAVAILABLE, "name/string buffer overflaw near line %u",
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '\\a'",
line_no);
*ptr++ = '\a';
}
-<name,string><<EOF>> {
- errx(EX_UNAVAILABLE, "unterminated name/string near line %u", line_no);
+<name,string,defn>\n {
+ char *s = (map_fp != NULL) ? map_name : "input";
+
+ errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s);
}
-<name,string>\\x[0-9a-f]{2} {
+<name,string,nchar><<EOF>> {
+ char *s = (map_fp != NULL) ? map_name : "input";
+
+ errx(EX_UNAVAILABLE, "premature EOF in the name/string/char near line %u of %s", line_no, s);
+}
+<string>\\x[0-9a-f]{2} {
u_int v;
sscanf(&yytext[2], "%x", &v);
*ptr++ = (u_char)v;
}
-<name,string>\\[0-7]{3} {
+<string>\\[0-7]{3} {
u_int v;
sscanf(&yytext[1], "%o", &v);
*ptr++ = (u_char)v;
}
+<string>\\. {
+ if(ptr >= buf + sizeof(buf) - 1)
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '%c'",
+ line_no, yytext[1]);
+ *ptr++ = yytext[1];
+}
+<string>. {
+ if(ptr >= buf + sizeof(buf) - 1)
+ errx(EX_UNAVAILABLE, "string buffer overflaw near line %u, character '%c'",
+ line_no, *yytext);
+ *ptr++ = *yytext;
+}
<charmap>[^ \t\n]+ {
- if((map_fp = fopen(yytext, "r")) == 0)
- err(EX_UNAVAILABLE, "can't open charmap file %s near line %u",
- yytext, line_no);
+ strcat(map_name, "/");
+ strcat(map_name, yytext);
+ if((map_fp = fopen(map_name, "r")) == NULL)
+ err(EX_UNAVAILABLE, "can't open 'charmap' file %s",
+ map_name);
+ save_no = line_no;
+ line_no = 1;
map_buf = yy_new_buffer(map_fp, YY_BUF_SIZE);
main_buf = YY_CURRENT_BUFFER;
yy_switch_to_buffer(map_buf);
- BEGIN(INITIAL);
+ ptr = buf;
+ BEGIN(defn);
}
-<charmap><<EOF>> {
- errx(EX_UNAVAILABLE, "charmap file name expected near line %u",
+<charmap>\n {
+ errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u",
line_no);
}
-<<EOF>> {
- if(map_fp) {
+<charmap><<EOF>> {
+ errx(EX_UNAVAILABLE, "'charmap' file name expected near line %u",
+ line_no);
+}
+<INITIAL,defn><<EOF>> {
+ if(map_fp != NULL) {
+ if (ptr != buf)
+ errx(EX_UNAVAILABLE, "premature EOF in the map near line %u of %s", line_no, map_name);
yy_switch_to_buffer(main_buf);
yy_delete_buffer(map_buf);
fclose(map_fp);
- map_fp = 0;
- }
- else
+ map_fp = NULL;
+ line_no = save_no;
+ BEGIN(INITIAL);
+ } else
yyterminate();
}
%%
OpenPOWER on IntegriCloud