summaryrefslogtreecommitdiffstats
path: root/usr.bin/sgmls
diff options
context:
space:
mode:
authorjfieber <jfieber@FreeBSD.org>1995-04-27 16:03:47 +0000
committerjfieber <jfieber@FreeBSD.org>1995-04-27 16:03:47 +0000
commitfb558ed8ecbff94577b9155ee55ebc0cd9777afa (patch)
tree157c629ecbe1e19bab07b3a19313bdfac3130ff9 /usr.bin/sgmls
downloadFreeBSD-src-fb558ed8ecbff94577b9155ee55ebc0cd9777afa.zip
FreeBSD-src-fb558ed8ecbff94577b9155ee55ebc0cd9777afa.tar.gz
The sgmls SGML parser. Support for our hypertext documents.
Reviewed by: Paul Richards, Garrett Wollman
Diffstat (limited to 'usr.bin/sgmls')
-rw-r--r--usr.bin/sgmls/LICENSE43
-rw-r--r--usr.bin/sgmls/Makefile9
-rw-r--r--usr.bin/sgmls/Makefile.inc13
-rw-r--r--usr.bin/sgmls/README138
-rwxr-xr-xusr.bin/sgmls/configure617
-rw-r--r--usr.bin/sgmls/libsgmls/Makefile17
-rw-r--r--usr.bin/sgmls/libsgmls/sgmls.c1036
-rw-r--r--usr.bin/sgmls/libsgmls/sgmls.h127
-rw-r--r--usr.bin/sgmls/rast/Makefile18
-rw-r--r--usr.bin/sgmls/rast/rast.175
-rw-r--r--usr.bin/sgmls/rast/rast.c534
-rwxr-xr-xusr.bin/sgmls/sgmls.pl247
-rw-r--r--usr.bin/sgmls/sgmls/Makefile18
-rw-r--r--usr.bin/sgmls/sgmls/action.h179
-rw-r--r--usr.bin/sgmls/sgmls/adl.h118
-rw-r--r--usr.bin/sgmls/sgmls/ambig.c438
-rw-r--r--usr.bin/sgmls/sgmls/appl.h33
-rw-r--r--usr.bin/sgmls/sgmls/config.h147
-rw-r--r--usr.bin/sgmls/sgmls/context.c444
-rw-r--r--usr.bin/sgmls/sgmls/context.h17
-rw-r--r--usr.bin/sgmls/sgmls/dosproc.c40
-rw-r--r--usr.bin/sgmls/sgmls/ebcdic.c42
-rw-r--r--usr.bin/sgmls/sgmls/ebcdic.h40
-rw-r--r--usr.bin/sgmls/sgmls/entgen.c405
-rw-r--r--usr.bin/sgmls/sgmls/entity.h189
-rw-r--r--usr.bin/sgmls/sgmls/error.h61
-rw-r--r--usr.bin/sgmls/sgmls/etype.h91
-rw-r--r--usr.bin/sgmls/sgmls/exclude.c121
-rw-r--r--usr.bin/sgmls/sgmls/genlex.c114
-rw-r--r--usr.bin/sgmls/sgmls/getopt.c166
-rw-r--r--usr.bin/sgmls/sgmls/getopt.h11
-rw-r--r--usr.bin/sgmls/sgmls/keyword.h22
-rw-r--r--usr.bin/sgmls/sgmls/latin1.h51
-rw-r--r--usr.bin/sgmls/sgmls/lexcode.h11
-rw-r--r--usr.bin/sgmls/sgmls/lexrf.c124
-rw-r--r--usr.bin/sgmls/sgmls/lextaba.c559
-rw-r--r--usr.bin/sgmls/sgmls/lextabe.c184
-rw-r--r--usr.bin/sgmls/sgmls/lextoke.h10
-rw-r--r--usr.bin/sgmls/sgmls/lineout.c653
-rw-r--r--usr.bin/sgmls/sgmls/lineout.h23
-rw-r--r--usr.bin/sgmls/sgmls/main.c602
-rw-r--r--usr.bin/sgmls/sgmls/md1.c862
-rw-r--r--usr.bin/sgmls/sgmls/md2.c801
-rw-r--r--usr.bin/sgmls/sgmls/msg.h252
-rw-r--r--usr.bin/sgmls/sgmls/msgcat.c833
-rw-r--r--usr.bin/sgmls/sgmls/msgcat.h13
-rw-r--r--usr.bin/sgmls/sgmls/pars1.c958
-rw-r--r--usr.bin/sgmls/sgmls/pars2.c1308
-rw-r--r--usr.bin/sgmls/sgmls/pcbrf.c1344
-rw-r--r--usr.bin/sgmls/sgmls/portproc.c104
-rw-r--r--usr.bin/sgmls/sgmls/serv.c299
-rw-r--r--usr.bin/sgmls/sgmls/sgml1.c477
-rw-r--r--usr.bin/sgmls/sgmls/sgml2.c499
-rw-r--r--usr.bin/sgmls/sgmls/sgmlaux.h70
-rw-r--r--usr.bin/sgmls/sgmls/sgmldecl.c1741
-rw-r--r--usr.bin/sgmls/sgmls/sgmldecl.h84
-rw-r--r--usr.bin/sgmls/sgmls/sgmlfnsm.h129
-rw-r--r--usr.bin/sgmls/sgmls/sgmlincl.h20
-rw-r--r--usr.bin/sgmls/sgmls/sgmlio.c384
-rw-r--r--usr.bin/sgmls/sgmls/sgmlmain.h101
-rw-r--r--usr.bin/sgmls/sgmls/sgmlmsg.c514
-rw-r--r--usr.bin/sgmls/sgmls/sgmls.1871
-rw-r--r--usr.bin/sgmls/sgmls/sgmlxtrn.c223
-rw-r--r--usr.bin/sgmls/sgmls/sgmlxtrn.h121
-rw-r--r--usr.bin/sgmls/sgmls/source.h114
-rw-r--r--usr.bin/sgmls/sgmls/std.h116
-rw-r--r--usr.bin/sgmls/sgmls/stklen.c2
-rw-r--r--usr.bin/sgmls/sgmls/strerror.c36
-rw-r--r--usr.bin/sgmls/sgmls/synrf.c72
-rw-r--r--usr.bin/sgmls/sgmls/synxtrn.h152
-rw-r--r--usr.bin/sgmls/sgmls/tools.h76
-rw-r--r--usr.bin/sgmls/sgmls/trace.h113
-rw-r--r--usr.bin/sgmls/sgmls/traceset.c465
-rw-r--r--usr.bin/sgmls/sgmls/unix.cfg147
-rw-r--r--usr.bin/sgmls/sgmls/unixproc.c98
-rw-r--r--usr.bin/sgmls/sgmls/version.c1
-rw-r--r--usr.bin/sgmls/sgmls/xfprintf.c568
-rw-r--r--usr.bin/sgmls/sgmlsasp/Makefile18
-rw-r--r--usr.bin/sgmls/sgmlsasp/replace.c467
-rw-r--r--usr.bin/sgmls/sgmlsasp/replace.h35
-rw-r--r--usr.bin/sgmls/sgmlsasp/sgmlsasp.130
-rw-r--r--usr.bin/sgmls/sgmlsasp/sgmlsasp.c278
-rw-r--r--usr.bin/sgmls/sgmlsasp/sgmlsasp.h26
-rw-r--r--usr.bin/sgmls/unix.cfg147
84 files changed, 22756 insertions, 0 deletions
diff --git a/usr.bin/sgmls/LICENSE b/usr.bin/sgmls/LICENSE
new file mode 100644
index 0000000..576ca35
--- /dev/null
+++ b/usr.bin/sgmls/LICENSE
@@ -0,0 +1,43 @@
+ LICENSE AND DISCLAIMER OF WARRANTIES
+
+ Standard Generalized Markup Language Users' Group (SGMLUG)
+ SGML Parser Materials
+
+ 1. License
+
+SGMLUG hereby grants to any user: (1) an irrevocable royalty-free,
+worldwide, non-exclusive license to use, execute, reproduce, display,
+perform and distribute copies of, and to prepare derivative works
+based upon these materials; and (2) the right to authorize others to
+do any of the foregoing.
+
+ 2. Disclaimer of Warranties
+
+(a) The SGML Parser Materials are provided "as is" to any USER. USER
+assumes responsibility for determining the suitability of the SGML
+Parser Materials for its use and for results obtained. SGMLUG makes
+no warranty that any errors have been eliminated from the SGML Parser
+Materials or that they can be eliminated by USER. SGMLUG shall not
+provide any support maintenance or other aid to USER or its licensees
+with respect to SGML Parser Materials. SGMLUG shall not be
+responsible for losses of any kind resulting from use of the SGML
+Parser Materials including (without limitation) any liability for
+business expense, machine downtime, or damages caused to USER or third
+parties by any deficiency, defect, error, or malfunction.
+
+(b) SGMLUG DISCLAIMS ALL WARRANTIES, EXPRESSED OR IMPLIED, ARISING OUT
+OF OR RELATING TO THE SGML PARSER MATERIALS OR ANY USE THEREOF,
+INCLUDING (WITHOUT LIMITATION) ANY WARRANTY WHATSOEVER AS TO THE
+FITNESS FOR A PARTICULAR USE OR THE MERCHANTABILITY OF THE SGML PARSER
+MATERIALS.
+
+(c) In no event shall SGMLUG be liable to USER or third parties
+licensed by USER for any indirect, special, incidental, or
+consequential damages (including lost profits).
+(d) SGMLUG has no knowledge of any conditions that would impair its right
+to license the SGML Parser Materials. Notwithstanding the foregoing,
+SGMLUG does not make any warranties or representations that the
+SGML Parser Materials are free of claims by third parties of patent,
+copyright infringement or the like, nor does SGMLUG assume any
+liability in respect of any such infringement of rights of third
+parties due to USER's operation under this license.
diff --git a/usr.bin/sgmls/Makefile b/usr.bin/sgmls/Makefile
new file mode 100644
index 0000000..62c6cea
--- /dev/null
+++ b/usr.bin/sgmls/Makefile
@@ -0,0 +1,9 @@
+#
+# Bmake file for sgmls
+# $Id:$
+#
+
+SUBDIR= libsgmls sgmls sgmlsasp rast
+
+.include <bsd.subdir.mk>
+
diff --git a/usr.bin/sgmls/Makefile.inc b/usr.bin/sgmls/Makefile.inc
new file mode 100644
index 0000000..1e4fc2b
--- /dev/null
+++ b/usr.bin/sgmls/Makefile.inc
@@ -0,0 +1,13 @@
+#
+# Bmakefile for rast
+#
+# $id$
+#
+
+.include "${.CURDIR}/../../Makefile.inc"
+
+.if exists(${.CURDIR}/../libsgmls/obj)
+LIBSGMLS= ${.CURDIR}/../libsgmls/obj/libsgmls.a
+.else
+LIBSGMLS= ${.CURDIR}/../libsgmls/libsgmls.a
+.endif \ No newline at end of file
diff --git a/usr.bin/sgmls/README b/usr.bin/sgmls/README
new file mode 100644
index 0000000..dd6e257
--- /dev/null
+++ b/usr.bin/sgmls/README
@@ -0,0 +1,138 @@
+$Id:$
+
+This the sgmls release 1.1 SGML parser written by James Clark
+jjc@jclark.com, repackaged for FreeBSD. The original source may be
+obtained from ftp://ftp.jclark.com/.
+
+Pieces removed include:
+ * Test documents: Compiled on FreeBSD, sgmls passes all tests.
+ * sgml-mode.el: The sole file covered by the GNU GPL. This is not
+ installed anyway and anyone wishing to do serious SGML editing
+ would be best to get the psgml package.
+ * Makefiles and config files for other operating systems (vms, dos,
+ cms).
+ * Formatted versions of the man pages.
+
+
+20-Apr-1995 John Fieber <jfieber@freebsd.org>
+
+
+The original README and TODO follow.
+----------------------------------------------------------------------
+This is sgmls, an SGML parser derived from the ARCSGML parser
+materials which were written by Charles F. Goldfarb. (These are
+available for anonymous ftp from ftp.ifi.uio.no [128.240.88.1] in the
+directory SIGhyper/SGMLUG/distrib.)
+
+The version number is given in the file version.c.
+
+The file INSTALL contains installation instructions.
+
+The file NEWS describes recent user-visible changes.
+
+The file sgmls.man contains a Unix manual page; sgmls.txt is the
+formatted version of this.
+
+The file sgml-mode.el contains a very simple SGML mode for GNU Emacs.
+
+The files sgmls.c and sgmls.h contain a small library for parsing the
+output of sgmls. This is used by sgmlsasp, which translates the
+output of sgmls using an ASP replacement file, and by rast, which
+translates the output of sgmls to the format of a RAST result. The
+files sgmlsasp.man and rast.man contain Unix manual pages for sgmlsasp
+and rast; sgmlsasp.txt and rast.txt are the formatted versions of
+these.
+
+The file LICENSE contains the license which applies to arcsgml and
+accordingly to those parts of sgmls derived from arcsgml. See also
+the copyright notice at the beginning of sgmlxtrn.c. The parts that
+were written by me are in the public domain (any files that were
+written entirely by me contain a comment to that effect.) The file
+sgml-mode.el is covered by the GNU GPL.
+
+Please report any bugs to me. When reporting bugs, please include the
+version number, details of your machine, OS and compiler, and a
+complete self-contained file that will allow me to reproduce the bug.
+
+James Clark
+jjc@jclark.com
+
+----------------------------------------------------------------------
+Warn about mixed content models where #PCDATA can't occur everywhere.
+
+Perhaps there should be a configuration option saying what a control
+character is for the purpose of SHUNCHAR CONTROLS.
+
+Should the current character that is printed in error messages be
+taken from be taken from the file entity or the current entity?
+
+Refine SYS_ action. If we distinguish DELNONCH in lexmark, lexgrp,
+lexsd, we can have separate action that ignores the following
+character as well.
+
+Should RSs in CDATA/SDATA entities be ignored as specified in 322:1-2?
+Similarily, do the rules on REs in 322:3-11 apply to CDATA/SDATA
+entities? (I don't think they count as being `in content'.)
+
+What should the entity manager do when it encounters code 13 in an
+input file? (Currently it treats it as an RE.)
+
+Document when invalid exclusions are detected.
+
+Option not to perform capacity checking.
+
+Give a warning if the recommendation of 422:1-3 is contravened.
+
+Should an empty CDATA/RCDATA marked section be allowed in the document
+type declaration subset?
+
+Include example of use of SGML_PATH in documentation.
+
+Try to detect the situation in 310:8-10 (but see 282:1-2).
+
+Resize hash tables if they become too full.
+
+Say something in the man page about message catalogues.
+
+Consider whether support for SHORTREF NONE requires further changes
+(other than disallowing short reference mapping declaration).
+
+Fake /dev/fd/N and /dev/stdin for systems that don't provide it.
+
+Improve the effficiency of the entity manager by not closing and
+reopening files. If we run out of FILEs choose the stream with the
+fewest bytes remaining to be read, and read the rest of it into
+memory. Each entity level will have its own read buffer.
+
+Support multi-line error messages: automatically indent after
+newline. (We could output to a temporary file first, then copy to
+stderr replacing newlines by newline+indent).
+
+Option that says to output out of context things.
+
+Divide up formal public identifier errors. Give these errors their
+own type code.
+
+Consider whether, when OMITTAG is NO, we need to change interpretation
+of an empty start-tag (7.4.1.1).
+
+Possibly turn errors 70 and 136 into warnings.
+
+Make things work with NORMSEP > 2. Would need to keep track of number
+of CDATA and SDATA entities in CDATA attributes.
+
+Handle `SCOPE INSTANCE'.
+
+In entgen.c, truncate filenames for OSs that don't do this themselves.
+
+Provide an option that specifies that maximum number of errors; when
+this limit is exceeded sgmls would exit.
+
+Document non-portable assumptions in the code.
+
+Option to write out SGML declaration. In this case make it write out
+APPINFO parameter.
+
+Allow there to be catalogs mapping public ids to filenames.
+Environment variable SGML_CATALOG containing list of filenames of
+catalogs.
diff --git a/usr.bin/sgmls/configure b/usr.bin/sgmls/configure
new file mode 100755
index 0000000..7fd1968
--- /dev/null
+++ b/usr.bin/sgmls/configure
@@ -0,0 +1,617 @@
+#!/bin/sh
+# Generate config.h from unix.cfg.
+
+trap 'rm -f doit doit.c doit.o doit.log config.out; exit 1' 1 2 3 15
+
+on=
+off=
+CC=${CC-cc}
+
+# Normally we use VARARGS if __STDC__ is not defined.
+# Test whether this assumption is wrong.
+
+cat >doit.c <<\EOF
+#ifdef __STDC__
+#include <stdarg.h>
+int foo(char *s,...)
+{
+ va_list ap;
+
+ va_start(ap, s);
+ va_end(ap);
+ return 0;
+}
+#else
+int foo = 0;
+#endif
+EOF
+
+$CC $CFLAGS -c doit.c >/dev/null 2>&1
+if test $? -ne 0
+then
+ on="$on VARARGS"
+fi
+
+cat >doit.c <<\EOF
+#include <stddef.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ off="$off STDDEF_H_MISSING"
+else
+ on="$on STDDEF_H_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <stdlib.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ off="$off STDLIB_H_MISSING"
+else
+ on="$on STDLIB_H_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <limits.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ off="$off LIMITS_H_MISSING"
+else
+ on="$on LIMITS_H_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <vfork.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ on="$on HAVE_VFORK_H"
+else
+ off="$off HAVE_VFORK_H"
+fi
+
+cat >doit.c <<\EOF
+#include <unistd.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ on="$on HAVE_UNISTD_H"
+else
+ off="$off HAVE_UNISTD_H"
+fi
+
+cat >doit.c <<\EOF
+#include <sys/types.h>
+#include <sys/stat.h>
+int foo = 0;
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ on="$on HAVE_SYS_STAT_H"
+else
+ off="$off HAVE_SYS_STAT_H"
+fi
+
+cat >doit.c <<\EOF
+/* Exit normally unless we need to use isascii. */
+
+#include <ctype.h>
+#include <signal.h>
+
+static int whoops()
+{
+ _exit(1);
+}
+
+main()
+{
+ int c;
+#ifdef isascii
+#ifdef SIGSEGV
+ signal(SIGSEGV, whoops);
+#endif
+#ifdef SIGBUS
+ signal(SIGBUS, whoops);
+#endif
+#ifdef SIGIOT
+ signal(SIGIOT, whoops);
+#endif
+
+ for (c = 128; c < 256; c++) {
+ if (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5'
+ || c == '6' || c == '7' || c == '8' || c == '9') {
+ if (!isdigit(c) || isalpha(c) || iscntrl(c) || isspace(c) || ispunct(c))
+ exit(1);
+ }
+ else if (isdigit(c))
+ exit(1);
+ else if (isalpha(c)) {
+ if (iscntrl(c) || isspace(c) || ispunct(c)
+ || (islower(c) && toupper(c) != c && !isupper(toupper(c)))
+ || (isupper(c) && tolower(c) != c && !islower(tolower(c))))
+ exit(1);
+ }
+ else if (islower(c) || isupper(c))
+ exit(1);
+ else if (iscntrl(c)) {
+ if (ispunct(c))
+ exit(1);
+ }
+ }
+#endif /* isascii */
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ # This tries to find the symbol that looks like the array
+ # used by <ctype.h>, and sees if its length appears to be 128
+ # rather than 256.
+ if test 1 = `(nm -n doit 2>/dev/null) | awk '
+BEGIN {
+ weight["0"] = 0;
+ weight["1"] = 1;
+ weight["2"] = 2;
+ weight["3"] = 3;
+ weight["4"] = 4;
+ weight["5"] = 5;
+ weight["6"] = 6;
+ weight["7"] = 7;
+ weight["8"] = 8;
+ weight["9"] = 9;
+ weight["a"] = weight["A"] = 10;
+ weight["b"] = weight["B"] = 11;
+ weight["c"] = weight["C"] = 12;
+ weight["d"] = weight["D"] = 13;
+ weight["e"] = weight["E"] = 14;
+ weight["f"] = weight["F"] = 15;
+}
+
+/^[0-9a-zA-Z]* D .*ctype/ && ctype_nr == 0 {
+ ctype_nr = NR;
+ addr = 0;
+ len = length($1);
+ for (i = 1; i <= len; i++)
+ addr = addr*16 + weight[substr($1, i, 1)];
+}
+
+/^[0-9a-zA-Z]* D / && NR == ctype_nr + 1 {
+ next_addr = 0;
+ len = length($1);
+ for (i = 1; i <= len; i++)
+ next_addr = next_addr*16 + weight[substr($1, i, 1)];
+}
+
+END {
+ size = next_addr - addr;
+ if (size >= 128 && size < 256)
+ print "1";
+ else
+ print "0";
+}'`
+ then
+ on="$on USE_ISASCII"
+ else
+ if ((yes | man 3 ctype) 2>/dev/null) \
+ | sed -e 's/.//g' -e 's/ *$//' -e '/de-$/N' \
+ -e 's/-\n//g' -e '/defined$/N' -e '/only$/N' \
+ -e '/where$/N' -e '/isascii$/N' -e '/is$/N' \
+ -e 's/\n/ /g' -e 's/ */ /g' \
+ | grep "defined only where isascii is true" >/dev/null
+ then
+ on="$on USE_ISASCII"
+ else
+ off="$off USE_ISASCII"
+ fi
+ fi
+else
+ on="$on USE_ISASCII"
+fi
+
+cat >doit.c <<\EOF
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ remove("foo");
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ off="$off REMOVE_MISSING"
+else
+ on="$on REMOVE_MISSING"
+fi
+
+cat >doit.c <<\EOF
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ getopt(argc, argv, "v");
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_GETOPT"
+else
+ off="$off HAVE_GETOPT"
+fi
+
+cat >doit.c <<\EOF
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ access("foo", 4);
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_ACCESS"
+else
+ off="$off HAVE_ACCESS"
+fi
+
+cat >doit.c <<\EOF
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ vfork();
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_VFORK"
+else
+ off="$off HAVE_VFORK"
+fi
+
+cat >doit.c <<\EOF
+main(argc, argv)
+int argc;
+char **argv;
+{
+
+ if (argc == 0) {
+ int status;
+ waitpid(-1, &status, 0);
+ }
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_WAITPID"
+else
+ off="$off HAVE_WAITPID"
+fi
+
+cat >doit.c <<\EOF
+#include <string.h>
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ strerror(0);
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ off="$off STRERROR_MISSING"
+else
+ on="$on STRERROR_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <strings.h>
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ bcopy((char *)0, (char *)0, 0);
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ # Only use BSD_STRINGS if ANSI string functions don't work.
+ cat >doit.c <<\EOF
+#include <string.h>
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ memcpy((char *)0, (char *)0, 0);
+ exit(0);
+}
+EOF
+
+ if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+ then
+ off="$off BSD_STRINGS"
+ else
+ on="$on BSD_STRINGS"
+ fi
+else
+ off="$off BSD_STRINGS"
+fi
+
+cat >doit.c <<\EOF
+#include <signal.h>
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0)
+ raise(SIGINT);
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ off="$off RAISE_MISSING"
+else
+ on="$on RAISE_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <stdio.h>
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0) {
+ fpos_t pos;
+ fsetpos(stdin, &pos);
+ fgetpos(stdin, &pos);
+ }
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ off="$off FPOS_MISSING"
+else
+ on="$on FPOS_MISSING"
+fi
+
+cat >doit.c <<\EOF
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0) {
+ pid_t pid;
+ int status;
+ long n = sysconf(_SC_OPEN_MAX);
+ pid = waitpid(-1, &status, 0);
+ WIFSTOPPED(status);
+ WIFSIGNALED(status);
+ WIFEXITED(status);
+ WEXITSTATUS(status);
+ WTERMSIG(status);
+ WSTOPSIG(status);
+ }
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on POSIX"
+else
+ off="$off POSIX"
+fi
+
+cat >doit.c <<\EOF
+#include <stdio.h>
+#include <signal.h>
+
+static int whoops()
+{
+ _exit(1);
+}
+
+main()
+{
+ char buf[30];
+#ifdef SIGSEGV
+ signal(SIGSEGV, whoops);
+#endif
+#ifdef SIGBUS
+ signal(SIGBUS, whoops);
+#endif
+#ifdef SIGIOT
+ signal(SIGIOT, whoops);
+#endif
+ sprintf(buf, "%2$s%2$s%1$s%1$s", "bar", "foo");
+ exit(!!strcmp(buf, "foofoobarbar"));
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_EXTENDED_PRINTF"
+else
+ off="$off HAVE_EXTENDED_PRINTF"
+fi
+
+cat >doit.c <<\EOF
+#include <nl_types.h>
+
+main(argc, argv)
+int argc;
+char **argv;
+{
+ if (argc == 0) {
+ nl_catd d = catopen("foo", 0);
+ catgets(d, 1, 1, "default");
+ catclose(d);
+ }
+ exit(0);
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ on="$on HAVE_CAT"
+else
+ off="$off HAVE_CAT"
+fi
+
+cat >doit.c <<\EOF
+#include <limits.h>
+
+char c = UCHAR_MAX;
+
+main(argc, argv)
+int argc;
+char **argv;
+{
+#if CHAR_MIN < 0
+ exit(!(c < 0));
+#else
+ exit(!(c > 0));
+#endif
+}
+EOF
+
+if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+then
+ char_signed=
+else
+ cat >doit.c <<\EOF
+main()
+{
+ int i;
+
+ for (i = 0; i < 512; i++) {
+ char c = (char)i;
+ if (c < 0)
+ exit(1);
+ }
+ exit(0);
+}
+EOF
+
+ if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null
+ then
+ char_signed=0
+ else
+ char_signed=1
+ fi
+fi
+
+cat >doit.c <<\EOF
+
+typedef void VOID;
+
+extern VOID bar();
+
+VOID foo()
+{
+}
+EOF
+
+if $CC $CFLAGS -c doit.c >/dev/null 2>&1
+then
+ void_ret=void
+else
+ void_ret=int
+fi
+
+
+cat >doit.c <<\EOF
+
+void *foo()
+{
+ static char *buf;
+ return buf;
+}
+EOF
+
+if $CC $CFLAGS -c doit.c >doit.log 2>&1
+then
+ if test -s doit.log
+ then
+ void_star="char \*"
+ else
+ void_star="void \*"
+ fi
+
+else
+ void_star="char \*"
+fi
+
+edit=
+
+rm -f doit.c doit doit.log doit.o
+
+for var in $on
+do
+ edit="$edit -e 's;^/\\* *\\(#define $var [^/]*\\) *\\*/;\\1;'"
+done
+for var in $off
+do
+ edit="$edit -e 's;^#define $var [^/]*;/* & */;'"
+done
+
+if test -n "$char_signed"
+then
+ edit="$edit -e 's;^/\\* *\\(#define CHAR_SIGNED $char_signed\\) *\\*/;\\1;'"
+fi
+
+edit="$edit -e 's/^typedef .*VOID;/typedef $void_ret VOID;/'"
+edit="$edit -e 's/^typedef .*UNIV;/typedef ${void_star}UNIV;/'"
+
+if test "X$(PREFIX)" != "X/usr/local"
+then
+ edit="$edit -e '/DEFAULT_PATH/s;/usr/local;$PREFIX;g'"
+fi
+
+eval sed $edit unix.cfg ">config.out"
+
+mv config.out config.h
+
+exit 0
diff --git a/usr.bin/sgmls/libsgmls/Makefile b/usr.bin/sgmls/libsgmls/Makefile
new file mode 100644
index 0000000..e94fcc4
--- /dev/null
+++ b/usr.bin/sgmls/libsgmls/Makefile
@@ -0,0 +1,17 @@
+#
+# Bmakefile for libsgmls
+#
+# $id$
+#
+
+LIB= sgmls
+SRCS= sgmls.c
+
+CFLAGS+= -I${.CURDIR}/../sgmls
+
+NOMAN= noman
+NOPROFILE= noprofile
+
+install:
+
+.include <bsd.lib.mk>
diff --git a/usr.bin/sgmls/libsgmls/sgmls.c b/usr.bin/sgmls/libsgmls/sgmls.c
new file mode 100644
index 0000000..5af7e5b
--- /dev/null
+++ b/usr.bin/sgmls/libsgmls/sgmls.c
@@ -0,0 +1,1036 @@
+/* sgmls.c:
+ Library for reading output of sgmls.
+
+ Written by James Clark (jjc@jclark.com). */
+
+#include "config.h"
+#include "std.h"
+#include "sgmls.h"
+#include "lineout.h"
+
+#ifdef __GNUC__
+#define NO_RETURN volatile
+#else
+#define NO_RETURN /* as nothing */
+#endif
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+#ifndef __STDC__
+#define const /* as nothing */
+#endif
+
+typedef struct sgmls_data data_s;
+typedef struct sgmls_notation notation_s;
+typedef struct sgmls_internal_entity internal_entity_s;
+typedef struct sgmls_external_entity external_entity_s;
+typedef struct sgmls_entity entity_s;
+typedef struct sgmls_attribute attribute_s;
+typedef struct sgmls_event event_s;
+
+/* lists are sorted in reverse order of level */
+struct list {
+ int subdoc_level; /* -1 if associated with finished subdoc */
+ struct list *next;
+ char *name;
+};
+
+struct entity_list {
+ int subdoc_level;
+ struct entity_list *next;
+ entity_s entity;
+};
+
+struct notation_list {
+ int subdoc_level;
+ struct notation_list *next;
+ notation_s notation;
+};
+
+struct sgmls {
+ FILE *fp;
+ char *buf;
+ unsigned buf_size;
+ struct entity_list *entities;
+ struct notation_list *notations;
+ attribute_s *attributes;
+ unsigned long lineno;
+ char *filename;
+ unsigned filename_size;
+ unsigned long input_lineno;
+ int subdoc_level;
+ char **files; /* from `f' commands */
+ int nfiles;
+ char *sysid; /* from `s' command */
+ char *pubid; /* from `p' command */
+};
+
+enum error_code {
+ E_ZERO, /* Not an error */
+ E_NOMEM, /* Out of memory */
+ E_BADESCAPE, /* Bad escape */
+ E_NULESCAPE, /* \000 other than in data */
+ E_NUL, /* A null input character */
+ E_BADENTITY, /* Reference to undefined entity */
+ E_INTERNALENTITY, /* Internal entity when external was needed */
+ E_SYSTEM, /* System input error */
+ E_COMMAND, /* Bad command letter */
+ E_MISSING, /* Missing arguments */
+ E_NUMBER, /* Not a number */
+ E_ATTR, /* Bad attribute type */
+ E_BADNOTATION, /* Reference to undefined notation */
+ E_BADINTERNAL, /* Bad internal entity type */
+ E_BADEXTERNAL, /* Bad external entity type */
+ E_EOF, /* EOF in middle of line */
+ E_SDATA, /* \| other than in data */
+ E_LINELENGTH /* line longer than UNSIGNED_MAX */
+};
+
+static char *errlist[] = {
+ 0,
+ "Out of memory",
+ "Bad escape",
+ "\\0 escape not in data",
+ "Nul character in input",
+ "Reference to undefined entity",
+ "Internal entity when external was needed",
+ "System input error",
+ "Bad command letter",
+ "Missing arguments",
+ "Not a number",
+ "Bad attribute type",
+ "Reference to undefined notation",
+ "Bad internal entity type",
+ "Bad external entity type",
+ "EOF in middle of line",
+ "\\| other than in data",
+ "Too many V commands",
+ "Input line too long"
+};
+
+static void NO_RETURN error P((enum error_code));
+static int parse_data P((char *, unsigned long *));
+static void parse_location P((char *, struct sgmls *));
+static void parse_notation P((char *, notation_s *));
+static void parse_internal_entity P((char *, internal_entity_s *));
+static void parse_external_entity
+ P((char *, struct sgmls *, external_entity_s *));
+static void parse_subdoc_entity P((char *, external_entity_s *));
+static attribute_s *parse_attribute P((struct sgmls *, char *));
+static void grow_datav P((void));
+static char *unescape P((char *));
+static char *unescape_file P((char *));
+static int unescape1 P((char *));
+static char *scan_token P((char **));
+static int count_args P((char *));
+static struct list *list_find P((struct list *, char *, int));
+static UNIV xmalloc P((unsigned));
+static UNIV xrealloc P((UNIV , unsigned));
+static char *strsave P((char *));
+static int read_line P((struct sgmls *));
+static notation_s *lookup_notation P((struct sgmls *, char *));
+static entity_s *lookup_entity P((struct sgmls *, char *));
+static external_entity_s *lookup_external_entity P((struct sgmls *, char *));
+static void define_external_entity P((struct sgmls *, external_entity_s *));
+static void define_internal_entity P((struct sgmls *, internal_entity_s *));
+static void define_notation P((struct sgmls *, notation_s *));
+static data_s *copy_data P((data_s *, int));
+static void list_finish_level P((struct list **, int));
+static void add_attribute P((attribute_s **, attribute_s *));
+static void default_errhandler P((int, char *, unsigned long));
+
+#define xfree(s) do { if (s) free(s); } while (0)
+
+static sgmls_errhandler *errhandler = default_errhandler;
+static unsigned long input_lineno = 0;
+
+static data_s *datav = 0;
+static int datav_size = 0;
+
+struct sgmls *sgmls_create(fp)
+ FILE *fp;
+{
+ struct sgmls *sp;
+
+ sp = (struct sgmls *)malloc(sizeof(struct sgmls));
+ if (!sp)
+ return 0;
+ sp->fp = fp;
+ sp->entities = 0;
+ sp->notations = 0;
+ sp->attributes = 0;
+ sp->lineno = 0;
+ sp->filename = 0;
+ sp->filename_size = 0;
+ sp->input_lineno = 0;
+ sp->buf_size = 0;
+ sp->buf = 0;
+ sp->subdoc_level = 0;
+ sp->files = 0;
+ sp->nfiles = 0;
+ sp->sysid = 0;
+ sp->pubid = 0;
+ return sp;
+}
+
+void sgmls_free(sp)
+ struct sgmls *sp;
+{
+ struct entity_list *ep;
+ struct notation_list *np;
+
+ if (!sp)
+ return;
+ xfree(sp->filename);
+ sgmls_free_attributes(sp->attributes);
+
+ for (ep = sp->entities; ep;) {
+ struct entity_list *tem = ep->next;
+ if (ep->entity.is_internal) {
+ xfree(ep->entity.u.internal.data.s);
+ free(ep->entity.u.internal.name);
+ }
+ else {
+ int i;
+ for (i = 0; i < ep->entity.u.external.nfilenames; i++)
+ xfree(ep->entity.u.external.filenames[i]);
+ xfree(ep->entity.u.external.filenames);
+ xfree(ep->entity.u.external.sysid);
+ xfree(ep->entity.u.external.pubid);
+ sgmls_free_attributes(ep->entity.u.external.attributes);
+ free(ep->entity.u.internal.name);
+ }
+ free(ep);
+ ep = tem;
+ }
+
+ for (np = sp->notations; np;) {
+ struct notation_list *tem = np->next;
+ xfree(np->notation.sysid);
+ xfree(np->notation.pubid);
+ free(np->notation.name);
+ free(np);
+ np = tem;
+ }
+
+ xfree(sp->buf);
+ xfree(sp->pubid);
+ xfree(sp->sysid);
+ if (sp->files) {
+ int i;
+ for (i = 0; i < sp->nfiles; i++)
+ free(sp->files[i]);
+ free(sp->files);
+ }
+ free(sp);
+
+ xfree(datav);
+ datav = 0;
+ datav_size = 0;
+}
+
+sgmls_errhandler *sgmls_set_errhandler(handler)
+ sgmls_errhandler *handler;
+{
+ sgmls_errhandler *old = errhandler;
+ if (handler)
+ errhandler = handler;
+ return old;
+}
+
+int sgmls_next(sp, e)
+ struct sgmls *sp;
+ event_s *e;
+{
+ while (read_line(sp)) {
+ char *buf = sp->buf;
+
+ e->filename = sp->filename;
+ e->lineno = sp->lineno;
+
+ switch (buf[0]) {
+ case DATA_CODE:
+ e->u.data.n = parse_data(buf + 1, &sp->lineno);
+ e->u.data.v = datav;
+ e->type = SGMLS_EVENT_DATA;
+ return 1;
+ case START_CODE:
+ {
+ char *p;
+ e->u.start.attributes = sp->attributes;
+ sp->attributes = 0;
+ e->type = SGMLS_EVENT_START;
+ p = buf + 1;
+ e->u.start.gi = scan_token(&p);
+ return 1;
+ }
+ case END_CODE:
+ {
+ char *p = buf + 1;
+ e->type = SGMLS_EVENT_END;
+ e->u.end.gi = scan_token(&p);
+ return 1;
+ }
+ case START_SUBDOC_CODE:
+ case END_SUBDOC_CODE:
+ {
+ char *p = buf + 1;
+ char *name = scan_token(&p);
+ if (buf[0] == START_SUBDOC_CODE) {
+ e->u.entity = lookup_external_entity(sp, name);
+ sp->subdoc_level++;
+ e->type = SGMLS_EVENT_SUBSTART;
+ }
+ else {
+ e->type = SGMLS_EVENT_SUBEND;
+ list_finish_level((struct list **)&sp->entities, sp->subdoc_level);
+ list_finish_level((struct list **)&sp->notations, sp->subdoc_level);
+ sp->subdoc_level--;
+ e->u.entity = lookup_external_entity(sp, name);
+ }
+ return 1;
+ }
+ case ATTRIBUTE_CODE:
+ add_attribute(&sp->attributes, parse_attribute(sp, buf + 1));
+ break;
+ case DATA_ATTRIBUTE_CODE:
+ {
+ char *p = buf + 1;
+ char *name;
+ attribute_s *a;
+ external_entity_s *ext;
+
+ name = scan_token(&p);
+ a = parse_attribute(sp, p);
+ ext = lookup_external_entity(sp, name);
+ add_attribute(&ext->attributes, a);
+ }
+ break;
+ case REFERENCE_ENTITY_CODE:
+ {
+ char *p = buf + 1;
+ char *name;
+ name = scan_token(&p);
+ e->u.entity = lookup_external_entity(sp, name);
+ e->type = SGMLS_EVENT_ENTITY;
+ return 1;
+ }
+ case DEFINE_NOTATION_CODE:
+ {
+ notation_s notation;
+
+ parse_notation(buf + 1, &notation);
+ define_notation(sp, &notation);
+ }
+ break;
+ case DEFINE_EXTERNAL_ENTITY_CODE:
+ {
+ external_entity_s external;
+
+ parse_external_entity(buf + 1, sp, &external);
+ define_external_entity(sp, &external);
+ }
+ break;
+ case DEFINE_SUBDOC_ENTITY_CODE:
+ {
+ external_entity_s external;
+
+ parse_subdoc_entity(buf + 1, &external);
+ define_external_entity(sp, &external);
+ }
+ break;
+ case DEFINE_INTERNAL_ENTITY_CODE:
+ {
+ internal_entity_s internal;
+
+ parse_internal_entity(buf + 1, &internal);
+ define_internal_entity(sp, &internal);
+ }
+ break;
+ case PI_CODE:
+ e->u.pi.len = unescape1(buf + 1);
+ e->u.pi.s = buf + 1;
+ e->type = SGMLS_EVENT_PI;
+ return 1;
+ case LOCATION_CODE:
+ parse_location(buf + 1, sp);
+ break;
+ case APPINFO_CODE:
+ e->u.appinfo = unescape(buf + 1);
+ e->type = SGMLS_EVENT_APPINFO;
+ return 1;
+ case SYSID_CODE:
+ sp->sysid = strsave(unescape(buf + 1));
+ break;
+ case PUBID_CODE:
+ sp->pubid = strsave(unescape(buf + 1));
+ break;
+ case FILE_CODE:
+ sp->files = xrealloc(sp->files, (sp->nfiles + 1)*sizeof(char *));
+ sp->files[sp->nfiles] = strsave(unescape_file(buf + 1));
+ sp->nfiles += 1;
+ break;
+ case CONFORMING_CODE:
+ e->type = SGMLS_EVENT_CONFORMING;
+ return 1;
+ default:
+ error(E_COMMAND);
+ }
+ }
+
+ return 0;
+}
+
+static
+int parse_data(p, linenop)
+ char *p;
+ unsigned long *linenop;
+{
+ int n = 0;
+ char *start = p;
+ char *q;
+ int is_sdata = 0;
+
+ /* No need to copy before first escape. */
+
+ for (; *p != '\\' && *p != '\0'; p++)
+ ;
+ q = p;
+ while (*p) {
+ if (*p == '\\') {
+ switch (*++p) {
+ case '\\':
+ *q++ = *p++;
+ break;
+ case 'n':
+ *q++ = RECHAR;
+ *linenop += 1;
+ p++;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ int val = *p++ - '0';
+ if (*p >= '0' && *p <= '7') {
+ val = val*8 + (*p++ - '0');
+ if (*p >= '0' && *p <= '7')
+ val = val*8 + (*p++ - '0');
+ }
+ *q++ = (char)val;
+ }
+ break;
+ case '|':
+ if (q > start || is_sdata) {
+ if (n >= datav_size)
+ grow_datav();
+ datav[n].s = start;
+ datav[n].len = q - start;
+ datav[n].is_sdata = is_sdata;
+ n++;
+ }
+ is_sdata = !is_sdata;
+ start = q;
+ p++;
+ break;
+ default:
+ error(E_BADESCAPE);
+ }
+ }
+ else
+ *q++ = *p++;
+ }
+
+ if (q > start || is_sdata) {
+ if (n >= datav_size)
+ grow_datav();
+ datav[n].s = start;
+ datav[n].len = q - start;
+ datav[n].is_sdata = is_sdata;
+ n++;
+ }
+ return n;
+}
+
+static
+void grow_datav()
+{
+ unsigned size = datav_size ? 2*datav_size : 2;
+ datav = (data_s *)xrealloc((UNIV)datav, size*sizeof(data_s));
+ datav_size = size;
+}
+
+static
+void parse_location(s, sp)
+ char *s;
+ struct sgmls *sp;
+{
+ unsigned size;
+
+ if (*s < '0' || *s > '9' || sscanf(s, "%lu", &sp->lineno) != 1)
+ error(E_NUMBER);
+ do {
+ ++s;
+ } while (*s >= '0' && *s <= '9');
+
+ if (*s != ' ')
+ return;
+ s++;
+ s = unescape_file(s);
+ size = strlen(s) + 1;
+ if (size <= sp->filename_size)
+ strcpy(sp->filename, s);
+ else {
+ sp->filename = xrealloc(sp->filename, size);
+ strcpy(sp->filename, s);
+ sp->filename_size = size;
+ }
+}
+
+static
+void parse_notation(s, n)
+ char *s;
+ notation_s *n;
+{
+ n->name = strsave(scan_token(&s));
+}
+
+static
+void parse_internal_entity(s, e)
+ char *s;
+ internal_entity_s *e;
+{
+ char *type;
+
+ e->name = strsave(scan_token(&s));
+ type = scan_token(&s);
+ if (strcmp(type, "CDATA") == 0)
+ e->data.is_sdata = 0;
+ else if (strcmp(type, "SDATA") == 0)
+ e->data.is_sdata = 1;
+ else
+ error(E_BADINTERNAL);
+ e->data.len = unescape1(s);
+ if (e->data.len == 0)
+ e->data.s = 0;
+ else {
+ e->data.s = xmalloc(e->data.len);
+ memcpy(e->data.s, s, e->data.len);
+ }
+}
+
+static
+void parse_external_entity(s, sp, e)
+ char *s;
+ struct sgmls *sp;
+ external_entity_s *e;
+{
+ char *type;
+ char *notation;
+
+ e->name = strsave(scan_token(&s));
+ type = scan_token(&s);
+ if (strcmp(type, "CDATA") == 0)
+ e->type = SGMLS_ENTITY_CDATA;
+ else if (strcmp(type, "SDATA") == 0)
+ e->type = SGMLS_ENTITY_SDATA;
+ else if (strcmp(type, "NDATA") == 0)
+ e->type = SGMLS_ENTITY_NDATA;
+ else
+ error(E_BADEXTERNAL);
+ notation = scan_token(&s);
+ e->notation = lookup_notation(sp, notation);
+}
+
+static
+void parse_subdoc_entity(s, e)
+ char *s;
+ external_entity_s *e;
+{
+ e->name = strsave(scan_token(&s));
+ e->type = SGMLS_ENTITY_SUBDOC;
+}
+
+static
+attribute_s *parse_attribute(sp, s)
+ struct sgmls *sp;
+ char *s;
+{
+ attribute_s *a;
+ char *type;
+
+ a = (attribute_s *)xmalloc(sizeof(*a));
+ a->name = strsave(scan_token(&s));
+ type = scan_token(&s);
+ if (strcmp(type, "CDATA") == 0) {
+ unsigned long lineno = 0;
+ a->type = SGMLS_ATTR_CDATA;
+ a->value.data.n = parse_data(s, &lineno);
+ a->value.data.v = copy_data(datav, a->value.data.n);
+ }
+ else if (strcmp(type, "IMPLIED") == 0) {
+ a->type = SGMLS_ATTR_IMPLIED;
+ }
+ else if (strcmp(type, "NOTATION") == 0) {
+ a->type = SGMLS_ATTR_NOTATION;
+ a->value.notation = lookup_notation(sp, scan_token(&s));
+ }
+ else if (strcmp(type, "ENTITY") == 0) {
+ int n, i;
+ a->type = SGMLS_ATTR_ENTITY;
+ n = count_args(s);
+ if (n == 0)
+ error(E_MISSING);
+ a->value.entity.v = (entity_s **)xmalloc(n*sizeof(entity_s *));
+ a->value.entity.n = n;
+ for (i = 0; i < n; i++)
+ a->value.entity.v[i] = lookup_entity(sp, scan_token(&s));
+ }
+ else if (strcmp(type, "TOKEN") == 0) {
+ int n, i;
+ a->type = SGMLS_ATTR_TOKEN;
+ n = count_args(s);
+ if (n == 0)
+ error(E_MISSING);
+ a->value.token.v = (char **)xmalloc(n * sizeof(char *));
+ for (i = 0; i < n; i++)
+ a->value.token.v[i] = strsave(scan_token(&s));
+ a->value.token.n = n;
+ }
+ else
+ error(E_ATTR);
+ return a;
+}
+
+void sgmls_free_attributes(p)
+ attribute_s *p;
+{
+ while (p) {
+ attribute_s *nextp = p->next;
+ switch (p->type) {
+ case SGMLS_ATTR_CDATA:
+ if (p->value.data.v) {
+ free(p->value.data.v[0].s);
+ free(p->value.data.v);
+ }
+ break;
+ case SGMLS_ATTR_TOKEN:
+ {
+ int i;
+ for (i = 0; i < p->value.token.n; i++)
+ free(p->value.token.v[i]);
+ xfree(p->value.token.v);
+ }
+ break;
+ case SGMLS_ATTR_ENTITY:
+ xfree(p->value.entity.v);
+ break;
+ case SGMLS_ATTR_IMPLIED:
+ case SGMLS_ATTR_NOTATION:
+ break;
+ }
+ free(p->name);
+ free(p);
+ p = nextp;
+ }
+}
+
+static
+data_s *copy_data(v, n)
+ data_s *v;
+ int n;
+{
+ if (n == 0)
+ return 0;
+ else {
+ int i;
+ unsigned total;
+ char *p;
+ data_s *result;
+
+ result = (data_s *)xmalloc(n*sizeof(data_s));
+ total = 0;
+ for (i = 0; i < n; i++)
+ total += v[i].len;
+ if (!total)
+ total++;
+ p = xmalloc(total);
+ for (i = 0; i < n; i++) {
+ result[i].s = p;
+ memcpy(result[i].s, v[i].s, v[i].len);
+ result[i].len = v[i].len;
+ p += v[i].len;
+ result[i].is_sdata = v[i].is_sdata;
+ }
+ return result;
+ }
+}
+
+/* Unescape s, and return nul-terminated data. Give an error
+if the data contains 0. */
+
+static
+char *unescape(s)
+ char *s;
+{
+ int len = unescape1(s);
+ if (memchr(s, '\0', len))
+ error(E_NULESCAPE);
+ s[len] = '\0';
+ return s;
+}
+
+/* Like unescape(), but REs are represented by 012 not 015. */
+
+static
+char *unescape_file(s)
+ char *s;
+{
+ char *p;
+ p = s = unescape(s);
+ while ((p = strchr(p, RECHAR)) != 0)
+ *p++ = '\n';
+ return s;
+
+}
+
+/* Unescape s, and return length of data. The data may contain 0. */
+
+static
+int unescape1(s)
+ char *s;
+{
+ const char *p;
+ char *q;
+
+ q = strchr(s, '\\');
+ if (!q)
+ return strlen(s);
+ p = q;
+ while (*p) {
+ if (*p == '\\') {
+ switch (*++p) {
+ case '\\':
+ *q++ = *p++;
+ break;
+ case 'n':
+ *q++ = RECHAR;
+ p++;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ int val = *p++ - '0';
+ if (*p >= '0' && *p <= '7') {
+ val = val*8 + (*p++ - '0');
+ if (*p >= '0' && *p <= '7')
+ val = val*8 + (*p++ - '0');
+ }
+ *q++ = (char)val;
+ }
+ break;
+ case '|':
+ error(E_SDATA);
+ default:
+ error(E_BADESCAPE);
+ }
+ }
+ else
+ *q++ = *p++;
+ }
+ return q - s;
+}
+
+static
+char *scan_token(pp)
+ char **pp;
+{
+ char *start = *pp;
+ while (**pp != '\0') {
+ if (**pp == ' ') {
+ **pp = '\0';
+ *pp += 1;
+ break;
+ }
+ *pp += 1;
+ }
+ if (!*start)
+ error(E_MISSING);
+ return start;
+}
+
+static
+int count_args(p)
+ char *p;
+{
+ int n = 0;
+
+ while (*p != '\0') {
+ n++;
+ do {
+ ++p;
+ if (*p == ' ') {
+ p++;
+ break;
+ }
+ } while (*p != '\0');
+ }
+ return n;
+}
+
+static
+int read_line(sp)
+ struct sgmls *sp;
+{
+ unsigned i = 0;
+ FILE *fp = sp->fp;
+ int c;
+ char *buf = sp->buf;
+ unsigned buf_size = sp->buf_size;
+
+ c = getc(fp);
+ if (c == EOF) {
+ input_lineno = sp->input_lineno;
+ if (ferror(fp))
+ error(E_SYSTEM);
+ return 0;
+ }
+
+ sp->input_lineno++;
+ input_lineno = sp->input_lineno;
+ for (;;) {
+ if (i >= buf_size) {
+ if (buf_size == 0)
+ buf_size = 24;
+ else if (buf_size > (unsigned)UINT_MAX/2) {
+ if (buf_size == (unsigned)UINT_MAX)
+ error(E_LINELENGTH);
+ buf_size = (unsigned)UINT_MAX;
+ }
+ else
+ buf_size *= 2;
+ buf = xrealloc(buf, buf_size);
+ sp->buf = buf;
+ sp->buf_size = buf_size;
+ }
+ if (c == '\0')
+ error(E_NUL);
+ if (c == '\n') {
+ buf[i] = '\0';
+ break;
+ }
+ buf[i++] = c;
+ c = getc(fp);
+ if (c == EOF) {
+ if (ferror(fp))
+ error(E_SYSTEM);
+ else
+ error(E_EOF);
+ }
+ }
+ return 1;
+}
+
+static
+notation_s *lookup_notation(sp, name)
+struct sgmls *sp;
+char *name;
+{
+ struct notation_list *p
+ = (struct notation_list *)list_find((struct list *)sp->notations, name,
+ sp->subdoc_level);
+ if (!p)
+ error(E_BADNOTATION);
+ return &p->notation;
+}
+
+static
+entity_s *lookup_entity(sp, name)
+struct sgmls *sp;
+char *name;
+{
+ struct entity_list *p
+ = (struct entity_list *)list_find((struct list *)sp->entities, name,
+ sp->subdoc_level);
+ if (!p)
+ error(E_BADENTITY);
+ return &p->entity;
+}
+
+static
+external_entity_s *lookup_external_entity(sp, name)
+struct sgmls *sp;
+char *name;
+{
+ entity_s *p = lookup_entity(sp, name);
+ if (p->is_internal)
+ error(E_INTERNALENTITY);
+ return &p->u.external;
+}
+
+static
+void define_external_entity(sp, e)
+struct sgmls *sp;
+external_entity_s *e;
+{
+ struct entity_list *p;
+ e->attributes = 0;
+ e->filenames = sp->files;
+ e->nfilenames = sp->nfiles;
+ sp->files = 0;
+ sp->nfiles = 0;
+ e->pubid = sp->pubid;
+ sp->pubid = 0;
+ e->sysid = sp->sysid;
+ sp->sysid = 0;
+ p = (struct entity_list *)xmalloc(sizeof(struct entity_list));
+ memcpy((UNIV)&p->entity.u.external, (UNIV)e, sizeof(*e));
+ p->entity.is_internal = 0;
+ p->subdoc_level = sp->subdoc_level;
+ p->next = sp->entities;
+ sp->entities = p;
+}
+
+static
+void define_internal_entity(sp, e)
+struct sgmls *sp;
+internal_entity_s *e;
+{
+ struct entity_list *p;
+ p = (struct entity_list *)xmalloc(sizeof(struct entity_list));
+ memcpy((UNIV)&p->entity.u.internal, (UNIV)e, sizeof(*e));
+ p->entity.is_internal = 1;
+ p->subdoc_level = sp->subdoc_level;
+ p->next = sp->entities;
+ sp->entities = p;
+}
+
+static
+void define_notation(sp, np)
+struct sgmls *sp;
+notation_s *np;
+{
+ struct notation_list *p;
+ np->sysid = sp->sysid;
+ sp->sysid = 0;
+ np->pubid = sp->pubid;
+ sp->pubid = 0;
+ p = (struct notation_list *)xmalloc(sizeof(struct notation_list));
+ memcpy((UNIV)&p->notation, (UNIV)np, sizeof(*np));
+ p->subdoc_level = sp->subdoc_level;
+ p->next = sp->notations;
+ sp->notations = p;
+}
+
+static
+struct list *list_find(p, name, level)
+ struct list *p;
+ char *name;
+ int level;
+{
+ for (; p && p->subdoc_level == level; p = p->next)
+ if (strcmp(p->name, name) == 0)
+ return p;
+ return 0;
+}
+
+/* Move all the items in the list whose subdoc level is level to the
+end of the list and make their subdoc_level -1. */
+
+static
+void list_finish_level(listp, level)
+ struct list **listp;
+ int level;
+{
+ struct list **pp, *next_level, *old_level;
+ for (pp = listp; *pp && (*pp)->subdoc_level == level; pp = &(*pp)->next)
+ (*pp)->subdoc_level = -1;
+ next_level = *pp;
+ *pp = 0;
+ old_level = *listp;
+ *listp = next_level;
+ for (pp = listp; *pp; pp = &(*pp)->next)
+ ;
+ *pp = old_level;
+}
+
+static
+void add_attribute(pp, a)
+ attribute_s **pp, *a;
+{
+ for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next)
+ ;
+ a->next = *pp;
+ *pp = a;
+}
+
+
+static
+char *strsave(s)
+char *s;
+{
+ if (!s)
+ return s;
+ else {
+ char *p = xmalloc(strlen(s) + 1);
+ strcpy(p, s);
+ return p;
+ }
+}
+
+static
+UNIV xmalloc(n)
+ unsigned n;
+{
+ UNIV p = malloc(n);
+ if (!p)
+ error(E_NOMEM);
+ return p;
+}
+
+/* ANSI C says first argument to realloc can be NULL, but not everybody
+ appears to support this. */
+
+static
+UNIV xrealloc(p, n)
+ UNIV p;
+ unsigned n;
+{
+ p = p ? realloc(p, n) : malloc(n);
+ if (!p)
+ error(E_NOMEM);
+ return p;
+}
+
+static NO_RETURN
+void error(num)
+ enum error_code num;
+{
+ (*errhandler)((int)num, errlist[num], input_lineno);
+ abort();
+}
+
+static
+void default_errhandler(num, msg, lineno)
+ int num;
+ char *msg;
+ unsigned long lineno;
+{
+ fprintf(stderr, "Line %lu: %s\n", lineno, msg);
+ exit(1);
+}
diff --git a/usr.bin/sgmls/libsgmls/sgmls.h b/usr.bin/sgmls/libsgmls/sgmls.h
new file mode 100644
index 0000000..79b2658
--- /dev/null
+++ b/usr.bin/sgmls/libsgmls/sgmls.h
@@ -0,0 +1,127 @@
+/* sgmls.h
+ Interface to a library for reading output of sgmls. */
+
+struct sgmls_data {
+ char *s;
+ unsigned len;
+ char is_sdata;
+};
+
+struct sgmls_notation {
+ char *name;
+ char *sysid;
+ char *pubid;
+};
+
+struct sgmls_internal_entity {
+ char *name;
+ struct sgmls_data data;
+};
+
+enum sgmls_external_entity_type {
+ SGMLS_ENTITY_CDATA,
+ SGMLS_ENTITY_SDATA,
+ SGMLS_ENTITY_NDATA,
+ SGMLS_ENTITY_SUBDOC
+ };
+
+struct sgmls_external_entity {
+ char *name;
+ enum sgmls_external_entity_type type;
+ char **filenames;
+ int nfilenames;
+ char *pubid;
+ char *sysid;
+ struct sgmls_attribute *attributes;
+ struct sgmls_notation *notation;
+};
+
+struct sgmls_entity {
+ union {
+ struct sgmls_internal_entity internal;
+ struct sgmls_external_entity external;
+ } u;
+ char is_internal;
+};
+
+enum sgmls_attribute_type {
+ SGMLS_ATTR_IMPLIED,
+ SGMLS_ATTR_CDATA,
+ SGMLS_ATTR_TOKEN,
+ SGMLS_ATTR_ENTITY,
+ SGMLS_ATTR_NOTATION
+};
+
+struct sgmls_attribute {
+ struct sgmls_attribute *next;
+ char *name;
+ enum sgmls_attribute_type type;
+ union {
+ struct {
+ struct sgmls_data *v;
+ int n;
+ } data;
+ struct {
+ struct sgmls_entity **v;
+ int n;
+ } entity;
+ struct {
+ char **v;
+ int n;
+ } token;
+ struct sgmls_notation *notation;
+ } value;
+};
+
+enum sgmls_event_type {
+ SGMLS_EVENT_DATA, /* data */
+ SGMLS_EVENT_ENTITY, /* external entity reference */
+ SGMLS_EVENT_PI, /* processing instruction */
+ SGMLS_EVENT_START, /* element start */
+ SGMLS_EVENT_END, /* element end */
+ SGMLS_EVENT_SUBSTART, /* subdocument start */
+ SGMLS_EVENT_SUBEND, /* subdocument end */
+ SGMLS_EVENT_APPINFO, /* appinfo */
+ SGMLS_EVENT_CONFORMING /* the document was conforming */
+ };
+
+struct sgmls_event {
+ enum sgmls_event_type type;
+ union {
+ struct {
+ struct sgmls_data *v;
+ int n;
+ } data;
+ struct sgmls_external_entity *entity;
+ struct {
+ char *s;
+ unsigned len;
+ } pi;
+ struct {
+ char *gi;
+ struct sgmls_attribute *attributes;
+ } start;
+ struct {
+ char *gi;
+ } end;
+ char *appinfo;
+ } u;
+ char *filename; /* SGML filename */
+ unsigned long lineno; /* SGML lineno */
+};
+
+#ifdef __STDC__
+void sgmls_free_attributes(struct sgmls_attribute *);
+struct sgmls *sgmls_create(FILE *);
+int sgmls_next(struct sgmls *, struct sgmls_event *);
+void sgmls_free(struct sgmls *);
+typedef void sgmls_errhandler(int, char *, unsigned long);
+sgmls_errhandler *sgmls_set_errhandler(sgmls_errhandler *);
+#else /* not __STDC__ */
+void sgmls_free_attributes();
+struct sgmls *sgmls_create();
+int sgmls_next();
+void sgmls_free();
+typedef void sgmls_errhandler();
+sgmls_errhandler *sgmls_set_errhandler();
+#endif /* not __STDC__ */
diff --git a/usr.bin/sgmls/rast/Makefile b/usr.bin/sgmls/rast/Makefile
new file mode 100644
index 0000000..3be6c87
--- /dev/null
+++ b/usr.bin/sgmls/rast/Makefile
@@ -0,0 +1,18 @@
+#
+# Bmakefile for rast
+#
+# $id$
+#
+
+PROG= rast
+
+SRCS+= rast.c
+
+CFLAGS+= -I${.CURDIR}/../libsgmls -I${.CURDIR}/../sgmls
+
+LDADD= ${LIBSGMLS}
+DPADD= ${LIBSGMLS}
+
+.include "../Makefile.inc"
+.include <bsd.prog.mk>
+
diff --git a/usr.bin/sgmls/rast/rast.1 b/usr.bin/sgmls/rast/rast.1
new file mode 100644
index 0000000..2d167fc
--- /dev/null
+++ b/usr.bin/sgmls/rast/rast.1
@@ -0,0 +1,75 @@
+.\" -*- nroff -*-
+.tr \(ts"
+.TH RAST 1
+.SH NAME
+rast \- translate output of sgmls to RAST format
+.SH SYNOPSIS
+.B rast
+[
+.BI \-o output_file
+]
+[
+.I input_file
+]
+.SH DESCRIPTION
+.I Rast
+translates the output of sgmls to the format of a RAST result.
+RAST is the Reference Application for SGML Testing defined in the Proposed
+American National Standard on Conformance Testing for Standard Generalized
+Markup Language (SGML) Systems (X3.190-199X).
+.I Rast
+reads from
+.I input_file
+or from standard input if
+.I input_file
+is not specified.
+It writes to
+.I output_file
+or to standard output if
+.I output_file
+is not specified;
+use of the
+.B \-o
+option avoids the need for
+.I rast
+to use a temporary file.
+.LP
+Note that the
+.B -c
+option of
+.I sgmls
+can generate a capacity report in RACT format.
+.SH BUGS
+Production [9] in clause 14.5.5 of the draft standard is clearly wrong;
+.I rast
+corrects it by appending
+.RI `,\ LE '.
+An alternative way to correct it would be to delete the
+.RB `, \(tsEND-ENTITY\(ts '.
+.LP
+In production [18] in clause 14.5.9,
+.RI ` markup\ data +'
+should be
+.RI ` markup\ data *'
+since internal sdata entities need not contain any characters (14.5.11),
+and
+.I markup\ data
+cannot be empty (14.5.9, 14.5.12).
+.LP
+The RAST result for the example in Annex B.4 is incorrect.
+The line
+.B G03-A1=
+should be immediately followed by a line
+.BR !g03-e1! .
+(The problem with production [9] also applies to this example.)
+.LP
+.I Rast
+outputs a newline after
+.B #ERROR
+in order to avoid producing files with partial lines.
+.SH "SEE ALSO"
+.IR sgmls (1)
+.br
+.I
+Conformance Testing for Standard Generalized Markup Language (SGML) Systems,
+(X3.190-199X), Draft July 1991
diff --git a/usr.bin/sgmls/rast/rast.c b/usr.bin/sgmls/rast/rast.c
new file mode 100644
index 0000000..31b48de
--- /dev/null
+++ b/usr.bin/sgmls/rast/rast.c
@@ -0,0 +1,534 @@
+/* rast.c
+ Translate sgmls output to RAST result format.
+
+ Written by James Clark (jjc@jclark.com). */
+
+#include "config.h"
+#include "std.h"
+#include "sgmls.h"
+#include "getopt.h"
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+#ifdef __GNUC__
+#define NO_RETURN volatile
+#else
+#define NO_RETURN /* as nothing */
+#endif
+
+#ifdef VARARGS
+#define VP(parms) ()
+#else
+#define VP(parms) P(parms)
+#endif
+
+#ifdef USE_ISASCII
+#define ISASCII(c) isascii(c)
+#else
+#define ISASCII(c) (1)
+#endif
+
+NO_RETURN void error VP((char *,...));
+
+static void input_error P((int, char *, unsigned long));
+static int do_file P((FILE *));
+static void usage P((void));
+
+static void output_processing_instruction P((char *, unsigned));
+static void output_data P((struct sgmls_data *, int));
+static void output_data_lines P((char *, unsigned));
+static void output_internal_sdata P((char *, unsigned));
+static void output_external_entity P((struct sgmls_external_entity *));
+static void output_external_entity_info P((struct sgmls_external_entity *));
+static void output_element_start P((char *, struct sgmls_attribute *));
+static void output_element_end P((char *));
+static void output_attribute P((struct sgmls_attribute *));
+static void output_tokens P((char **, int));
+static void output_markup_chars P((char *, unsigned));
+static void output_markup_string P((char *));
+static void output_char P((int, int));
+static void output_flush P((int));
+static void output_external_id P((char *, char *));
+static void output_entity P((struct sgmls_entity *));
+static void output_external_entity_info P((struct sgmls_external_entity *));
+static void output_internal_entity P((struct sgmls_internal_entity *));
+
+#define output_flush_markup() output_flush('!')
+#define output_flush_data() output_flush('|')
+
+static FILE *outfp;
+static int char_count = 0;
+static char *program_name;
+
+int main(argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int opt;
+ char *output_file = 0;
+
+ program_name = argv[0];
+
+ while ((opt = getopt(argc, argv, "o:")) != EOF)
+ switch (opt) {
+ case 'o':
+ output_file = optarg;
+ break;
+ case '?':
+ usage();
+ default:
+ abort();
+ }
+
+ if (output_file) {
+ errno = 0;
+ outfp = fopen(output_file, "w");
+ if (!outfp)
+ error("couldn't open `%s' for output: %s", strerror(errno));
+ }
+ else {
+ outfp = tmpfile();
+ if (!outfp)
+ error("couldn't create temporary file: %s", strerror(errno));
+ }
+
+ if (argc - optind > 1)
+ usage();
+
+ if (argc - optind == 1) {
+ if (!freopen(argv[optind], "r", stdin))
+ error("couldn't open `%s' for input: %s", argv[optind], strerror(errno));
+ }
+
+ (void)sgmls_set_errhandler(input_error);
+
+ if (!do_file(stdin)) {
+ fclose(outfp);
+ if (output_file) {
+ if (!freopen(output_file, "w", stdout))
+ error("couldn't reopen `%s' for output: %s", strerror(errno));
+ }
+ fputs("#ERROR\n", stdout);
+ exit(EXIT_FAILURE);
+ }
+
+ if (output_file) {
+ errno = 0;
+ if (fclose(outfp) == EOF)
+ error("error closing `%s': %s", output_file, strerror(errno));
+ }
+ else {
+ errno = 0;
+ if (fseek(outfp, 0L, SEEK_SET))
+ error("couldn't rewind temporary file: %s", strerror(errno));
+ while ((c = getc(outfp)) != EOF)
+ if (putchar(c) == EOF)
+ error("error writing standard output: %s", strerror(errno));
+ }
+ exit(EXIT_SUCCESS);
+}
+
+static
+void usage()
+{
+ fprintf(stderr, "usage: %s [-o output_file] [input_file]\n", program_name);
+ exit(EXIT_FAILURE);
+}
+
+static
+int do_file(fp)
+ FILE *fp;
+{
+ struct sgmls *sp;
+ struct sgmls_event e;
+ int conforming = 0;
+
+ sp = sgmls_create(fp);
+ while (sgmls_next(sp, &e))
+ switch (e.type) {
+ case SGMLS_EVENT_DATA:
+ output_data(e.u.data.v, e.u.data.n);
+ break;
+ case SGMLS_EVENT_ENTITY:
+ output_external_entity(e.u.entity);
+ break;
+ case SGMLS_EVENT_PI:
+ output_processing_instruction(e.u.pi.s, e.u.pi.len);
+ break;
+ case SGMLS_EVENT_START:
+ output_element_start(e.u.start.gi, e.u.start.attributes);
+ sgmls_free_attributes(e.u.start.attributes);
+ break;
+ case SGMLS_EVENT_END:
+ output_element_end(e.u.end.gi);
+ break;
+ case SGMLS_EVENT_SUBSTART:
+ {
+ int level = 1;
+ output_external_entity(e.u.entity);
+ while (level > 0) {
+ if (!sgmls_next(sp, &e))
+ return 0;
+ switch (e.type) {
+ case SGMLS_EVENT_SUBSTART:
+ level++;
+ break;
+ case SGMLS_EVENT_SUBEND:
+ level--;
+ break;
+ case SGMLS_EVENT_START:
+ sgmls_free_attributes(e.u.start.attributes);
+ break;
+ default:
+ /* prevent compiler warnings */
+ break;
+ }
+ }
+ }
+ break;
+ case SGMLS_EVENT_APPINFO:
+ break;
+ case SGMLS_EVENT_CONFORMING:
+ conforming = 1;
+ break;
+ default:
+ abort();
+ }
+ sgmls_free(sp);
+ return conforming;
+}
+
+static
+void output_processing_instruction(s, len)
+ char *s;
+ unsigned len;
+{
+ fputs("[?", outfp);
+ if (len > 0) {
+ putc('\n', outfp);
+ output_data_lines(s, len);
+ output_flush_data();
+ }
+ fputs("]\n", outfp);
+}
+
+static
+void output_data(v, n)
+ struct sgmls_data *v;
+ int n;
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ if (v[i].is_sdata)
+ output_internal_sdata(v[i].s, v[i].len);
+ else if (v[i].len > 0)
+ output_data_lines(v[i].s, v[i].len);
+ }
+}
+
+static
+void output_data_lines(s, n)
+ char *s;
+ unsigned n;
+{
+ assert(n > 0);
+ for (; n > 0; --n)
+ output_char((unsigned char)*s++, '|');
+ output_flush_data();
+}
+
+static
+void output_internal_sdata(s, n)
+ char *s;
+ unsigned n;
+{
+ fputs("#SDATA-TEXT\n", outfp);
+ output_markup_chars(s, n);
+ output_flush_markup();
+ fputs("#END-SDATA\n", outfp);
+}
+
+static
+void output_external_entity(e)
+ struct sgmls_external_entity *e;
+{
+ fprintf(outfp, "[&%s\n", e->name);
+ output_external_entity_info(e);
+ fputs("]\n", outfp);
+}
+
+static
+void output_element_start(gi, att)
+ char *gi;
+ struct sgmls_attribute *att;
+{
+ fprintf(outfp, "[%s", gi);
+ if (att) {
+ struct sgmls_attribute *p;
+ putc('\n', outfp);
+ for (p = att; p; p = p->next)
+ output_attribute(p);
+ }
+ fputs("]\n", outfp);
+}
+
+static
+void output_element_end(gi)
+ char *gi;
+{
+ fprintf(outfp, "[/%s]\n", gi);
+}
+
+static
+void output_attribute(p)
+ struct sgmls_attribute *p;
+{
+ fprintf(outfp, "%s=\n", p->name);
+ switch (p->type) {
+ case SGMLS_ATTR_IMPLIED:
+ fputs("#IMPLIED\n", outfp);
+ break;
+ case SGMLS_ATTR_CDATA:
+ {
+ struct sgmls_data *v = p->value.data.v;
+ int n = p->value.data.n;
+ int i;
+ for (i = 0; i < n; i++)
+ if (v[i].is_sdata)
+ output_internal_sdata(v[i].s, v[i].len);
+ else {
+ output_markup_chars(v[i].s, v[i].len);
+ output_flush_markup();
+ }
+ }
+ break;
+ case SGMLS_ATTR_TOKEN:
+ output_tokens(p->value.token.v, p->value.token.n);
+ break;
+ case SGMLS_ATTR_ENTITY:
+ {
+ int i;
+ for (i = 0; i < p->value.entity.n; i++) {
+ struct sgmls_entity *e = p->value.entity.v[i];
+ char *name;
+
+ if (e->is_internal)
+ name = e->u.internal.name;
+ else
+ name = e->u.external.name;
+ if (i > 0)
+ output_markup_string(" ");
+ output_markup_string(name);
+ }
+ output_flush_markup();
+ for (i = 0; i < p->value.entity.n; i++)
+ output_entity(p->value.entity.v[i]);
+ }
+ break;
+ case SGMLS_ATTR_NOTATION:
+ output_tokens(&p->value.notation->name, 1);
+ output_external_id(p->value.notation->pubid, p->value.notation->sysid);
+ break;
+ }
+}
+
+static void output_tokens(v, n)
+ char **v;
+ int n;
+{
+ int i;
+ assert(n > 0);
+ output_markup_string(v[0]);
+ for (i = 1; i < n; i++) {
+ output_markup_string(" ");
+ output_markup_string(v[i]);
+ }
+ output_flush_markup();
+}
+
+static
+void output_markup_chars(s, n)
+ char *s;
+ unsigned n;
+{
+ for (; n > 0; --n)
+ output_char((unsigned char)*s++, '!');
+}
+
+static
+void output_markup_string(s)
+ char *s;
+{
+ while (*s)
+ output_char((unsigned char)*s++, '!');
+}
+
+static
+void output_char(c, delim)
+ int c;
+ int delim;
+{
+ if (ISASCII(c) && isprint(c)) {
+ if (char_count == 0)
+ putc(delim, outfp);
+ putc(c, outfp);
+ char_count++;
+ if (char_count == 60) {
+ putc(delim, outfp);
+ putc('\n', outfp);
+ char_count = 0;
+ }
+ }
+ else {
+ output_flush(delim);
+ switch (c) {
+ case RECHAR:
+ fputs("#RE\n", outfp);
+ break;
+ case RSCHAR:
+ fputs("#RS\n", outfp);
+ break;
+ case TABCHAR:
+ fputs("#TAB\n", outfp);
+ break;
+ default:
+ fprintf(outfp, "#%d\n", c);
+ }
+ }
+}
+
+static
+void output_flush(delim)
+ int delim;
+{
+ if (char_count > 0) {
+ putc(delim, outfp);
+ putc('\n', outfp);
+ char_count = 0;
+ }
+}
+
+static
+void output_external_id(pubid, sysid)
+ char *pubid;
+ char *sysid;
+{
+ if (!pubid && !sysid)
+ fputs("#SYSTEM\n#NONE\n", outfp);
+ else {
+ if (pubid) {
+ fputs("#PUBLIC\n", outfp);
+ if (*pubid) {
+ output_markup_string(pubid);
+ output_flush_markup();
+ }
+ else
+ fputs("#EMPTY\n", outfp);
+ }
+ if (sysid) {
+ fputs("#SYSTEM\n", outfp);
+ if (*sysid) {
+ output_markup_string(sysid);
+ output_flush_markup();
+ }
+ else
+ fputs("#EMPTY\n", outfp);
+ }
+ }
+}
+
+static
+void output_entity(e)
+ struct sgmls_entity *e;
+{
+ if (e->is_internal)
+ output_internal_entity(&e->u.internal);
+ else
+ output_external_entity_info(&e->u.external);
+ fputs("#END-ENTITY", outfp);
+#ifndef ASIS
+ putc('\n', outfp);
+#endif
+}
+
+static
+void output_external_entity_info(e)
+ struct sgmls_external_entity *e;
+{
+ switch (e->type) {
+ case SGMLS_ENTITY_CDATA:
+ fputs("#CDATA-EXTERNAL", outfp);
+ break;
+ case SGMLS_ENTITY_SDATA:
+ fputs("#SDATA-EXTERNAL", outfp);
+ break;
+ case SGMLS_ENTITY_NDATA:
+ fputs("#NDATA-EXTERNAL", outfp);
+ break;
+ case SGMLS_ENTITY_SUBDOC:
+ fputs("#SUBDOC", outfp);
+ break;
+ }
+ putc('\n', outfp);
+ output_external_id(e->pubid, e->sysid);
+ if (e->type != SGMLS_ENTITY_SUBDOC) {
+ struct sgmls_attribute *p;
+ fprintf(outfp, "#NOTATION=%s\n", e->notation->name);
+ output_external_id(e->notation->pubid, e->notation->sysid);
+ for (p = e->attributes; p; p = p->next)
+ output_attribute(p);
+ }
+}
+
+static
+void output_internal_entity(e)
+ struct sgmls_internal_entity *e;
+{
+ if (e->data.is_sdata)
+ fputs("#SDATA-INTERNAL", outfp);
+ else
+ fputs("#CDATA-INTERNAL", outfp);
+ putc('\n', outfp);
+ output_markup_chars(e->data.s, e->data.len);
+ output_flush_markup();
+}
+
+static
+void input_error(num, str, lineno)
+ int num;
+ char *str;
+ unsigned long lineno;
+{
+ error("Error at input line %lu: %s", lineno, str);
+}
+
+NO_RETURN
+#ifdef VARARGS
+void error(va_alist) va_dcl
+#else
+void error(char *message,...)
+#endif
+{
+#ifdef VARARGS
+ char *message;
+#endif
+ va_list ap;
+
+ fprintf(stderr, "%s: ", program_name);
+#ifdef VARARGS
+ va_start(ap);
+ message = va_arg(ap, char *);
+#else
+ va_start(ap, message);
+#endif
+ vfprintf(stderr, message, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ fflush(stderr);
+ exit(EXIT_FAILURE);
+}
diff --git a/usr.bin/sgmls/sgmls.pl b/usr.bin/sgmls/sgmls.pl
new file mode 100755
index 0000000..edb9eb6
--- /dev/null
+++ b/usr.bin/sgmls/sgmls.pl
@@ -0,0 +1,247 @@
+#! /usr/bin/perl
+
+# This is a skeleton of a perl script for processing the output of
+# sgmls. You must change the parts marked with "XXX".
+
+# XXX This is for troff: in data, turn \ into \e (which prints as \).
+# Backslashes in SDATA entities are left as backslashes.
+
+$backslash_in_data = "\\e";
+
+$prog = $0;
+
+$prog =~ s|.*/||;
+
+$level = 0;
+
+while (<STDIN>) {
+ chop;
+ $command = substr($_, 0, 1);
+ substr($_, 0, 1) = "";
+ if ($command eq '(') {
+ &start_element($_);
+ $level++;
+ }
+ elsif ($command eq ')') {
+ $level--;
+ &end_element($_);
+ foreach $key (keys %attribute_value) {
+ @splitkey = split($;, $key);
+ if ($splitkey[0] == $level) {
+ delete $attribute_value{$key};
+ delete $attribute_type{$key};
+ }
+ }
+ }
+ elsif ($command eq '-') {
+ &unescape_data($_);
+ &data($_);
+ }
+ elsif ($command eq 'A') {
+ @field = split(/ /, $_, 3);
+ $attribute_type{$level,$field[0]} = $field[1];
+ &unescape_data($field[2]);
+ $attribute_value{$level,$field[0]} = $field[2];
+ }
+ elsif ($command eq '&') {
+ &entity($_);
+ }
+ elsif ($command eq 'D') {
+ @field = split(/ /, $_, 4);
+ $data_attribute_type{$field[0], $field[1]} = $field[2];
+ &unescape_data($field[3]);
+ $data_attribute_value{$field[0], $field[1]} = $field[3];
+ }
+ elsif ($command eq 'N') {
+ $notation{$_} = 1;
+ if (defined($sysid)) {
+ $notation_sysid{$_} = $sysid;
+ undef($sysid);
+ }
+ if (defined($pubid)) {
+ $notation_pubid{$_} = $pubid;
+ undef($pubid);
+ }
+ }
+ elsif ($command eq 'I') {
+ @field = split(/ /, $_, 3);
+ $entity_type{$field[0]} = $field[1];
+ &unescape($field[2]);
+ # You may want to substitute \e for \ if the type is CDATA.
+ $entity_text{$field[0]} = $field[2];
+ $entity_code{$field[0]} = 'I';
+ }
+ elsif ($command eq 'E') {
+ @field = split(/ /, $_);
+ $entity_code{$field[0]} = 'E';
+ $entity_type{$field[0]} = $field[1];
+ $entity_notation{$field[0]} = $field[2];
+ if (defined(@files)) {
+ foreach $i (0..$#files) {
+ $entity_filename{$field[0], $i} = $files[i];
+ }
+ undef(@files);
+ }
+ if (defined($sysid)) {
+ $entity_sysid{$field[0]} = $sysid;
+ undef($sysid);
+ }
+ if (defined($pubid)) {
+ $entity_pubid{$field[0]} = $pubid;
+ undef($pubid);
+ }
+ }
+ elsif ($command eq 'S') {
+ $entity_code{$_} = 'S';
+ if (defined(@files)) {
+ foreach $i (0..$#files) {
+ $entity_filename{$_, $i} = $files[i];
+ }
+ undef(@files);
+ }
+ if (defined($sysid)) {
+ $entity_sysid{$_} = $sysid;
+ undef($sysid);
+ }
+ if (defined($pubid)) {
+ $entity_pubid{$_} = $pubid;
+ undef($pubid);
+ }
+ }
+ elsif ($command eq '?') {
+ &unescape($_);
+ &pi($_);
+ }
+ elsif ($command eq 'L') {
+ @field = split(/ /, $_);
+ $lineno = $field[0];
+ if ($#field >= 1) {
+ &unescape($field[1]);
+ $filename = $field[1];
+ }
+ }
+ elsif ($command eq 'V') {
+ @field = split(/ /, $_, 2);
+ &unescape($field[1]);
+ $environment{$field[0]} = $field[1];
+ }
+ elsif ($command eq '{') {
+ &start_subdoc($_);
+ }
+ elsif ($command eq '}') {
+ &end_subdoc($_);
+ }
+ elsif ($command eq 'f') {
+ &unescape($_);
+ push(@files, $_);
+ }
+ elsif ($command eq 'p') {
+ &unescape($_);
+ $pubid = $_;
+ }
+ elsif ($command eq 's') {
+ &unescape($_);
+ $sysid = $_;
+ }
+ elsif ($command eq 'C') {
+ $conforming = 1;
+ }
+ else {
+ warn "$prog:$ARGV:$.: unrecognized command \`$command'\n";
+ }
+}
+
+sub unescape {
+ $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc($1)/eg;
+}
+
+sub esc {
+ local($_) = $_[0];
+ if ($_ eq '012' || $_ eq '12') {
+ ""; # ignore RS
+ }
+ elsif (/^[0-7]/) {
+ sprintf("%c", oct);
+ }
+ elsif ($_ eq 'n') {
+ "\n";
+ }
+ elsif ($_ eq '|') {
+ "";
+ }
+ elsif ($_ eq "\\") {
+ "\\";
+ }
+ else {
+ $_;
+ }
+}
+
+sub unescape_data {
+ local($sdata) = 0;
+ $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc_data($1)/eg;
+}
+
+sub esc_data {
+ local($_) = $_[0];
+ if ($_ eq '012' || $_ eq '12') {
+ ""; # ignore RS
+ }
+ elsif (/^[0-7]/) {
+ sprintf("%c", oct);
+ }
+ elsif ($_ eq 'n') {
+ "\n";
+ }
+ elsif ($_ eq '|') {
+ $sdata = !$sdata;
+ "";
+ }
+ elsif ($_ eq "\\") {
+ $sdata ? "\\" : $backslash_in_data;
+ }
+ else {
+ $_;
+ }
+}
+
+
+sub start_element {
+ local($gi) = $_[0];
+ # XXX
+}
+
+sub end_element {
+ local($gi) = $_[0];
+ # XXX
+}
+
+sub data {
+ local($data) = $_[0];
+ # XXX
+}
+
+# A processing instruction.
+
+sub pi {
+ local($data) = $_[0];
+ # XXX
+}
+
+# A reference to an external entity.
+
+sub entity {
+ local($name) = $_[0];
+ # XXX
+}
+
+sub start_subdoc {
+ local($name) = $_[0];
+ # XXX
+}
+
+sub end_subdoc {
+ local($name) = $_[0];
+ # XXX
+}
+
diff --git a/usr.bin/sgmls/sgmls/Makefile b/usr.bin/sgmls/sgmls/Makefile
new file mode 100644
index 0000000..3a0a0cf
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/Makefile
@@ -0,0 +1,18 @@
+#
+# Bmakefile for sgmls
+#
+# $id$
+#
+
+PROG= sgmls
+
+SRCS+= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c
+SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c
+SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c
+SRCS+= msgcat.c lineout.c ambig.c exclude.c lextaba.c
+
+CFLAGS+= -I${.CURDIR}/../libsgmls
+
+.include "../Makefile.inc"
+.include <bsd.prog.mk>
+
diff --git a/usr.bin/sgmls/sgmls/action.h b/usr.bin/sgmls/sgmls/action.h
new file mode 100644
index 0000000..08475bf
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/action.h
@@ -0,0 +1,179 @@
+/* ACTION.H: Symbols for all PCB action codes. */
+/* CONACT.H: Symbols for content parse action names (end with '_').
+ There must be no conflict with PARSEACT.H, which
+ uses 0 through 19, or SGMLACT.H, which uses 20 through 32
+ (except that 31 - 32 can be defined here because they are
+ used only by PARSEPRO and do not conflict with SGML.C).
+*/
+#define CIR_ 31 /* Invalid character(s) ignored in MDS; restarting parse. */
+#define DTD_ 32 /* Process DOCTYPE declaration. */
+#define DTE_ 33 /* End of DOCTYPE declaration. */
+#define PEP_ 34 /* TEMP: Previous character ended prolog. */
+#define DAS_ 35 /* Current character begins data. */
+#define FCE_ 36 /* Process free character (SR12-18, 21-30). */
+#define DCE_ 37 /* Data character in element text; change PCB. */
+#define LAS_ 38 /* Start lookahead buffer with current character. */
+#define LAM_ 39 /* Move character to lookahead buffer. */
+#define LAF_ 40 /* Flush the lookahead buffer; REPEATCC. */
+#define NED_ 41 /* Process null end-tag delimiter. */
+#define NET_ 42 /* Process null end-tag. */
+#define NST_ 43 /* Process null start-tag. */
+#define NLF_ 44 /* Flush lookahead buffer except for trailing NET or SR. */
+#define ETC_ 45 /* End-tag in CDATA or RCDATA; treat as data if invalid. */
+#define SRMIN 46 /* Dummy for SHORT REFERENCES: srn = SRn - SRMIN. */
+#define SR1_ 47 /* TAB */
+#define SR2_ 48 /* RE */
+#define SR3_ 49 /* RS */
+#define SR4_ 50 /* Leading blanks */
+#define SR5_ 51 /* Null record */
+#define DAR_ 52 /* Flush data buffer after repeating current character. */
+#define SR7_ 53 /* Trailing blanks */
+#define SR8_ 54 /* Space */
+#define SR9_ 55 /* Two or more blanks */
+#define SR10 56 /* Quotation mark (first data character) */
+#define SR11 57 /* Number sign */
+#define SR12 58 /* FCE CHARACTERS start here */
+/* _ 59 */
+#define BSQ_ 60 /* Blank sequence begun; find its end. */
+/* 61 In use by PARSEACT.H */
+/* 62 In use by PARSEACT.H */
+/* 63 In use by PARSEACT.H */
+/* 64 In use by PARSEACT.H */
+#define SR19 65 /* Hyphen */
+#define SR20 66 /* Two hyphens */
+#define SR25 71 /* Left bracket */
+#define SR26 72 /* Right bracket */
+#define RBR_ 73 /* Two right brackets. */
+#define GTR_ 74 /* EOB with pending data character */
+#define MSP_ 75 /* Marked section start in prolog outside DTD */
+#define APP_ 76 /* APPINFO (other than NONE) */
+#define STE_ 77 /* Start tag ended prolog */
+
+/* GRPACT.H: Symbols for group tokenization action names (all alpha).
+ There must be no conflict with PARSEACT.H, which
+ uses 0 - 19.
+*/
+#define AND 20 /* AND connector found. */
+#define DTAG 21 /* Data tag token group occurred (treat as #CHARS). */
+#define GRPE 22 /* Group ended. */
+#define GRP_ 23 /* Group started. */
+#define NAS_ 24 /* Name started in content model or name group. */
+#define NMT_ 25 /* Name or name token started in name token group. */
+#define OPT 26 /* OPT occurrence indicator for previous token. */
+#define OR 27 /* OR connector found. */
+#define OREP 28 /* OREP occurrence indicator for previous token. */
+#define REP 29 /* REP occurrence indicator for previous token. */
+#define RNS_ 30 /* Reserved name started (#PCDATA). */
+#define SEQ 31 /* SEQ connector found. */
+/* LITACT.H: Symbols for content parse action names (end with '_').
+ There must be no conflict with PARSEACT.H, which
+ uses 0 through 19.
+*/
+#define MLA_ 20 /* Move character to look-aside data buffer. */
+#define LPR_ 21 /* Move previous character to data buffer. */
+#define RSM_ 22 /* Process record start and move it to data buffer. */
+#define FUN_ 23 /* Replace function character with a space. */
+#define LP2_ 24 /* Move previous two characters to data buffer. */
+#define MLE_ 25 /* Minimum literal error: invalid character ignored. */
+#define RPR_ 26 /* Remove previous character from data buffer; terminate. */
+#define TER_ 27 /* Terminate the parse. */
+/* MDACT.H: Symbols for markup declaration parse action names (all alpha).
+ There must be no conflict with PARSEACT.H, which
+ uses 0 - 19.
+*/
+#define CDR 20 /* CD[1] (MINUS) occurred previously. */
+#define EMD 21 /* End of markup declaration. */
+#define GRPS 22 /* Group started. */
+#define LIT 23 /* Literal started: character data. */
+#define LITE 24 /* Literal started: character data; LITA is delimiter. */
+#define MGRP 25 /* Minus exception group (MINUS,GRPO). */
+#define NAS 26 /* Name started. */
+#define NMT 27 /* Name token started. */
+#define NUM 28 /* Number or number token started. */
+#define PEN 29 /* Parameter entity name being defined (PERO found). */
+#define PGRP 30 /* Plus exception group (PLUS,GRPO). */
+#define RNS 31 /* Reserved name started. */
+#define MDS 32 /* Markup declaration subset start. */
+#define PENR 33 /* REPEATCC; PERO found. */
+/* PARSEACT.H: Symbols for common parse action names (end with '_').
+ There must be no conflict with other action name
+ files, which use numbers greater than 19.
+*/
+#define CRA_ 1 /* Character reference: alphabetic. */
+#define CRN_ 2 /* Character reference: numeric; non-char refs o.k.. */
+#define NON_ 3 /* Single byte of non-character data found. */
+#define EOF_ 4 /* Error: illegal entity end; resume old input; return. */
+#define ER_ 5 /* Entity reference; start new input source; continue. */
+#define GET_ 6 /* EOB, EOS, or EE: resume old input source; continue. */
+#define INV_ 7 /* Error: invalid char terminated markup; repeat char. */
+#define LEN_ 8 /* Error: length limit exceeded; end markup; repeat char. */
+#define NOP_ 9 /* No action necessary. */
+#define PCI_ 10 /* Previous character was invalid. */
+#define PER_ 11 /* Parameter reference; start new input source; continue. */
+#define RC2_ 12 /* Back up two characters. */
+#define RCC_ 13 /* Repeat current character. */
+#define RCR_ 14 /* Repeat current character and return to caller. */
+#define EE_ 15 /* EOS or EE: resume old input source; return to caller. */
+#define RS_ 16 /* Record start: ccnt=0; ++rcnt. */
+#define ERX_ 17 /* Entity reference; start new input source; return. */
+#define SYS_ 18 /* Error allowed: SYSCHAR in input stream; replace it. */
+#define EOD_ 19 /* End of document. */
+/* Number way out of order to avoid recompilation. */
+#define NSC_ 58 /* Handle DELNONCH/DELXNONCH when NON_ is allowed */
+#define PEX_ 61 /* Parameter entity ref; start new input source; return. */
+#define DEF_ 62 /* Data entity found. */
+#define PIE_ 63 /* PI entity found (needed in markup). */
+#define LNR_ 64 /* LEN_ error with extra REPEATCC. */
+/* SGMLACT.H: Symbols for content parse action names (end with '_')
+ that are returned to SGML.C for processing.
+ There must be no conflict with PARSEACT.H, which
+ uses 0 through 19, or CONACT.H, which uses 34 and above.
+ (Note: 31 is also used in CONACT.H, but no conflict
+ is created because they are tested only in PARSEPRO.C, which
+ completes before SGML.C starts to examine those codes.
+ Also, when EOD_ is returned from PARSECON, it is changed
+ to LOP_.)
+*/
+#define CON_ 20 /* Normal content action (one of the following). */
+#define DAF_ 21 /* Data found. */
+#define ETG_ 22 /* Process end-tag. */
+#define MD_ 23 /* Process markup declaration (NAMESTRT found). */
+#define MDC_ 24 /* Process markup declaration comment (CD found). */
+#define MSS_ 25 /* Process marked section start. */
+#define MSE_ 26 /* Process marked section end. */
+#define PIS_ 27 /* Processing instruction (string). */
+#define REF_ 28 /* Record end found. */
+#define STG_ 29 /* Process start-tag. */
+#define RSR_ 30 /* Return RS to effect SGML state transition. */
+#define LOP_ 31 /* Loop for new content without returning anything. */
+/* TAGACT.H: Symbols for tag parse action names (all alpha).
+ There must be no conflict with PARSEACT.H, which
+ uses 0 - 19.
+*/
+#define AVD 20 /* Delimited attribute value started: normal delimiter. */
+#define AVU 21 /* Undelimited value started. */
+#define ETIC 22 /* Tag closed with ETI. */
+#define NVS 23 /* Name of attribute or value started. */
+#define NASV 24 /* Saved NAS was actually an NTV. */
+#define NTV 25 /* Name token value started; get name and full value. */
+#define TAGC 26 /* Tag closed normally. */
+#define TAGO 27 /* Tag closed implicitly by TAGO character. */
+#define AVDA 28 /* Delimited attribute value started: alternative delim. */
+#define DSC 29 /* Closed by DSC character. */
+/* VALACT.H: Symbols for attribute value tokenization action names (all alpha).
+*/
+#define NOPA 0 /* No action necessary. */
+#define INVA 1 /* Invalid character; terminate parse. */
+#define LENA 2 /* Length limit of token exceeded; terminate parse. */
+#define NASA 3 /* Name started. */
+#define NMTA 4 /* Name token started. */
+#define NUMA 5 /* Number or number token started. */
+
+/* SGML declaration parsing actions. */
+
+#define ESGD 20 /* End of SGML declaration. */
+#define LIT1 21 /* Literal started. */
+#define LIT2 22 /* Literal started with LITA delimiter. */
+#define NUM1 23 /* Number started. */
+#define NAS1 24 /* Name started. */
+#define ISIG 25 /* Insignificant character occurred. */
diff --git a/usr.bin/sgmls/sgmls/adl.h b/usr.bin/sgmls/sgmls/adl.h
new file mode 100644
index 0000000..930e1e8
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/adl.h
@@ -0,0 +1,118 @@
+/* ADL.H: Definitions for attribute descriptor list processing.
+*/
+/* N/C/SDATA external entity types for nxetype member of ne structure. */
+#define ESNCDATA 1 /* External character data entity. */
+#define ESNNDATA 2 /* Non-SGML data entity. */
+#define ESNSDATA 3 /* External specific character data entity. */
+#define ESNSUB 4 /* SGML subdocument entity. */
+
+/* N/C/SDATA control block for AENTITY attributes and NDATA returns.*/
+struct ne { /* N/C/SDATA entity control block. */
+ UNIV neid; /* Files for NDATA entity. */
+ UNCH *nepubid; /* Public identifier if specified. */
+ UNCH *nesysid; /* System identifier if specified. */
+ PDCB nedcn; /* Data content notation control block. */
+ struct ad *neal; /* Data attribute list (NULL if none). */
+ UNCH *neename; /* Ptr to entity name (length and EOS). */
+ UNCH nextype; /* Entity type: NDATA SDATA CDATA SUBDOC. */
+};
+#define NESZ (sizeof(struct ne))
+typedef struct ne *PNE;
+/* NDATA entity control block fields. */
+#define NEID(p) (((PNE)p)->neid) /* File ID of NDATA entity. */
+#define NESYSID(p) (((PNE)p)->nesysid) /* System ID of NDATA entity. */
+#define NEPUBID(p) (((PNE)p)->nepubid) /* Public ID of NDATA entity. */
+#define NEDCN(p) (((PNE)p)->nedcn->ename) /* Data content notation name. */
+#define NEDCNSYSID(p) (((PNE)p)->nedcn->sysid) /* Notation system ID.*/
+#define NEDCNPUBID(p) (((PNE)p)->nedcn->pubid) /* Notation public ID.*/
+#define NEDCNDEFINED(p) (((PNE)p)->nedcn->defined) /* Notation defined? */
+#define NEDCNADL(p) (((PNE)p)->nedcn->adl) /* Data content notation attlist.*/
+#define NEENAME(p) (((PNE)p)->neename) /* Entity name pointer. */
+#define NEXTYPE(p) (((PNE)p)->nextype) /* External entity type. */
+#define NEAL(p) (((PNE)p)->neal) /* Data attributes (if any). */
+#define NEDCNMARK(p) DCNMARK(((PNE)p)->nedcn)
+
+/* Attribute descriptor list entry. */
+struct ad {
+ UNCH *adname; /* Attribute name with length and EOS. */
+ UNCH adflags; /* Attribute flags. */
+ UNCH adtype; /* Value type. */
+ UNS adnum; /* Group size or member pos in grp. */
+ UNS adlen; /* Length of default or value (for capacity). */
+ UNCH *addef; /* Default value (NULL if REQUIRED or IMPLIED). */
+ union {
+ PNE n; /* AENTITY: NDATA control block. */
+ PDCB x; /* ANOTEGRP: DCN control block. */
+ } addata; /* Special data associated with some attributes.*/
+};
+#define ADSZ (sizeof(struct ad)) /* Size of an ad structure. */
+
+/* Attribute flags for entire list adflags: ADLF. */
+#define ADLREQ 0x80 /* Attribute list: 1=REQUIRED att defined. */
+#define ADLNOTE 0x40 /* Attribute list: 1=NOTATION att defined. */
+#define ADLCONR 0x20 /* Attribute list: 1=CONREF att defined. */
+
+/* Attribute flags for list member adflags: ADFLAGS(n). */
+#define AREQ 0x80 /* Attribute: 0=null; 1=required. */
+#define ACURRENT 0x40 /* Attribute: 0=normal; 1=current. */
+#define AFIXED 0x20 /* Attribute: 0=normal; 1=must equal default. */
+#define AGROUP 0x10 /* Attribute: 0=single; 1=group of ad's. */
+#define ACONREF 0x08 /* Attribute: 0=normal; 1=att is CONREF. */
+#define AINVALID 0x04 /* Attribute: 1=value is invalid; 0=o.k. */
+#define AERROR 0x02 /* Attribute: 1=error was specified; 0=o.k. */
+#define ASPEC 0x01 /* Attribute: 1=value was specified; 0=default. */
+
+/* Attribute types for adtype. */
+#define ANMTGRP 0x00 /* Attribute: Name token group or member. */
+#define ANOTEGRP 0x01 /* Attribute: Notation (name group). */
+#define ACHARS 0x02 /* Attribute: Character string. */
+#define AENTITY 0x03 /* Attribute: Data entity (name). */
+#define AID 0x04 /* Attribute: ID value (name). */
+#define AIDREF 0x05 /* Attribute: ID reference value (name). */
+#define ANAME 0x06 /* Attribute: Name. */
+#define ANMTOKE 0x07 /* Attribute: Name token. */
+#define ANUMBER 0x08 /* Attribute: Number. */
+#define ANUTOKE 0x09 /* Attribute: Number token. */
+#define ATKNLIST 0x0A /* Attribute: >= means value is a token list. */
+#define AENTITYS 0x0A /* Attribute: Data entities (name list). */
+#define AIDREFS 0x0B /* Attribute: ID reference value (name list). */
+#define ANAMES 0x0C /* Attribute: Name list. */
+#define ANMTOKES 0x0D /* Attribute: Name token list. */
+#define ANUMBERS 0x0E /* Attribute: Number list. */
+#define ANUTOKES 0x0F /* Attribute: Number token list. */
+
+/* Field definitions for entries in an attribute list.
+ The first argument to all of these is the list address.
+*/
+/* Attribute list: flags. */
+#define ADLF(a) ((a)[0].adflags)
+/* Attribute list: number of list members. */
+#define ADN(a) ((a)[0].adtype)
+/* Attribute list: number of attributes. */
+#define AN(a) ((a)[0].adnum)
+/* Nth attribute in list: name. */
+#define ADNAME(a, n) (((a)[n].adname+1))
+/* Nth att in list: number of val)ues. */
+#define ADNUM(a, n) ((a)[n].adnum)
+/* Nth attribute in list: flags. */
+#define ADFLAGS(a, n) ((a)[n].adflags)
+/* Nth attribute in list: type. */
+#define ADTYPE(a, n) ((a)[n].adtype)
+/* Nth attribute in list: len of def or val.*/
+#define ADLEN(a, n) ((a)[n].adlen)
+/* Nth attribute in list: def or value. */
+#define ADVAL(a, n) ((a)[n].addef)
+/* Nth attribute in list: special data. */
+#define ADDATA(a, n) ((a)[n].addata)
+/* Nth att: token at Pth pos in value. */
+#define ADTOKEN(a, n, p)(((a)[n].addef+(p)))
+
+#define IDHASH 101 /* Size of ID hash table. Must be prime. */
+struct id { /* ID attribute control block. */
+ struct id *idnext; /* Next ID in chain. */
+ UNCH *idname; /* ID name with length prefix and EOS. */
+ UNCH iddefed; /* Non-zero if it has been defined. */
+ struct fwdref *idrl; /* Chain of forward references to this ID. */
+};
+#define IDSZ sizeof(struct id)
+typedef struct id *PID; /* Ptr to ID attribute control block. */
diff --git a/usr.bin/sgmls/sgmls/ambig.c b/usr.bin/sgmls/sgmls/ambig.c
new file mode 100644
index 0000000..9da02eb
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/ambig.c
@@ -0,0 +1,438 @@
+/* ambig.c -
+ Content model ambiguity checking.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+/*
+This uses the construction in pp8-9 of [1], extended to deal with AND
+groups.
+
+Note that it is not correct for the purposes of ambiguity analysis to
+handle AND groups by turning them into an OR group of SEQ groups
+(consider (a&b?)).
+
+We build an automaton for the entire content model by adding the
+following case for AND:
+
+nullable(v) := nullable(left child) and nullable(right child)
+if nullable(right child) then
+ for each x in last(left child) do
+ follow(v,x) = follow(left child,x) U first(right child);
+if nullable(left child) then
+ for each x in last(right child) do
+ follow(v,x) = follow(right child,x) U first(left child);
+first(v) := first(left child) U first(right child);
+last(v) := first(left child) U first(right child);
+
+We also build an automaton for each AND group by building automata for
+each of the members of the AND group using the above procedure and
+then combine the members using:
+
+for each x in last(left child) do
+ follow(v,x) = follow(left child,x) U first(right child);
+for each x in last(right child) do
+ follow(v,x) = follow(right child,x) U first(left child);
+first(v) := first(left child) U first(right child);
+
+The content model is ambiguous just in case one of these automata is
+non-deterministic. (Note that when checking determinism we need to
+check the `first' set as well as all the `follow' sets.)
+
+Why is this correct? Consider a primitive token in a member of an AND
+group. There are two worst cases for ambiguity: firstly, when none of
+the other members of AND group have been matched; secondly, when just
+the nullable members remain to be matched. The first case is not
+affected by context of the AND group (unless the first case is
+identical to the second case.)
+
+Note that inclusions are not relevant for the purposes of determining
+the ambiguity of content models. Otherwise the case in clause
+11.2.5.1:
+
+ An element that can satisfy an element in the content model is
+ considered to do so, even if the element is also an inclusion.
+
+could never arise.
+
+[1] Anne Brueggemann-Klein, Regular Expressions into Finite Automata,
+Universitaet Freiburg, Institut fur Informatik, 33 July 1991.
+*/
+
+#include "sgmlincl.h"
+
+/* Sets of states are represented by 0-terminated, ordered lists of
+indexes in gbuf. */
+
+#define MAXSTATES (GRPGTCNT+2)
+#define listcat(x, y) strcat((char *)(x), (char *)(y))
+#define listcpy(x, y) strcpy((char *)(x), (char *)(y))
+
+/* Information about a content token. */
+
+struct contoken {
+ UNCH size;
+ UNCH nullable;
+ UNCH *first;
+ UNCH *last;
+};
+
+static VOID contoken P((int, int, struct contoken *));
+static VOID andgroup P((int, int, struct contoken *));
+static VOID orgroup P((int, int, struct contoken *));
+static VOID seqgroup P((int, int, struct contoken *));
+static VOID andambig P((int));
+static int listambig P((UNCH *));
+static VOID listmerge P((UNCH *, UNCH *));
+static struct contoken *newcontoken P((void));
+static VOID freecontoken P((struct contoken *));
+
+
+/* Dynamically allocated vector of follow sets. */
+
+static UNCH **follow;
+static UNCH *mergebuf; /* for use by listmerge */
+
+/* Set to non-zero if the content model is ambiguous. */
+
+static int ambigsw;
+
+/* Check the current content model (in gbuf) for ambiguity. */
+
+VOID ambig()
+{
+ struct contoken *s;
+ int i;
+
+ if (!follow) {
+ /* We can't allocate everything in one chunk, because that would
+ overflow a 16-bit unsigned if GRPGTCNT was 253. */
+ UNCH *ptr;
+ follow = (UNCH **)rmalloc(MAXSTATES*sizeof(UNCH *));
+ follow[0] = 0;
+ ptr = (UNCH *)rmalloc((MAXSTATES - 1)*MAXSTATES);
+ for (i = 1; i < MAXSTATES; i++) {
+ follow[i] = ptr;
+ ptr += MAXSTATES;
+ }
+ mergebuf = (UNCH *)rmalloc(MAXSTATES);
+ }
+
+ for (i = 1; i < MAXSTATES; i++)
+ follow[i][0] = 0;
+
+ ambigsw = 0;
+
+ s = newcontoken();
+ contoken(1, 1, s);
+
+ ambigsw = ambigsw || listambig(s->first);
+
+ freecontoken(s);
+
+ for (i = 1; !ambigsw && i < MAXSTATES; i++)
+ if (listambig(follow[i]))
+ ambigsw = 1;
+
+ if (ambigsw)
+ mderr(137, (UNCH *)0, (UNCH *)0);
+}
+
+/* Free memory used for ambiguity checking. */
+
+VOID ambigfree()
+{
+ if (follow) {
+ frem((UNIV)follow[1]);
+ frem((UNIV)follow);
+ frem((UNIV)mergebuf);
+ follow = 0;
+ }
+}
+
+/* Determine whether a list of primitive content tokens (each
+represented by its index in gbuf) is ambiguous. */
+
+static
+int listambig(list)
+UNCH *list;
+{
+ UNCH *p;
+ int chars = 0;
+ int rc = 0;
+
+ for (p = list; *p; p++) {
+ if ((gbuf[*p].ttype & TTMASK) == TTETD) {
+ struct etd *e = gbuf[*p].tu.thetd;
+ if (e->mark) {
+ rc = 1;
+ break;
+ }
+ e->mark = 1;
+ }
+ else {
+ assert((gbuf[*p].ttype & TTMASK) == TTCHARS);
+ if (chars) {
+ rc = 1;
+ break;
+ }
+ chars = 1;
+ }
+ }
+
+ for (p = list; *p; p++)
+ if ((gbuf[*p].ttype & TTMASK) == TTETD)
+ gbuf[*p].tu.thetd->mark = 0;
+
+ return rc;
+}
+
+
+/* Analyze a content token. The `checkand' argument is needed to ensure
+that the algorithm is not exponential in the AND-group nesting depth.
+*/
+
+static
+VOID contoken(m, checkand, res)
+int m; /* Index of content token in gbuf */
+int checkand; /* Non-zero if AND groups should be checked */
+struct contoken *res; /* Result */
+{
+ UNCH flags = gbuf[m].ttype;
+ switch (flags & TTMASK) {
+ case TTCHARS:
+ case TTETD:
+ res->first[0] = m;
+ res->first[1] = 0;
+ res->last[0] = m;
+ res->last[1] = 0;
+ res->size = 1;
+ res->nullable = 0;
+ break;
+ case TTAND:
+ if (checkand)
+ andambig(m);
+ andgroup(m, checkand, res);
+ break;
+ case TTOR:
+ orgroup(m, checkand, res);
+ break;
+ case TTSEQ:
+ seqgroup(m, checkand, res);
+ break;
+ default:
+ abort();
+ }
+ if (flags & TREP) {
+ UNCH *p;
+ for (p = res->last; *p; p++)
+ listmerge(follow[*p], res->first);
+ }
+ if (flags & TOPT)
+ res->nullable = 1;
+}
+
+/* Check an AND group for ambiguity. */
+
+static
+VOID andambig(m)
+int m;
+{
+ int i, tnum;
+ int lim;
+ struct contoken *curr;
+ struct contoken *next;
+
+ tnum = gbuf[m].tu.tnum;
+ assert(tnum > 0);
+ curr = newcontoken();
+ next = newcontoken();
+ contoken(m + 1, 0, curr);
+ i = m + 1 + curr->size;
+ curr->size += 1;
+ for (--tnum; tnum > 0; --tnum) {
+ UNCH *p;
+ contoken(i, 0, next);
+ curr->size += next->size;
+ i += next->size;
+ for (p = curr->last; *p; p++)
+ listcat(follow[*p], next->first);
+ for (p = next->last; *p; p++)
+ listmerge(follow[*p], curr->first);
+ listcat(curr->first, next->first);
+ listcat(curr->last, next->last);
+ }
+ lim = m + curr->size;
+ for (i = m + 1; i < lim; i++) {
+ if (listambig(follow[i]))
+ ambigsw = 1;
+ follow[i][0] = 0;
+ }
+ freecontoken(curr);
+ freecontoken(next);
+}
+
+/* Handle an AND group. */
+
+static
+VOID andgroup(m, checkand, res)
+int m;
+int checkand;
+struct contoken *res;
+{
+ int i, tnum;
+ /* union of the first sets of nullable members of the group */
+ UNCH *nullablefirst;
+ struct contoken *next;
+
+ tnum = gbuf[m].tu.tnum;
+ assert(tnum > 0);
+ contoken(m + 1, checkand, res);
+ nullablefirst = (UNCH *)rmalloc(MAXSTATES);
+ if (res->nullable)
+ listcpy(nullablefirst, res->first);
+ else
+ nullablefirst[0] = 0;
+ i = m + 1 + res->size;
+ res->size += 1;
+ next = newcontoken();
+ for (--tnum; tnum > 0; --tnum) {
+ UNCH *p;
+ contoken(i, checkand, next);
+ res->size += next->size;
+ i += next->size;
+ if (next->nullable)
+ for (p = res->last; *p; p++)
+ listcat(follow[*p], next->first);
+ for (p = next->last; *p; p++)
+ listmerge(follow[*p], nullablefirst);
+ listcat(res->first, next->first);
+ if (next->nullable)
+ listcat(nullablefirst, next->first);
+ listcat(res->last, next->last);
+ res->nullable &= next->nullable;
+ }
+ frem((UNIV)nullablefirst);
+ freecontoken(next);
+}
+
+/* Handle a SEQ group. */
+
+static
+VOID seqgroup(m, checkand, res)
+int m;
+int checkand;
+struct contoken *res;
+{
+ int i, tnum;
+ struct contoken *next;
+
+ tnum = gbuf[m].tu.tnum;
+ assert(tnum > 0);
+ contoken(m + 1, checkand, res);
+ i = m + 1 + res->size;
+ res->size += 1;
+ next = newcontoken();
+ for (--tnum; tnum > 0; --tnum) {
+ UNCH *p;
+ contoken(i, checkand, next);
+ res->size += next->size;
+ i += next->size;
+ for (p = res->last; *p; p++)
+ listcat(follow[*p], next->first);
+ if (res->nullable)
+ listcat(res->first, next->first);
+ if (next->nullable)
+ listcat(res->last, next->last);
+ else
+ listcpy(res->last, next->last);
+ res->nullable &= next->nullable;
+ }
+ freecontoken(next);
+}
+
+/* Handle an OR group. */
+
+static
+VOID orgroup(m, checkand, res)
+int m;
+int checkand;
+struct contoken *res;
+{
+ int i, tnum;
+ struct contoken *next;
+
+ tnum = gbuf[m].tu.tnum;
+ assert(tnum > 0);
+ contoken(m + 1, checkand, res);
+ i = m + 1 + res->size;
+ res->size += 1;
+ next = newcontoken();
+ for (--tnum; tnum > 0; --tnum) {
+ contoken(i, checkand, next);
+ res->size += next->size;
+ i += next->size;
+ listcat(res->first, next->first);
+ listcat(res->last, next->last);
+ res->nullable |= next->nullable;
+ }
+ freecontoken(next);
+}
+
+
+/* Merge the second ordered list into the first. */
+
+static
+VOID listmerge(p, b)
+UNCH *p, *b;
+{
+ UNCH *a = mergebuf;
+
+ strcpy((char *)a, (char *)p);
+
+ for (;;) {
+ if (*a) {
+ if (*b) {
+ if (*a < *b)
+ *p++ = *a++;
+ else if (*a > *b)
+ *p++ = *b++;
+ else
+ a++;
+ }
+ else
+ *p++ = *a++;
+ }
+ else if (*b)
+ *p++ = *b++;
+ else
+ break;
+ }
+ *p = '\0';
+}
+
+static
+struct contoken *newcontoken()
+{
+ struct contoken *p = (struct contoken *)rmalloc(sizeof(struct contoken)
+ + MAXSTATES*2);
+ p->first = (UNCH *)(p + 1);
+ p->last = p->first + MAXSTATES;
+ return p;
+}
+
+static
+VOID freecontoken(p)
+struct contoken *p;
+{
+ frem((UNIV)p);
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/appl.h b/usr.bin/sgmls/sgmls/appl.h
new file mode 100644
index 0000000..404d749
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/appl.h
@@ -0,0 +1,33 @@
+/* appl.h */
+
+enum {
+ E_NOMEM = 1,
+ E_DOC,
+ E_EXEC,
+ E_FORK,
+ E_WAIT,
+ E_SIGNAL,
+ E_OPEN,
+ E_CAPBOTCH,
+ E_SUBDOC
+};
+
+VOID process_document P((int));
+VOID output_conforming P((void));
+
+UNIV xmalloc P((UNS));
+UNIV xrealloc P((UNIV, UNS));
+VOID appl_error VP((int, ...));
+
+#ifdef SUPPORT_SUBDOC
+int run_process P((char **));
+char **make_argv P((UNIV));
+VOID get_subcaps P((void));
+#endif
+
+#ifdef SUPPORT_SUBDOC
+extern int suberr;
+#endif
+
+extern int suppsw;
+extern int locsw;
diff --git a/usr.bin/sgmls/sgmls/config.h b/usr.bin/sgmls/sgmls/config.h
new file mode 100644
index 0000000..562cdcf
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/config.h
@@ -0,0 +1,147 @@
+/* unix.cfg: Configuration file for sgmls on Unix. */
+
+/* A list of filename templates to use for searching for external entities.
+The filenames are separated by the character specified in PATH_FILE_SEP.
+See sgmls.man for details. */
+#define DEFAULT_PATH "/usr/share/sgml/%O/%C/%T:%N.%X:%N.%D"
+/* The character that separates the filenames templates. */
+#define PATH_FILE_SEP ':'
+/* The character that separates filenames in a system identifier.
+Usually the same as PATH_FILE_SEP. */
+#define SYSID_FILE_SEP ':'
+/* The environment variable that contains the list of filename templates. */
+#define PATH_ENV_VAR "SGML_PATH"
+
+/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name
+or system identifier into a legal filename. A character in
+MIN_DAT_SUBS_FROM will be transformed into the character in the
+corresponding position in MIN_DAT_SUBS_TO. If there is no such
+position, then the character is removed. */
+/* This says that spaces should be transformed to underscores, and
+slashes to percents. */
+#define MIN_DAT_SUBS_FROM " /"
+#define MIN_DAT_SUBS_TO "_%"
+
+/* Define this to allow tracing. */
+/* #define TRACE 1 */
+
+/* Define this you want support for subdocuments. This is implemented
+using features that are not part of Standard C, so you might not want
+to define it if you are porting to a new system. Otherwise I suggest
+you leave it defined. */
+#define SUPPORT_SUBDOC 1
+
+/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports
+X/Open extensions; if they do, then, for example,
+
+ printf("%2$s%1$s", "bar", "foo")
+
+should print `foobar'. */
+
+/* #define HAVE_EXTENDED_PRINTF 1 */
+
+/* Define HAVE_CAT if your system provides the X/Open message
+catalogue functions catopen() and catgets(), and you want to use them.
+An implementations of these functions is included and will be used if
+you don't define this. On SunOS 4.1.1, if you do define this you
+should set CC=/usr/xpg2bin/cc in the makefile. */
+
+/* #define HAVE_CAT 1 */
+
+#ifdef __STDC__
+/* Define this if your compiler supports prototypes. */
+#define USE_PROTOTYPES 1
+#endif
+
+/* Can't use <stdarg.h> without prototypes. */
+#ifndef USE_PROTOTYPES
+#define VARARGS 1
+#endif
+
+/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>,
+you must define VARARGS yourself here. */
+/* #define VARARGS 1 */
+
+/* Define this if you do not have strerror(). */
+/* #define STRERROR_MISSING 1 */
+
+/* Define this unless the character testing functions in ctype.h
+are defined for all values representable as an unsigned char. You do
+not need to define this if your system is ANSI C conformant. You
+should define for old Unix systems. */
+/* #define USE_ISASCII 1 */
+
+/* Define this if your system provides the BSD style string operations
+rather than ANSI C ones (eg bcopy() rather than memcpy(), and index()
+rather than strchr()). */
+/* #define BSD_STRINGS 1 */
+
+/* Define this if you have getopt(). */
+#define HAVE_GETOPT 1
+
+/* Define this if you have access(). */
+#define HAVE_ACCESS 1
+
+/* Define this if you have <unistd.h>. */
+#define HAVE_UNISTD_H 1
+
+/* Define this if you have <sys/stat.h>. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define this if you have waitpid(). */
+#define HAVE_WAITPID 1
+
+/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */
+#define POSIX 1
+
+/* Define this if you have the vfork() system call. */
+#define HAVE_VFORK 1
+
+/* Define this if you have <vfork.h>. */
+/* #define HAVE_VFORK_H 1 */
+
+/* Define this if you don't have <stdlib.h> */
+/* #define STDLIB_H_MISSING 1 */
+
+/* Define this if you don't have <stddef.h> */
+/* #define STDDEF_H_MISSING 1 */
+
+/* Define this if you don't have <limits.h> */
+/* #define LIMITS_H_MISSING 1 */
+
+/* Define this if you don't have remove(); unlink() will be used instead. */
+/* #define REMOVE_MISSING 1 */
+
+/* Define this if you don't have raise(); kill() will be used instead. */
+/* #define RAISE_MISSING 1 */
+
+/* Define this if you don't have fsetpos() and fgetpos(). */
+/* #define FPOS_MISSING 1 */
+
+/* Universal pointer type. */
+/* If your compiler doesn't fully support void *, change `void' to `char'. */
+typedef void *UNIV;
+
+/* If your compiler doesn't support void as a function return type,
+change `void' to `int'. */
+typedef void VOID;
+
+/* If you don't have an ANSI C conformant <limits.h>, define
+CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed.
+The <limits.h> on some versions of System Release V 3.2 is not ANSI C
+conformant: the value of CHAR_MIN is 0 even though the `char' type is
+signed. */
+
+/* #define CHAR_SIGNED 1 */
+/* #define CHAR_SIGNED 0 */
+#ifndef CHAR_SIGNED
+#include <limits.h>
+#if CHAR_MIN < 0
+#define CHAR_SIGNED 1
+#else
+#define CHAR_SIGNED 0
+#endif
+#endif /* not CHAR_SIGNED */
+
+/* Assume the system character set is ISO Latin-1. */
+#include "latin1.h"
diff --git a/usr.bin/sgmls/sgmls/context.c b/usr.bin/sgmls/sgmls/context.c
new file mode 100644
index 0000000..1eb5a5c
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/context.c
@@ -0,0 +1,444 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+#include "context.h"
+
+#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
+#define NEWGI (newetd->etdgi+1) /* GI of new tag. */
+#define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
+#define PEX (-1) /* GI is a plus exception and not a minus. */
+
+#define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))
+#define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \
+ |= (1L<<((n-1)&(LONGBITS-1))))
+#define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))
+
+#define HITOFF(h, n) (!(HITON(h, n)))
+
+#define TOKENHIT HITON(H,T)
+
+static
+VOID copypos(to, from)
+struct mpos *to, *from;
+{
+ int i;
+ for (i = 0; i <= (int)from[0].t; i++) {
+ to[i].g = from[i].g;
+ to[i].t = from[i].t;
+ memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));
+ }
+}
+
+/* CONTEXT: Determine whether a GI is valid in the present structural context.
+ Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
+ different element is required, and RCMISS if it is totally invalid.
+ On entry, pos points to the model token to be tested against the GI.
+ TO DO: Save allowed GIs for an error message on an RCMISS.
+ Support a "query" mode (what is allowed now?) by working
+ with a copy of pos.
+*/
+int context(gi, mod, pos, statuspt, mexts)
+struct etd *gi; /* ETD of new GI. */
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
+int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
+{
+ UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
+
+ if (mexts == -1) {
+ if (STATUS == RCEND)
+ return RCPEX;
+ copypos(savedpos, pos);
+ }
+ Tstart = T; /* Save starting token for AND group testing. */
+ while (STATUS!=RCMISS && STATUS!=RCEND) {
+ TRACEGI("CONTEXT", gi, mod, pos, Tstart);
+ while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
+ pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
+ Tstart = T; /* Save starting token for AND group testing. */
+ TRACEGI("OPENGRP", gi, mod, pos, Tstart);
+ }
+ STATUS = (UNCH)tokenreq(gi, mod, pos);
+ TRACEGI("STATUS", gi, mod, pos, Tstart);
+ if (gi==TOKEN.tu.thetd) { /* Hit in model. */
+ STATUS = (UNCH)RCHIT;
+ gtypesv = GTYPE; toccsv = TOCC;
+ newtoken(mod, pos, statuspt);
+ return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT))
+ ? RCMEX : RCHITMEX);
+ }
+ if (STATUS==RCREQ) {
+ if (mexts == -1)
+ break;
+ STATUS = RCHIT;
+ nextetd = TOKEN.tu.thetd;
+ newtoken(mod, pos, statuspt);
+ return(RCREQ);
+ }
+ /* else if (STATUS==RCNREQ) */
+ if (mexts>0) return(RCMEX);
+ newtoken(mod, pos, statuspt);
+ }
+ if (mexts == -1) {
+ copypos(pos, savedpos);
+ return STATUS = RCPEX;
+ }
+ return((int)STATUS);
+}
+/* ECONTEXT: Determine whether the current element can be ended, or whether
+ non-optional tokens remain at the current level or higher.
+ Returns 1 if element can be ended, or 0 if tokens remain.
+ On entry, STATUS==RCEND if there are no tokens left; if not,
+ pos points to the next model token to be tested.
+ TO DO: Support a "query" mode (what is required now?) by working
+ with a copy of pos.
+*/
+int econtext(mod, pos, statuspt)
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
+{
+ unsigned next; /* Position in AND group of next testable token.*/
+
+ Tstart = T;
+ TRACEEND("ECONT", mod, pos, 0, 0, Tstart);
+ if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
+ nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
+ while (STATUS!=RCMISS && STATUS!=RCEND) {
+ STATUS = (UNCH)testend(mod, pos, 0, 0);
+ TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart);
+ nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
+ if (STATUS==RCEND) return(1);
+ if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
+ if (STATUS==RCMISS) {
+ if (BITON(TOCC, TOPT)) nextetd = 0;
+ return(0);
+ }
+ if (!tokenopt(mod, pos)) return(0);
+
+ STATUS = RCNREQ;
+ if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
+ else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
+ next : offbit(H, 0, GNUM));
+
+ M = G + grpsz(&GHDR, (int)T-1) + 1;
+ TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart);
+ }
+ if (STATUS==RCMISS) {
+ if (BITON(TOCC, TOPT)) nextetd = 0;
+ return(0);
+ }
+ return(1); /* STATUS==RCEND */
+}
+/* NEWTOKEN: Find the next token to test. Set STATUS to indicate results:
+ RCEND if element has ended (no more tokens to test);
+ RCREQ if required new token was found;
+ RCNREQ if non-required new token was found;
+ RCHIT if a hit token was repeated (now non-required);
+ and RCMISS if a new token can't be found because current token
+ (which was not hit) was neither unconditionally required nor
+ optional.
+*/
+VOID newtoken(mod, pos, statuspt)
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
+{
+ unsigned nextand = 0; /* Position in AND group of next testable token.*/
+ int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
+
+ /* If the GI was a hit, turn on the hit bit and set the status to
+ assume that the token to be tested against the next GI will
+ be non-required. If the current token is repeatable, exit so
+ it will stand as the next token to test.
+ */
+ if (STATUS==RCHIT) {
+ HITSET(H, T);
+ STATUS = RCNREQ;
+ if (BITON(TOCC, TREP)) return;
+ }
+ /* At this point, we must determine the next token to test:
+ either against the next GI, if this one was a hit, or
+ against the same GI if conditions permit a retry.
+ To find the next token, we must first end the current group,
+ if possible, and any we can that contain it.
+ If the outermost group was a hit and is repeatable, or
+ if the element has ended, we exit now.
+ If it hasn't ended, or was optional and ended with a miss,
+ we can retry the GI against the next token.
+ */
+ if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;
+
+ /* At this point, the "current token" is either the original one,
+ or the token for the highest level unhit group that it ended.
+ We will retry a missed GI, by testing it against the next
+ token, if the current token:
+ 1. Is optional;
+ 2. Was hit (i.e., because it is repeatable and was hit by a
+ previous GI or because it is a hit group that just ended);
+ 3. Is in an AND or OR group and is not the last testable token.
+
+ It will be the next sequential one (unhit one, in an AND group);
+ if there are none left, use the first unhit token in the group.
+ In either case, set M to correspond to the new T.
+ */
+ retest:
+ TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart);
+ if (GTYPE==TTAND) {
+ nextand = offbit(H, (int)T, GNUM);
+ if (!nextand)
+ nextand = offbit(H, 0, GNUM);
+ }
+ if ( BITON(TOCC, TOPT)
+ || TOKENHIT
+ || GTYPE==TTOR /* T!=GNUM or group would have ended. */
+ || nextand ) {
+ if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
+ else T = nextand;
+ M = G + grpsz(&GHDR, (int)T-1) + 1;
+ if (GTYPE==TTAND) {
+ /* If AND group wrapped, it can end if all non-optionals were
+ hit. */
+ if (T==Tstart && !currhit) {
+ UNCH Psave = P;
+ int rc = testend(mod, pos, 0, 1);
+ if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}
+ else STATUS = RCMISS;
+ }
+
+ /* We only test unhit tokens, so we must use an unhit token
+ as Tstart (which is used to detect when the AND group has
+ wrapped). */
+ else if (HITON(H,Tstart)) Tstart = T;
+ }
+ }
+ else STATUS = RCMISS;
+ TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart);
+}
+/* TESTEND: End the current group, if possible, and any that it is nested in.
+ The current token will either be a group header, or some token
+ that could not end its group. Return 1 if the (possibly new)
+ current token is repeatable; 0 if it is not.
+*/
+int testend(mod, pos, andoptsw, newtknsw)
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+int andoptsw; /* 1=test optional AND members; 0=ignore. */
+int newtknsw; /* 1=new token test; 0=end element test. */
+{
+ int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
+
+ while (!rc) {
+ TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart);
+ /* TESTMISS:
+ If we've hit no tokens yet in the current group, and
+ the current token is the last unhit one in the group we can test,
+ we will end the group (it may never really have started!)
+ because we might be able to try the token that follows it.
+ In any group, a token is the last testable unhit token if it
+ is the last sequential one, as the GI was already tested against
+ the preceding unhit tokens. In addition,
+ in a SEQ group, it is the last testable unhit token if it isn't
+ optional, because we can't skip past it to the following ones.
+ If we end the group, before popping the level, set M to G, as this
+ level`s group header will be the next level's current token.
+ */
+ if (!ANYHIT(H) && (T==GNUM
+ || (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
+ M = G; --P; Tstart = T;
+ if (P<=1) {
+ if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
+ else rc = RCMISS;
+ }
+ continue;
+ }
+ /* TESTHIT:
+ See if we've hit all the non-optional tokens in the group.
+ If so, pop to the previous level and set the group's hit bit.
+ If we were called from NEWTOKEN we are trying to find the token
+ to test against the next start-tag, so if the group is repeatable,
+ process it again. (If not, we were called from ECONTEXT and
+ are testing whether the element can be ended.)
+ Otherwise, if we are at the first level, the element is over.
+ */
+ if ((GTYPE==TTOR && TOKENHIT)
+ || (GTYPE==TTSEQ && T==(UNCH)GNUM
+ && (TOKENHIT || BITON(TOCC, TOPT)))
+ || (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) {
+ M = G;
+ --P;
+ HITSET(H, T);
+ Tstart = T;
+ if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
+ else if (P<=1) rc = RCEND;
+ /* If we are looking for a new token to test against the next
+ start-tag, then we need to consider optional and members
+ in this group, even if we didn't need to consider them
+ in the group that we just ended because that group had
+ wrapped. */
+ else if (newtknsw) andoptsw = 1;
+ /* Else loop to test new outer group. */
+ }
+ else rc = RCNREQ; /* No group ended this time, so return. */
+ }
+ TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart);
+ return(rc);
+}
+/* TOKENOPT: Return 1 if current token is contextually optional;
+ otherwise, return 0.
+*/
+int tokenopt(mod, pos)
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+{
+ TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart);
+ return (BITON(TOCC, TOPT) /* Inherently optional. */
+ || TOKENHIT /* Was hit (handles "plus" suffix case). */
+ || (!ANYHIT(H) && groupopt(mod, pos)));
+ /* In optional group with no hits. */
+}
+/* GROUPOPT: Temporarily makes the current group be the current token so that
+ TOKENOPT() can be applied to it. Returns the value returned
+ by TOKENOPT.
+*/
+int groupopt(mod, pos)
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+{
+ UNCH saveM; /* Save M when testing if group is not required.*/
+ int rc; /* 1=contextually optional; 0=not. */
+
+ if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
+ saveM = M; M = G; --P;
+ rc = tokenopt(mod, pos);
+ ++P; G = M; M = saveM;
+ return(rc);
+}
+/* TOKENREQ: Returns RCREQ if the current token is "contextually required".
+ That is, it is not contextually optional and
+ 1) it is a member of a "seq" group that is either required
+ or has at least 1 hit token.
+ 2) it is a member of an "and" group in which all other
+ tokens were hit.
+ Optional tokens are not counted
+ if GI is ETDCDATA, as we are looking for an
+ omitted start-tag. Otherwise, they are counted,
+ as the GI might match one of them.
+ Returns RCNREQ if the current token is "not required".
+*/
+int tokenreq(gi, mod, pos)
+struct etd *gi; /* ETD of new GI. */
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+{
+ TRACEGI("TOKENREQ", gi, mod, pos, Tstart);
+ return( tokenopt(mod, pos) ? RCNREQ
+ : ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
+#if 0
+ || (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1))
+#endif
+ )
+ ? RCREQ : RCNREQ );
+}
+/* GROUPREQ: Temporarily makes the current group be the current token so that
+ TOKENREQ() can be applied to it. Returns the value returned
+ by TOKENREQ.
+*/
+int groupreq(gi, mod, pos)
+struct etd *gi; /* ETD of new GI. */
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+{
+ UNCH saveM; /* Save M when testing if group is not required.*/
+ int rc; /* Return code: RCREQ RCNREQ */
+
+ if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
+ saveM = M; M = G; --P;
+ rc = tokenreq(gi, mod, pos);
+ ++P; G = M; M = saveM;
+ return(rc);
+}
+/* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
+ from the group's start (G) to a specified index within the group (T).
+ M = 0, plus 1 for each token in the group, plus the size of
+ any subgroups (gotten by calling GRPSZ recursively). On entry,
+ M must be equal to G at the current level.
+*/
+int grpsz(g, t)
+struct thdr *g; /* mod[G]: Ptr to group in the model. */
+int t; /* T: Index of last token in the group. */
+{
+ struct thdr *p = g; /* Ptr to current token in the model. */
+ int m = 0; /* Size of group (including nested groups). */
+ int i = 0; /* Number of group members (loop counter). */
+ UNS type; /* Token type (without TOREP bits). */
+
+ while (++i<=t) {
+ ++p; ++m;
+ type = GET(p->ttype, TTMASK);
+ if (type==TTOR || type==TTSEQ || type==TTAND) {
+ m += grpsz(p, p->tu.tnum);
+ p = g+m;
+ }
+ }
+ return(m);
+}
+/* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
+ (other than those that correspond to optional tokens if "opt" is
+ 0) and the "but" bit (all bits if "but" bit is zero). Otherwise,
+ returns 0. GRPSZ is used to skip past subgroup tokens.
+*/
+int allhit(p, hits, but, opt)
+struct thdr *p; /* mod[G]: Ptr to group in the model. */
+unsigned long *hits; /* H: Hit bits to be tested. */
+int but; /* Index of bit to ignore; 0=test all. */
+int opt; /* 1=optional tokens must be hit; 0=ignore. */
+{
+ int b = 0; /* Index of bit being tested in hits. */
+ int e = p->tu.tnum; /* Ending index (number of bits to test). */
+ unsigned type; /* Token type (without TOREP bits). */
+
+ while (++p, ++b<=e) {
+ if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but)
+ return 0;
+ if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
+ p += grpsz(p, p->tu.tnum);
+ }
+ return 1;
+}
+/* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
+ the caller's "first" bit. If all bits through the
+ specified last bit are on, it returns 0.
+*/
+int offbit(bits, first, last)
+unsigned long *bits; /* Bits to be tested. */
+int first; /* Index of first bit to be tested in bits. */
+int last; /* Index of last bit to be tested in bits. */
+{
+ while (++first <= last)
+ if (HITOFF(bits, first))
+ return first;
+ return 0;
+}
+
+/* ANYHIT: Return 1 if any bit is set. */
+
+int anyhit(bits)
+unsigned long *bits;
+{
+ int i;
+ for (i = 0; i < grplongs; i++)
+ if (bits[i] != 0)
+ return 1;
+ return 0;
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/context.h b/usr.bin/sgmls/sgmls/context.h
new file mode 100644
index 0000000..04350c7
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/context.h
@@ -0,0 +1,17 @@
+/* context.h */
+
+#define M pos[0].g /* Index of current token in model. */
+#ifdef P
+#undef P
+#endif
+#define P pos[0].t /* Index of current group in pos. */
+#define G pos[P].g /* Index of current group in model. */
+#define T pos[P].t /* Index of current token in its group. */
+#define H pos[P].h /* Pointer to hit bits for current group. */
+#define GHDR mod[G] /* Current group header. */
+#define TOKEN mod[M] /* Current token. */
+#define TTYPE (GET(TOKEN.ttype, TTMASK)) /* Token type of current token. */
+#define TOCC (GET(TOKEN.ttype, TOREP)) /* Occurrence for current token. */
+#define GTYPE (GET(GHDR.ttype, TTMASK)) /* Token type of current group. */
+#define GOCC (GET(GHDR.ttype, TOREP)) /* Occurrence for current group. */
+#define GNUM GHDR.tu.tnum /* Number of tokens in current grp. */
diff --git a/usr.bin/sgmls/sgmls/dosproc.c b/usr.bin/sgmls/sgmls/dosproc.c
new file mode 100644
index 0000000..99b526d
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/dosproc.c
@@ -0,0 +1,40 @@
+/* dosproc.c -
+
+ MS-DOS implementation of run_process().
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifdef SUPPORT_SUBDOC
+
+#include "std.h"
+#include "entity.h"
+#include "appl.h"
+
+#include <process.h>
+
+int run_process(argv)
+char **argv;
+{
+ int ret;
+ fflush(stdout);
+ fflush(stderr);
+ ret = spawnvp(P_WAIT, argv[0], argv);
+ if (ret < 0)
+ appl_error(E_EXEC, argv[0], strerror(errno));
+ return ret;
+}
+
+#endif /* SUPPORT_SUBDOC */
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/ebcdic.c b/usr.bin/sgmls/sgmls/ebcdic.c
new file mode 100644
index 0000000..b8188c7
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/ebcdic.c
@@ -0,0 +1,42 @@
+/* ASCII to EBCDIC (ISO 8859-1 to IBM CP 37v2) table. */
+/* Contributed by C. M. Sperberg-McQueen <u35395@uicvm.uic.edu>. */
+
+/* The mapping must be 1 to 1. The positions of *CHAR and *CH in the table
+must not be changed, although the values in ebcdic.h can be. */
+
+#include "ebcdic.h"
+
+unsigned char charset[] = {
+ 0, 1, 2, 3, 55, 45, 46, 47,
+ GENRECHAR, TABCHAR, RSCHAR, 11, 12, RECHAR, 14, 15,
+ 16, 17, 18, 19, 60, 61, 50, 38,
+ 24, 25, EOFCHAR, 39, EOBCHAR, DELCDATA, DELSDATA, DELNONCH,
+ SPCCHAR, 90, 127, 123, 91, 108, 80, 125,
+ 77, 93, 92, 78, 107, 96, 75, 97,
+240, 241, 242, 243, 244, 245, 246, 247,
+248, 249, 122, 94, 76, 126, 110, 111,
+124, 193, 194, 195, 196, 197, 198, 199,
+200, 201, 209, 210, 211, 212, 213, 214,
+215, 216, 217, 226, 227, 228, 229, 230,
+231, 232, 233, 173, 224, 189, 176, 109,
+121, 129, 130, 131, 132, 133, 134, 135,
+136, 137, 145, 146, 147, 148, 149, 150,
+151, 152, 153, 162, 163, 164, 165, 166,
+167, 168, 169, 192, 79, 208, 161, 7,
+ 4, 6, 8, 9, 10, 20, 21, 23,
+ 26, 27, 32, 33, 34, 35, 36, 40,
+ 41, 42, 43, 44, 48, 49, 51, 52,
+ 53, 54, 56, 57, 58, 59, 62, 255,
+ 65, 170, 74, 177, 159, 178, 106, 181,
+187, 180, 154, 138, 95, 202, 175, 188,
+144, 143, 234, 250, 190, 160, 182, 179,
+157, 218, 155, 139, 183, 184, 185, 171,
+100, 101, 98, 102, 99, 103, 158, 104,
+116, 113, 114, 115, 120, 117, 118, 119,
+172, 105, 237, 238, 235, 239, 236, 191,
+128, 253, 254, 251, 252, 186, 174, 89,
+ 68, 69, 66, 70, 67, 71, 156, 72,
+ 84, 81, 82, 83, 88, 85, 86, 87,
+140, 73, 205, 206, 203, 207, 204, 225,
+112, 221, 222, 219, 220, 141, 142, 223,
+};
diff --git a/usr.bin/sgmls/sgmls/ebcdic.h b/usr.bin/sgmls/sgmls/ebcdic.h
new file mode 100644
index 0000000..1c35bcb
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/ebcdic.h
@@ -0,0 +1,40 @@
+/* SGML Character Use: EBCDIC
+*/
+
+#define EOFCHAR '\077' /* FUNCTION: EE (entity end: files). */
+#define EOBCHAR '\034' /* NONCHAR: EOB (file entity: end of buffer. */
+#define RSCHAR '\045' /* FUNCTION: RS (record start). */
+#define RECHAR '\015' /* FUNCTION: RE (record end). */
+#define TABCHAR '\005' /* FUNCTION: TAB (horizontal tab). */
+#define SPCCHAR '\100' /* FUNCTION: SPACE (horizontal space). */
+#define GENRECHAR '\026' /* NONCHAR: Generated RE. */
+#define DELCDATA '\035' /* NONCHAR: Delimiter for CDATA entity in
+ attribute value. */
+#define DELSDATA '\036' /* NONCHAR: Delimiter for SDATA entity in
+ attribute value. */
+#define DELNONCH '\037' /* NONCHAR: non-SGML character prefix. */
+
+/* This should work for EBCDIC. See comment in latin1.h. */
+#define SHIFTNON(ch) ((UNCH)(ch) | 0200)
+#define UNSHIFTNON(ch) ((UNCH)(ch) & ~0200)
+
+/* See comment in latin1.h. */
+#define CANON_NONSGML 255
+
+/* See comment in latin1.h. */
+#define CANON_DATACHAR 254
+
+/* Components for a formal public identifier for the whole of the
+system character set. Protect with ifndef so that it can be overriden
+in config.h. */
+
+/* Use a private escape sequence. */
+#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE
+#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0"
+#endif
+#ifndef SYSTEM_CHARSET_OWNER
+#define SYSTEM_CHARSET_OWNER "-//IBM"
+#endif
+#ifndef SYSTEM_CHARSET_DESCRIPTION
+#define SYSTEM_CHARSET_DESCRIPTION "Code Page 1047"
+#endif
diff --git a/usr.bin/sgmls/sgmls/entgen.c b/usr.bin/sgmls/sgmls/entgen.c
new file mode 100644
index 0000000..0829495
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/entgen.c
@@ -0,0 +1,405 @@
+/* entgen.c -
+
+ Implement entgen() which generates a list of filenames from a struct fpi.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifdef HAVE_ACCESS
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h> /* For R_OK. */
+#endif /* HAVE_UNISTD_H */
+
+#ifndef R_OK
+#define R_OK 4
+#endif /* not R_OK */
+
+#endif /* HAVE_ACCESS */
+
+#include "sgmlaux.h"
+
+/* Environment variable that contains path. */
+#ifndef PATH_ENV_VAR
+#define PATH_ENV_VAR "SGML_PATH"
+#endif
+/* Default search path. See field() for interpretation of %*. */
+#ifndef DEFAULT_PATH
+#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D"
+#endif
+
+#ifndef PATH_FILE_SEP
+#define PATH_FILE_SEP ':'
+#endif
+
+#ifndef SYSID_FILE_SEP
+#define SYSID_FILE_SEP ':'
+#endif
+
+/* This says: change space to underscore, slash to percent. */
+
+#ifndef MIN_DAT_SUBS_FROM
+#define MIN_DAT_SUBS_FROM " /"
+#endif
+#ifndef MIN_DAT_SUBS_TO
+#define MIN_DAT_SUBS_TO "_%"
+#endif
+
+static int field P((struct fpi *, int, char *));
+static int mindatcpy P((char *, char *, int, int));
+static int testopen P((char *));
+static UNIV sysidgen P((char *));
+
+static char *path = 0;
+
+/* Non-zero if searching should be performed when a system identifier
+is specified. */
+static int sysidsrch = 0;
+
+#define EMPTY_VERSION "default"
+
+static char *classes[] = {
+ "capacity",
+ "charset",
+ "notation",
+ "syntax",
+ "document",
+ "dtd",
+ "elements",
+ "entities",
+ "lpd",
+ "nonsgml",
+ "shortref",
+ "subdoc",
+ "text"
+ };
+
+/* This is mainly for compatibility with arcsgml. */
+
+static char *genext[] = {
+ "nsd", /* Non-SGML data entity. */
+ "gml", /* GML document or text entity. */
+ "spe", /* System parameter entity. */
+ "dtd", /* Document type definition. */
+ "lpd", /* Link process definition. */
+ "pns", /* Public non-SGML data entity. */
+ "pge", /* Public general entity. */
+ "ppe", /* Public parameter entity. */
+ "pdt", /* Public document type definition. */
+ "plp", /* Public link process definition. */
+ "vns", /* Display version non-SGML data entity. */
+ "vge", /* Display version general entity. */
+ "vpe", /* Display version parameter entity. */
+ "vdt", /* Display version document type definition.*/
+ "vlp", /* Display version link process definition.*/
+};
+
+static char *ext[] = {
+ "sgml", /* SGML subdocument */
+ "data", /* Data */
+ "text", /* General text */
+ "parm", /* Parameter entity */
+ "dtd", /* Document type definition */
+ "lpd", /* Link process definition */
+};
+
+/* Like memcpy, but substitute, fold to lower case (if fold is
+non-zero) and null terminate. This is used both for minimum data and
+for names. If p is NULL, do nothing. Return len. */
+
+static int mindatcpy(p, q, len, fold)
+char *p, *q;
+int len;
+int fold;
+{
+ static char subsfrom[] = MIN_DAT_SUBS_FROM;
+ static char substo[] = MIN_DAT_SUBS_TO;
+ int n;
+
+ if (!p)
+ return len;
+ for (n = len; --n >= 0; q++) {
+ char *r = strchr(subsfrom, *q);
+ if (!r) {
+ if (fold && ISASCII(*q) && isupper((UNCH)*q))
+ *p++ = tolower((UNCH)*q);
+ else
+ *p++ = *q;
+ }
+ else {
+ int i = r - subsfrom;
+ if (i < sizeof(substo) - 1)
+ *p++ = substo[i];
+ }
+ }
+ *p = '\0';
+ return len;
+}
+
+
+/* Return length of field. Copy into buf if non-NULL. */
+
+static int field(f, c, buf)
+struct fpi *f;
+int c;
+char *buf;
+{
+ int n;
+
+ switch (c) {
+ case '%':
+ if (buf) {
+ buf[0] = '%';
+ buf[1] = '\0';
+ }
+ return 1;
+ case 'N': /* the entity, document or dcn name */
+ return mindatcpy(buf, (char *)f->fpinm, ustrlen(f->fpinm),
+ (f->fpistore != 1 && f->fpistore != 2 && f->fpistore != 3
+ ? NAMECASE
+ : ENTCASE));
+ case 'D': /* dcn name */
+ if (f->fpistore != 1) /* not a external data entity */
+ return -1;
+ if (f->fpinedcn == 0) /* it's a SUBDOC */
+ return -1;
+ return mindatcpy(buf, (char *)f->fpinedcn, ustrlen(f->fpinedcn),
+ NAMECASE);
+ case 'X':
+ /* This is for compatibility with arcsgml */
+ if (f->fpistore < 1 || f->fpistore > 5)
+ return -1;
+ n = (f->fpipubis != 0)*(f->fpiversw > 0 ? 2 : 1)*5+f->fpistore - 1;
+ if (buf)
+ strcpy(buf, genext[n]);
+ return strlen(genext[n]);
+ case 'Y': /* tYpe */
+ n = f->fpistore;
+ if (n < 1 || n > 5)
+ return -1;
+ if (n == 1 && f->fpinedcn == 0) /* it's a SUBDOC */
+ n = 0;
+ if (buf)
+ strcpy(buf, ext[n]);
+ return strlen(ext[n]);
+ case 'P': /* public identifier */
+ if (!f->fpipubis)
+ return -1;
+ return mindatcpy(buf, (char *)f->fpipubis, ustrlen(f->fpipubis), 0);
+ case 'S': /* system identifier */
+ if (!f->fpisysis)
+ return -1;
+ else {
+ UNCH *p;
+ n = 0;
+ for (p = f->fpisysis; *p; p++)
+ if (*p != RSCHAR) {
+ if (buf)
+ buf[n] = *p == RECHAR ? '\n' : *p;
+ n++;
+ }
+ return n;
+ }
+ }
+ /* Other fields need a formal public identifier. */
+ /* return -1 if the formal public identifier was invalid or missing. */
+ if (f->fpiversw < 0 || !f->fpipubis)
+ return -1;
+
+ switch (c) {
+ case 'A': /* Is it available? */
+ return f->fpitt == '+' ? 0 : -1;
+ case 'I': /* Is it ISO? */
+ return f->fpiot == '!' ? 0 : -1;
+ case 'R': /* Is it registered? */
+ return f->fpiot == '+' ? 0 : -1;
+ case 'U': /* Is it unregistered? */
+ return f->fpiot == '-' ? 0 : -1;
+ case 'L': /* public text language */
+ if (f->fpic == FPICHARS)
+ return -1;
+ /* it's entered in all upper case letters */
+ return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 1);
+ case 'O': /* owner identifier */
+ return mindatcpy(buf, (char *)f->fpipubis + f->fpio, f->fpiol, 0);
+ case 'C': /* public text class */
+ n = f->fpic - 1;
+ if (n < 0 || n >= sizeof(classes)/sizeof(classes[0]))
+ return -1;
+ if (buf)
+ strcpy(buf, classes[n]);
+ return strlen(classes[n]);
+ case 'T': /* text description */
+ return mindatcpy(buf, (char *)f->fpipubis + f->fpit, f->fpitl, 0);
+ case 'V':
+ if (f->fpic < FPICMINV) /* class doesn't have version */
+ return -1;
+ if (f->fpiversw > 0) /* no version */
+ return -1;
+ if (f->fpivl == 0) { /* empty version: */
+ /* use device-independent version*/
+ if (buf)
+ strcpy(buf, EMPTY_VERSION);
+ return strlen(EMPTY_VERSION);
+ }
+ return mindatcpy(buf, (char *)f->fpipubis + f->fpiv, f->fpivl, 0);
+ case 'E': /* public text designating (escape) sequence */
+ if (f->fpic != FPICHARS)
+ return -1;
+ return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 0);
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int testopen(pathname)
+char *pathname;
+{
+#ifdef HAVE_ACCESS
+ return access(pathname, R_OK) >= 0;
+#else /* not HAVE_ACCESS */
+ FILE *fp;
+ fp = fopen(pathname, "r");
+ if (!fp)
+ return 0;
+ fclose(fp);
+ return 1;
+#endif /* not HAVE_ACCESS */
+}
+
+/* Return a pointer to an dynamically-allocated buffer that contains
+ the names of the files containing this entity, with each filename
+ terminated by a '\0', and with the list of filenames terminated by
+ another '\0'. */
+
+UNIV entgen(f)
+struct fpi *f;
+{
+ char *file;
+
+ assert(f->fpistore != 6); /* Musn't call entgen for a notation. */
+ if (!path) {
+ char *p;
+ char c;
+ path = getenv(PATH_ENV_VAR);
+ if (!path)
+ path = DEFAULT_PATH;
+ p = path;
+
+ /* Only search for system identifiers if path uses %S. */
+ while ((c = *p++) != '\0')
+ if (c == '%') {
+ if (*p == 'S') {
+ sysidsrch = 1;
+ break;
+ }
+ if (*p != '\0' && *p != PATH_FILE_SEP)
+ p++;
+ }
+ }
+ if (f->fpisysis
+ && (!sysidsrch
+ || strchr((char *)f->fpisysis, SYSID_FILE_SEP)
+ || strcmp((char *)f->fpisysis, STDINNAME) == 0))
+ return sysidgen((char *)f->fpisysis);
+
+ file = path;
+
+ for (;;) {
+ char *p;
+ int len = 0;
+ char *fileend = strchr(file, PATH_FILE_SEP);
+ if (!fileend)
+ fileend = strchr(file, '\0');
+
+ /* Check that all substitutions are non-null, and calculate
+ the resulting total length of the filename. */
+ for (p = file; p < fileend; p++)
+ if (*p == '%') {
+ int n;
+ /* Set len to -1 if a substitution is invalid. */
+ if (++p >= fileend) {
+ len = -1;
+ break;
+ }
+ n = field(f, *p, (char *)0);
+ if (n < 0) {
+ len = -1;
+ break;
+ }
+ len += n;
+ }
+ else
+ len++;
+
+ if (len > 0) {
+ /* We've got a valid non-empty filename. */
+ char *s;
+ char *buf;
+
+ s = buf = (char *)rmalloc(len + 2);
+ for (p = file; p < fileend; p++)
+ if (*p == '%')
+ s += field(f, *++p, s);
+ else
+ *s++ = *p;
+ *s++ = '\0';
+ if (testopen(buf)) {
+ /* Terminate the array of filenames. */
+ *s++ = '\0';
+ return buf;
+ }
+ free((UNIV)buf);
+ }
+ if (*fileend == '\0')
+ break;
+ file = ++fileend;
+ }
+ return 0;
+}
+
+/* Handle a system identifier without searching. */
+
+static
+UNIV sysidgen(s)
+char *s;
+{
+ char *buf, *p;
+
+ buf = (char *)rmalloc(strlen(s) + 2);
+
+ for (p = buf; *s; s++) {
+ if (*s == SYSID_FILE_SEP) {
+ if (p > buf && p[-1] != '\0')
+ *p++ = '\0';
+ }
+ else if (*s == RECHAR)
+ *p++ = '\n';
+ else if (*s != RSCHAR)
+ *p++ = *s;
+ }
+ /* Terminate this filename. */
+ if (p > buf && p[-1] != '\0')
+ *p++ = '\0';
+ if (p == buf) {
+ /* No filenames. */
+ frem((UNIV)buf);
+ return 0;
+ }
+ /* Terminate the list. */
+ *p++ = '\0';
+ return buf;
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/entity.h b/usr.bin/sgmls/sgmls/entity.h
new file mode 100644
index 0000000..d7d3096
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/entity.h
@@ -0,0 +1,189 @@
+/* Struct dcncb: attribute list added to support data attributes. */
+#ifndef ENTITY_H /* Don't include this file more than once. */
+#define ENTITY_H
+/* ENTITY.H: Definitions and control block templates for entity management.
+*/
+#include "tools.h" /* Definitions for type declarations, etc. */
+#include "msgcat.h"
+
+#define STDINNAME "-" /* File name that refers to standard input. */
+
+#define EOS '\0' /* NONCHAR: EE (entity end: strings). */
+
+#define AVALCASE 2 /* 2=untranslated string of name characters. */
+
+#define REFNAMELEN 8 /* reference quantity set NAMELEN */
+#define REFLITLEN 240 /* reference quantity set LITLEN */
+
+/* Minimization status of returned tags.
+*/
+#define MINNONE 0 /* Minimization: tag not minimized. */
+#define MINNULL 1 /* Minimization: tag was null. */
+#define MINNET 2 /* Minimization: end-tag was NET delimiter. */
+#define MINDATA 3 /* Minimization: end-tag was data tag. */
+#define MINSTAG 4 /* Minimization: tag implied by start-tag. */
+#define MINETAG 5 /* Minimization: end-tag implied by end-tag. */
+
+/* Formal public identifier public text classes.
+*/
+#define FPICAP 1
+#define FPICHARS 2
+#define FPINOT 3
+#define FPISYN 4
+#define FPICMINV 5 /* Minimum fpic value for versionable text. */
+#define FPIDOC 5
+#define FPIDTD 6
+#define FPIELEM 7
+#define FPIENT 8
+#define FPILPD 9
+#define FPINON 10
+#define FPISHORT 11
+#define FPISUB 12
+#define FPITEXT 13
+struct fpi { /* Formal public identifier. */
+ UNCH fpiot; /* Owner type: + or - or ! (for ISO). */
+ UNS fpiol; /* Length of owner identifier. */
+ UNS fpio; /* Offset in pubis of owner identifier (no EOS).*/
+ int fpic; /* Public text class. */
+ UNCH fpitt; /* Text type: - or + (for available). */
+ UNS fpitl; /* Length of text identifier. */
+ UNS fpit; /* Offset in pubis of text identifier (no EOS). */
+ UNS fpill; /* Language/designating sequence length. */
+ UNS fpil; /* Offset in pubis of language. */
+ UNS fpivl; /* Length of display version . */
+ UNS fpiv; /* Offset in pubis of display version (no EOS). */
+ int fpiversw; /* 1=use best ver; 0=use stated ver; -1=error. */
+ UNCH *fpinm; /* Entity/DCN name (EOS, no length). */
+ UNCH fpistore; /* 1=NDATA 2=general 3=parm 4=DTD 5=LPD 6=DCN. */
+ /* Name of the entity's DCN. Valid only when fpistore == 1.
+ NULL if it's a SUBDOC. */
+ UNCH *fpinedcn;
+ UNCH *fpipubis; /* Public ID string (EOS). */
+ UNCH *fpisysis; /* System ID string (EOS). */
+};
+#define FPISZ sizeof(struct fpi)
+typedef struct fpi *PFPI; /* Ptr to FPI control block. */
+
+/* General control blocks.
+*/
+#define NONONCH 1 /* Character references to non-chars invalid. */
+#define OKNONCH 0 /* Character references to non-chars allowed. */
+struct parse { /* Parse control block. */
+ char *pname; /* Parse name; content, tag, etc. */
+ UNCH *plex; /* Lexical analysis table. */
+ UNCH **ptab; /* State and action table. */
+ UNS state; /* State. */
+ UNS input; /* Input. */
+ UNS action; /* Action. */
+ UNS newstate; /* Next state. */
+};
+struct restate {
+ UNS sstate; /* State. */
+ UNS sinput; /* Input. */
+ UNS saction; /* Action. */
+ UNS snext; /* Next state. */
+};
+struct map {
+ UNCH *mapnm; /* Name followed by EOS. */
+ int mapdata; /* Data associated with that name. */
+};
+struct hash { /* Dummy structure for function arguments. */
+ struct hash *enext; /* Next entry in chain. */
+ UNCH *ename; /* Entry name with size and EOS. */
+};
+typedef struct hash *PHASH; /* Ptr to hash table entry. */
+typedef struct hash **THASH; /* Ptr to hash table. */
+
+struct fwdref { /* A forward id reference. */
+ struct fwdref *next; /* Pt to next reference in chain. */
+ UNIV msg; /* Ptr to saved error messsage. */
+};
+#define FWDREFSZ sizeof(struct fwdref)
+
+struct dcncb { /* Data content notation control block. */
+ struct dcncb *enext; /* Next DCN in chain. */
+ UNCH *ename; /* Notation name followed by EOS. */
+ UNCH mark; /* For use by application. */
+ UNCH entsw; /* Entity defined with this notation? */
+ UNCH defined; /* Has this notation been defined. */
+ UNCH *sysid; /* System identifier of notation. */
+ UNCH *pubid; /* Public identifier of notation. */
+ struct ad *adl; /* Data attribute list (NULL if none). */
+};
+#define DCBSZ sizeof(struct dcncb)
+#define DCNMARK(p) ((p)->mark ? 1 : ((p)->mark = 1, 0))
+
+typedef struct dcncb *PDCB; /* Ptr to DCN control block. */
+
+/* Number of capacities in a capacity set. */
+
+#define NCAPACITY 17
+
+struct sgmlcap {
+ char **name;
+ UNCH *points;
+ long *number;
+ long *limit;
+};
+
+struct sgmlstat { /* Document statistics. */
+ UNS dcncnt; /* Number of data content notations defined. */
+ UNS pmexgcnt; /* Number of plus or minus exception groups. */
+ UNS etdcnt; /* Number of element types declared. */
+ UNS etdercnt; /* Number of element types defined by default. */
+ UNS pmexcnt; /* Number of plus/minus exception grp members. */
+ UNS modcnt; /* Number of content model tokens defined. */
+ UNS attcnt; /* Number of attributes defined. */
+ UNS attdef; /* Characters of attribute defaults defined. */
+ UNS attgcnt; /* Number of att value grp members (incl dcn). */
+ UNS idcnt; /* Number of ID attributes specified. */
+ UNS idrcnt; /* Number of ID references specified. */
+ UNS ecbcnt; /* Number of entities declared. */
+ UNS ecbtext; /* Characters of entity text defined. */
+ UNS srcnt; /* Number of short reference tables defined. */
+ UNS dcntext; /* Characters of notation identifiers defined. */
+};
+struct switches { /* Parser control switches (1=non-standard). */
+ int swdupent; /* 1=msg if duplicate ENTITY def attempted;0=no.*/
+ int swcommnt; /* 1=return comment declarations as data; 0=no. */
+ int swrefmsg; /* 1=msg if undeclared ref is defaulted; 0=no. */
+ UNS swbufsz; /* Size of source file buffer for READ(). */
+ int swenttr; /* 1=trace entity stack in error messages; 0=no.*/
+ int sweltr; /* 1=trace element stack in error messages; 0=no. */
+ int swambig; /* 1=check content model ambiguity */
+ int swundef; /* 1=warn about undefined elements and notations. */
+ char *prog; /* Program name for error messages. */
+#ifdef TRACE
+ char *trace; /* What to trace in the body. */
+ char *ptrace; /* What to trace in the prolog. */
+#endif /* TRACE */
+ nl_catd catd; /* Message catalog descriptor. */
+ long nopen; /* Number of open document entities */
+ int onlypro; /* Parse only the prolog. */
+ char **includes; /* List of parameter entities to be defined
+ as "INCLUDE"; NULL terminated.*/
+ VOID (*die) P((void)); /* Function to call on fatal error. */
+};
+struct markup { /* Delimiter strings for text processor. */
+ UNCH *cro; /* LEXCON markup string: CRO */
+ UNCH *dso; /* LEXCON markup string: DSO */
+ UNCH *ero; /* LEXCON markup string: ERO */
+ UNCH *etag; /* LEXMARK markup string: end-tag */
+ UNCH *lit; /* LEXMARK markup string: LIT */
+ UNCH *lita; /* LEXMARK markup string: LITA */
+ UNCH *mdc; /* LEXCON markup string: MDC */
+ UNCH *mdo; /* LEXCON markup string: MDO */
+ UNCH *mse; /* LEXCON markup string: mse */
+ UNCH *mss; /* LEXCON markup string: mss */
+ UNCH *mssc; /* LEXCON markup string: mss CDATA */
+ UNCH *mssr; /* LEXCON markup string: mss RCDATA */
+ UNCH *pic; /* LEXCON markup string: PIC */
+ UNCH *pio; /* LEXCON markup string: PIO */
+ UNCH *refc; /* LEXGRP markup string: REFC */
+ UNCH *stag; /* LEXMARK markup string: start-tag */
+ UNCH *tagc; /* LEXMARK markup string: TAGC */
+ UNCH *vi; /* LEXMARK markup string: VI */
+ int lennet; /* LEXMARK markup string length: null end-tag. */
+ int lennst; /* LEXMARK markup string length: null start-tag.*/
+};
+#endif /* ndef ENTITY_H */
diff --git a/usr.bin/sgmls/sgmls/error.h b/usr.bin/sgmls/sgmls/error.h
new file mode 100644
index 0000000..d37d493
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/error.h
@@ -0,0 +1,61 @@
+/* ERROR.H: Symbols for SGML error codes (start with 'E_').
+ Numbers 46 - 56 are generated by ERROR.C.
+ Later numbers are coded directly.
+*/
+/* SGMLERR.C: General errors and syntax errors.
+*/
+#define E_CONTEXT 1 /* W GI not allowed at this point in structure. */
+#define E_MDNAME 2 /* E Invalid markup declaration name. */
+/*efine E_LEN 3 E Syntax error: length exceeded. */
+#define E_SYS 4 /* W Illegal system character. */
+#define E_ETAG 5 /* E End-tag does not match any open start-tag. */
+#define E_STAGMAX 6 /* E Maximum number of open elements exceeded. */
+/* E_ALLNULL 7 W Start- and end-tag omitted with null content. */
+#define E_EOF 8 /* E/W Illegal entity end in markup or delimited text. */
+/* fine E_INV 9 E Markup error: invalid character. */
+#define E_CHARS 10 /* W Data found in content that doesn't allow it. */
+/* fine E_NOETDE 11 E End-tag GI not defined by element declaration. */
+#define E_BADNM 12 /* E Name is not syntactically valid. */
+#define E_BADATT 13 /* E Attribute was not defined by element declaration. */
+#define E_VALINV 14 /* W Att value/declaration conflict: invalid char. */
+#define E_VALLEN 15 /* W Att value/declaration conflict: token too long. */
+#define E_VALCNT 16 /* W Att value/declaration conflict: too many tokens. */
+#define E_VALTYPE 17 /* W Att value/declaration conflict: wrong token type.*/
+#define E_VALGRP 18 /* W Att value/declaration conflict: token not in grp.*/
+#define E_VALREQ 19 /* W Att value/declaration conflict: req unspecified. */
+/* E_EMIN 20 W End-tag implied by end-tag; not minimizable. */
+/* E_SMIN 21 W Omitted start-tag was not minimizable. */
+#define E_POSSATT 22 /* E Possible att found but not defined; used as data.*/
+/* Late additions numbered out of order to avoid recompilation. */
+/*efine E_ENTSYNC 37 E Entity and group nesting levels out of sync. */
+#define E_BADVAL 25 /* W Att value omitted (null); default used. */
+/* E_ECONTXT 30 W Element ended prematurely (some content omitted).*/
+/* E_EMINST 39 W End-tag implied by start-tag; not minimizable. */
+/* E_MEXTAG 40 W *** In Use *** */
+#define E_MEXERR 41 /* W Attempt to exclude contextually required element.*/
+#define E_DOCTYPE 42 /* W No document type defined; *DOCTYPE assumed. */
+/* E_NOETDS 43 E Start-tag GI not defined by element declaration. */
+#define E_RESTART 44 /* E Invalid chars ignored; trying to restart parse. */
+
+/* MDERROR.C: Errors in markup declarations.
+*/
+/*efine E_DUP 23 E Duplicate specification. */
+/*efine E_KEY 24 E Incorrect keyword for parameter. */
+/*efine E_MSE 26 E MSE occurred with no corresponding MS. */
+/*efine E_MSS 27 E MSS exceeded maximum nesting level. */
+/*efine E_NUM 28 E Incorrect number of parameters. */
+#define E_TYPE 29 /* E Incorrect parameter type. */
+/* Late additions numbered out of order to avoid recompilation. */
+/*efine E_VAL 38 W Incorrect parameter value. */
+
+/* RESERROR.C: Errors in resource routines.
+*/
+/* Unused I End of primary source entity. */
+/* fine E_FILBUF 31 E Could not read next buffer. */
+/* fine E_ERFILE 32 E Could not open file. */
+/* fine E_MALLOC 33 T Could not obtain required main storage. */
+/* fine E_ERMAX 34 E Maximum number of open entities exceeded. */
+/* fine E_ERNAME 35 E Referenced entity undeclared. */
+/* fine E_ERLOOP 36 E Entity referenced within itself: ref ignored. */
+/* Late additions numbered out of order to avoid recompilation. */
+/* E_ERDEF 45 E Referenced entity undeclared; SYSTEM assumed. */
diff --git a/usr.bin/sgmls/sgmls/etype.h b/usr.bin/sgmls/sgmls/etype.h
new file mode 100644
index 0000000..e4ee1f9
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/etype.h
@@ -0,0 +1,91 @@
+/* ETYPE.H: Definitions for element type and group processing.
+*/
+#define MCHARS 0x80 /* Model: contains #CHARS. */
+#define MGI 0x40 /* Model: contains GI names. */
+#define MPHRASE 0x20 /* Model: first token is #CHARS. */
+#define MKEYWORD 0x1F /* Model: defined with single keyword. */
+#define MNONE 0x10 /* Model: contains no GIs or #CHARS. */
+#define MANY 0x08 /* Model: contains any GIs or #CHARS. */
+#define MRCDATA 0x04 /* Model: contains RCDATA. */
+#define MCDATA 0x02 /* Model: contains CDATA. */
+
+#define TOREP (TOPT+TREP) /* 11000000 Optional and repeatable. */
+#define TOPT 0x80 /* Token: 1=optional; 0=required. */
+#define TREP 0x40 /* Token: 1=repeatable; 0=not. */
+#define TXOREP (TXOPT+TXREP) /* * explicitly specified */
+#define TXOPT 0x20 /* ? explicitly specified */
+#define TXREP 0x10 /* + explicitly specified */
+#define TTMASK 0x0F /* 00001111 Mask for testing token type. */
+#define TTETD 4 /* 00000100 Token is an ETD. */
+#define TTAND 3 /* 00000011 Token is an AND group. */
+#define TTSEQ 2 /* 00000010 Token is a sequence group. */
+#define TTOR 1 /* 00000001 Token is an OR group. */
+#define TTCHARS 0 /* 00000000 Token is #CHARS. */
+
+struct thdr { /* Token header or model header. */
+ UNCH ttype; /* Token type attributes or model content. */
+ union {
+ int tnum; /* Group token: tokens in group.
+ Model header: content tokens at any level. */
+ struct etd *thetd; /* GI token: ptr to etd. */
+ } tu;
+};
+#define THSZ (sizeof(struct thdr))
+
+#define ETDHASH 211 /* Size of element hash table. Must be prime. */
+#define SMO 0x40 /* ETDMIN: Start-tag O minimization. */
+#define EMO 0x04 /* ETDMIN: End-tag O minimization. */
+#define EMM 0x02 /* ETDMIN: End-tag minimization explicitly
+ specified to be minus */
+#define ETDDCL 0x80 /* ETDMIN: Element was declared. */
+#define ETDUSED 0x20 /* ETDMIN: Element used in another declaration. */
+#define ETDOCC 0x10 /* ETDMIN: Element occurred in document. */
+
+struct etd { /* Element type definition. */
+ struct etd *etdnext; /* Next element type definition in hash chain. */
+ UNCH *etdgi; /* GI preceded by its length, followed by EOS. */
+ UNCH etdmin; /* Flag bits: minimization. */
+ UNCH mark; /* Mark bit: for ambiguity checking */
+ struct thdr *etdmod; /* Content model. */
+ struct etd **etdmex; /* Minus exceptions. */
+ struct etd **etdpex; /* Plus exceptions. */
+ struct ad *adl; /* Attribute descriptor list. */
+ struct entity **etdsrm; /* Short reference map. */
+};
+#define ETDSZ (sizeof(struct etd))
+typedef struct etd *PETD;
+extern struct etd dumetd[];
+
+/* Number of bits in a long must be >= 1<<LONGPOW */
+#define LONGPOW 5
+
+#define LONGBITS (1<<LONGPOW)
+
+struct mpos { /* Position of current element in model. */
+ UNCH g; /* Index of this group in the model. */
+ UNCH t; /* Index of the current token in this group. */
+ unsigned long *h; /* Hit bits of this group's tokens. */
+};
+
+#define HITCLEAR(h) MEMZERO((UNIV)(h), grplongs*sizeof(unsigned long))
+
+#define TAGCONER 0x01 /* 00000001 (contersw) Tag was out of context. */
+#define TAGNET 0x02 /* 00000010 (etisw) Tag has NET enabled. */
+#define TAGPEX 0x04 /* 00000100 (pexsw) Tag was plus exception. */
+#define TAGREF 0x08 /* 00001000 (conrefsw) Tag had CONREF or EMPTY.*/
+struct tag { /* Tag control block. */
+ UNCH status; /* Status of context check. */
+ UNCH tflags; /* Flags: TAGCONER TAGNET TAGPEX TAGREF */
+ struct etd *tetd; /* Element type definition for tag. */
+ struct entity **tsrm; /* Current short reference map. */
+ struct mpos *tpos; /* Position of next tag in this model. */
+};
+
+#define RCEND 1 /* No more tokens: end element and retry GI. */
+#define RCREQ 2 /* Required GI must precede proposed GI. */
+#define RCMISS 3 /* GI invalid: not element end; no required GI. */
+#define RCHIT 4 /* GI is the one expected next. */
+#define RCMEX 5 /* GI invalid: minus exception. */
+#define RCHITMEX 6 /* RCMEX with invalid attempted minus exclusion.*/
+#define RCPEX 7 /* GI is valid solely because of plus exclusion.*/
+#define RCNREQ 8 /* Token is not required; can retry invalid GI. */
diff --git a/usr.bin/sgmls/sgmls/exclude.c b/usr.bin/sgmls/sgmls/exclude.c
new file mode 100644
index 0000000..c3968b4
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/exclude.c
@@ -0,0 +1,121 @@
+/* exclude.c -
+ Exclusion checking.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "sgmlincl.h"
+
+static int excktok P((struct thdr *, int, int *));
+static int exmark P((int));
+
+/* Check that the current exclusions are legal for the content model
+of the current element. */
+
+VOID exclude()
+{
+ struct thdr *mod = tags[ts].tetd->etdmod;
+
+ if ((mod->ttype & MKEYWORD) == 0 && exmark(1)) {
+ int excl;
+
+ excktok(mod + 1, 0, &excl);
+ exmark(0);
+ }
+}
+
+/* Set the mark field of all current exclusions to val. Return 1 if
+there are some current exclusions. */
+
+static
+int exmark(val)
+int val;
+{
+ int i;
+ int gotone = 0;
+
+ for (i = ts; i > 0; --i) {
+ struct etd **p = tags[i].tetd->etdmex;
+ if (p) {
+ for (; *p; p++)
+ (*p)->mark = val;
+ gotone = 1;
+ }
+ }
+ return gotone;
+}
+
+/* Check exclusions for this token. Return size of token. */
+
+static
+int excktok(t, orgrp, excl)
+struct thdr *t;
+int orgrp; /* 1 if token is member of or group */
+int *excl; /* Set to 1 if token is excluded. */
+{
+ int size;
+ struct thdr *tem;
+ int tnum;
+ int optional = 0;
+ int hadopt, hadreq;
+
+ *excl = 0;
+
+ switch (t->ttype & TTMASK) {
+ case TTETD:
+ if (t->tu.thetd->mark) {
+ if (orgrp || (t->ttype & TOPT))
+ *excl = 1;
+ else
+ sgmlerr(217, &pcbstag, t->tu.thetd->etdgi + 1,
+ tags[ts].tetd->etdgi + 1);
+ }
+ /* fall through */
+ case TTCHARS:
+ size = 1;
+ break;
+ case TTOR:
+ case TTAND:
+ case TTSEQ:
+ tem = t + 1;
+ hadopt = 0;
+ hadreq = 0;
+ for (tnum = t->tu.tnum; tnum > 0; --tnum) {
+ int ex;
+ int n = excktok(tem, (t->ttype & TTMASK) == TTOR, &ex);
+ if (!ex) {
+ if (tem->ttype & TOPT)
+ hadopt = 1;
+ else
+ hadreq = 1;
+ }
+ tem += n;
+ }
+ size = tem - t;
+ if ((t->ttype & TTMASK) == TTOR)
+ optional = hadreq ? hadopt : 1;
+ else
+ optional = !hadreq;
+ break;
+ default:
+ abort();
+ }
+
+ /* Was required, but exclusions have made it optional.
+ eg <!element foo - - (a | b) -(a, b)> */
+
+ if (optional && !(t->ttype & TOPT))
+ sgmlerr(216, &pcbstag, tags[ts].tetd->etdgi + 1, (UNCH *)0);
+
+ return size;
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/genlex.c b/usr.bin/sgmls/sgmls/genlex.c
new file mode 100644
index 0000000..1e84ecf
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/genlex.c
@@ -0,0 +1,114 @@
+/* genlex: Generate lexical tables for non-ASCII charsets. */
+
+#include "config.h"
+#include "std.h"
+#include "tools.h"
+
+#define CANON_ASCII_NONSGML 255 /* Canonical non-SGML character in ASCII. */
+#define CANON_ASCII_DATACHAR 254 /* Canonical DATACHAR in ASCII. */
+
+extern unsigned char charset[];
+extern UNCH *lextabs[];
+extern UNCH lextran[];
+
+static char *lextabnames[] = {
+ "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke"
+};
+
+static VOID print_tab(s, t)
+ char *s;
+ UNCH *t;
+{
+ int i;
+ printf("UNCH %s[] = {\n", s);
+ for (i = 0; i < 256; i++)
+ printf("%2d,%c", t[i], (i + 1) % 16 == 0 ? '\n' : ' ');
+ fputs("};\n\n", stdout);
+}
+
+int main(argc, argv)
+ int argc;
+ char **argv;
+{
+ int i;
+ UNCH tab[256];
+ char special[256];
+ /* Shunned character numbers in the reference concrete syntax. */
+ static UNCH refshun[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
+ };
+ char shunned[256];
+ char *program_name;
+
+ program_name = strrchr(argv[0], '/');
+ if (program_name)
+ program_name++;
+ else
+ program_name = argv[0];
+
+ /* Check that the mapping is 1-1. */
+ for (i = 0; i < 256; i++)
+ tab[i] = 0;
+ for (i = 0; i < 256; i++)
+ tab[charset[i]] = 1;
+ for (i = 0; i < 256; i++)
+ if (!tab[i]) {
+ fprintf(stderr, "%s: bad mapping: no character mapped to %d\n",
+ program_name, i);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Compute special. */
+ for (i = 0; i < 256; i++)
+ special[i] = 0;
+ for (i = 0; lextabs[i]; i++) {
+ int j;
+ for (j = 0; j < 256; j++)
+ if (lextabs[i][j] != lextabs[i][CANON_ASCII_NONSGML]
+ && lextabs[i][j] != lextabs[i][CANON_ASCII_DATACHAR])
+ special[charset[j]] = 1;
+ }
+
+ /* Compute shunned. */
+ for (i = 0; i < 256; i++)
+ shunned[i] = 0;
+ for (i = 0; i < sizeof(refshun); i++)
+ shunned[refshun[i]] = 1;
+
+ printf("/* This file was automatically generated by %s. Do not edit. */\n\n",
+ program_name);
+ fputs("#include \"config.h\"\n#include \"entity.h\"\n#include \"sgmldecl.h\"\n\n",
+ stdout);
+
+ /* Generate each of the lexical tables. */
+ for (i = 0; lextabs[i]; i++) {
+ int j;
+ for (j = 0; j < 256; j++)
+ tab[charset[j]] = lextabs[i][j];
+
+ for (j = 0; j < 256; j++)
+ if (!special[j]) {
+ if (shunned[j])
+ tab[j] = lextabs[i][CANON_ASCII_NONSGML];
+ else
+ tab[j] = lextabs[i][CANON_ASCII_DATACHAR];
+ }
+ print_tab(lextabnames[i], tab);
+ }
+
+ /* Generate lextran. */
+ for (i = 0; i < 256; i++)
+ tab[charset[i]] = charset[lextran[i]];
+ print_tab("lextran", tab);
+
+ /* Generate asciicharset. */
+ fputs("int asciicharset[] = {\n", stdout);
+ for (i = 0; i < 128; i++)
+ printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' ');
+ for (i = 128; i < 256; i++)
+ printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' ');
+ fputs("};\n", stdout);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/usr.bin/sgmls/sgmls/getopt.c b/usr.bin/sgmls/sgmls/getopt.c
new file mode 100644
index 0000000..9a218b3
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/getopt.c
@@ -0,0 +1,166 @@
+/* getopt.c -
+ getopt() for those systems that don't have it.
+
+ Derived from comp.sources.unix/volume3/att_getopt.
+ Modified by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifndef HAVE_GETOPT
+
+#include "std.h"
+#include "getopt.h"
+
+#ifdef SWITCHAR
+#include <dos.h>
+#endif
+
+int opterr = 1;
+int optind = 1;
+int optopt;
+char *optarg;
+
+#ifndef OPTION_CHAR
+#define OPTION_CHAR '-'
+#endif
+
+int getopt(argc, argv, opts)
+int argc;
+char **argv;
+char *opts;
+{
+#ifdef SWITCHAR
+ union REGS regs;
+ static char switchar = '\0';
+#endif
+ static int sp = 1;
+ register int c;
+ register char *cp;
+ char *message;
+#ifdef SWITCHAR
+ if (switchar == '\0') {
+ regs.x.ax = 0x3700;
+ intdos(&regs, &regs);
+ if (!regs.x.cflag)
+ switchar = regs.h.dl;
+ else
+ switchar = '/';
+ }
+#endif
+ if (sp == 1) {
+ if (optind >= argc)
+ return EOF;
+ if ((
+#ifdef SWITCHAR
+ argv[optind][0] != switchar &&
+#endif
+ argv[optind][0] != OPTION_CHAR) || argv[optind][1] == '\0') {
+#ifdef REORDER_ARGS
+ int i;
+ for (i = optind; i < argc; i++)
+ if ((
+#ifdef SWITCHAR
+ argv[i][0] == switchar ||
+#endif
+ argv[i][0] == OPTION_CHAR) && argv[i][1] != '\0')
+ break;
+ if (i < argc) {
+ c = argv[i][1];
+#ifdef CASE_INSENSITIVE_OPTIONS
+ if (isupper(c))
+ c = tolower(c);
+#endif
+ if (c != ':' && c != OPTION_CHAR && (cp = strchr(opts, c)) != NULL
+ && cp[1] == ':' && argv[i][2] == 0 && i < argc - 1) {
+ int j;
+ char *temp1 = argv[i];
+ char *temp2 = argv[i+1];
+ for (j = i - 1; j >= optind; j--)
+ argv[j+2] = argv[j];
+ argv[optind] = temp1;
+ argv[optind+1] = temp2;
+ }
+ else {
+ int j;
+ char *temp = argv[i];
+ for (j = i - 1; j >= optind; j--)
+ argv[j+1] = argv[j];
+ argv[optind] = temp;
+ }
+ }
+ else
+#endif
+ return EOF;
+ }
+ if ((argv[optind][0] == OPTION_CHAR && argv[optind][1] == OPTION_CHAR
+ && argv[optind][2] == '\0')
+#ifdef SWITCHAR
+ || (argv[optind][0] == switchar && argv[optind][1] == switchar
+ && argv[optind][2] == '\0')
+#endif
+ ) {
+ optind++;
+ return(EOF);
+ }
+ }
+ optopt = c = argv[optind][sp];
+#ifdef CASE_INSENSITIVE_OPTIONS
+ if (
+#ifdef USE_ISASCII
+ isascii(c) &&
+#endif /* USE_ISASCII */
+ isupper((unsigned char)c))
+ optopt = c = tolower((unsigned char)c);
+#endif /* CASE_INSENSITIVE_OPTIONS */
+ if (c == ':' || (cp = strchr(opts, c)) == NULL) {
+ if (argv[optind][++sp] == '\0') {
+ optind++;
+ sp = 1;
+ }
+ message = ": illegal option -- ";
+ goto bad;
+ }
+ if (*++cp == ':') {
+ if (argv[optind][sp+1] != '\0')
+ optarg = &argv[optind++][sp+1];
+ else if (++optind >= argc) {
+ sp = 1;
+ message = ": option requires an argument -- ";
+ goto bad;
+ }
+ else
+ optarg = argv[optind++];
+ sp = 1;
+ }
+ else {
+ if (argv[optind][++sp] == '\0') {
+ sp = 1;
+ optind++;
+ }
+ optarg = NULL;
+ }
+ return c;
+bad:
+ if (opterr) {
+ fputs(argv[0], stderr);
+ fputs(message, stderr);
+ fputc(optopt, stderr);
+ fputc('\n', stderr);
+ }
+ return '?';
+}
+
+#endif /* not HAVE_GETOPT */
+
+/*
+Local Variables:
+c-indent-level: 4
+c-continued-statement-offset: 4
+c-brace-offset: 4
+c-argdecl-indent: 4
+c-label-offset: -4
+tab-width: 4
+End:
+*/
+
diff --git a/usr.bin/sgmls/sgmls/getopt.h b/usr.bin/sgmls/sgmls/getopt.h
new file mode 100644
index 0000000..4856560
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/getopt.h
@@ -0,0 +1,11 @@
+/* Declare getopt() and associated variables. */
+
+/* Don't use prototypes in case some system header file has a
+conflicting definition. Systems differ on how they declare the second
+parameter. */
+
+extern int getopt();
+
+extern char *optarg;
+extern int optind;
+extern int opterr;
diff --git a/usr.bin/sgmls/sgmls/keyword.h b/usr.bin/sgmls/sgmls/keyword.h
new file mode 100644
index 0000000..6c092f0
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/keyword.h
@@ -0,0 +1,22 @@
+/* KEYWORD.H: Definitions for markup declaration keyword processing.
+*/
+/* Default value types for attribute definition list declaration.
+*/
+#define DNULL 1 /* Default value: implied attribute. */
+#define DREQ 2 /* Default value: required attribute. */
+#define DCURR 3 /* Default value: current attribute. */
+#define DCONR 4 /* Default value: content reference attribute. */
+#define DFIXED 5 /* Default value: fixed attribute. */
+
+/* External identifier types for entity and notation declarations.
+*/
+#define EDSYSTEM 1 /* SYSTEM (but not PUBLIC) identifier specified.*/
+#define EDPUBLIC 2 /* PUBLIC (but not SYSTEM) identifier specified.*/
+#define EDBOTH 3 /* PUBLIC and also SYSTEM identifiers specified.*/
+
+/* Marked section keywords.
+*/
+#define MSTEMP 1
+#define MSRCDATA 2
+#define MSCDATA 3
+#define MSIGNORE 4
diff --git a/usr.bin/sgmls/sgmls/latin1.h b/usr.bin/sgmls/sgmls/latin1.h
new file mode 100644
index 0000000..44f43f3
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/latin1.h
@@ -0,0 +1,51 @@
+/* SGML Character Use: ISO Latin 1.
+*/
+#define EOFCHAR '\032' /* FUNCTION: EE (entity end: files). */
+#define EOBCHAR '\034' /* NONCHAR: EOB (file entity: end of buffer. */
+#define RSCHAR '\012' /* FUNCTION: RS (record start). */
+#define RECHAR '\015' /* FUNCTION: RE (record end). */
+#define TABCHAR '\011' /* FUNCTION: TAB (horizontal tab). */
+#define SPCCHAR '\040' /* FUNCTION: SPACE (horizontal space). */
+#define GENRECHAR '\010' /* NONCHAR: Generated RE. */
+#define DELCDATA '\035' /* NONCHAR: Delimiter for CDATA entity in
+ attribute value. */
+#define DELSDATA '\036' /* NONCHAR: Delimiter for SDATA entity in
+ attribute value. */
+#define DELNONCH '\037' /* NONCHAR: non-SGML character prefix. */
+
+/* These two macros are used to handle non-SGML characters. A non-SGML
+by character is represented by a DELNONCH character followed by
+SHIFTNON(original_character). SHIFTNON must transform any character
+in the set 0, EOFCHAR, EOBCHAR, GENRECHAR, DELCDATA, DELSDATA,
+DELNONCH into a character that is not one of the set 0, EOFCHAR,
+EOBCHAR. Furthermore UNSHIFTNON(SHIFTNON(c)) must be equal to c for
+every character c in the former set. */
+/* This is a simple definition that works for ASCII-like character sets. */
+#define SHIFTNON(ch) ((UNCH)(ch) | 0100)
+#define UNSHIFTNON(ch) ((UNCH)(ch) & ~0100)
+
+/* A canonical NONSGML character. The character number that is shunned
+in the reference concrete syntax and is not the number of a
+significant (in the reference concrete syntax) character nor one of
+the above characters nor 0. */
+#define CANON_NONSGML 255
+
+/* A canonical DATACHAR character. The character number that is not
+shunned in the reference concrete syntax and is not the number of a
+significant (in the reference concrete syntax) SGML character nor one
+of the above characters. */
+#define CANON_DATACHAR 254
+
+/* Components for a formal public identifier for the whole of the
+system character set. Protect with ifndef so that it can be overriden
+in config.h. */
+
+#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE
+#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/13 4/1"
+#endif
+#ifndef SYSTEM_CHARSET_OWNER
+#define SYSTEM_CHARSET_OWNER "ISO Registration Number 100"
+#endif
+#ifndef SYSTEM_CHARSET_DESCRIPTION
+#define SYSTEM_CHARSET_DESCRIPTION "ECMA-94 Right Part of Latin Alphabet Nr. 1"
+#endif
diff --git a/usr.bin/sgmls/sgmls/lexcode.h b/usr.bin/sgmls/sgmls/lexcode.h
new file mode 100644
index 0000000..e4047ba
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lexcode.h
@@ -0,0 +1,11 @@
+/* Definitions of lexical codes needed by both lextaba.c and lexrf.c. */
+
+#define FCE 27 /* FRE Free character in use as an entity reference */
+#define FRE 0 /* FREECHAR that is not in a CON delimiter-in-context. */
+#define LITC 21 /* LIT LITA PIC or EE in use as a literal terminator */
+#define MSC3 15 /* ] Also MSC[2]. */
+#define NET 17 /* / When enabled. */
+#define ETI 16 /* / Actually ETAGO[2] */
+#define SPCR 19 /* Space in use as SR8. */
+#define TGO2 25 /* < TAGO; also MDO[1], PIO[1] */
+#define CDE 11 /* NONSGML delcdata CDATA/SDATA delimiter */
diff --git a/usr.bin/sgmls/sgmls/lexrf.c b/usr.bin/sgmls/sgmls/lexrf.c
new file mode 100644
index 0000000..ec3db83
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lexrf.c
@@ -0,0 +1,124 @@
+/* LEXRF: Lexical tables for reference concrete syntax.
+*/
+
+#include "config.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "synxtrn.h" /* Declarations for concrete syntax constants. */
+#include "action.h" /* Action names for all parsing. */
+#include "lexcode.h"
+
+static UNCH SRTAB[] = { TABCHAR, '\0' };
+static UNCH SRRE[] = { RECHAR, '\0' };
+static UNCH SRRS[] = { RSCHAR, '\0' };
+static UNCH SRRSB[] = { RSCHAR, 'B', '\0' };
+static UNCH SRRSRE[] = { RSCHAR, RECHAR, '\0' };
+static UNCH SRRSBRE[] = { RSCHAR, 'B', RECHAR, '\0' };
+static UNCH SRBRE[] = { 'B', RECHAR, '\0' };
+
+struct lexical lex = { /* Delimiter set constants for parser use. */
+ { /* Markup strings for text processor use. */
+ (UNCH *)"\4&#", /* LEXCON markup string: CRO */
+ (UNCH *)"[", /* LEXCON markup string: DSO */
+ (UNCH *)"\3&", /* LEXCON markup string: ERO */
+ (UNCH *)"\4</", /* LEXMARK markup string: end-tag */
+ (UNCH *)"\3\"", /* LEXMARK markup string: LIT */
+ (UNCH *)"\3'", /* LEXMARK markup string: LITA */
+ (UNCH *)"\3>", /* LEXCON markup string: MDC */
+ (UNCH *)"\4<!", /* LEXCON markup string: MDO */
+ (UNCH *)"\5]]>", /* LEXCON markup string: mse */
+ (UNCH *)"\5<![", /* LEXCON markup string: mss */
+ (UNCH *)"\13<![CDATA[", /* LEXCON markup string: mss CDATA */
+ (UNCH *)"\14<![RCDATA[", /* LEXCON markup string: mss RCDATA */
+ (UNCH *)"\3>", /* LEXCON markup string: PIC */
+ (UNCH *)"\4<?", /* LEXCON markup string: PIO */
+ (UNCH *)"\3;", /* LEXGRP markup string: ref close. */
+ (UNCH *)"\3<", /* LEXMARK markup string: start-tag */
+ (UNCH *)"\3>", /* LEXMARK markup string: TAGC */
+ (UNCH *)"\3=", /* LEXMARK markup string: VI */
+ 3, /* LEXMARK: length of null end-tag. */
+ 2 /* LEXMARK: length of null start-tag. */
+ },
+ { /* Short reference delimiters. */
+ { /* Short reference delimiter table. */
+ {(UNCH *)"", SRCT}, /* Dummy entry to store SR count. */
+ {SRTAB, 1}, /* TAB */
+ {SRRE, 2}, /* RE */
+ {SRRS, 3}, /* RS */
+ {SRRSB, 4}, /* Leading blanks */
+ {SRRSRE, 5}, /* Null record */
+ {SRRSBRE, 6}, /* Blank record */
+ {SRBRE, 7}, /* Trailing blanks */
+ {(UNCH *)" ", 8}, /* Space */
+ {(UNCH *)"BB", 9}, /* Two or more blanks */
+ {(UNCH *)"\"", 10}, /* Quotation mark (first data character) */
+ {(UNCH *)"#", 11}, /* Number sign */
+ {(UNCH *)"%", 12}, /* FCE CHARACTERS start here */
+ {(UNCH *)"'", 13},
+ {(UNCH *)"(", 14},
+ {(UNCH *)")", 15},
+ {(UNCH *)"*", 16},
+ {(UNCH *)"+", 17},
+ {(UNCH *)",", 18},
+ {(UNCH *)"-", 19}, /* Hyphen */
+ {(UNCH *)"--", 20}, /* Two hyphens */
+ {(UNCH *)":", 21},
+ {(UNCH *)";", 22},
+ {(UNCH *)"=", 23},
+ {(UNCH *)"@", 24},
+ {(UNCH *)"[", 25},
+ {(UNCH *)"]", 26},
+ {(UNCH *)"^", 27},
+ {(UNCH *)"_", 28}, /* Low line */
+ {(UNCH *)"{", 29},
+ {(UNCH *)"|", 30},
+ {(UNCH *)"}", 31},
+ {(UNCH *)"~", 32},
+ {0, 0}
+ },
+ { /* Printable form of unprintable SR delims.*/
+ "", /* Dummy entry to balance s.dtb. */
+ "&#TAB;", /* TAB */
+ "&#RE;", /* RE */
+ "&#RS;", /* RS */
+ "&#RS;B", /* Leading blanks */
+ "&#RS;&#RE;", /* Null record */
+ "&#RS;B&#RE;", /* Blank record */
+ "B&#RE;", /* Trailing blanks */
+ "&#SPACE;" /* Space */
+ },
+ 12, /* LEXCNM: Index of first FCE in srdeltab. */
+ 20, /*LEXCNM:Index of "two hyphens" in srdeltab*/
+ 10, /* LEXCNM: Index of first SR with data char. */
+ 19, /* LEXCNM: Index of hyphen in srdeltab. */
+ SRNPRT+1, /* LEXCNM: Index of 1st printable SR. */
+ 8, /* LEXCNM: Index of space in srdeltab. */
+ 25, /* LEXCNM: Index of left bracket in srdeltab. */
+ 26, /* LEXCNM: Index of right bracket in srdeltab. */
+ }, /* End of short reference delimiters. */
+ { /* General delimiter characters. */
+ GENRECHAR, /*LEXCNM:(BS)Generated RE; can't be markup.*/
+ '"', /* LEXMARK: Char used as LIT delimiter.*/
+ '\'', /* LEXMARK: Char used as LITA delimiter.*/
+ '>', /* LEXLMS: Char used as MDC delimiter.*/
+ ']', /* LEXLMS: Char used as MSC when enabled.*/
+ '/', /* LEXCON: Char used as NET when enabled.*/
+ '%', /* LEXMARK: Char used as PERO delimiter. */
+ '>', /* LEXCON: Char used as PIC delimiter.*/
+ '<' /* LEXCON: Char used as TAGO when enabled.*/
+ },
+ { /* Lexical table code assignments. */
+ FCE, /* LEXCNM: FRE char as entity reference.*/
+ FRE, /* LEXLMS: Free character not an entity ref.*/
+ LITC, /* LEXLMS: Literal close delimiter enabled. */
+ MSC3, /* LEXLMS: Marked section close delim enabled. */
+ NET, /* LEXCON: Null end-tag delimiter enabled. */
+ ETI, /* LEXCON: NET disabled; still used as ETI. */
+ SPCR, /* LEXCNM: Space in use as SHORTREF delim. */
+ TGO2, /* LEXCON: Tag open delimiter enabled. */
+ CDE /* LEXLMS: CDATA/SDATA delimiters. */
+ }
+};
+
+UNCH *lextabs[] = {
+ lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, 0
+};
diff --git a/usr.bin/sgmls/sgmls/lextaba.c b/usr.bin/sgmls/sgmls/lextaba.c
new file mode 100644
index 0000000..54f9395
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lextaba.c
@@ -0,0 +1,559 @@
+/* lextaba.c: lexical tables for ASCII. */
+
+/* These tables are munged by setnonsgml(). */
+
+#include "config.h"
+#include "entity.h"
+#include "lexcode.h"
+#include "sgmldecl.h"
+
+/* LEXCNM: Lexical table for mixed content (PCBCONM) parse.
+*/
+/* Symbols for SGML character set divisions and function characters. */
+#define NU 1 /* NUMERAL Numerals */
+#define NMC 2 /* LC/UCNMCHAR . - Period and hyphen */
+#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */
+#define SPC 4 /* SPACE 32 Space */
+#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */
+#define EE 6 /* NONSGML 00 26 Entity end (end of file) */
+#define EOB 7 /* NONSGML 28 End disk buffer */
+#define RS 8 /* Function 10 Line feed */
+#define RE 9 /* Function 13 Carrier return */
+#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */
+#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */
+
+/* Symbols for SGML delimiter roles in CON and CXT.
+ ETI and NET must be the same in LEXCNM and LEXCON.
+ FRE characters are changed to FCE if an FCE entity is declared.
+ They are changed back to FRE when the entity is canceled.
+*/
+#define ERO 13 /* & Also CRO[1] */
+#define NMRE 14 /* 08 Generated non-markup RE */
+#define COM 15 /* - For MDO context; also SR19 and SR20. */
+#undef LIT1
+#define LIT1 18 /* " SR10 */
+#define MDO 20 /* ! Actually MDO[2] */
+#define MSC1 21 /* ] Both MSC[1] and MSC[2]; also SR26. */
+#define MSO 22 /* [ For MDO context; also SR25. */
+#define PIO 23 /* ? Actually PIO[2] */
+#define RNI 24 /* # For CRO[2]; also SR11. */
+#define TGC1 25 /* > For TAGO and MSC context; also MDC, PIC */
+#define TGO1 26 /* < TAGO; also MDO[1], PIO[1] */
+
+UNCH lexcnm[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE, NON, NON, NON, NON, NON, NON, NON, NMRE,SEP, RS, NON, NON, RE, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, MDO, LIT1,RNI, FRE, FRE ,ERO, FRE, FRE, FRE, FRE, FRE, FRE, COM, NMC, ETI, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , FRE, FRE, TGO1,FRE, TGC1,PIO, /*
+@ A B C D E F G H I J K L M N O */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC1,FRE, FRE, /*
+` a b c d e f g h i j k l m n o */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON
+};
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti lit spcr mdo msc mso net pio rni tagc tago fce */
+#undef ERO
+#undef NMRE
+#undef COM
+#undef LIT1
+/* def SPCR*/
+#undef MDO
+#undef MSC1
+#undef MSO
+#undef PIO
+#undef RNI
+#undef TGC1
+/* def TGO1*/
+/* def FCE*/
+/* LEXCON: Lexical table for RCDATA and CDATA content (PCBCON?),
+ prolog (PCBPRO), and nested declaration set (PCBMDS) parses.
+ Note: NMC is same as FRE; kept for consistency with LEXCNM and LEXLMS.
+*/
+/* Symbols for SGML character set divisions and function characters. */
+/* Same as for LEXCNM. */
+
+/* Symbols for SGML delimiter roles in CON, CXT, and DS.
+ ETI and NET must be the same in LEXCNM and LEXCON.
+ FRE characters are changed to FCE if an FCE entity is declared.
+ They are changed back to FRE when the entity is canceled.
+*/
+#define ERO 13 /* & Also CRO[1] */
+#define NMRE 14 /* 08 Generated non-markup RE */
+#define COM 15 /* - For MDO context. */
+/*#define ETI 16 / Actually ETAGO[2] */
+/*#define NET 17 / When enabled. */
+#define MDO 18 /* ! Actually MDO[2] */
+#define MSC2 19 /* ] Both MSC[1] and MSC[2]. */
+#define MSO 20 /* [ For MDO context. */
+#define PERO 21 /* % For prolog */
+#define PIO 22 /* ? Actually PIO[2] */
+#define RNI 23 /* # For CRO[2]. */
+#define TGC2 24 /* > For TAGO and MSC context; also MDC, PIC */
+
+UNCH lexcon[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE, NON, NON, NON, NON, NON, NON, NON, NMRE,SEP, RS, NON, NON, RE, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, MDO, FRE, RNI, FRE, PERO,ERO, FRE, FRE, FRE, FRE, FRE, FRE, COM, NMC, ETI, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , FRE, FRE, TGO2,FRE, TGC2,PIO, /*
+@ A B C D E F G H I J K L M N O */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC2,FRE, FRE, /*
+` a b c d e f g h i j k l m n o */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON
+};
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+#undef FRE
+#undef NU
+#undef NMC
+#undef NMS
+#undef SPC
+#undef NON
+#undef EE
+#undef EOB
+#undef RS
+#undef RE
+#undef SEP
+#undef NSC
+#undef ERO
+#undef NMRE
+#undef COM
+/* def ETI*/
+/* def NET*/
+#undef MDO
+#undef MSC2
+#undef MSO
+#undef PERO
+#undef PIO
+#undef RNI
+#undef TGC2
+/* LEXGRP: Lexical table for group parses, including PCBREF.
+*/
+/* Symbols for SGML character set divisions. */
+#define BIT 0 /* Bit combinations (not NONCHAR) not allowed in a group. */
+#define NMC 1 /* NAMECHAR . - Period, underscore, and numerals */
+#define NMS 2 /* NAMESTRT Lower and uppercase letters */
+#define RE 3 /* Function 13 Carrier return */
+#define SPC 4 /* SPACE 32 09 Space; includes TAB */
+#define NON 5 /* NONCHAR 0-31 127 255 Unused, except for: */
+#define EE 6 /* Function 26 00 EE: entity end (end of file) */
+#define EOB 7 /* NONCHAR 28 End disk buffer. */
+#define RS 8 /* Function 10 RS: record start (line feed) */
+
+/* Symbols for SGML delimiter roles in GRP. */
+#define AND1 9 /* & */
+#define GRPC 10 /* ) */
+#define GRPO 11 /* ( */
+#undef LIT2
+#define LIT2 12 /* " For datatags. */
+#define LITA 13 /* ' For datatags. */
+#define DTGC 14 /* ] For datatags. */
+#define DTGO 15 /* [ For datatags. */
+#define OPT1 16 /* ? */
+#define OR1 17 /* | */
+#define PERO 18 /* % */
+#define PLUS 19 /* + */
+#define REP1 20 /* * */
+#define RNI 21 /* # For #CHARS */
+#define SEQ1 22 /* , */
+#define REFC 23 /* ; For references */
+
+UNCH lexgrp[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, RE, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, BIT, LIT2,RNI, BIT, PERO,AND1,LITA,GRPO,GRPC,REP1,PLUS,SEQ1,NMC, NMC, BIT, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, BIT, REFC,BIT, BIT, BIT, OPT1,/*
+@ A B C D E F G H I J K L M N O */
+BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, DTGO,BIT, DTGC,BIT, BIT, /*
+` a b c d e f g h i j k l m n o */
+BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, BIT, OR1, BIT, BIT, NON,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, NON
+};
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+#undef BIT
+#undef NMC
+#undef NMS
+#undef RE
+#undef SPC
+#undef NON
+#undef EE
+#undef EOB
+#undef RS
+#undef AND1
+#undef GRPC
+#undef GRPO
+#undef LIT2
+#undef LITA
+#undef DTGC
+#undef DTGO
+#undef OPT1
+#undef OR1
+#undef PERO
+#undef PLUS
+#undef REP1
+#undef RNI
+#undef SEQ1
+#undef REFC
+/* LEXLMS: Lexical table for literal parses and marked sections.
+*/
+/* Symbols for SGML character set divisions and function characters.
+*/
+#define FRE 0 /* Free char: not in a delimiter or minimum literal. */
+#define NU 1 /* Numeral Numerals */
+#undef MIN
+#define MIN 2 /* Minimum literal '()+,-./:?= */
+#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */
+#define SPC 4 /* SPACE 32 Space */
+#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */
+#define EE 6 /* NONSGML 00 26 Entity end (end of file) */
+#define EOB 7 /* NONSGML 28 End disk buffer */
+#define RS 8 /* Function 10 Line feed */
+#define RE 9 /* Function 13 Carrier return */
+#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */
+/*#define CDE 11 NONSGML delcdata CDATA/SDATA delimiter */
+#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */
+/* Symbols for SGML delimiter roles in LIT, PI, and marked sections.
+ Either LIT, LITA, PIC, or EE, is changed to LITC when a literal is begun.
+ It is changed back when the LITC occurs (i.e., when the literal ends).
+*/
+#define ERO 13 /* & */
+#define MDO 14 /* ! Actually MDO[2] */
+#define MSO 16 /* [ For MDO context. */
+#define PERO 17 /* % For prolog. */
+#define RNI 18 /* # For CRO[2] */
+#define TGC3 19 /* > Also MDC for MSC context. */
+#define TGO3 20 /* < TAGO; also MDO[1] */
+
+/* Room has been left in the parse tables in case re-parsing of text
+ is eventually supported (i.e., saved parsed text is used by the
+ application to create a new SGML document, but CDATA and SDATA
+ entities in literals, and non-SGML characters, are left in their
+ parsed state to avoid the overhead of reconstituting the original
+ markup). In such a case, the two non-SGML characters DELCDATA and
+ DELSDATA are changed to CDE.
+ NOTE: The idea is a bad one, because the generated document would
+ be non-conforming, as it would contain non-SGML characters.
+*/
+UNCH lexlms[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE, NON, NON, NON, NON, NON, NON, NON, NON ,SEP, RS, NON, NON, RE, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, MDO, FRE, RNI, FRE, PERO,ERO, MIN, MIN, MIN, FRE, MIN, MIN, MIN, MIN, MIN, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , MIN, FRE, TGO3,MIN, TGC3,MIN, /*
+@ A B C D E F G H I J K L M N O */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC3,FRE, FRE, /*
+` a b c d e f g h i j k l m n o */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON
+};
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tago tagc litc */
+/* def FRE*/
+#undef NU
+#undef MIN
+#undef NMS
+#undef SPC
+#undef NON
+#undef EE
+#undef EOB
+#undef RS
+#undef RE
+#undef SEP
+/* def CDE*/
+/* def NSC*/
+#undef ERO
+#undef MDO
+/* def MSC3*/
+#undef MSO
+#undef PERO
+#undef RNI
+#undef TGC3
+#undef TGO3
+/* def LITC*/
+/* LEXMARK: Lexical scan table for markup: PCBMD? and PCB?TAG.
+*/
+/* Symbols for SGML character set divisions. */
+#define BIT 0 /* Bit combinations not allowed; includes ESC SO SI */
+#define NMC 1 /* NAMECHAR . _ Period and underscore */
+#define NU 2 /* NUMERAL Numerals */
+#define NMS 3 /* NAMESTRT Lower and uppercase letters */
+#define SPC 4 /* SPACE 32 13 09 Space; includes RE TAB */
+#define NON 5 /* NONCHAR 0-31 127 255 Unused, except for: */
+#define EE 6 /* Function 26 00 EE: entity end (end of file) */
+#define EOB 7 /* NONCHAR 28 End disk buffer. */
+#define RS 8 /* Function 10 RS: record start (line feed) */
+
+/* Symbols for SGML delimiter roles in MD and TAG. */
+#define COM1 9 /* - Actually COM[1]; also COM[2], MINUS. */
+#define ETIB 10 /* / ETI; actually ETAGO[2]. */
+#define GRPO 11 /* ( */
+#define LIT3 12 /* " */
+#define LITA 13 /* ' */
+#define DSO 14 /* [ */
+#define DSC1 15 /* ] For data attribute specifications */
+#define PERO 16 /* % */
+#define PLUS 17 /* + */
+#define REFC 18 /* ; For references */
+#define RNI 19 /* # Also CRO[2] */
+#define TGC4 20 /* > Also MDC, PIC */
+#define TGO4 21 /* < TAGO; also MDO[1] */
+#define VI 22 /* = */
+
+UNCH lexmark[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, SPC, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, BIT, LIT3,RNI, BIT, PERO,BIT, LITA,GRPO,BIT, BIT, PLUS,BIT, COM1,NMC ,ETIB,/*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, BIT, REFC,TGO4,VI, TGC4,BIT, /*
+@ A B C D E F G H I J K L M N O */
+BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, DSO, BIT, DSC1, BIT, BIT, /*
+` a b c d e f g h i j k l m n o */
+BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, BIT, BIT, BIT, BIT, NON,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT,
+BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, NON
+};
+/* bit nmc nu nms spc non ee eob rs com eti grpo lit lita
+ dso pero plus refc rni tagc tago vi */
+#undef BIT
+#undef NMC
+#undef NU
+#undef NMS
+#undef SPC
+#undef NON
+#undef EE
+#undef EOB
+#undef RS
+#undef COM1
+#undef ETIB
+#undef GRPO
+#undef LIT3
+#undef LITA
+#undef DSO
+#undef DSC
+#undef PERO
+#undef PLUS
+#undef REFC
+#undef RNI
+#undef TGC4
+#undef TGO4
+#undef VI
+/* LEXSD: Lexical scan table for SGML declaration.
+*/
+
+/* Symbols for SGML character set divisions. */
+#define SIG 0 /* Significant SGML characters. */
+#define DAT 1 /* DATACHAR Not significant, and not non-sgml. */
+#define NU 2 /* NUMERAL Numerals */
+#define NMS 3 /* NAMESTRT Lower and uppercase letters */
+#define SPC 4 /* SPACE 32 13 09 Space; includes RE TAB */
+#define NON 5 /* NONCHAR NONSGML */
+#define EE 6 /* Function 26 00 EE: entity end (end of file) */
+#define EOB 7 /* NONCHAR 28 End disk buffer. */
+#define RS 8 /* Function 10 RS: record start (line feed) */
+/* Symbols for SGML delimiter roles in SGML declaration. */
+#define COM1 9 /* - Actually COM[1]; also COM[2]. */
+#define LIT3 10 /* " */
+#define LITA 11 /* ' */
+#define TGC4 12 /* > Also MDC, PIC */
+
+UNCH lexsd[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, SPC, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, SIG, LIT3,SIG, DAT, SIG ,SIG, LITA,SIG, SIG, SIG, SIG, SIG, COM1,SIG ,SIG,/*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SIG, SIG, SIG, SIG, TGC4,SIG, /*
+@ A B C D E F G H I J K L M N O */
+SIG, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, SIG, DAT, SIG, SIG, SIG, /*
+` a b c d e f g h i j k l m n o */
+DAT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, SIG, SIG, SIG, SIG, NON,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
+DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, NON
+};
+
+#undef SIG
+#undef DAT
+#undef NON
+#undef NU
+#undef NMS
+#undef SPC
+#undef EE
+#undef EOB
+#undef RS
+#undef COM1
+#undef LIT3
+#undef LITA
+#undef TGC4
+
+/* LEXTRAN: Translation table for SGML names.
+*/
+UNCH lextran[256] = { /*
+000 001       bs tab lf home ff cr so si */
+0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , /*
+          eof esc rt left up down */
+16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 , /*
+space! " # $ % & ' ( ) * + , - . / */
+32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 , /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 , 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 , /*
+@ A B C D E F G H I J K L M N O */
+64 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 , /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 , 91 , 92 , 93 , 94 , 95 , /*
+` a b c d e f g h i j k l m n o */
+96 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 , /*
+p q r s t u v w x y z { | } ~ 127 */
+80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 , 123, 124, 125, 126, 127,
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
+};
+/* LEXTOKE: Lexical class table for tokenization scan.
+*/
+#include "lextoke.h" /* Symbols for tokenization lexical classes. */
+UNCH lextoke[256] = { /*
+
+000 001       bs tab lf home ff cr   */
+INV, INV, INV, INV, INV, INV, INV, INV, INV, SEP, REC, INV, INV, REC, INV, INV, /*
+          eof esc rt left up down */
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, EOB, INV, INV, INV, /*
+space! " # $ % & ' ( ) * + , - . / */
+SP , INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, NMC, NMC, INV, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , INV, INV, INV, INV, INV, INV, /*
+@ A B C D E F G H I J K L M N O */
+INV, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, INV, INV, INV, INV, INV, /*
+` a b c d e f g h i j k l m n o */
+INV, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV
+};
+
+/* This table maps ASCII to the system character set. */
+int asciicharset[] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
diff --git a/usr.bin/sgmls/sgmls/lextabe.c b/usr.bin/sgmls/sgmls/lextabe.c
new file mode 100644
index 0000000..f93af89
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lextabe.c
@@ -0,0 +1,184 @@
+/* This file was automatically generated by genlex. Do not edit. */
+
+#include "config.h"
+#include "entity.h"
+#include "sgmldecl.h"
+
+UNCH lexcnm[] = {
+ 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5,
+ 5, 5, 5, 5, 5, 5, 14, 5, 5, 5, 5, 5, 7, 5, 5, 12,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 26, 0, 0, 0,
+13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0,
+15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 23,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 18,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 22, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5,
+};
+
+UNCH lexcon[] = {
+ 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5,
+ 5, 5, 5, 5, 5, 5, 14, 5, 5, 5, 5, 5, 7, 5, 5, 12,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 25, 0, 0, 0,
+13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0,
+15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 24, 22,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 20, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5,
+};
+
+UNCH lexgrp[] = {
+ 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 11, 19, 17,
+ 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 10, 23, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 18, 0, 0, 16,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 13, 0, 12,
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 15, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0,
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5,
+};
+
+UNCH lexlms[] = {
+ 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 12,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 20, 2, 2, 0,
+13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 2, 0, 0,
+ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 17, 0, 19, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 18, 0, 2, 2, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 16, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5,
+};
+
+UNCH lexmark[] = {
+ 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 21, 11, 17, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0,
+ 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 20, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 13, 22, 12,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 14, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 5,
+};
+
+UNCH lexsd[] = {
+ 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5,
+ 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6,
+ 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
+ 9, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 12, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 11, 0, 10,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 0, 1, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1,
+ 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 5,
+};
+
+UNCH lextoke[] = {
+ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0,
+};
+
+UNCH lextran[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 140, 141, 142, 143,
+144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 156, 157, 158, 159,
+160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+};
+
+int asciicharset[] = {
+ 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
+ 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111,
+124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214,
+215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109,
+121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150,
+151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
diff --git a/usr.bin/sgmls/sgmls/lextoke.h b/usr.bin/sgmls/sgmls/lextoke.h
new file mode 100644
index 0000000..d2bcfa0
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lextoke.h
@@ -0,0 +1,10 @@
+/* LEXTOKE.H: Symbols for tokenization lexical classes.
+*/
+#define INV 0 /* Invalid Chars Not allowed in an SGML name. */
+#define REC 1 /* Record Boundary RS and RE. */
+#define SEP 2 /* Separator TAB. */
+#define SP 3 /* SPACE */
+#define NMC 4 /* NAMECHAR . _ Period, underscore (plus NMS, NUM). */
+#define NMS 5 /* NAMESTRT Lower and uppercase letters */
+#define NU 6 /* NUMERAL Numerals */
+#define EOB 7 /* NONCHAR 28 End disk buffer. */
diff --git a/usr.bin/sgmls/sgmls/lineout.c b/usr.bin/sgmls/sgmls/lineout.c
new file mode 100644
index 0000000..fd856ce
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lineout.c
@@ -0,0 +1,653 @@
+/* lineout.c -
+ Implements line-oriented output format.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+#include "std.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "adl.h" /* Definitions for attribute list processing. */
+#include "sgmlmain.h" /* Main interface to SGML services. */
+#include "lineout.h"
+#include "appl.h"
+
+static VOID flush_data P((void));
+static VOID define_external_entity P((PNE));
+static VOID define_entity P((UNCH *));
+static VOID handle_attributes P((UNCH *, struct ad *));
+static VOID handle_token_list P((UNCH *, struct ad *, int));
+static VOID handle_single_token P((UNCH *, struct ad *, int));
+static VOID output_notation P((UNCH *, UNCH *, UNCH *));
+static VOID output_internal_entity P((UNCH *, int, UNCH *));
+static VOID output_external_entity P((UNCH *, int, UNIV, UNCH *, UNCH *,
+ UNCH *));
+static VOID output_subdoc P((UNCH *, UNIV, UNCH *, UNCH *));
+#ifdef SUPPORT_SUBDOC
+static VOID process_subdoc P((UNCH *, UNIV));
+#endif /* SUPPORT_SUBDOC */
+static VOID output_record_end P((void));
+static VOID output_pcdata P((UNS, UNCH *));
+static VOID output_cdata P((UNS, UNCH *));
+static VOID output_sdata P((UNS, UNCH *));
+static VOID output_entity_reference P((UNCH *));
+static VOID output_start_tag P((UNCH *));
+static VOID output_end_tag P((UNCH *));
+static VOID output_processing_instruction P((UNS, UNCH *));
+static VOID output_implied_attribute P((UNCH *, UNCH *));
+static char *attribute_type_string P((int));
+static VOID output_begin_attribute P((UNCH *, UNCH *, int));
+static VOID output_attribute_token P((UNS, UNCH *));
+static VOID output_end_attribute P((void));
+static VOID print_data P((UNS, UNCH *, int));
+static VOID print_string P((UNS, UNCH *, int));
+static VOID print_id P((UNIV, UNCH *, UNCH *));
+static VOID print_filename P((char *));
+static VOID output_location P((void));
+static VOID output_appinfo P((UNS, UNCH *));
+
+static int have_data = 0;
+static char *current_filename = 0;
+static unsigned long current_lineno = 0;
+
+VOID process_document(subdocsw)
+int subdocsw;
+{
+ enum sgmlevent rc;
+ struct rcbtag rcbtag;
+ struct rcbdata rcbdaf;
+
+ while ((rc = sgmlnext(&rcbdaf, &rcbtag)) != SGMLEOD) {
+#ifdef SUPPORT_SUBDOC
+ if (rc == SGMLDAF && !CONTERSW(rcbdaf) && NDESW(rcbdaf)
+ && NEXTYPE(NEPTR(rcbdaf)) == ESNSUB) {
+ if (!suppsw && !sgmlment(NEENAME(NEPTR(rcbdaf))))
+ define_external_entity(NEPTR(rcbdaf));
+ process_subdoc(NEENAME(NEPTR(rcbdaf)) + 1,
+ NEID(NEPTR(rcbdaf)));
+ continue;
+ }
+#endif /* SUPPORT_SUBDOC */
+ if (!suppsw)
+ switch (rc) {
+ case SGMLDAF:
+ if (CONTERSW(rcbdaf))
+ break;
+ if (CDESW(rcbdaf))
+ output_cdata(CDATALEN(rcbdaf), CDATA(rcbdaf));
+ else if (SDESW(rcbdaf))
+ output_sdata(CDATALEN(rcbdaf), CDATA(rcbdaf));
+ else if (NDESW(rcbdaf)) {
+ assert(NEXTYPE(NEPTR(rcbdaf)) != ESNSUB);
+ if (!sgmlment(NEENAME(NEPTR(rcbdaf))))
+ define_external_entity(NEPTR(rcbdaf));
+ output_entity_reference(NEENAME(NEPTR(rcbdaf)) + 1);
+ }
+ else
+ output_pcdata(CDATALEN(rcbdaf), CDATA(rcbdaf));
+ break;
+ case SGMLSTG:
+ if (CONTERSW(rcbtag))
+ break;
+ if (ALPTR(rcbtag))
+ handle_attributes((UNCH *)NULL, ALPTR(rcbtag));
+ output_start_tag(CURGI(rcbtag));
+ break;
+ case SGMLETG:
+ if (CONTERSW(rcbtag))
+ break;
+ output_end_tag(CURGI(rcbtag));
+ break;
+ case SGMLPIS:
+ if (CONTERSW(rcbdaf))
+ break;
+ output_processing_instruction(PDATALEN(rcbdaf),
+ PDATA(rcbdaf));
+ break;
+ case SGMLREF:
+ if (CONTERSW(rcbdaf))
+ break;
+ output_record_end();
+ break;
+ case SGMLAPP:
+ if (CONTERSW(rcbdaf))
+ break;
+ if (!subdocsw)
+ output_appinfo(ADATALEN(rcbdaf), ADATA(rcbdaf));
+ break;
+ default:
+ abort();
+ }
+ }
+}
+
+/* Output an indication that the document was conforming. */
+
+VOID output_conforming()
+{
+ if (!suppsw)
+ printf("%c\n", CONFORMING_CODE);
+}
+
+static VOID define_external_entity(p)
+PNE p;
+{
+ if (NEXTYPE(p) == ESNSUB)
+ output_subdoc(NEENAME(p) + 1, NEID(p), NEPUBID(p), NESYSID(p));
+ else {
+ if (!NEDCNMARK(p))
+ output_notation(NEDCN(p) + 1, NEDCNPUBID(p), NEDCNSYSID(p));
+ output_external_entity(NEENAME(p) + 1, NEXTYPE(p), NEID(p),
+ NEPUBID(p), NESYSID(p), NEDCN(p) + 1);
+ if (NEAL(p))
+ handle_attributes(NEENAME(p) + 1, NEAL(p));
+ }
+}
+
+static VOID define_entity(ename)
+UNCH *ename;
+{
+ int rc;
+ PNE np;
+ UNCH *tp;
+
+ if (sgmlment(ename)) /* already defined it */
+ return;
+ rc = sgmlgent(ename, &np, &tp);
+ switch (rc) {
+ case 1:
+ define_external_entity(np);
+ break;
+ case 2:
+ case 3:
+ output_internal_entity(ename + 1, rc == 3, tp);
+ break;
+ }
+}
+
+/* ENT is the name of the entity with which these attributes are associated;
+if it's NULL, they're associated with the next start tag. */
+
+static VOID handle_attributes(ent, al)
+UNCH *ent;
+struct ad *al;
+{
+ int aln;
+
+ for (aln = 1; aln <= ADN(al); aln++) {
+ if (GET(ADFLAGS(al, aln), AERROR))
+ ;
+ else if (GET(ADFLAGS(al, aln), AINVALID))
+ ;
+ else if (ADVAL(al, aln) == NULL)
+ output_implied_attribute(ent, ADNAME(al, aln));
+ else if (ADTYPE(al, aln) >= ATKNLIST)
+ handle_token_list(ent, al, aln);
+ else
+ handle_single_token(ent, al, aln);
+ if (BITON(ADFLAGS(al, aln), AGROUP))
+ aln += ADNUM(al, aln);
+ }
+}
+
+static VOID handle_token_list(ent, al, aln)
+UNCH *ent;
+struct ad *al;
+int aln;
+{
+ UNCH *ptr;
+ int i;
+ if (ADTYPE(al, aln) == AENTITYS) {
+ ptr = ADVAL(al, aln);
+ for (i = 0; i < ADNUM(al, aln); i++) {
+ /* Temporarily make token look like normal
+ name with length and EOS. */
+ UNCH c = ptr[*ptr + 1];
+ ptr[*ptr + 1] = '\0';
+ *ptr += 2;
+ define_entity(ptr);
+ *ptr -= 2;
+ ptr += *ptr + 1;
+ *ptr = c;
+ }
+ }
+ output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln));
+ ptr = ADVAL(al, aln);
+ for (i = 0; i < ADNUM(al, aln); i++) {
+ /* The first byte is a length NOT including the length
+ byte; the tokens are not EOS terminated. */
+ output_attribute_token(*ptr, ptr + 1);
+ ptr += *ptr + 1;
+ }
+ output_end_attribute();
+}
+
+static VOID handle_single_token(ent, al, aln)
+UNCH *ent;
+struct ad *al;
+int aln;
+{
+ if (ADTYPE(al, aln) == ANOTEGRP && !DCNMARK(ADDATA(al, aln).x))
+ output_notation(ADVAL(al, aln) + 1,
+ ADDATA(al, aln).x->pubid,
+ ADDATA(al, aln).x->sysid);
+ else if (ADTYPE(al, aln) == AENTITY)
+ define_entity(ADVAL(al, aln));
+ output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln));
+ if (ADTYPE(al, aln) == ACHARS)
+ output_attribute_token(ustrlen(ADVAL(al, aln)), ADVAL(al, aln));
+ else
+ output_attribute_token(*ADVAL(al, aln) - 2, ADVAL(al, aln) + 1);
+ output_end_attribute();
+}
+
+static VOID output_notation(name, pubid, sysid)
+UNCH *name;
+UNCH *pubid, *sysid;
+{
+ flush_data();
+ print_id((UNIV)0, pubid, sysid);
+ printf("%c%s\n", DEFINE_NOTATION_CODE, name);
+}
+
+static VOID output_internal_entity(ename, is_sdata, text)
+UNCH *ename;
+int is_sdata;
+UNCH *text;
+{
+ flush_data();
+ printf("%c%s %s ", DEFINE_INTERNAL_ENTITY_CODE, ename,
+ is_sdata ? "SDATA" : "CDATA");
+ print_string(text ? ustrlen(text) : 0, text, 0);
+ putchar('\n');
+}
+
+static VOID output_subdoc(nm, id, pubid, sysid)
+UNCH *nm;
+UNIV id;
+UNCH *pubid, *sysid;
+{
+ flush_data();
+ print_id(id, pubid, sysid);
+ printf("%c%s\n", DEFINE_SUBDOC_ENTITY_CODE, nm);
+}
+
+#ifdef SUPPORT_SUBDOC
+
+static VOID process_subdoc(nm, id)
+UNCH *nm;
+UNIV id;
+{
+ if (!suppsw) {
+ flush_data();
+ output_location();
+ printf("%c%s\n", START_SUBDOC_CODE, nm);
+ fflush(stdout);
+ }
+ fflush(stderr);
+
+ if (id) {
+ char **argv;
+ int ret;
+
+ argv = make_argv(id);
+ ret = run_process(argv);
+ if (ret != 0)
+ suberr++;
+
+ current_filename = 0;
+ free(argv);
+ if (ret == 0)
+ get_subcaps();
+ }
+ else {
+ suberr++;
+ appl_error(E_SUBDOC, nm);
+ }
+
+ if (!suppsw)
+ printf("%c%s\n", END_SUBDOC_CODE, nm);
+}
+
+#endif /* SUPPORT_SUBDOC */
+
+static VOID output_external_entity(nm, xtype, id, pubid, sysid, dcn)
+UNCH *nm, *dcn;
+UNIV id;
+UNCH *pubid, *sysid;
+int xtype;
+{
+ char *type;
+
+ flush_data();
+
+ print_id(id, pubid, sysid);
+
+ switch (xtype) {
+ case ESNCDATA:
+ type = "CDATA";
+ break;
+ case ESNNDATA:
+ type = "NDATA";
+ break;
+ case ESNSDATA:
+ type = "SDATA";
+ break;
+ default:
+ return;
+ }
+ printf("%c%s %s %s\n", DEFINE_EXTERNAL_ENTITY_CODE, nm, type, dcn);
+}
+
+static VOID output_record_end()
+{
+ static UNCH re = RECHAR;
+ print_data(1, &re, 0);
+}
+
+static VOID output_pcdata(n, s)
+UNS n;
+UNCH *s;
+{
+ print_data(n, s, 0);
+}
+
+static VOID output_cdata(n, s)
+UNS n;
+UNCH *s;
+{
+ print_data(n, s, 0);
+}
+
+static VOID output_sdata(n, s)
+UNS n;
+UNCH *s;
+{
+ print_data(n, s, 1);
+}
+
+static VOID output_entity_reference(s)
+UNCH *s;
+{
+ flush_data();
+ output_location();
+ printf("%c%s\n", REFERENCE_ENTITY_CODE, s);
+}
+
+static VOID output_start_tag(s)
+UNCH *s;
+{
+ flush_data();
+ output_location();
+ printf("%c%s\n", START_CODE, s);
+}
+
+static VOID output_end_tag(s)
+UNCH *s;
+{
+ flush_data();
+ printf("%c%s\n", END_CODE, s);
+}
+
+static VOID output_processing_instruction(n, s)
+UNS n;
+UNCH *s;
+{
+ flush_data();
+ output_location();
+ putchar(PI_CODE);
+ print_string(n, s, 0);
+ putchar('\n');
+}
+
+static VOID output_appinfo(n, s)
+UNS n;
+UNCH *s;
+{
+ flush_data();
+ output_location();
+ putchar(APPINFO_CODE);
+ print_string(n, s, 0);
+ putchar('\n');
+}
+
+
+static VOID output_implied_attribute(ent, aname)
+UNCH *ent, *aname;
+{
+ flush_data();
+ if (ent)
+ printf("%c%s %s IMPLIED\n", DATA_ATTRIBUTE_CODE, ent, aname);
+ else
+ printf("%c%s IMPLIED\n", ATTRIBUTE_CODE, aname);
+}
+
+static char *attribute_type_string(type)
+int type;
+{
+ switch (type) {
+ case ANMTGRP:
+ case ANAME:
+ case ANMTOKE:
+ case ANUTOKE:
+ case ANUMBER:
+ case ANAMES:
+ case ANMTOKES:
+ case ANUTOKES:
+ case ANUMBERS:
+ case AID:
+ case AIDREF:
+ case AIDREFS:
+ return "TOKEN";
+ case ANOTEGRP:
+ return "NOTATION";
+ case ACHARS:
+ return "CDATA";
+ case AENTITY:
+ case AENTITYS:
+ return "ENTITY";
+ }
+#if 0
+ fatal("invalid attribute type %d", type);
+#endif
+ return "INVALID";
+}
+
+static VOID output_begin_attribute(ent, aname, type)
+UNCH *ent, *aname;
+int type;
+{
+ flush_data();
+ if (ent)
+ printf("%c%s %s %s", DATA_ATTRIBUTE_CODE, ent, aname,
+ attribute_type_string(type));
+ else
+ printf("%c%s %s", ATTRIBUTE_CODE, aname,
+ attribute_type_string(type));
+
+}
+
+static VOID output_attribute_token(vallen, val)
+UNS vallen;
+UNCH *val;
+{
+ putchar(' ');
+ print_string(vallen, val, 0);
+}
+
+static VOID output_end_attribute()
+{
+ putchar('\n');
+}
+
+static VOID print_data(n, s, is_sdata)
+UNS n;
+UNCH *s;
+int is_sdata;
+{
+ if (n > 0 || is_sdata) {
+ if (n == 1 && *s == RECHAR)
+ current_lineno++;
+ else
+ output_location();
+ if (!have_data)
+ putchar(DATA_CODE);
+ print_string(n, s, is_sdata);
+ have_data = 1;
+ }
+}
+
+static VOID flush_data()
+{
+ if (have_data) {
+ putchar('\n');
+ have_data = 0;
+ }
+}
+
+static VOID output_location()
+{
+ char *filename;
+ unsigned long lineno;
+ int filename_changed = 0;
+
+ if (!locsw)
+ return;
+ if (!sgmlloc(&lineno, &filename))
+ return;
+ if (!current_filename || strcmp(filename, current_filename) != 0)
+ filename_changed = 1;
+ else if (lineno == current_lineno)
+ return;
+ flush_data();
+ printf("%c%lu", LOCATION_CODE, lineno);
+ current_lineno = lineno;
+ if (filename_changed) {
+ putchar(' ');
+ print_filename(filename);
+ current_filename = filename;
+ }
+ putchar('\n');
+}
+
+static VOID print_string(slen, s, is_sdata)
+UNS slen;
+UNCH *s;
+int is_sdata;
+{
+ if (is_sdata)
+ fputs("\\|", stdout);
+ while (slen > 0) {
+ UNCH ch = *s++;
+ slen--;
+ if (ch == DELSDATA) {
+ if (is_sdata)
+ ; /* I don't think this should happen */
+ else
+ fputs("\\|", stdout);
+ ;
+ }
+ else if (ch == DELCDATA)
+ ;
+ else {
+ if (ch == DELNONCH) {
+ if (!slen)
+ break;
+ ch = UNSHIFTNON(*s);
+ s++;
+ slen--;
+ }
+ switch (ch) {
+ case RECHAR:
+ fputs("\\n", stdout);
+ break;
+ case '\\':
+ fputs("\\\\", stdout);
+ break;
+ default:
+ if (ISASCII(ch) && isprint(ch))
+ putchar(ch);
+ else
+ printf("\\%03o", ch);
+ break;
+ }
+ }
+ }
+ if (is_sdata)
+ fputs("\\|", stdout);
+}
+
+
+static VOID print_id(id, pubid, sysid)
+UNIV id;
+UNCH *pubid;
+UNCH *sysid;
+{
+
+ if (pubid) {
+ putchar(PUBID_CODE);
+ print_string(ustrlen(pubid), pubid, 0);
+ putchar('\n');
+ }
+
+ if (sysid) {
+ putchar(SYSID_CODE);
+ print_string(ustrlen(sysid), sysid, 0);
+ putchar('\n');
+ }
+
+ if (id) {
+ char *p;
+
+ for (p = id; *p != '\0'; p++) {
+ putchar(FILE_CODE);
+ do {
+ switch (*p) {
+ case '\\':
+ fputs("\\\\", stdout);
+ break;
+ case '\n':
+ fputs("\\n", stdout);
+ break;
+ default:
+ if (ISASCII(*p) && isprint((UNCH)*p))
+ putchar(*p);
+ else
+ printf("\\%03o", (UNCH)*p);
+ break;
+ }
+ } while (*++p);
+ putchar('\n');
+ }
+ }
+}
+
+static VOID print_filename(s)
+char *s;
+{
+ for (; *s; s++)
+ switch (*s) {
+ case '\\':
+ fputs("\\\\", stdout);
+ break;
+ case '\n':
+ fputs("\\n", stdout);
+ break;
+ default:
+ if (ISASCII(*s) && isprint((UNCH)*s))
+ putchar(*s);
+ else
+ printf("\\%03o", (UNCH)*s);
+ break;
+ }
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/lineout.h b/usr.bin/sgmls/sgmls/lineout.h
new file mode 100644
index 0000000..f3c4231
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/lineout.h
@@ -0,0 +1,23 @@
+/* lineout.h */
+
+/* Output codes used by sgmls. */
+
+#define DATA_CODE '-'
+#define START_CODE '('
+#define END_CODE ')'
+#define ATTRIBUTE_CODE 'A'
+#define DATA_ATTRIBUTE_CODE 'D'
+#define REFERENCE_ENTITY_CODE '&'
+#define DEFINE_NOTATION_CODE 'N'
+#define DEFINE_EXTERNAL_ENTITY_CODE 'E'
+#define DEFINE_INTERNAL_ENTITY_CODE 'I'
+#define PI_CODE '?'
+#define DEFINE_SUBDOC_ENTITY_CODE 'S'
+#define START_SUBDOC_CODE '{'
+#define END_SUBDOC_CODE '}'
+#define LOCATION_CODE 'L'
+#define APPINFO_CODE '#'
+#define PUBID_CODE 'p'
+#define SYSID_CODE 's'
+#define FILE_CODE 'f'
+#define CONFORMING_CODE 'C'
diff --git a/usr.bin/sgmls/sgmls/main.c b/usr.bin/sgmls/sgmls/main.c
new file mode 100644
index 0000000..3435dce
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/main.c
@@ -0,0 +1,602 @@
+/* main.c -
+ Main program for sgmls.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+#include "std.h"
+#include "getopt.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "adl.h" /* Definitions for attribute list processing. */
+#include "sgmlmain.h" /* Main interface to SGML services. */
+#include "appl.h"
+
+#define READCNT 512
+
+/* Before using argv[0] in error messages, strip off everything up to and
+including the last character in prog that occurs in PROG_PREFIX. */
+
+#ifndef PROG_PREFIX
+#define PROG_PREFIX "/"
+#endif /* not PROG_PREFIX */
+
+/* Message catalogue name. */
+#define CAT_NAME "sgmls"
+/* Message set to use for application error messages. */
+#define APP_SET 4
+
+#ifdef HAVE_EXTENDED_PRINTF
+#define xvfprintf vfprintf
+#else
+extern int xvfprintf P((FILE *, char *, va_list));
+#endif
+
+static VOID usage P((void));
+static VOID fatal VP((int, ...));
+static VOID do_error P((int, va_list));
+static VOID swinit P((struct switches *));
+static VOID write_caps P((char *, struct sgmlcap *));
+
+static UNIV make_docent P((int, char **));
+static char *munge_program_name P((char *, char *));
+static VOID die P((void));
+#ifdef SUPPORT_SUBDOC
+static VOID build_subargv P((struct switches *));
+static VOID cleanup P((void));
+static char *create_subcap_file P((void));
+#endif /* SUPPORT_SUBDOC */
+
+static char *errlist[] = {
+ 0,
+ "Out of memory",
+ "Cannot open SGML document entity",
+ "Cannot exec `%s': %s",
+ "Cannot fork: %s",
+ "Error waiting for process: %s",
+ "Program %s got fatal signal %d",
+ "Cannot open `%s': %s",
+ "Subdocument capacity botch",
+ "Non-existent subdocument entity `%s' not processed",
+};
+
+int suppsw = 0; /* Non-zero means suppress output. */
+int locsw = 0; /* Non-zero means generate location info. */
+static char *prog; /* Program name (for error messages). */
+static nl_catd catd; /* Message catalogue descriptor. */
+static char *capfile = 0; /* File for capacity report. */
+extern char *version_string;
+
+char options[] = {
+ 'c', ':', 'd', 'e', 'g', 'i', ':', 'l', 'o', ':', 'p', 'r', 's', 'u', 'v',
+#ifdef CANT_REDIRECT_STDERR
+ 'f', ':',
+#endif /* CANT_REDIRECT_STDERR */
+#ifdef TRACE
+ 'x', ':', 'y', ':',
+#endif /* TRACE */
+ '\0'
+};
+
+#ifdef SUPPORT_SUBDOC
+int suberr = 0; /* Error in subdocument. */
+static char *subargv[sizeof(options)];
+static int subargc = 0;
+static char nopenbuf[sizeof(long)*3 + 1];
+static char sgmldecl_file[L_tmpnam];
+static char subcap_file[L_tmpnam];
+#endif
+
+int main(argc, argv)
+int argc;
+char **argv;
+{
+ static char stderr_buf[BUFSIZ];
+ int opt;
+#ifdef CANT_REDIRECT_STDERR
+ char *errfile = 0;
+#endif
+ struct sgmlcap cap;
+ struct switches sw;
+ int nincludes = 0; /* number of -i options */
+ setbuf(stderr, stderr_buf);
+
+ /* Define MAIN_HOOK in config.h if some function needs to be called here. */
+#ifdef MAIN_HOOK
+ MAIN_HOOK(argc, argv);
+#endif
+#ifdef SUPPORT_SUBDOC
+ subargv[subargc++] = argv[0];
+#endif
+
+ prog = argv[0] = munge_program_name(argv[0], "sgmls");
+
+ catd = catopen(CAT_NAME, 0);
+ swinit(&sw);
+
+ while ((opt = getopt(argc, argv, options)) != EOF) {
+ switch (opt) {
+ case 'l': /* Generate location information. */
+ locsw = 1;
+ break;
+ case 'c': /* Print capacity usage. */
+ capfile = optarg;
+ break;
+ case 's': /* Suppress output. */
+ suppsw = 1;
+ break;
+ case 'd': /* Report duplicate entity declarations. */
+ sw.swdupent = 1;
+ break;
+ case 'e': /* Provide entity stack trace in error msg. */
+ sw.swenttr = 1;
+ break;
+#ifdef CANT_REDIRECT_STDERR
+ case 'f': /* Redirect errors. */
+ errfile = optarg;
+ break;
+#endif /* CANT_REDIRECT_STDERR */
+ case 'g': /* Provide GI stack trace in error messages. */
+ sw.sweltr = 1;
+ break;
+ case 'p': /* Parse only the prolog. */
+ sw.onlypro = 1;
+ suppsw = 1;
+ break;
+ case 'r': /* Give warning for defaulted references. */
+ sw.swrefmsg = 1;
+ break;
+ case 'u':
+ sw.swundef = 1;
+ break;
+#ifdef TRACE
+ case 'x': /* Trace options for the document body. */
+ sw.trace = optarg;
+ break;
+ case 'y': /* Trace options for the prolog. */
+ sw.ptrace = optarg;
+ break;
+#endif /* TRACE */
+ case 'v': /* Print the version number. */
+ fprintf(stderr, "sgmls version %s\n", version_string);
+ fflush(stderr);
+ break;
+ case 'o':
+ sw.nopen = atol(optarg);
+ if (sw.nopen <= 0)
+ usage();
+ break;
+ case 'i': /* Define parameter entity as "INCLUDE". */
+ sw.includes = (char **)xrealloc((UNIV)sw.includes,
+ (nincludes + 2)*sizeof(char *));
+ sw.includes[nincludes++] = optarg;
+ sw.includes[nincludes] = 0;
+ break;
+ case '?':
+ usage();
+ default:
+ abort();
+ }
+ }
+
+#ifdef CANT_REDIRECT_STDERR
+ if (errfile) {
+ FILE *fp;
+ errno = 0;
+ fp = fopen(errfile, "w");
+ if (!fp)
+ fatal(E_OPEN, errfile, strerror(errno));
+ fclose(fp);
+ errno = 0;
+ if (!freopen(errfile, "w", stderr)) {
+ /* Can't use fatal() since stderr is now closed */
+ printf("%s: ", prog);
+ printf(errlist[E_OPEN], errfile, strerror(errno));
+ putchar('\n');
+ exit(EXIT_FAILURE);
+ }
+ }
+#endif /* CANT_REDIRECT_STDERR */
+
+ (void)sgmlset(&sw);
+
+#ifdef SUPPORT_SUBDOC
+ build_subargv(&sw);
+#endif
+ if (sgmlsdoc(make_docent(argc - optind, argv + optind)))
+ fatal(E_DOC);
+
+ process_document(sw.nopen > 0);
+ sgmlend(&cap);
+ if (capfile)
+ write_caps(capfile, &cap);
+#ifdef SUPPORT_SUBDOC
+ cleanup();
+ if (suberr)
+ exit(EXIT_FAILURE);
+#endif /* SUPPORT_SUBDOC */
+ if (sgmlgcnterr() > 0)
+ exit(EXIT_FAILURE);
+ if (!sw.nopen)
+ output_conforming();
+ exit(EXIT_SUCCESS);
+}
+
+static char *munge_program_name(arg, dflt)
+char *arg, *dflt;
+{
+ char *p;
+#ifdef PROG_STRIP_EXTENSION
+ char *ext;
+#endif
+ if (!arg || !*arg)
+ return dflt;
+ p = strchr(arg, '\0');
+ for (;;) {
+ if (p == arg)
+ break;
+ --p;
+ if (strchr(PROG_PREFIX, *p)) {
+ p++;
+ break;
+ }
+ }
+ arg = p;
+#ifdef PROG_STRIP_EXTENSION
+ ext = strrchr(arg, '.');
+ if (ext) {
+ p = (char *)xmalloc(ext - arg + 1);
+ memcpy(p, arg, ext - arg);
+ p[ext - arg] = '\0';
+ arg = p;
+ }
+#endif /* PROG_STRIP_EXTENSION */
+#ifdef PROG_FOLD
+#ifdef PROG_STRIP_EXTENSION
+ if (!ext) {
+#endif
+ p = xmalloc(strlen(arg) + 1);
+ strcpy(p, arg);
+ arg = p;
+#ifdef PROG_STRIP_EXTENSION
+ }
+#endif
+ for (p = arg; *p; p++)
+ if (ISASCII((unsigned char)*p) && isupper((unsigned char)*p))
+ *p = tolower((unsigned char)*p);
+#endif /* PROG_FOLD */
+ return arg;
+}
+
+static UNIV make_docent(argc, argv)
+int argc;
+char **argv;
+{
+ UNS len = 1;
+ int i;
+ UNIV res;
+ char *ptr;
+ static char *stdinname = STDINNAME;
+
+ if (argc == 0) {
+ argv = &stdinname;
+ argc = 1;
+ }
+
+ for (i = 0; i < argc; i++)
+ len += strlen(argv[i]) + 1;
+
+ res = xmalloc(len);
+ ptr = (char *)res;
+ for (i = 0; i < argc; i++) {
+ strcpy(ptr, argv[i]);
+ ptr = strchr(ptr, '\0') + 1;
+ }
+ *ptr = '\0';
+ return res;
+}
+
+
+static VOID usage()
+{
+ /* Don't mention -o since this are for internal use only. */
+ fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity]%s [filename ...]\n",
+ prog,
+#ifdef CANT_REDIRECT_STDERR
+ " [-f file]",
+#else /* not CANT_REDIRECT_STDERR */
+ "",
+#endif /* not CANT_REDIRECT_STDERR */
+#ifdef TRACE
+ " [-x flags] [-y flags]"
+#else /* not TRACE */
+ ""
+#endif /* not TRACE */
+ );
+ exit(EXIT_FAILURE);
+}
+
+static VOID die()
+{
+#ifdef SUPPORT_SUBDOC
+ cleanup();
+#endif /* SUPPORT_SUBDOC */
+ exit(EXIT_FAILURE);
+}
+
+static VOID swinit(swp)
+struct switches *swp;
+{
+ swp->swenttr = 0;
+ swp->sweltr = 0;
+ swp->swbufsz = READCNT+2;
+ swp->prog = prog;
+ swp->swdupent = 0;
+ swp->swrefmsg = 0;
+#ifdef TRACE
+ swp->trace = 0;
+ swp->ptrace = 0;
+#endif /* TRACE */
+ swp->catd = catd;
+ swp->swambig = 1; /* Always check for ambiguity. */
+ swp->swundef = 0;
+ swp->nopen = 0;
+ swp->onlypro = 0;
+ swp->includes = 0;
+ swp->die = die;
+}
+
+#ifdef SUPPORT_SUBDOC
+
+static VOID build_subargv(swp)
+struct switches *swp;
+{
+ if (suppsw)
+ subargv[subargc++] = "-s";
+ if (locsw)
+ subargv[subargc++] = "-l";
+ if (swp->swdupent)
+ subargv[subargc++] = "-d";
+ if (swp->swenttr)
+ subargv[subargc++] = "-e";
+ if (swp->sweltr)
+ subargv[subargc++] = "-g";
+ if (swp->swrefmsg)
+ subargv[subargc++] = "-r";
+#ifdef TRACE
+ if (swp->trace) {
+ subargv[subargc++] = "-x";
+ subargv[subargc++] = swp->trace;
+ }
+ if (swp->ptrace) {
+ subargv[subargc++] = "-y";
+ subargv[subargc++] = swp->ptrace;
+ }
+#endif /* TRACE */
+ subargv[subargc++] = "-o";
+ sprintf(nopenbuf, "%ld", swp->nopen + 1);
+ subargv[subargc++] = nopenbuf;
+}
+
+
+static
+VOID handler(sig)
+int sig;
+{
+ signal(sig, SIG_DFL);
+ cleanup();
+ raise(sig);
+}
+
+static
+VOID cleanup()
+{
+ if (sgmldecl_file[0]) {
+ (void)remove(sgmldecl_file);
+ sgmldecl_file[0] = '\0';
+ }
+ if (subcap_file[0]) {
+ (void)remove(subcap_file);
+ subcap_file[0] = '\0';
+ }
+}
+
+static
+char *store_sgmldecl()
+{
+ if (!sgmldecl_file[0]) {
+ FILE *fp;
+ if (signal(SIGINT, SIG_IGN) != SIG_IGN)
+ signal(SIGINT, handler);
+#ifdef SIGTERM
+ if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
+ signal(SIGTERM, handler);
+#endif /* SIGTERM */
+#ifdef SIGPIPE
+ if (signal(SIGPIPE, SIG_IGN) != SIG_IGN)
+ signal(SIGPIPE, handler);
+#endif
+#ifdef SIGHUP
+ if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
+ signal(SIGHUP, handler);
+#endif
+ tmpnam(sgmldecl_file);
+ errno = 0;
+ fp = fopen(sgmldecl_file, "w");
+ if (!fp)
+ fatal(E_OPEN, sgmldecl_file, strerror(errno));
+ sgmlwrsd(fp);
+ fclose(fp);
+ }
+ return sgmldecl_file;
+}
+
+static
+char *create_subcap_file()
+{
+ if (subcap_file[0] == '\0') {
+ FILE *fp;
+ tmpnam(subcap_file);
+ fp = fopen(subcap_file, "w");
+ if (!fp)
+ fatal(E_OPEN, subcap_file, strerror(errno));
+ fclose(fp);
+ }
+ return subcap_file;
+}
+
+char **make_argv(id)
+UNIV id;
+{
+ int nfiles;
+ char *p;
+ char **argv;
+ int i;
+
+ for (p = (char *)id, nfiles = 0; *p; p = strchr(p, '\0') + 1)
+ nfiles++;
+
+ argv = (char **)xmalloc((subargc + 2 + 1 + nfiles + 1)*sizeof(char *));
+ memcpy((UNIV)argv, (UNIV)subargv, subargc*sizeof(char *));
+
+ i = subargc;
+
+ argv[i++] = "-c";
+ argv[i++] = create_subcap_file();
+
+ argv[i++] = store_sgmldecl();
+
+ for (p = (char *)id; *p; p = strchr(p, '\0') + 1)
+ argv[i++] = p;
+ argv[i] = 0;
+ return argv;
+}
+
+VOID get_subcaps()
+{
+ long cap[NCAPACITY];
+ FILE *fp;
+ int i;
+
+ if (!subcap_file[0])
+ return;
+ errno = 0;
+ fp = fopen(subcap_file, "r");
+ if (!fp)
+ fatal(E_OPEN, subcap_file, strerror(errno));
+ for (i = 0; i < NCAPACITY; i++)
+ if (fscanf(fp, "%*s %ld", cap + i) != 1)
+ fatal(E_CAPBOTCH);
+ fclose(fp);
+ sgmlsubcap(cap);
+}
+
+
+#endif /* SUPPORT_SUBDOC */
+
+/* Print capacity statistics.*/
+
+static VOID write_caps(name, p)
+char *name;
+struct sgmlcap *p;
+{
+ FILE *fp;
+ int i;
+ fp = fopen(name, "w");
+ if (!fp)
+ fatal(E_OPEN, name, strerror(errno));
+ /* This is in RACT format. */
+ for (i = 0; i < NCAPACITY; i++)
+ fprintf(fp, "%s %ld\n", p->name[i], p->number[i]*p->points[i]);
+ fclose(fp);
+}
+
+UNIV xmalloc(n)
+UNS n;
+{
+ UNIV p = malloc(n);
+ if (!p)
+ fatal(E_NOMEM);
+ return p;
+}
+
+UNIV xrealloc(s, n)
+UNIV s;
+UNS n;
+{
+ s = s ? realloc(s, n) : malloc(n);
+ if (!s)
+ fatal(E_NOMEM);
+ return s;
+}
+
+static
+#ifdef VARARGS
+VOID fatal(va_alist) va_dcl
+#else
+VOID fatal(int errnum,...)
+#endif
+{
+#ifdef VARARGS
+ int errnum;
+#endif
+ va_list ap;
+
+#ifdef VARARGS
+ va_start(ap);
+ errnum = va_arg(ap, int);
+#else
+ va_start(ap, errnum);
+#endif
+ do_error(errnum, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+#ifdef VARARGS
+VOID appl_error(va_alist) va_dcl
+#else
+VOID appl_error(int errnum,...)
+#endif
+{
+#ifdef VARARGS
+ int errnum;
+#endif
+ va_list ap;
+
+#ifdef VARARGS
+ va_start(ap);
+ errnum = va_arg(ap, int);
+#else
+ va_start(ap, errnum);
+#endif
+ do_error(errnum, ap);
+ va_end(ap);
+}
+
+static
+VOID do_error(errnum, ap)
+int errnum;
+va_list ap;
+{
+ char *text;
+ fprintf(stderr, "%s: ", prog);
+ assert(errnum > 0);
+ assert(errnum < sizeof(errlist)/sizeof(errlist[0]));
+ text = catgets(catd, APP_SET, errnum, errlist[errnum]);
+ assert(text != 0);
+ xvfprintf(stderr, text, ap);
+ fputc('\n', stderr);
+ fflush(stderr);
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/md1.c b/usr.bin/sgmls/sgmls/md1.c
new file mode 100644
index 0000000..a2db320
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/md1.c
@@ -0,0 +1,862 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+/* MDADL: Process ATTLIST declaration.
+*/
+VOID mdadl(tbuf)
+UNCH *tbuf; /* Work area for tokenization (tbuf). */
+{
+ int i; /* Loop counter; temporary variable. */
+ int adlim; /* Number of unused ad slots in al. */
+ struct ad *alperm = 0; /* Attribute definition list. */
+ int stored = 0;
+
+ mdname = key[KATTLIST]; /* Identify declaration for messages. */
+ subdcl = 0; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es level for entity nesting check. */
+ reqadn = noteadn = 0; /* No required attributes yet. */
+ idadn = conradn = 0; /* No special atts yet.*/
+ AN(al) = 0; /* Number of attributes defined. */
+ ADN(al) = 0; /* Number of ad's in al (atts + name vals).*/
+ /* PARAMETER 1: Element name or a group of them.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: element name or group");
+ switch (pcbmd.action) {
+ case NAS:
+ nmgrp[0] = etddef(tbuf);
+ nmgrp[1] = 0;
+ break;
+ case GRPS:
+ parsegrp(nmgrp, &pcbgrnm, tbuf);
+ break;
+ case RNS: /* Reserved name started. */
+ if (ustrcmp(tbuf+1, key[KNOTATION])) {
+ mderr(118, tbuf+1, key[KNOTATION]);
+ return;
+ }
+ mdnadl(tbuf);
+ return;
+ default:
+ mderr(121, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* Save first GI for error msgs. */
+ if (nmgrp[0])
+ subdcl = nmgrp[0]->etdgi+1;
+ /* PARAMETER 2: Attribute definition list.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: attribute list");
+ if (pcbmd.action!=NAS) {
+ mderr(120, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ while (pcbmd.action==NAS) {
+ al[ADN(al)+1].adname = savenm(tbuf);
+ if ((adlim = ATTCNT-((int)++ADN(al)))<0) {
+ mderr(111, (UNCH *)0, (UNCH *)0);
+ adlfree(al, 1);
+ return;
+ }
+ ++AN(al);
+ if (mdattdef(adlim, 0)) {
+ adlfree(al, 1);
+ return;
+ }
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ }
+ if (AN(al)>0) { /* Save list only if 1 or more good atts. */
+ if (reqadn) SET(ADLF(al), ADLREQ); /* Element must have start-tag. */
+ if (noteadn) SET(ADLF(al), ADLNOTE); /* Element cannot be EMPTY. */
+ if (conradn) SET(ADLF(al), ADLCONR); /* Element cannot be EMPTY. */
+ alperm = (struct ad *)rmalloc((1+ADN(al))*ADSZ);
+ memcpy((UNIV)alperm, (UNIV)al, (1+ADN(al))*ADSZ );
+ ds.attcnt += AN(al); /* Number of attributes defined. */
+ ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */
+ TRACEADL(alperm);
+ }
+ /* Clear attribute list for next declaration. */
+ MEMZERO((UNIV)al, (1+ADN(al))*ADSZ);
+
+ /* PARAMETER 3: End of declaration.
+ */
+ /* Next pcb.action was set during attribute definition loop. */
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) {mderr(126, (UNCH *)0, (UNCH *)0); return;}
+ if (es!=mdessv) synerr(37, &pcbmd);
+
+ /* EXECUTE: Store the definition for each element name specified.
+ */
+ TRACEGRP(nmgrp);
+ for (i = 0; nmgrp[i]; i++) {
+ if (nmgrp[i]->adl) { /* Error if an ADL exists. */
+ mderr(112, (UNCH *)0, (UNCH *)0);
+ continue;
+ }
+ nmgrp[i]->adl = alperm; /* If virgin, store the adl ptr. */
+ stored = 1;
+ if (alperm && nmgrp[i]->etdmod)
+ etdadl(nmgrp[i]); /* Check for conflicts with ETD. */
+ }
+ if (!stored && alperm) {
+ adlfree(alperm, 1);
+ frem((UNIV)alperm);
+ }
+}
+/* ETDADL: Check compatibility between ETD and ADL.
+*/
+VOID etdadl(p)
+struct etd *p; /* Pointer to element type definition. */
+{
+ parmno = 0;
+ /* Minimizable element cannot have required attribute. */
+ if (GET(p->etdmin, SMO) && GET(p->adl[0].adflags, ADLREQ)) {
+ mderr(40, (UNCH *)0, (UNCH *)0);
+ RESET(p->etdmin, SMO);
+ }
+ /* Empty element cannot have NOTATION attribute.
+ Attribute is not removed (too much trouble), but we trap
+ attempts to specify it on the start-tag in adlval().
+ */
+ if (GET(p->etdmod->ttype, MNONE)) {
+ if (GET(p->adl[0].adflags, ADLNOTE))
+ mderr(83, (UNCH *)0, (UNCH *)0);
+
+ /* Empty element cannot have CONREF attribute.
+ Attribute is not removed because it just acts
+ like IMPLIED anyway.
+ */
+ if (GET(p->adl[0].adflags, ADLCONR))
+ mderr(85, (UNCH *)0, (UNCH *)0);
+ }
+ /* "-" should not be specified for the end-tag minimization if
+ the element has a content reference attribute. */
+ if (GET(p->adl[0].adflags, ADLCONR) && BITON(p->etdmin, EMM))
+ mderr(153, (UNCH *)0, (UNCH *)0);
+}
+/* MDNADL: Process ATTLIST declaration for notation.
+ TO DO: Pass deftab and dvtab as parameters so
+ that prohibited types can be handled by leaving
+ them out of the tables.
+*/
+VOID mdnadl(tbuf)
+UNCH *tbuf; /* Work area for tokenization (tbuf). */
+{
+ int i; /* Loop counter; temporary variable. */
+ int adlim; /* Number of unused ad slots in al. */
+ struct ad *alperm = 0; /* Attribute definition list. */
+ int stored = 0;
+
+ /* PARAMETER 1: Notation name or a group of them.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: notation name or group");
+ switch (pcbmd.action) {
+ case NAS:
+ nnmgrp[0] = dcndef(tbuf);
+ nnmgrp[1] = 0;
+ break;
+ case GRPS:
+ parsngrp(nnmgrp, &pcbgrnm, tbuf);
+ break;
+ default:
+ mderr(121, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ subdcl = nnmgrp[0]->ename+1; /* Save first name for error msgs. */
+ /* PARAMETER 2: Attribute definition list.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: attribute list");
+ if (pcbmd.action!=NAS) {
+ mderr(120, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ while (pcbmd.action==NAS) {
+ al[ADN(al)+1].adname = savenm(tbuf);
+ if ((adlim = ATTCNT-((int)ADN(al)++))<0) {
+ mderr(111, (UNCH *)0, (UNCH *)0);
+ adlfree(al, 1);
+ return;
+ }
+ ++AN(al);
+ if (mdattdef(adlim, 1)) {
+ adlfree(al, 1);
+ return;
+ }
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ }
+ if (AN(al)>0) { /* Save list only if 1 or more good atts. */
+ alperm = (struct ad *)rmalloc((1+ADN(al))*ADSZ);
+ memcpy((UNIV)alperm, (UNIV)al, (1+ADN(al))*ADSZ );
+ ds.attcnt += AN(al); /* Number of attributes defined. */
+ ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */
+ TRACEADL(alperm);
+ }
+ /* Clear attribute list for next declaration. */
+ MEMZERO((UNIV)al, (1+ADN(al))*ADSZ);
+
+ /* PARAMETER 3: End of declaration.
+ */
+ /* Next pcb.action was set during attribute definition loop. */
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) {mderr(126, (UNCH *)0, (UNCH *)0); return;}
+ if (es!=mdessv) synerr(37, &pcbmd);
+
+ /* EXECUTE: Store the definition for each notation name specified.
+ */
+ TRACENGR(nnmgrp);
+ for (i = 0; nnmgrp[i]; i++) {
+ if (nnmgrp[i]->adl) { /* Error if an ADL exists. */
+ mderr(112, (UNCH *)0, (UNCH *)0);
+ continue;
+ }
+ nnmgrp[i]->adl = alperm; /* If virgin, store the adl ptr. */
+ if (nnmgrp[i]->entsw)
+ fixdatt(nnmgrp[i]);
+ stored = 1;
+ TRACEDCN(nnmgrp[i]);
+ }
+ if (!stored && alperm) {
+ adlfree(alperm, 1);
+ frem((UNIV)alperm);
+ }
+}
+
+/* Data attributes have been specified for notation p, but entities
+have already been declared with notation p. Fix up the definitions of
+all entities with notation p. Generate an error for any data
+attribute that was required. */
+
+VOID fixdatt(p)
+struct dcncb *p;
+{
+ int i;
+ for (i = 0; i < ENTHASH; i++) {
+ struct entity *ep;
+ for (ep = etab[i]; ep; ep = ep->enext)
+ if (ep->estore == ESN && ep->etx.n && ep->etx.n->nedcn == p) {
+ int adn;
+ initatt(p->adl);
+ /* Don't use adlval because if there were required
+ attributes the error message wouldn't say what
+ entity was involved. */
+ for (adn = 1; adn <= ADN(al); adn++) {
+ if (GET(ADFLAGS(al,adn), AREQ)) {
+ sgmlerr(218, &pcbstag, ADNAME(al,adn),
+ ep->ename + 1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ }
+ if (BITON(ADFLAGS(al, adn), AGROUP))
+ adn += ADNUM(al, adn);
+ }
+ storedatt(ep->etx.n);
+ }
+ }
+}
+
+/* MDATTDEF: Process an individual attribute definition.
+ The attribute name is parsed by the caller.
+ Duplicate attributes are parsed, but removed from list.
+ Returns 0 if successful, otherwise returns 1.
+*/
+int mdattdef(adlim, datt)
+int adlim; /* Remaining capacity of al (in tokens).*/
+int datt; /* Non-zero if a data attribute. */
+{
+ int deftype; /* Default value type: 0=not keyword. */
+ int errsw = 0; /* 1=semantic error; ignore att. */
+ int novalsw = 0; /* 1=semantic error; treat as IMPLIED. */
+ int attadn = (int)ADN(al); /* Save ad number of this attribute. */
+ struct parse *grppcb = NULL; /* PCB for name/token grp parse. */
+ int errcode; /* Error type returned by PARSEVAL, ANMTGRP. */
+ UNCH *advalsv; /* Save area for permanent value ptr. */
+
+ /* PARAMETER 1: Attribute name (parsed by caller).
+ */
+ TRACEMD("1: attribute name");
+ if (anmget((int)ADN(al)-1, al[attadn].adname)) {
+ errsw = 1;
+ mderr(99, ADNAME(al,attadn), (UNCH *)0);
+ }
+ ADNUM(al,attadn) = ADFLAGS(al,attadn) = ADLEN(al,attadn) = 0;
+ ADVAL(al,attadn) = 0; ADDATA(al,attadn).x = 0; ADTYPE(al,attadn) = ANMTGRP;
+ /* PARAMETER 2: Declared value.
+ */
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: declared value");
+ switch (pcbmd.action) {
+ case NAS: /* Keyword for value type. */
+ switch (ADTYPE(al,attadn) = (UNCH)mapsrch(dvtab, lbuf+1)) {
+ case 0:
+ mderr(100, ADNAME(al,attadn), lbuf+1);
+ return 1;
+ case ANOTEGRP:
+ if (datt) {
+ errsw = 1;
+ mderr(156, (UNCH *)0, (UNCH *)0);
+ }
+ else if (!noteadn) noteadn = ADN(al);
+ else {
+ errsw = 1;
+ mderr(101, ADNAME(al,attadn), (UNCH *)0);
+ }
+ grppcb = &pcbgrnm; /* NOTATION requires name grp. */
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);/* Get GRPO*/
+ break;
+ case AID:
+ if (datt) {
+ errsw = 1;
+ mderr(144, (UNCH *)0, (UNCH *)0);
+ }
+ else if (!idadn)
+ idadn = attadn;
+ else {
+ errsw = 1;
+ mderr(102, ADNAME(al,attadn), (UNCH *)0);
+ }
+ break;
+ case AIDREF:
+ case AIDREFS:
+ if (datt) {
+ errsw = 1;
+ mderr(155, (UNCH *)0, (UNCH *)0);
+ }
+ break;
+ case AENTITY:
+ case AENTITYS:
+ if (datt) {
+ errsw = 1;
+ mderr(154, (UNCH *)0, (UNCH *)0);
+ }
+ break;
+ }
+ break;
+ case GRPS:
+ grppcb = &pcbgrnt; /* Normal grp is name token grp. */
+ break;
+ case EMD:
+ mderr(103, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ default:
+ mderr(104, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ }
+ /* PARAMETER 2A: Name token group.
+ */
+ if (grppcb != NULL) {
+ TRACEMD("2A: name group");
+ switch (pcbmd.action) {
+ case GRPS: /* Name token list. */
+ SET(ADFLAGS(al,attadn), AGROUP);
+ /* Call routine to parse group, create ad entries in adl. */
+ errcode = anmtgrp(grppcb, al+attadn,
+ (GRPCNT<adlim ? GRPCNT+1 : adlim+1),
+ &al[attadn].adnum, ADN(al));
+ if (errcode<=0) {
+ if (adlim < GRPCNT)
+ mderr(111, (UNCH *)0, (UNCH *)0);
+ else
+ mderr(105, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ }
+ ADN(al) += ADNUM(al,attadn); /* Add grp size to total ad cnt.*/
+ break;
+ default:
+ mderr(106, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ }
+ }
+ /* PARAMETER 3: Default value keyword.
+ */
+ parsemd(lbuf, AVALCASE,
+ (ADTYPE(al,attadn)==ACHARS) ? &pcblitr : &pcblitt, LITLEN);
+ TRACEMD("3: default keyword");
+ switch (pcbmd.action) {
+ case RNS: /* Keyword. */
+ deftype = mapsrch(deftab, lbuf+1);
+ switch (deftype) {
+ case DFIXED: /* FIXED */
+ SET(ADFLAGS(al,attadn), AFIXED);
+ parsemd(lbuf, AVALCASE,
+ (ADTYPE(al,attadn)==ACHARS) ? &pcblitr : &pcblitt,
+ LITLEN); /* Real default. */
+ goto parm3x; /* Go process specified value. */
+ case DCURR: /* CURRENT: If ID, treat as IMPLIED. */
+ if (ADTYPE(al,attadn)==AID) {
+ mderr(80, ADNAME(al,attadn), (UNCH *)0);
+ break;
+ }
+ if (datt) {
+ mderr(157, (UNCH *)0, (UNCH *)0);
+ break;
+ }
+ SET(ADFLAGS(al,attadn), ACURRENT);
+ break;
+ case DREQ: /* REQUIRED */
+ SET(ADFLAGS(al,attadn), AREQ); ++reqadn;
+ break;
+ case DCONR: /* CONREF */
+ if (ADTYPE(al,attadn)==AID) {
+ mderr(107, ADNAME(al,attadn), (UNCH *)0);
+ break;
+ }
+ if (datt) {
+ mderr(158, (UNCH *)0, (UNCH *)0);
+ break;
+ }
+ SET(ADFLAGS(al,attadn), ACONREF); conradn = 1;
+ case DNULL: /* IMPLIED */
+ break;
+ default: /* Unknown keyword is an error. */
+ mderr(108, ADNAME(al,attadn), lbuf+1);
+ errsw = 1;
+ }
+ if (errsw) {
+ /* Ignore erroneous att. */
+ adlfree(al, attadn);
+ --AN(al);
+ ADN(al) = (UNCH)attadn-1;
+ }
+ return(0);
+ default:
+ break;
+ }
+ /* PARAMETER 3x: Default value (non-keyword).
+ */
+ parm3x:
+ TRACEMD("3x: default (non-keyword)");
+ if (ADTYPE(al,attadn)==AID) { /* If ID, treat as IMPLIED. */
+ mderr(81, ADNAME(al,attadn), (UNCH *)0);
+ novalsw = 1; /* Keep parsing to keep things straight. */
+ }
+ switch (pcbmd.action) {
+ case LIT: /* Literal. */
+ case LITE: /* Literal. */
+ /* Null string (except CDATA) is error: msg and treat as IMPLIED. */
+ if (*lbuf == '\0' && ADTYPE(al,attadn)!=ACHARS) {
+ mderr(82, ADNAME(al,attadn), (UNCH *)0);
+ novalsw = 1;
+ }
+ break;
+ case NAS: /* Name character string. */
+ case NMT: /* Name character string. */
+ case NUM: /* Number or number token string. */
+ /* The name won't have a length byte because AVALCASE was specified. */
+ break;
+ case CDR:
+ parsetkn(lbuf, NMC, LITLEN);
+ break;
+ case EMD:
+ mderr(109, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ default:
+ mderr(110, ADNAME(al,attadn), (UNCH *)0);
+ return 1;
+ }
+ if (errsw) {
+ /* Ignore erroneous att. */
+ adlfree(al, attadn);
+ --AN(al);
+ ADN(al) = (UNCH)attadn-1;
+ return(0);
+ }
+ if (novalsw) return(0);
+
+ /* PARAMETER 3y: Validate and store default value.
+ */
+ if (ADTYPE(al,attadn)==ACHARS) {
+ UNS len = vallen(ACHARS, 0, lbuf);
+ if (len > LITLEN) {
+ /* Treat as implied. */
+ sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0);
+ return 0;
+ }
+ /* No more checking for CDATA value. */
+ ADNUM(al,attadn) = 0; /* CDATA is 0 tokens. */
+ ADVAL(al,attadn) = savestr(lbuf);/* Store default; save ptr. */
+ ADLEN(al,attadn) = len;
+ ds.attdef += len;
+ return 0;
+ }
+ /* Parse value and save token count (GROUP implies 1 token). */
+ advalsv = (UNCH *)rmalloc(ustrlen(lbuf)+2); /* Storage for tokenized value. */
+ errcode = parseval(lbuf, (UNS)ADTYPE(al,attadn), advalsv);
+ if (BITOFF(ADFLAGS(al,attadn), AGROUP)) ADNUM(al,attadn) = (UNCH)tokencnt;
+
+ /* If value was invalid, or was a group member that was not in the group,
+ issue an appropriate message and set the error switch. */
+ if (errcode)
+ {sgmlerr((UNS)errcode, &pcbmd, ADNAME(al,attadn), lbuf); errsw = 1;}
+ else if ( BITON(ADFLAGS(al,attadn), AGROUP)
+ && !amemget(&al[attadn], (int)ADNUM(al,attadn), advalsv) ) {
+ sgmlerr(79, &pcbmd, ADNAME(al,attadn), advalsv+1);
+ errsw = 1;
+ }
+ ADLEN(al,attadn) = vallen(ADTYPE(al,attadn), ADNUM(al,attadn), advalsv);
+ if (ADLEN(al,attadn) > LITLEN) {
+ sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0);
+ ADLEN(al,attadn) = 0;
+ errsw = 1;
+ }
+ /* For valid tokenized value, save it and update statistics. */
+ if (!errsw) {
+ ADVAL(al,attadn) = advalsv;
+ ds.attdef += ADLEN(al,attadn);
+ return 0;
+ }
+ /* If value was bad, free the value's storage and treat as
+ IMPLIED or REQUIRED. */
+ frem((UNIV)advalsv); /* Release storage for value. */
+ ADVAL(al,attadn) = NULL; /* And make value NULL. */
+ return 0;
+}
+/* ANMTGRP: Parse a name or name token group, create attribute descriptors
+ for its members, and add them to the attribute descriptor list.
+ The parse either terminates or returns a good token, so no
+ switch is needed.
+*/
+int anmtgrp(pcb, nt, grplim, adn, adsz)
+struct parse *pcb; /* PCB for name or name token grp. */
+struct ad nt[]; /* Buffer for creating name token list. */
+int grplim; /* Maximum size of list (plus 1). */
+UNS *adn; /* Ptr to number of names or tokens in grp. */
+int adsz; /* Size of att def list. */
+{
+ UNCH adtype = (UNCH)(pcb==&pcbgrnt ? ANMTGRP:ANOTEGRP);/*Attribute type.*/
+ int essv = es; /* Entity stack level when grp started. */
+
+ *adn = 0; /* Group is empty to start. */
+ while (parse(pcb)!=GRPE && *adn<grplim) {
+ switch (pcb->action) {
+ case NAS_: /* Name or name token (depending on pcb). */
+ case NMT_:
+ parsenm(lbuf, NAMECASE);
+ nt[*adn+1].adname = savenm(lbuf);
+ if (antvget((int)(adsz+*adn), nt[*adn+1].adname, (UNCH **)0))
+ mderr(98, ntoa((int)*adn+1), nt[*adn+1].adname+1);
+ nt[++*adn].adtype = adtype;
+ nt[*adn].addef = NULL;
+ continue;
+
+ case EE_: /* Entity ended (correctly or incorrectly). */
+ if (es<essv) {synerr(37, pcb); essv = es;}
+ continue;
+
+ case PIE_: /* PI entity reference (invalid). */
+ entpisw = 0; /* Reset PI entity indicator. */
+ synerr(59, pcb);
+ continue;
+
+ default:
+ break;
+ }
+ break;
+ }
+ if (es!=essv) synerr(37, pcb);
+ if (*adn==grplim) return -1;
+ else return *adn; /* Return number of tokens. */
+}
+/* MDDTDS: Process start of DOCTYPE declaration (through MSO).
+*/
+VOID mddtds(tbuf)
+UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
+{
+ struct fpi fpicb; /* Formal public identifier structure. */
+ union etext etx; /* Ptr to entity text. */
+ UNCH estore = ESD; /* Entity storage class. */
+ int emdsw = 0; /* 1=end of declaration found; 0=not yet. */
+
+ mdname = key[KDOCTYPE]; /* Identify declaration for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es for checking entity nesting. */
+ dtdrefsw = 0; /* No external DTD entity as yet. */
+ /* PARAMETER 1: Document type name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: doc type name");
+ if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;}
+ dtype = savenm(tbuf);
+ subdcl = dtype+1; /* Subject of declaration for error msgs. */
+
+ /* PARAMETER 2: External identifier keyword or MDS.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: extid or MDS");
+ switch (pcbmd.action) {
+ case NAS:
+ if (mdextid(tbuf, &fpicb, dtype+1, &estore, (PNE)0)==0) return;
+ if ((etx.x = entgen(&fpicb))==0)
+ mderr(146, dtype+1, (UNCH *)0);
+ else
+ dtdrefsw = 1; /* Signal external DTD entity. */
+ break;
+ case MDS:
+ goto execute;
+ default:
+ mderr(128, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* PARAMETER 3: MDS or end of declaration.
+ */
+ TRACEMD("3: MDS or EMD");
+ switch (pcbmd.action) {
+ default: /* Treat as end of declaration. */
+ mderr(126, (UNCH *)0, (UNCH *)0);
+ case EMD:
+ emdsw = 1;
+ case MDS:
+ break;
+ }
+ /* EXECUTE: Store entity definition if an external ID was specified.
+ */
+ execute:
+ if (es!=mdessv) synerr(37, &pcbmd);
+ propcb = &pcbmds; /* Prepare to parse doc type definition (MDS). */
+ if (dtdrefsw) {
+ /* TO DO: If concurrent DTD's supported, free existing
+ etext for all but first DTD (or reuse it). */
+ entdef(indtdent, estore, &etx);
+ ++ds.ecbcnt; ds.ecbtext += entlen;
+ if (emdsw) {
+ REPEATCC; /* Push back the MDC. */
+ *FPOS = lex.d.msc; /* Simulate end of DTD subset. */
+ REPEATCC; /* Back up to read MSC next. */
+ delmscsw = 1; /* Insert MSC after referenced DTD. */
+ }
+ }
+ indtdsw = 1; /* Allow "DTD only" parameters. */
+ return;
+}
+/* MDDTDE: Process DOCTYPE declaration end.
+*/
+VOID mddtde(tbuf)
+UNCH *tbuf; /* Work area for tokenization. */
+{
+ mdessv = es; /* Save es for checking entity nesting. */
+ propcb = &pcbpro; /* Restore normal prolog parse. */
+ indtdsw = 0; /* Prohibit "DTD only" parameters. */
+
+ mdname = key[KDOCTYPE]; /* Identify declaration for messages. */
+ subdcl = dtype+1; /* Subject of declaration for error msgs. */
+ parmno = 0; /* No parameters as yet. */
+ /* PARAMETER 4: End of declaration.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ if (es!=mdessv) synerr(37, &pcbmd);
+}
+/* MDELEM: Process ELEMENT declaration.
+*/
+VOID mdelem(tbuf)
+UNCH *tbuf; /* Work area for tokenization (tbuf). */
+{
+ UNCH *ranksuff = lbuf; /* Rank suffix. */
+ UNS dctype = 0; /* Declared content type (from dctab). */
+ UNCH fmin = 0; /* Minimization bit flags. */
+ int i; /* Loop counter. */
+ UNS u; /* Temporary variable. */
+ struct etd **mexgrp, **pexgrp; /* Ptr to model exceptions array. */
+ struct thdr *cmod, *cmodsv; /* Ptr to content model. */
+ UNCH *etdgi; /* GI of current etd (when going through group).*/
+ int minomitted = 0; /* Tag minimization parameters omitted. */
+
+ mdname = key[KELEMENT]; /* Identify declaration for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es level for entity nesting check. */
+ ranksuff[0] = 0;
+ mexgrp = pexgrp = 0;
+
+ /* PARAMETER 1: Element name or a group of them.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: element name or grp");
+ switch (pcbmd.action) {
+ case NAS:
+ nmgrp[0] = etddef(tbuf);
+ nmgrp[1] = 0;
+ break;
+ case GRPS:
+ parsegrp(nmgrp, &pcbgrnm, tbuf);
+ break;
+ default:
+ mderr(121, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* Save first GI for trace and error messages. */
+ if (nmgrp[0])
+ subdcl = nmgrp[0]->etdgi+1;
+
+ /* PARAMETER 1A: Rank suffix (optional).
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1A: rank suffix");
+ switch (pcbmd.action) {
+ case NUM:
+ ustrcpy(ranksuff, tbuf);
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ default:
+ break;
+ }
+ /* PARAMETER 2A: Start-tag minimization.
+ */
+ TRACEMD("2A: start min");
+ switch (pcbmd.action) {
+ case CDR:
+ break;
+ case NAS:
+ if (!ustrcmp(tbuf+1, key[KO])) {
+ if (OMITTAG==YES) SET(fmin, SMO);
+ break;
+ }
+ /* fall through */
+ default:
+ if (OMITTAG==NO) {minomitted=1; break;}
+ mderr(129, tbuf+1, (UNCH *)0);
+ return;
+ }
+ /* Must omit omitted end-tag minimization, if omitted
+ start-tag minimization was omitted (because OMITTAG == NO). */
+ if (!minomitted) {
+ /* PARAMETER 2B: End-tag minimization.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2B: end min");
+ switch (pcbmd.action) {
+ case NAS:
+ if (ustrcmp(tbuf+1, key[KO])) {mderr(129, tbuf+1, (UNCH *)0); return;}
+ if (OMITTAG==YES) SET(fmin, EMO);
+ break;
+ case CDR:
+ SET(fmin, EMM);
+ break;
+ default:
+ mderr(129, tbuf+1, (UNCH *)0);
+ return;
+ }
+ /* PARAMETER 3: Declared content.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ }
+ TRACEMD("3: declared content");
+ switch (pcbmd.action) {
+ case NAS:
+ dctype = mapsrch(dctab, tbuf+1);
+ if (!dctype) {mderr(24, tbuf+1, (UNCH *)0); return;}
+ /* Eliminate incompatibilities among parameters. */
+ if (GET(fmin, SMO) && GET(dctype, MNONE+MCDATA+MRCDATA)) {
+ mderr(58, (UNCH *)0, (UNCH *)0);
+ RESET(fmin, SMO);
+ }
+ if (GET(dctype, MNONE) && BITON(fmin, EMM)) {
+ mderr(87, (UNCH *)0, (UNCH *)0);
+ SET(fmin, EMO);
+ }
+ /* If valid, process like a content model. */
+ case GRPS:
+ cmodsv = parsemod((int)(pcbmd.action==GRPS ? 0 : dctype));
+ if (cmodsv==0) return;
+ u = (dctype ? 1 : cmodsv->tu.tnum+2) * THSZ;
+ cmod = (struct thdr *)rmalloc(u);
+ memcpy((UNIV)cmod , (UNIV)cmodsv, u );
+ ds.modcnt += cmod->tu.tnum;
+ TRACEMOD(cmod);
+ break;
+ default:
+ mderr(130, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* PARAMETERS 3A, 3B: Exceptions or end.
+ */
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ if (BITOFF(cmod->ttype, MCDATA+MRCDATA+MNONE)) {
+ /* PARAMETER 3A: Minus exceptions.
+ */
+ TRACEMD("3A: -grp");
+ switch (pcbmd.action) {
+ case MGRP:
+ /* We cheat and use nnmgrp for this. */
+ mexgrp = copygrp((PETD *)nnmgrp,
+ u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf));
+ ++ds.pmexgcnt; ds.pmexcnt += u-1;
+ TRACEGRP(mexgrp);
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ default:
+ break;
+ }
+ /* PARAMETER 3B: Plus exceptions.
+ */
+ TRACEMD("3B: +grp");
+ switch (pcbmd.action) {
+ case PGRP:
+ pexgrp = copygrp((PETD *)nnmgrp,
+ u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf));
+ ++ds.pmexgcnt; ds.pmexcnt += u-1;
+ TRACEGRP(pexgrp);
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ default:
+ break;
+ }
+ }
+ /* PARAMETER 4: End of declaration.
+ */
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ if (es!=mdessv) synerr(37, &pcbmd);
+
+ /* EXECUTE: Store the definition for each element name specified.
+ */
+ TRACEGRP(nmgrp);
+ for (i = -1; nmgrp[++i];) {
+ etdgi = nmgrp[i]->etdgi;
+ if (*ranksuff) {
+ if ((tbuf[0] = *etdgi + ustrlen(ranksuff)) - 2 > NAMELEN) {
+ mderr(131, etdgi+1, ranksuff);
+ continue;
+ }
+ memcpy(tbuf+1, etdgi+1, *etdgi-1);
+ ustrcpy(tbuf+*etdgi-1, ranksuff);
+ etdcan(etdgi);
+ nmgrp[i] = etddef(tbuf);
+ }
+ if (nmgrp[i]->etdmod) {mderr(56, etdgi+1, (UNCH *)0); continue;}
+ etdset(nmgrp[i], fmin+ETDDCL, cmod, mexgrp, pexgrp, nmgrp[i]->etdsrm);
+ ++ds.etdcnt;
+ if (nmgrp[i]->adl) etdadl(nmgrp[i]); /* Check ETD conflicts. */
+ TRACEETD(nmgrp[i]);
+ }
+}
+
+VOID adlfree(al, aln)
+struct ad *al;
+int aln;
+{
+ for (; aln <= ADN(al); aln++) {
+ frem((UNIV)al[aln].adname);
+ if (ADVAL(al, aln))
+ frem((UNIV)ADVAL(al, aln));
+ if (BITON(ADFLAGS(al, aln), AGROUP)) {
+ int i;
+ for (i = 0; i < ADNUM(al, aln); i++)
+ frem((UNIV)al[aln + i + 1].adname);
+ aln += ADNUM(al, aln);
+ }
+ }
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/md2.c b/usr.bin/sgmls/sgmls/md2.c
new file mode 100644
index 0000000..846c555
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/md2.c
@@ -0,0 +1,801 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+/* MDENTITY: Process ENTITY declaration.
+*/
+VOID mdentity(tbuf)
+UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
+{
+ struct fpi fpicb; /* Formal public identifier structure. */
+ struct fpi *fpis = &fpicb; /* Ptr to current or #DEFAULT fpi. */
+ union etext etx; /* Ptr to entity text. */
+ UNCH estore = ESM; /* Entity storage class. */
+ struct entity *ecb; /* Ptr to entity control block. */
+ int parmsw = 0; /* 1=parameter entity declaration; 0 = not. */
+ int defltsw = 0; /* 1=#DEFAULT declaration; 0=not. */
+ PNE pne = 0; /* Ptr to N/C/SDATA entity control block. */
+
+ mdname = key[KENTITY]; /* Declaration name for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es for checking entity nesting. */
+ /* PARAMETER 1: Entity name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(nmbuf, ENTCASE, &pcblitp, NAMELEN);
+ TRACEMD("1: entity nm");
+ switch (pcbmd.action) {
+ case PEN:
+ parsemd(nmbuf + 1, ENTCASE, &pcblitp, NAMELEN);
+ if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;}
+ if (nmbuf[1] == NAMELEN + 2) {
+ /* It was too long. */
+ nmbuf[0] = NAMELEN + 2;
+ nmbuf[NAMELEN + 1] = '\0';
+ mderr(65, (UNCH *)0, (UNCH *)0);
+ }
+ else
+ nmbuf[0] = nmbuf[1] + 1; /* Increment length for PERO. */
+ nmbuf[1] = lex.d.pero; /* Prefix PERO to name. */
+ parmsw = 1; /* Indicate parameter entity. */
+ case NAS:
+ break;
+ case RNS: /* Reserved name started. */
+ if (ustrcmp(nmbuf+1, key[KDEFAULT])) {
+ mderr(118, nmbuf+1, key[KDEFAULT]);
+ return;
+ }
+ memcpy(nmbuf, indefent, *indefent);/* Copy #DEFAULT to name buffer. */
+ fpis = &fpidf; /* Use #DEFAULT fpi if external. */
+ defltsw = 1; /* Indicate #DEFAULT is being defined.*/
+ break;
+ default:
+ mderr(122, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ subdcl = nmbuf+1; /* Subject name for error messages. */
+ /* PARAMETER 2: Entity text keyword (optional).
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ TRACEMD("2: keyword");
+ switch (pcbmd.action) {
+ case NAS:
+ if ((estore = (UNCH)mapsrch(enttab, tbuf+1))==0) {
+ estore = parmsw ? ESP : ESF;
+ pne = (PNE)rmalloc(NESZ);
+ if (mdextid(tbuf, fpis, nmbuf+1+parmsw, &estore, pne)==0)
+ return;
+ if (defltsw) etx.x = NULL;
+ else if ((etx.x = entgen(&fpicb))==0) {
+ if (parmsw)
+ mderr(148, nmbuf+2, (UNCH *)0);
+ else
+ mderr(147, nmbuf+1, (UNCH *)0);
+ }
+ goto parm4;
+ }
+ if (parmsw && (estore==ESX || estore==ESC)) {
+ mderr(38, tbuf+1, (UNCH *)0);
+ estore = ESM;
+ }
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ break;
+ default:
+ estore = ESM;
+ break;
+ }
+ /* PARAMETER 3: Parameter literal.
+ */
+ TRACEMD("3: literal");
+ switch (pcbmd.action) {
+ case LITE:
+ case LIT:
+ switch (estore) {
+ case ESM: /* LITERAL: parameter literal required. */
+ case ESC: /* CDATA: parameter literal required. */
+ case ESX: /* SDATA: parameter literal required. */
+ case ESI: /* PI: parameter literal required. */
+ etx.c = savestr(tbuf);
+ break;
+ case ESMD: /* MD: parameter literal required. */
+ etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc);
+ goto bcheck;
+ case ESMS: /* MS: parameter literal required. */
+ etx.c = sandwich(tbuf, lex.m.mss, lex.m.mse);
+ goto bcheck;
+ case ESS: /* STARTTAG: parameter literal required. */
+ etx.c = sandwich(tbuf, lex.m.stag, lex.m.tagc);
+ goto bcheck;
+ case ESE: /* ENDTAG: parameter literal required. */
+ etx.c = sandwich(tbuf, lex.m.etag, lex.m.tagc);
+ bcheck:
+ if (etx.c == 0) {
+ mderr(225, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ break;
+ }
+ break;
+ default:
+ mderr(123, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* PARAMETER 4: End of declaration.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ parm4:
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ if (es!=mdessv) synerr(37, &pcbmd);
+
+ /* EXECUTE: If the entity already exists, ignore the new definition.
+ If it is a new entity, store the definition.
+ */
+ if ((ecb = entfind(nmbuf))!=0 && ecb->estore) {
+ if (ecb->dflt) {
+ mderr(228, nmbuf + 1, (UNCH *)0);
+ hout((THASH)etab, nmbuf, hash(nmbuf, ENTHASH));
+ if (ecb->estore == ESN) {
+ frem((UNIV)NEID(ecb->etx.n));
+ frem((UNIV)ecb->etx.n);
+ }
+ else if (ecb->estore >= ESFM)
+ frem((UNIV)ecb->etx.x);
+ frem((UNIV)ecb);
+ }
+ else {
+ /* Duplicate definition: not an error. */
+ if (sw.swdupent) mderr(68, nmbuf+1, (UNCH *)0);
+ if (estore<ESFM) frem((UNIV)etx.c);
+ return;
+ }
+ }
+ ++ds.ecbcnt; /* Do capacity before NOTATION. */
+ ds.ecbtext += estore<ESFM ? ustrlen(etx.c) : entlen;
+ ecb = entdef(nmbuf, estore, &etx); /* Define the entity. */
+ if (estore==ESN) { /* If entity is external: */
+ NEENAME(pne) = ecb->ename; /* Store entity name in ne. */
+ NEID(pne) = etx.x; /* Store system fileid in ne. */
+ NESYSID(pne) = fpis->fpisysis ? savestr(fpis->fpisysis) : 0;
+ NEPUBID(pne) = fpis->fpipubis ? savestr(fpis->fpipubis) : 0;
+ ecb->etx.n = pne; /* Store ne control block in etx. */
+ TRACEESN(pne);
+ }
+ else if (pne)
+ frem((UNIV)pne);
+ if (defltsw) {
+ ecbdeflt = ecb; /* If #DEFAULT save ecb. */
+ if (fpidf.fpipubis)
+ fpidf.fpipubis = savestr(fpidf.fpipubis);
+ if (fpidf.fpisysis)
+ fpidf.fpisysis = savestr(fpidf.fpisysis);
+ }
+}
+/* SANDWICH: Catenate a prefix and suffix to a string.
+ The result has an EOS but no length.
+ Return 0 if the result if longer than LITLEN.
+*/
+UNCH *sandwich(s, pref, suff)
+UNCH *s; /* String, with EOS. */
+UNCH *pref; /* Prefix, with length and EOS. */
+UNCH *suff; /* Suffix, with length and EOS. */
+{
+ UNCH *pt;
+ UNS slen, tlen;
+
+ slen = ustrlen(s);
+ tlen = slen + (*pref - 2) + (*suff - 2);
+ if (tlen > LITLEN)
+ return 0;
+ pt = (UNCH *)rmalloc(tlen + 1);
+ memcpy(pt, pref + 1, *pref - 2);
+ memcpy(pt + (*pref - 2), s, slen);
+ memcpy(pt + (*pref - 2) + slen, suff + 1, *suff - 1);
+ return pt;
+}
+/* MDEXTID: Process external identifier parameter of a markup declaration.
+ On entry, tbuf contains SYSTEM or PUBLIC if all is well.
+ NULL is returned if an error, otherwise fpis. If it is a
+ valid external data entity, the caller's estore is set to ESN
+ and its nxetype is set to the code for the external entity type.
+ The event that terminated the parse is preserved in pcb.action,
+ so the caller should process it before further parsing.
+*/
+struct fpi *mdextid(tbuf, fpis, ename, estore, pne)
+UNCH *tbuf; /* Work area for tokenization[2*(LITLEN+2)]. */
+struct fpi *fpis; /* FPI structure. */
+UNCH *ename; /* Entity or notation name, with EOS, no length.*/
+ /* NOTE: No PERO on parameter entity name. */
+UNCH *estore; /* DTD, general or parameter entity, DCN. */
+PNE pne; /* Caller's external entity ptr. */
+{
+ PDCB dcb; /* Ptr to DCN control block. */
+ int exidtype; /* External ID type: 0=none 1=system 2=public. */
+ int exetype; /* External entity type. */
+
+ MEMZERO((UNIV)fpis, (UNS)FPISZ); /* Initialize fpi structure. */
+ /* Move entity name into fpi (any PERO was stripped by caller). */
+ fpis->fpinm = ename;
+ entlen = 0; /* Initialize external ID length. */
+
+ /* PARAMETER 1: External identifier keyword or error.
+ */
+ TRACEMD("1: extid keyword");
+ if ((exidtype = mapsrch(exttab, tbuf+1))==0) {
+ mderr(29, (UNCH *)0, (UNCH *)0);
+ return (struct fpi *)0;
+ }
+ if (exidtype==EDSYSTEM) goto parm3;
+
+ /* PARAMETER 2: Public ID literal.
+ */
+ pcbmd.newstate = 0;
+ /* The length of a minimum literal cannot exceed the value of LITLEN
+ in the reference quantity set. */
+ parsemd(pubibuf, NAMECASE, &pcblitv, REFLITLEN);
+ TRACEMD("2: pub ID literal");
+ switch (pcbmd.action) {
+ case LITE: /* Use alternative literal delimiter. */
+ case LIT: /* Save literal as public ID string. */
+ entlen = ustrlen(pubibuf);
+ fpis->fpipubis = pubibuf;
+ break;
+ default:
+ mderr(117, (UNCH *)0, (UNCH *)0);
+ return (struct fpi *)0; /* Signal error to caller. */
+ }
+ /* PARAMETER 3: System ID literal.
+ */
+ parm3:
+ pcbmd.newstate = 0;
+ parsemd(sysibuf, NAMECASE, &pcblitc, LITLEN);
+ TRACEMD("3: sys ID literal");
+ if (pcbmd.action==LIT || pcbmd.action==LITE) {
+ entlen += ustrlen(sysibuf);
+ fpis->fpisysis = sysibuf;
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ }
+ else memcpy(tbuf, sysibuf, *sysibuf);
+ if (*estore!=ESF || pcbmd.action!=NAS) goto genfpi;
+
+ /* PARAMETER 4: Entity type keyword.
+ */
+ TRACEMD("4: Entity type");
+ if ((exetype = mapsrch(extettab, tbuf+1))==0) {
+ mderr(24, tbuf+1, (UNCH *)0);
+ return (struct fpi *)0;
+ }
+ if (exetype==ESNSUB && SUBDOC == NO) {
+ mderr(90, tbuf+1, (UNCH *)0);
+ return (struct fpi *)0;
+ }
+
+ NEXTYPE(pne) = (UNCH)exetype; /* Save entity type in caller's ne. */
+ *estore = ESN; /* Signal that entity is a data entity. */
+
+ if (exetype==ESNSUB) {
+ pne->nedcn = 0;
+ pcbmd.newstate = 0; /* Parse next token for caller. */
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+ goto genfpi;
+ }
+ /* PARAMETER 5: Notation name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("5: notation");
+ if (pcbmd.action!=NAS) {mderr(119, tbuf+1, (UNCH *)0); return (struct fpi *)0;}
+ /* Locate the data content notation. */
+ pne->nedcn = dcb = dcndef(lbuf);
+ /* Note that we have defined an entity with this notation.
+ If attributes are later defined for this notation, we'll
+ have to fix up this entity. */
+ dcb->entsw = 1;
+
+ /* PARAMETER 6: Data attribute specification.
+ */
+ pcbmd.newstate = 0;
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("6: [att list]");
+ if (pcbmd.action!=MDS) { /* No attributes specified. */
+ if (dcb->adl == 0)
+ NEAL(pne) = 0;
+ else {
+ initatt(dcb->adl);
+ adlval((int)ADN(al), (struct etd *)0);
+ storedatt(pne);
+ }
+ goto genfpi;
+ }
+ if (dcb->adl==0) { /* Atts specified, but none defined. */
+ mderr(22, (UNCH *)0, (UNCH *)0);
+ return (struct fpi *)0;
+ }
+ pcbstag.newstate = pcbstan; /* First separator is optional. */
+ if ((parseatt(dcb->adl, tbuf))==0)/* Empty list. */
+ mderr(91, (UNCH *)0, (UNCH *)0);
+ else {
+ adlval((int)ADN(al), (struct etd *)0);
+ storedatt(pne);
+ }
+ parse(&pcbeal); /* Parse the list ending. */
+ pcbmd.newstate = 0; /* Parse next token for caller. */
+ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
+
+ /* GENFPI: Builds a formal public identifier structure, including the
+ entity name, offsets of the components of the public ID, and
+ other data a system might use to identify the actual file.
+ */
+ genfpi:
+ TRACEMD("7: generate fpi");
+ fpis->fpistore = *estore - ESFM + 1; /* External entity type: 1-6. */
+ if (*estore == ESN) {
+ if (NEXTYPE(pne) == ESNSUB)
+ fpis->fpinedcn = 0;
+ else
+ fpis->fpinedcn = NEDCN(pne) + 1;
+ }
+ /* Analyze public ID and make structure entries. */
+ if (exidtype==EDPUBLIC) {
+ if (FORMAL==NO)
+ fpis->fpiversw = -1;
+ else if (parsefpi(fpis)>0) {
+ mderr(88, fpis->fpipubis, (UNCH *)0);
+ fpis->fpiversw = -1; /* Signal bad formal public ID. */
+ }
+ }
+ return fpis;
+}
+
+/* Store a data attribute. */
+
+VOID storedatt(pne)
+PNE pne;
+{
+ int i;
+
+ NEAL(pne) = (struct ad *)rmalloc((1+ADN(al))*ADSZ);
+ memcpy((UNIV)NEAL(pne), (UNIV)al, (1+ADN(al))*ADSZ);
+ for (i = 1; i <= (int)ADN(al); i++) {
+ if (GET(ADFLAGS(al, i), ASPEC))
+ ds.attdef += ADLEN(al, i);
+ if (NEAL(pne)[i].addef != 0)
+ NEAL(pne)[i].addef = savestr(NEAL(pne)[i].addef);
+ }
+ ds.attcnt += AN(al); /* Number of attributes defined. */
+#if 0
+ /* I can't see any reason to increase AVGRPCNT here. */
+ ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */
+#endif
+}
+
+/* PARSEFPI: Parses a formal public identifier and builds a control block.
+ PARSEFPI returns a positive error code (1-10), or 0 if no errors.
+ It set fpiversw if no version was specified in the ID and the
+ public text is in a class that permits display versions.
+ Note: An empty version ("//") can be specified (usually it is
+ the non-device-specific form, such as a definitional entity set).
+*/
+int parsefpi(f)
+PFPI f; /* Ptr to formal public identifier structure. */
+{
+ UNCH *l; /* Pointer to EOS of public identifier. */
+ UNCH *p, *q; /* Ptrs to current field in public identifier. */
+ UNS len; /* Field length */
+
+ p = f->fpipubis; /* Point to start of identifier. */
+ l = p + ustrlen(p); /* Point to EOS of identifier. */
+ if (*p=='+' || *p=='-') { /* If owner registered, unregistered. */
+ f->fpiot = *p; /* Save owner type. */
+ if ((p += 3)>=l) return 1; /* Get to owner ID field. */
+ }
+ else f->fpiot = '!'; /* Indicate ISO owner identifier. */
+ if ((q = pubfield(p, l, '/', &len))==0) /* Find end of owner ID field. */
+ return 2;
+ f->fpiol = len; /* Save owner ID length. */
+ f->fpio = p - f->fpipubis; /* Save offset in pubis to owner ID. */
+
+ if ((p = pubfield(q, l, ' ', &len))==0) /* Find end of text class field. */
+ return 3;
+ *(--p) = EOS; /* Temporarily make class a string. */
+ f->fpic = mapsrch(pubcltab, q); /* Check for valid uc class name.*/
+ *p++ = ' '; /* Restore the SPACE delimiter. */
+ if (f->fpic==0) return 4; /* Error if not valid uc class name.*/
+
+ /* The public text class in a notation identifier must be NOTATION. */
+ if (f->fpistore == ESK - ESFM + 1 && f->fpic != FPINOT) return 10;
+
+ if (*p=='-') { /* If text is unavailable public text.*/
+ f->fpitt = *p; /* Save text type. */
+ if ((p += 3)>=l) return 5; /* Get to text description field. */
+ }
+ else f->fpitt = '+'; /* Indicate available public text. */
+ if ((q = pubfield(p, l, '/', &len))==0) /* Find end of text description. */
+ return 6;
+ f->fpitl = len; /* Save text description length. */
+ f->fpit = p - f->fpipubis; /* Save ptr to description.*/
+
+ p = pubfield(q, l, '/', &len); /* Bound language field. */
+ if (f->fpic != FPICHARS) {
+ int i;
+ /* Language must be all upper-case letters. */
+ /* The standard only says that it *should* be two letters, so
+ don't enforce that. */
+ for (i = 0; i < len; i++) {
+ /* Don't assume ASCII. */
+ if (!strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", q[i]))
+ return 7;
+ }
+ }
+ f->fpill = len;
+ f->fpil = q - f->fpipubis;
+ if (p!=0) { /* If there is a version field: */
+ if (f->fpic<FPICMINV) /* Error if class prohibits versions. */
+ return 8;
+ if ((pubfield(p, l, '/', &len))!=0) /* Bound version field. */
+ return 9; /* Error if yet another field. */
+ f->fpivl = len; /* Save version length. */
+ f->fpiv = p - f->fpipubis; /* Save ptr (in pubis) to version. */
+ }
+ else if (f->fpic>=FPICMINV) f->fpiversw = 1;/* No version: get the best. */
+ return(0);
+}
+/* PUBFIELD: Returns ptr to next field, or NULL if ID has ended.
+*/
+#ifdef USE_PROTOTYPES
+UNCH *pubfield(UNCH *p, UNCH *l, UNCH d, UNS *lenp)
+#else
+UNCH *pubfield(p, l, d, lenp)
+UNCH *p; /* Public identifier field (no length or EOS). */
+UNCH *l; /* Pointer to EOS of public identifier. */
+UNCH d; /* Field delimiter: ' ' or '/'. */
+UNS *lenp; /* Gets field length */
+#endif
+{
+ UNCH *psv = p+1; /* Save starting value of p. */
+
+ while (p<l) {
+ if (*p++==d) { /* Test for delimiter character. */
+ *lenp = p - psv; /* Save field length (no len or EOS). */
+ if (d=='/' && *p++!=d) /* Solidus requires a second one. */
+ continue;
+ return(p); /* Return ptr to next field. */
+ }
+ }
+ *lenp = p - --psv; /* Save field length (no len or EOS). */
+ return NULL;
+}
+/* MDMS: Process marked section start.
+ If already in special parse, bump the level counters and return
+ without parsing the declaration.
+*/
+struct parse *mdms(tbuf, pcb)
+UNCH *tbuf; /* Work area for tokenization [NAMELEN+2]. */
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ int key; /* Index of keyword in mslist. */
+ int ptype; /* Parameter token type. */
+ int pcbcode = 0; /* Parse code: 0=same; 2-4 per defines. */
+
+ if (++mslevel>TAGLVL) {
+ --mslevel;
+ sgmlerr(27, (struct parse *)0, ntoa(TAGLVL), (UNCH *)0);
+ }
+
+ /* If already in IGNORE mode, return without parsing parameters. */
+ if (msplevel) {++msplevel; return(pcb);}
+
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es for checking entity nesting. */
+ pcbmd.newstate = pcbmdtk; /* First separator is optional. */
+
+ /* PARAMETERS: TEMP, RCDATA, CDATA, IGNORE, INCLUDE, or MDS. */
+ while ((ptype = parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN))==NAS){
+ if ((key = mapsrch(mstab, tbuf+1))==0) {
+ sgmlerr(64, (struct parse *)0, ntoa(parmno), tbuf+1);
+ continue;
+ }
+ if (key==MSTEMP) continue; /* TEMP: for documentation. */
+ msplevel = 1; /* Special parse required. */
+ if (key>pcbcode) pcbcode = key; /* Update if higher priority. */
+ }
+ if (ptype!=MDS) {
+ NEWCC; /* Syntax error did REPEATCC. */
+ sgmlerr(97, (struct parse *)0, lex.m.dso, (UNCH *)0);
+ REPEATCC; /* 1st char of marked section. */
+ }
+ if (es!=mdessv) synerr(37, pcb);
+ TRACEMS(1, pcbcode, mslevel, msplevel);
+ if (pcbcode==MSIGNORE) pcb = &pcbmsi;
+ else if (pcbcode) {
+ pcb = pcbcode==MSCDATA ? &pcbmsc : (rcessv = es, &pcbmsrc);
+ }
+ return(pcb); /* Tell caller whether to change the parse. */
+}
+/* MDMSE: Process marked section end.
+ Issue an error if no marked section had started.
+*/
+int mdmse()
+{
+ int retcode = 0; /* Return code: 0=same parse; 1=cancel special. */
+
+ if (mslevel) --mslevel;
+ else sgmlerr(26, (struct parse *)0, (UNCH *)0, (UNCH *)0);
+
+ if (msplevel) if (--msplevel==0) retcode = 1;
+ TRACEMS(0, retcode, mslevel, msplevel);
+ return retcode;
+}
+/* MDNOT: Process NOTATION declaration.
+*/
+VOID mdnot(tbuf)
+UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
+{
+ struct fpi fpicb; /* Formal public identifier structure. */
+ PDCB dcb; /* Ptr to notation entity in dcntab. */
+ UNCH estore = ESK; /* Entity storage class. */
+
+ mdname = key[KNOTATION]; /* Identify declaration for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es for checking entity nesting. */
+
+ /* PARAMETER 1: Notation name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: name");
+ if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;}
+ subdcl = lbuf+1; /* Save notation name for error msgs. */
+
+ /* PARAMETER 2: External identifier keyword.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: extid");
+ if (pcbmd.action!=NAS) {mderr(29, (UNCH *)0, (UNCH *)0); return;}
+ if (mdextid(tbuf, &fpicb, lbuf+1, &estore, (PNE)0)==0) return;
+
+ /* PARAMETER 3: End of declaration.
+ Token was parsed by MDEXTID.
+ */
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ if (es!=mdessv) synerr(37, &pcbmd);
+
+ /* EXECUTE: Store notation name.
+ */
+ if ((dcb = dcnfind(lbuf)) != 0 && dcb->defined) {
+ mderr(56, lbuf+1, (UNCH *)0);
+ return;
+ }
+ /* else */
+ dcb = dcndef(lbuf);
+ dcb->defined = 1;
+ dcb->sysid = fpicb.fpisysis ? savestr(fpicb.fpisysis) : 0;
+ dcb->pubid = fpicb.fpipubis ? savestr(fpicb.fpipubis) : 0;
+ ++ds.dcncnt;
+ ds.dcntext += entlen;
+ TRACEDCN(dcb);
+ return;
+}
+/* DCNDEF: Define a notation and return its DCNCB.
+ If caller does not care if it already exists,
+ he should specify NULL for the notation text
+ so we don't clobber the existing text (if any).
+*/
+struct dcncb *dcndef(nname)
+UNCH *nname; /* Notation name (with length and EOS). */
+{
+ return((PDCB)hin((THASH)dcntab, nname, 0, DCBSZ));
+}
+/* DCNFIND: If a notation was declared, return its DCNCB.
+ Return NULL if it is not defined.
+*/
+struct dcncb *dcnfind(nname)
+UNCH *nname; /* Notation name (with length and EOS). */
+{
+ return((PDCB)hfind((THASH)dcntab, nname, 0));
+}
+#define SRM(i) (srhptr->srhsrm[i]) /* Current entry in SHORTREF map. */
+/* MDSRMDEF: Process short reference mapping declaration.
+*/
+VOID mdsrmdef(tbuf)
+UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
+{
+ struct entity *entcb; /* Ptr to defined entity. */
+ PSRH srhptr; /* Ptr to short reference map hdr (in srhtab).*/
+ int srn; /* Short reference delimiter number in srdeltab.*/
+ int mapused = 0; /* Has map already been used? */
+
+ mdname = key[KSHORTREF]; /* Identify declaration for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ if (!sd.shortref) {mderr(198, (UNCH *)0, (UNCH *)0); return;}
+ mdessv = es; /* Save es for checking entity nesting. */
+ /* PARAMETER 1: SHORTREF map name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: map nm");
+ if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;}
+ if ((srhptr = srhfind(tbuf))!=0) {
+ mapused = 1;
+ /* Error if map was declared (not just used). */
+ if (SRM(0)) {mderr(56, tbuf+1, (UNCH *)0); return;}
+ }
+ else srhptr = srhdef(tbuf); /* Create map with SRs mapped to NULL.*/
+ SRM(0) = (PECB)srhptr; /* Indicate map was actually declared.*/
+ subdcl = srhptr->ename+1; /* Save map name for error msgs. */
+
+ while ( pcbmd.newstate = 0,
+ parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN)==LIT
+ || pcbmd.action==LITE ) {
+ /* PARAMETER 2: Delimiter string.
+ */
+ TRACEMD("2: SR string");
+ if ((srn = mapsrch(lex.s.dtb, tbuf))==0) {
+ mderr(124, tbuf, (UNCH *)0);
+ goto cleanup;
+ }
+ /* PARAMETER 3: Entity name.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, ENTCASE, &pcblitp, NAMELEN);
+ TRACEMD("3: entity");
+ if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); goto cleanup;}
+ if ((entcb = entfind(tbuf))==0) {
+ union etext etx;
+ etx.x = 0;
+ entcb = entdef(tbuf, '\0', &etx);
+ }
+ if (SRM(srn)) {
+ mderr(56, (srn<lex.s.prtmin ? (UNCH *)lex.s.pdtb[srn]
+ : lex.s.dtb[srn].mapnm), (UNCH *)0);
+ continue;
+ }
+ SRM(srn) = entcb;
+ if (srn>=lex.s.fce && srn!=lex.s.hyp && srn!=lex.s.hyp2
+ && srn!=lex.s.lbr && srn!=lex.s.rbr)
+ lexcnm[*lex.s.dtb[srn].mapnm] = lex.l.fce;
+ else if (srn==lex.s.spc) lexcnm[' '] = lex.l.spcr;
+ }
+ /* PARAMETER 4: End of declaration.
+ */
+ TRACEMD(emd);
+ if (parmno==2)
+ {mderr((UNS)(pcbmd.action==EMD ? 28:123), (UNCH *)0, (UNCH *)0); goto cleanup;}
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ if (es!=mdessv) synerr(37, &pcbmd);
+ ++ds.srcnt;
+ TRACESRM("SHORTREF", srhptr->srhsrm, (UNCH *)0);
+ return;
+
+ cleanup:
+ /* Don't free the map if the map was in use (because of a USEMAP
+ declaration) before this declaration. */
+ if (mapused)
+ MEMZERO((UNIV)srhptr->srhsrm, sizeof(PECB)*(lex.s.dtb[0].mapdata+1));
+ else {
+ frem((UNIV)srhptr->srhsrm);
+ hout((THASH)srhtab, srhptr->ename, 0);
+ frem((UNIV)srhptr);
+ }
+}
+/* MDSRMUSE: Activate a short reference map.
+*/
+VOID mdsrmuse(tbuf)
+UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
+{
+ PSRH srhptr; /* Ptr to short reference map hdr (in srhtab).*/
+ TECB srmptr; /* Ptr to short reference map (in header). */
+ int i; /* Loop counter; temporary variable. */
+
+ mdname = key[KUSEMAP]; /* Identify declaration for messages. */
+ subdcl = NULL; /* No subject as yet. */
+ parmno = 0; /* No parameters as yet. */
+ mdessv = es; /* Save es for checking entity nesting. */
+ /* PARAMETER 1: SHORTREF map name or "#EMPTY".
+ */
+ pcbmd.newstate = 0;
+ parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("1: map nm");
+ subdcl = lbuf+1; /* Subject name for error messages. */
+ switch (pcbmd.action) {
+ case RNS: /* Empty SHORTREF map requested. */
+ if (ustrcmp(lbuf+1, key[KEMPTY])) {
+ mderr(118, lbuf+1, key[KEMPTY]);
+ return;
+ }
+ srmptr = SRMNULL;
+ break;
+ case NAS: /* Map name specified; save if undefined. */
+ if ((srhptr = srhfind(lbuf))==0) {
+ if (!indtdsw) {mderr(125, (UNCH *)0, (UNCH *)0); return;}
+ srmptr = NULL;
+ }
+ else
+ srmptr = srhptr->srhsrm;
+ break;
+ default:
+ mderr(120, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* PARAMETER 2: Element name or a group of them. (In DTD only.)
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD("2: GI or grp");
+ switch (pcbmd.action) {
+ case NAS:
+ if (!indtdsw) {mderr(142, (UNCH *)0, (UNCH *)0); return;}
+ nmgrp[0] = etddef(tbuf);
+ nmgrp[1] = (PETD)NULL;
+ break;
+ case GRPS:
+ if (!indtdsw) {mderr(142, (UNCH *)0, (UNCH *)0); return;}
+ parsegrp(nmgrp, &pcbgrnm, tbuf);
+ break;
+ case EMD:
+ if (indtdsw) {mderr(28, (UNCH *)0, (UNCH *)0); return;}
+ tags[ts].tsrm = srmptr;
+ TRACESRM("USEMAP", tags[ts].tsrm, tags[ts].tetd->etdgi+1);
+ goto realemd;
+ default:
+ mderr(indtdsw ? 121 : 126, (UNCH *)0, (UNCH *)0);
+ return;
+ }
+ /* PARAMETER 3: End of declaration.
+ */
+ pcbmd.newstate = 0;
+ parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
+ TRACEMD(emd);
+ if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
+ /* If map has not yet been defined, do it and get map pointer. */
+ if (!srmptr) srmptr = (srhdef(lbuf))->srhsrm;
+
+ /* Store the map pointer for each element name specified.
+ */
+ TRACEGRP(nmgrp);
+ for (i = -1; nmgrp[++i];) {
+ if (!nmgrp[i]->etdsrm) nmgrp[i]->etdsrm = srmptr;
+ else if (sw.swdupent) mderr(68, nmgrp[i]->etdgi+1, (UNCH *)0);
+ }
+ realemd:
+ if (es!=mdessv) synerr(37, &pcbmd);
+}
+/* SRHDEF: Define a SHORTREF map and return ptr to its header.
+ All entries in map are mapped to NULL.
+ Caller must determine whether it already exists.
+*/
+PSRH srhdef(sname)
+UNCH *sname; /* SHORTREF map name (with length and EOS). */
+{
+ PSRH srh; /* Ptr to SHORTREF map hdr in srhtab. */
+
+ (srh = (PSRH)hin((THASH)srhtab, sname, 0, SRHSZ))->srhsrm =
+ (TECB)rmalloc((UNS)(lex.s.dtb[0].mapdata+1)*sizeof(PECB));
+ return(srh);
+}
+/* SRHFIND: If a SHORTREF map was declared, return the ptr to its header.
+ Return NULL if it is not defined.
+*/
+PSRH srhfind(sname)
+UNCH *sname; /* SHORTREF map name (with length and EOS). */
+{
+ return((PSRH)hfind((THASH)srhtab, sname, 0));
+}
+#undef SRM
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/msg.h b/usr.bin/sgmls/sgmls/msg.h
new file mode 100644
index 0000000..fa97a4c
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/msg.h
@@ -0,0 +1,252 @@
+/*
+Severity codes:
+I information (not an SGML error at all)
+W warning (an SGML markup error but it knows what you mean)
+E error
+C critical (fatal)
+
+Type codes:
+R resource
+C ?context/content
+M minimization
+Q quantity
+S syntax
+D declaration
+U unsupported feature
+*/
+struct {
+ char *text;
+ char severity;
+ char type;
+} messages[] = {
+/* 0 */ {0},
+/* 1 */ {"%s element not allowed at this point in %s element", 'E', 'C'},
+/* 2 */ {"%s markup declaration not permitted here; declaration ended", 'E', 'D'},
+/* 3 */ {"Length of name, number, or token exceeded NAMELEN or LITLEN limit", 'E', 'Q'},
+/* 4 */ {"Non-SGML character ignored", 'E', 'S'},
+/* 5 */ {"%s end-tag ignored: doesn't end any open element (current is %s)", 'E', 'C'},
+/* 6 */ {"%s start-tag exceeds open element limit; possible lies from %s on", 'E', 'Q'},
+/* 7 */ {"Start-tag omitted from %s with empty content", 'E', 'M'},
+/* 8 */ {"Illegal entity end in markup or delimited text", 'E', 'S'},
+/* 9 */ {"Incorrect character in markup; markup terminated", 'E', 'S'},
+/* 10 */ {"Data not allowed at this point in %s element", 'E', 'C'},
+/* 11 */ {"No element declaration for %s end-tag GI; end-tag ignored", 'E', 'C'},
+/* 12 */ {"%s name ignored: not a syntactically valid SGML name", 'E', 'S'},
+/* 13 */ {"%s = \"%s\" attribute ignored: not defined for this element", 'E', 'C'},
+/* 14 */ {"%s = \"%s\" attribute value defaulted: invalid character", 'E', 'S'},
+/* 15 */ {"%s = \"%s\" attribute value defaulted: token too long", 'E', 'Q'},
+/* 16 */ {"%s = \"%s\" attribute value defaulted: too many tokens", 'E', 'C'},
+/* 17 */ {"%s = \"%s\" attribute value defaulted: wrong token type", 'E', 'C'},
+/* 18 */ {"%s = \"%s\" attribute value defaulted: token not in group", 'E', 'C'},
+/* 19 */ {"Required %s attribute was not specified; may affect processing", 'E', 'C'},
+/* 20 */ {"%s end-tag implied by %s end-tag; not minimizable", 'E', 'M'},
+/* 21 */ {"%s start-tag implied by %s start-tag; not minimizable", 'W', 'M'},
+/* 22 */ {"Possible attributes treated as data because none were defined", 'E', 'C'},
+/* 23 */ {"Duplicate specification occurred for \"%s\"; may affect processing", 'E', 'D'},
+/* 24 */ {"\"%s\" keyword invalid; declaration terminated", 'E', 'D'},
+/* 25 */ {"%s = \"%s\" attribute defaulted: empty string not allowed for token", 'E', 'C'},
+/* 26 */ {"Marked section end ignored; not in a marked section", 'E', 'S'},
+/* 27 */ {"Marked section start ignored; %s marked sections open already", 'E', 'Q'},
+/* 28 */ {"One or more parameters missing; declaration ignored", 'E', 'D'},
+/* 29 */ {"\"PUBLIC\" or \"SYSTEM\" required; declaration terminated", 'E', 'D'},
+/* 30 */ {"%s element ended prematurely; required %s omitted", 'E', 'C'},
+/* 31 */ {"Entity \"%s\" terminated: could not read file", 'E', 'R'},
+/* 32 */ {"Could not open file for entity \"%s\"; entity reference ignored", 'E', 'R'},
+/* 33 */ {"Insufficient main memory; unable to continue parsing", 'C', 'R'},
+/* 34 */ {"%s entity reference ignored; exceeded open entity limit (%s)", 'E', 'Q'},
+/* 35 */ {"No declaration for entity \"%s\"; reference ignored", 'E', 'C'},
+/* 36 */ {"%s entity reference occurred within own text; reference ignored", 'E', 'C'},
+/* 37 */ {"Entity nesting level out of sync", 'E', 'S'},
+/* 38 */ {"Parameter entity text cannot have %s keyword; keyword ignored", 'E', 'D'},
+/* 39 */ {"%s end-tag implied by %s start-tag; not minimizable", 'W', 'M'},
+/* 40 */ {"Start-tag minimization ignored; element has required attribute", 'E', 'D'},
+/* 41 */ {"Required %s element cannot be excluded from %s element", 'E', 'C'},
+/* 42 */ {"No DOCTYPE declaration; document type is unknown", 'E', 'C'},
+/* 43 */ {"Undefined %1$s start-tag GI was used in DTD; \"%1$s O O ANY\" assumed", 'E', 'C'},
+/* 44 */ {"Invalid character(s) ignored; attempting to resume DOCTYPE subset", 'E', 'S'},
+/* 45 */ {"No declaration for entity \"%s\"; default definition used", 'I', 'C'},
+/* 46 */ {"%s end-tag implied by NET delimiter; not minimizable", 'W', 'M'},
+/* 47 */ {"%s end-tag implied by data; not minimizable", 'W', 'M'},
+/* 48 */ {"%s end-tag implied by short start-tag (no GI); not minimizable", 'W', 'M'},
+/* 49 */ {"%s start-tag implied by data; not minimizable", 'W', 'M'},
+/* 50 */ {"%s start-tag implied by short start-tag (no GI); not minimizable", 'W', 'M'},
+/* 51 */ {"Short end-tag (no GI) ignored: no open elements", 'E', 'C'},
+/* 52 */ {"No definition for %1$s document type; \"%1$s O O ANY\" assumed", 'E', 'C'},
+/* 53 */ {"No definition for %1$s implied start-tag; \"%1$s O O ANY\" assumed", 'E', 'C'},
+/* 54 */ {"%s element ended prematurely; required subelement omitted", 'E', 'C'},
+/* 55 */ {"Content model token %s: connectors conflict; first was used", 'E', 'D'},
+/* 56 */ {"Duplicate specification occurred for \"%s\"; duplicate ignored", 'E', 'D'},
+/* 57 */ {"Bad end-tag in R/CDATA element; treated as short (no GI) end-tag", 'E', 'S'},
+/* 58 */ {"Start-tag minimization should be \"-\" for element with declared content", 'I', 'D'},
+/* 59 */ {"Reference to PI entity not permitted here; reference ignored", 'E', 'S'},
+/* 60 */ {"Non-SGML character found; should have been character reference", 'W', 'S'},
+/* 61 */ {"Numeric character reference exceeds 255; reference ignored", 'E', 'S'},
+/* 62 */ {"Invalid alphabetic character reference ignored", 'E', 'S'},
+/* 63 */ {"Invalid character in minimum literal; character ignored", 'E', 'S'},
+/* 64 */ {"Keyword %s ignored; \"%s\" is not a valid marked section keyword", 'E', 'D'},
+/* 65 */ {"Parameter entity name longer than (NAMELEN-1); truncated", 'E', 'Q'},
+/* 66 */ {"Start-tag length exceeds TAGLEN limit; parsed correctly", 'W', 'Q'},
+/* 67 */ {"%s attribute defaulted: FIXED attribute must equal default", 'W', 'C'},
+/* 68 */ {"Duplicate specification occurred for \"%s\"; duplicate ignored", 'I', 'D'},
+/* 69 */ {"%s = \"%s\" IDREF attribute ignored: referenced ID does not exist", 'E', 'C'},
+/* 70 */ {"%s = \"%s\" IDREF attribute ignored: number of IDs in list exceeds GRPCNT limit", 'E', 'Q'},
+/* 71 */ {"%s = \"%s\" ID attribute ignored: ID in use for another element", 'E', 'C'},
+/* 72 */ {"%s = \"%s\" ENTITY attribute not general entity; may affect processing", 'E', 'C'},
+/* 73 */ {"%s = \"%s\" attribute ignored: previously specified in same list", 'W', 'C'},
+/* 74 */ {"\"?\" = \"%s\" name token ignored: not in any group in this list", 'E', 'C'},
+/* 75 */ {"Normalized attribute specification length over ATTSPLEN limit", 'E', 'Q'},
+/* 76 */ {"%s = \"%s\" NOTATION ignored: element content is empty", 'E', 'C'},
+/* 77 */ {"%s = \"%s\" NOTATION undefined: may affect processing", 'E', 'C'},
+/* 78 */ {"Entity \"%2$s\" has undefined notation \"%1$s\"", 'E', 'C'},
+/* 79 */ {"%s = \"%s\" default attribute value not in group; #IMPLIED used", 'E', 'C'},
+/* 80 */ {"#CURRENT default value treated as #IMPLIED for %s ID attribute", 'E', 'D'},
+/* 81 */ {"ID attribute %s cannot have a default value; treated as #IMPLIED", 'E', 'D'},
+/* 82 */ {"%s attribute must be token, not empty string; treated as #IMPLIED", 'E', 'D'},
+/* 83 */ {"NOTATION attribute ignored for EMPTY element", 'E', 'D'},
+/* 84 */ {"%s = \"%s\" NOTATION ignored: content reference specified", 'E', 'C'},
+/* 85 */ {"#CONREF default value treated as #IMPLIED for EMPTY element", 'W', 'D'},
+/* 86 */ {"%s = \"%s\" entity not data entity; may affect processing", 'E', 'C'},
+/* 87 */ {"End-tag minimization should be \"O\" for EMPTY element", 'I', 'D'},
+/* 88 */ {"Formal public identifier \"%s\" invalid; treated as informal", 'E', 'S'},
+/* 89 */ {"Out-of-context %2$s start-tag ended %1$s document element (and parse)", 'E', 'C'},
+/* 90 */ {"\"%s\" keyword is for unsupported feature; declaration terminated", 'E', 'D'},
+/* 91 */ {"Attribute specification list in prolog cannot be empty", 'E', 'D'},
+/* 92 */ {"Document ended invalidly within a literal; parsing ended", 'C', 'S'},
+/* 93 */ {"Short ref in map \"%2$s\" to undeclared entity \"%1$s\" treated as data", 'E', 'C'},
+/* 94 */ {"Could not reopen file to continue entity \"%s\"; entity terminated", 'E', 'R'},
+/* 95 */ {"Out-of-context data ended %s document element (and parse)", 'E', 'C'},
+/* 96 */ {"Short start-tag (no GI) ended %s document element (and parse)", 'E', 'C'},
+/* 97 */ {"DSO delimiter (%s) omitted from marked section declaration", 'E', 'D'},
+/* 98 */ {"Group token %s: duplicate name or name token \"%s\" ignored", 'E', 'D'},
+/* 99 */ {"Attempt to redefine %s attribute ignored", 'E', 'D'},
+/* 100 */ {"%s definition ignored: %s is not a valid declared value keyword", 'E', 'D'},
+/* 101 */ {"%s definition ignored: NOTATION attribute already defined", 'E', 'D'},
+/* 102 */ {"%s definition ignored: ID attribute already defined", 'E', 'D'},
+/* 103 */ {"%s definition ignored: no declared value specified", 'E', 'D'},
+/* 104 */ {"%s definition ignored: invalid declared value specified", 'E', 'D'},
+/* 105 */ {"%s definition ignored: number of names or name tokens in group exceeded GRPCNT limit", 'E', 'D'},
+/* 106 */ {"%s definition ignored: name group omitted for NOTATION attribute", 'E', 'D'},
+/* 107 */ {"#CONREF default value treated as #IMPLIED for %s ID attribute", 'E', 'D'},
+/* 108 */ {"%s definition ignored: %s is not a valid default value keyword", 'E', 'D'},
+/* 109 */ {"%s definition ignored: no default value specified", 'E', 'D'},
+/* 110 */ {"%s definition ignored: invalid default value specified", 'E', 'D'},
+/* 111 */ {"More than ATTCNT attribute names and/or name (token) values; terminated", 'E', 'D'},
+/* 112 */ {"Attempted redefinition of attribute definition list ignored", 'E', 'D'},
+/* 113 */ {"Content model token %s: more than GRPCNT model group tokens; terminated", 'E', 'Q'},
+/* 114 */ {"Content model token %s: more than GRPGTCNT content model tokens; terminated", 'E', 'Q'},
+/* 115 */ {"Content model token %s: more than GRPLVL nested model groups; terminated", 'E', 'Q'},
+/* 116 */ {"Content model token %s: %s invalid; declaration terminated", 'E', 'D'},
+/* 117 */ {"\"PUBLIC\" specified without public ID; declaration terminated", 'E', 'D'},
+/* 118 */ {"\"%s\" keyword invalid (only %s permitted); declaration terminated", 'E', 'D'},
+/* 119 */ {"\"%s\" specified without notation name; declaration terminated", 'E', 'D'},
+/* 120 */ {"Parameter must be a name; declaration terminated", 'E', 'D'},
+/* 121 */ {"Parameter must be a GI or a group of them; declaration terminated", 'E', 'D'},
+/* 122 */ {"Parameter must be a name or PERO (%%); declaration terminated", 'E', 'D'},
+/* 123 */ {"Parameter must be a literal; declaration terminated", 'E', 'D'},
+/* 124 */ {"\"%s\" not valid short reference delimiter; declaration terminated", 'E', 'D'},
+/* 125 */ {"Map does not exist; declaration ignored", 'E', 'C'},
+/* 126 */ {"MDC delimiter (>) expected; following text may be misinterpreted", 'E', 'D'},
+/* 127 */ {"Document ended invalidly within prolog; parsing ended", 'C', 'S'},
+/* 128 */ {"\"PUBLIC\" or \"SYSTEM\" or DSO ([) required; declaration terminated", 'E', 'D'},
+/* 129 */ {"Minimization must be \"-\" or \"O\" (not \"%s\"); declaration terminated", 'E', 'D'},
+/* 130 */ {"Content model or keyword expected; declaration terminated", 'E', 'D'},
+/* 131 */ {"Rank stem \"%s\" + suffix \"%s\" more than NAMELEN characters; not defined", 'E', 'D'},
+/* 132 */ {"Undefined %s start-tag GI ignored; not used in DTD", 'E', 'C'},
+/* 133 */ {"Document ended invalidly within a markup declaration; parsing ended", 'C', 'S'},
+/* 134 */ {"Normalized length of literal exceeded %s; markup terminated", 'E', 'Q'},
+/* 135 */ {"R/CDATA marked section in declaration subset; prolog terminated", 'E', 'D'},
+/* 136 */ {"%s = \"%s\" ENTITIES attribute ignored: more than GRPCNT in list", 'E', 'Q'},
+/* 137 */ {"Content model is ambiguous", 'W', 'D'},
+/* 138 */ {"Invalid parameter entity name \"%s\"", 'E', 'S'},
+/* 139 */ {"Document ended invalidly within a marked section; parsing ended", 'C', 'S'},
+/* 140 */ {"Element \"%s\" used in DTD but not defined", 'I', 'D'},
+/* 141 */ {"Invalid NDATA or SUBDOC entity reference occurred; ignored", 'E', 'S'},
+/* 142 */ {"Associated element type not allowed in document instance", 'E', 'C'},
+/* 143 */ {"Illegal DSC character; in different entity from DSO", 'E', 'C'},
+/* 144 */ {"Declared value of data attribute cannot be ID", 'E', 'D' },
+/* 145 */ {"Invalid reference to external CDATA or SDATA entity; ignored", 'E', 'S'},
+/* 146 */ {"Could not find external document type \"%s\"", 'E', 'R'},
+/* 147 */ {"Could not find external general entity \"%s\"", 'I', 'R'},
+/* 148 */ {"Could not find external parameter entity \"%s\"", 'I', 'R'},
+/* 149 */ {"Reference to non-existent general entity \"%s\" ignored", 'E', 'R'},
+/* 150 */ {"Could not find entity \"%s\" using default declaration", 'E', 'R'},
+/* 151 */ {"Could not find entity \"%2$s\" in attribute %1$s using default declaration", 'E', 'R'},
+/* 152 */ {"Short reference map \"%s\" used in DTD but not defined", 'I', 'D'},
+/* 153 */ {"End-tag minimization should be \"O\" for element with CONREF attribute", 'I', 'D'},
+/* 154 */ {"Declared value of data attribute cannot be ENTITY or ENTITIES", 'E', 'D' },
+/* 155 */ {"Declared value of data attribute cannot be IDREF or IDREFS", 'E', 'D' },
+/* 156 */ {"Declared value of data attribute cannot be NOTATION", 'E', 'D' },
+/* 157 */ {"CURRENT cannot be specified for a data attribute", 'E', 'D' },
+/* 158 */ {"CONREF cannot be specified for a data attribute", 'E', 'D' },
+/* 159 */ {"Short reference map for element \"%s\" not defined; ignored", 'E', 'C'},
+/* 160 */ {"Cannot create temporary file", 'C', 'R'},
+/* 161 */ {"Document ended invalidly within SGML declaration", 'C', 'D'},
+/* 162 */ {"Capacity limit %s exceeded by %s points", 'W', 'Q'},
+/* 163 */ {"Amendment 1 requires \"ISO 8879:1986\" instead of \"ISO 8879-1986\"", 'W', 'D'},
+/* 164 */ {"Non-markup, non-minimum data character in SGML declaration", 'E', 'D'},
+/* 165 */ {"Parameter cannot be a literal", 'E', 'D'},
+/* 166 */ {"Invalid concrete syntax scope \"%s\"", 'E', 'D'},
+/* 167 */ {"Parameter must be a number", 'E', 'D'},
+/* 168 */ {"\"%s\" should have been \"%s\"", 'E', 'D'},
+/* 169 */ {"Character number %s is not supported as an additional name character", 'E', 'U'},
+/* 170 */ {"Parameter must be a literal or \"%s\"", 'E', 'D'},
+/* 171 */ {"Bad character description for character %s", 'E', 'D'},
+/* 172 */ {"Character number %s is described more than once", 'W', 'D'},
+/* 173 */ {"Character number plus number of characters exceeds 256", 'E', 'D'},
+/* 174 */ {"No description for upper half of character set: assuming \"128 128 UNUSED\"", 'W', 'D'},
+/* 175 */ {"Character number %s was not described; assuming UNUSED", 'E', 'D'},
+/* 176 */ {"Non-significant shunned character number %s not declared UNUSED", 'E', 'D'},
+/* 177 */ {"Significant character \"%s\" cannot be non-SGML", 'E', 'D'},
+/* 178 */ {"Unknown capacity set \"%s\"", 'E', 'U'},
+/* 179 */ {"No capacities specified." , 'E', 'D'},
+/* 180 */ {"Unknown concrete syntax \"%s\"", 'E', 'U'},
+/* 181 */ {"Character number exceeds 255", 'E', 'D'},
+/* 182 */ {"Concrete syntax SWITCHES not supported", 'E', 'U'},
+/* 183 */ {"\"INSTANCE\" scope not supported", 'E', 'U'},
+/* 184 */ {"Value of \"%s\" feature must be one or more", 'E', 'D'},
+/* 185 */ {"\"%s\" invalid; must be \"YES\" or \"NO\"", 'E', 'D'},
+/* 186 */ {"\"%s\" invalid; must be \"PUBLIC\" or \"SGMLREF\"", 'E', 'D'},
+/* 187 */ {"Feature \"%s\" is not supported", 'E', 'U'},
+/* 188 */ {"Too many open subdocument entities", 'E', 'Q'},
+/* 189 */ {"Invalid formal public identifier", 'I', 'D'},
+/* 190 */ {"Public text class should have been \"%s\"", 'I', 'D'},
+/* 191 */ {"Character number %s must be non-SGML", 'W', 'D'},
+/* 192 */ {"Notation \"%s\" not defined in DTD", 'W', 'D'},
+/* 193 */ {"Unclosed start or end tag requires \"SHORTTAG YES\"", 'W', 'M'},
+/* 194 */ {"Net-enabling start tag requires \"SHORTTAG YES\"", 'W', 'M'},
+/* 195 */ {"Attribute name omission requires \"SHORTTAG YES\"", 'W', 'M'},
+/* 196 */ {"Undelimited attribute value requires \"SHORTTAG YES\"", 'W', 'M'},
+/* 197 */ {"Attribute specification omitted for \"%s\": requires markup minimization", 'W', 'M'},
+/* 198 */ {"Concrete syntax does not have any short reference delimiters", 'E', 'D'},
+/* 199 */ {"Character number %s does not exist in the base character set", 'E', 'D'},
+/* 200 */ {"Character number %s is UNUSED in the syntax reference character set", 'E', 'D'},
+/* 201 */ {"Character number %s was not described in the syntax reference character set", 'E', 'D'},
+/* 202 */ {"Character number %s in the syntax reference character set has no corresponding character in the system character set", 'E', 'D'},
+/* 203 */ {"Character number %s was described using an unknown base set", 'E', 'D'},
+/* 204 */ {"Duplication specification for added funtion \"%s\"", 'E', 'D'},
+/* 205 */ {"Added function character cannot be \"%s\"", 'E', 'D'},
+/* 206 */ {"Only reference concrete syntax function characters supported", 'E', 'U'},
+/* 207 */ {"Only reference concrete syntax general delimiters supported", 'E', 'U'},
+/* 208 */ {"Only reference concrete syntax short reference delimiters supported", 'E', 'U'},
+/* 209 */ {"Unrecognized keyword \"%s\"", 'E', 'D'},
+/* 210 */ {"Unrecognized quantity name \"%s\"", 'E', 'D'},
+/* 211 */ {"Interpretation of \"%s\" is not a valid name in the declared concrete syntax", 'E', 'D'},
+/* 212 */ {"Replacement reserved name \"%s\" cannot be reference reserved name", 'E', 'D'},
+/* 213 */ {"Duplicate replacement reserved name \"%s\"", 'E', 'D'},
+/* 214 */ {"Quantity \"%s\" must not be less than %s", 'E', 'D'},
+/* 215 */ {"Only values up to %2$s are supported for quantity \"%1$s\"", 'E', 'U'},
+/* 216 */ {"Exclusions attempt to change required status of group in \"%s\"", 'E', 'C'},
+/* 217 */ {"Exclusion cannot apply to token \"%s\" in content model for \"%s\"", 'E', 'C'},
+/* 218 */ {"Required %s attribute was not specified for entity %s", 'E', 'C'},
+/* 219 */ {"UCNMSTRT must have the same number of characters as LCNMSTRT", 'E', 'D'},
+/* 220 */ {"UCNMCHAR must have the same number of characters as LCNMCHAR", 'E', 'D'},
+/* 221 */ {"Character number %s assigned to both LCNMSTRT or UCNMSTRT and LCNMCHAR or UCNMCHAR", 'E', 'D'},
+/* 222 */ {"Character number %s cannot be an additional name character", 'E', 'D'},
+/* 223 */ {"It is unsupported for \"-\" not to be assigned to UCNMCHAR or LCNMCHAR", 'E', 'U'},
+/* 224 */ {"Normalized length of value of attribute \"%s\" exceeded LITLEN", 'E', 'Q'},
+/* 225 */ {"Length of interpreted parameter literal exceeds LITLEN less the length of the bracketing delimiters", 'E', 'Q'},
+/* 226 */ {"Start tag of document element omitted; not minimizable", 'W', 'M'},
+/* 227 */ {"Unrecognized designating escape sequence \"%s\"", 'I', 'U'},
+/* 228 */ {"Earlier reference to entity \"%s\" used default entity", 'I', 'D'},
+/* 229 */ {"Reference to non-existent parameter entity \"%s\" ignored", 'E', 'R'},
+};
diff --git a/usr.bin/sgmls/sgmls/msgcat.c b/usr.bin/sgmls/sgmls/msgcat.c
new file mode 100644
index 0000000..6b0d9cb
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/msgcat.c
@@ -0,0 +1,833 @@
+/* msgcat.c -
+ X/Open message catalogue functions and gencat utility.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifndef HAVE_CAT
+
+/* In this implementation the message catalogue format is the same as the
+message text source file format (see pp 42-43 of the X/Open
+Portability Guide, Issue 3, Volume 3.) This means that you don't have
+to use the gencat utility, but it is still useful for checking and
+merging catalogues. */
+
+/* Compile this with -DGENCAT to get the gencat utility. */
+
+#include "std.h"
+#include "msgcat.h"
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+/* Default message set. */
+#define NL_SETD 1
+
+#ifndef PATH_FILE_SEP
+#define PATH_FILE_SEP ':'
+#endif
+
+#ifndef DEFAULT_NLSPATH
+#define DEFAULT_NLSPATH ""
+#endif
+
+#ifndef DEFAULT_LANG
+#define DEFAULT_LANG "default"
+#endif
+
+#define HASH_TAB_SIZE 251
+
+struct message {
+ struct message *next;
+ unsigned msgnum;
+ unsigned setnum;
+ char *text;
+};
+
+struct cat {
+ char *name;
+ int loaded;
+ int bad;
+ struct message *table[HASH_TAB_SIZE];
+};
+
+static char *read_buf = 0;
+static unsigned read_buf_len = 0;
+
+/* Errors that can be generated by read_catalog. */
+
+enum cat_err {
+ E_ZERO, /* not an error */
+ E_BADARG,
+ E_NOMEM,
+ E_NOSUCHCOMMAND,
+ E_INPUT,
+ E_EOF,
+ E_BADSEP,
+ E_BADLINE
+};
+
+#ifdef GENCAT
+/* These must match enum cat_err. */
+static char *cat_errlist[] = {
+ "Error 0",
+ "Invalid argument to command",
+ "Out of memory",
+ "Unrecognized command",
+ "Input error",
+ "Unexpected end of file",
+ "Space or tab expected after message number",
+ "Invalid line",
+};
+#endif /* GENCAT */
+
+#ifndef GENCAT
+/* The value of NLSPATH. */
+static char *nlspath = 0;
+/* The value of LANG. */
+static char *lang = 0;
+#endif /* not GENCAT */
+
+static int current_lineno = -1;
+static enum cat_err cat_errno = E_ZERO;
+
+#ifndef GENCAT
+static void load_catalog P((struct cat *));
+static FILE *find_catalog P((char *, char **));
+#endif
+static int read_catalog P((FILE *, struct message **));
+static void delete_set P((struct message **, unsigned));
+static void delete_message P((struct message **, unsigned, unsigned));
+static int hash P((unsigned setnum, unsigned msgnum));
+static char *parse_text P((FILE *, int));
+
+#ifndef GENCAT
+
+nl_catd catopen(name, oflag)
+char *name;
+int oflag;
+{
+ struct cat *catp;
+ int i;
+
+ if (!name)
+ return 0;
+
+ catp = (struct cat *)malloc(sizeof *catp);
+ if (!catp)
+ return 0;
+ for (i = 0; i < HASH_TAB_SIZE; i++)
+ catp->table[i] = 0;
+ catp->name = malloc(strlen(name) + 1);
+ catp->loaded = 0;
+ catp->bad = 0;
+ strcpy(catp->name, name);
+ return (nl_catd)catp;
+}
+
+int catclose(catd)
+nl_catd catd;
+{
+ int i;
+ struct cat *catp = (struct cat *)catd;
+
+ if (!catp)
+ return 0;
+
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ struct message *p, *nextp;
+ for (p = catp->table[i]; p; p = nextp) {
+ nextp = p->next;
+ free(p->text);
+ free((char *)p);
+ }
+ }
+ if (catp->name)
+ free(catp->name);
+ free((char *)catp);
+ return 0;
+}
+
+char *catgets(catd, setnum, msgnum, dflt)
+nl_catd catd;
+int setnum, msgnum;
+char *dflt;
+{
+ struct message *p;
+ struct cat *catp;
+
+ /* setnum and msgnum are required to be >= 1. */
+ if (!catd || setnum <= 0 || msgnum <= 0)
+ return dflt;
+ catp = (struct cat *)catd;
+ if (!catp->loaded)
+ load_catalog(catp);
+ if (catp->bad)
+ return dflt;
+ for (p = catp->table[hash(setnum, msgnum)]; p; p = p->next)
+ if (p->msgnum == msgnum && p->setnum == setnum)
+ break;
+ if (!p)
+ return dflt;
+ return p->text;
+}
+
+static
+VOID load_catalog(catp)
+struct cat *catp;
+{
+ FILE *fp;
+ char *path;
+
+ catp->loaded = 1;
+ fp = find_catalog(catp->name, &path);
+ if (!fp) {
+ catp->bad = 1;
+ return;
+ }
+ current_lineno = 0;
+ if (read_catalog(fp, catp->table) < 0)
+ catp->bad = 1;
+ fclose(fp);
+ if (read_buf) {
+ free(read_buf);
+ read_buf = 0;
+ }
+ read_buf_len = 0;
+ free(path);
+}
+
+static
+FILE *find_catalog(name, pathp)
+char *name;
+char **pathp;
+{
+ char *path;
+
+ if (!name)
+ return 0;
+ if (!nlspath) {
+ nlspath = getenv("NLSPATH");
+ if (!nlspath)
+ nlspath = DEFAULT_NLSPATH;
+ }
+ if (!lang) {
+ lang = getenv("LANG");
+ if (!lang)
+ lang = DEFAULT_LANG;
+ }
+ path = nlspath;
+ for (;;) {
+ char *p;
+ unsigned len = 0;
+
+ for (p = path; *p != '\0' && *p != PATH_FILE_SEP; p++) {
+ if (*p == '%') {
+ if (p[1] == 'N') {
+ p++;
+ len += strlen(name);
+ }
+ else if (p[1] == 'L') {
+ p++;
+ len += strlen(lang);
+ }
+ else if (p[1] == '%') {
+ p++;
+ len++;
+ }
+ else
+ len++;
+
+ }
+ else
+ len++;
+ }
+ if (len > 0) {
+ char *s, *try;
+ FILE *fp;
+ s = try = malloc(len + 1);
+ if (!s)
+ return 0;
+ for (p = path; *p != '\0' && *p != PATH_FILE_SEP; p++) {
+ if (*p == '%') {
+ if (p[1] == 'N') {
+ p++;
+ strcpy(s, name);
+ s += strlen(name);
+ }
+ else if (p[1] == 'L') {
+ p++;
+ strcpy(s, lang);
+ s += strlen(lang);
+ }
+ else if (p[1] == '%') {
+ p++;
+ *s++ = '%';
+ }
+ else
+ *s++ = *p;
+ }
+ else
+ *s++ = *p;
+ }
+ *s++ = '\0';
+ fp = fopen(try, "r");
+ if (fp) {
+ *pathp = try;
+ return fp;
+ }
+ free(try);
+ }
+ if (*p == '\0')
+ break;
+ path = ++p;
+ }
+ return 0;
+}
+
+#endif /* not GENCAT */
+
+/* 0 success, -1 error */
+
+static
+int parse_message(c, fp, table, setnum, quote)
+int c;
+FILE *fp;
+struct message **table;
+unsigned setnum;
+int quote;
+{
+ unsigned msgnum;
+ struct message *msgp;
+ char *text;
+ int hc;
+
+ msgnum = c - '0';
+ for (;;) {
+ c = getc(fp);
+ if (!isdigit(c))
+ break;
+ msgnum = msgnum*10 + (c - '0');
+ }
+ if (c == '\n') {
+ delete_message(table, setnum, msgnum);
+ return 0;
+ }
+ if (c != ' ' && c != '\t') {
+ cat_errno = E_BADSEP;
+ return -1;
+ }
+ text = parse_text(fp, quote);
+ if (!text)
+ return -1;
+ hc = hash(setnum, msgnum);
+ for (msgp = table[hc]; msgp; msgp = msgp->next)
+ if (msgp->setnum == setnum && msgp->msgnum == msgnum)
+ break;
+ if (msgp)
+ free(msgp->text);
+ else {
+ msgp = (struct message *)malloc(sizeof *msgp);
+ if (!msgp) {
+ cat_errno = E_NOMEM;
+ return -1;
+ }
+ msgp->next = table[hc];
+ table[hc] = msgp;
+ msgp->msgnum = msgnum;
+ msgp->setnum = setnum;
+ }
+ msgp->text = text;
+ return 0;
+}
+
+static
+char *parse_text(fp, quote)
+FILE *fp;
+int quote;
+{
+ unsigned i = 0;
+ char *p;
+ int c;
+ int quoted;
+
+ c = getc(fp);
+ if (c == quote) {
+ quoted = 1;
+ c = getc(fp);
+ }
+ else
+ quoted = 0;
+ for (;; c = getc(fp)) {
+ if (c == EOF) {
+ if (ferror(fp)) {
+ cat_errno = E_INPUT;
+ return 0;
+ }
+ break;
+ }
+ if (c == '\n')
+ break;
+ /* XXX
+
+ Can quotes be used in quoted message text if protected by \ ?
+
+ Is it illegal to omit the closing quote if there's an opening
+ quote?
+
+ Is it illegal to have anything after a closing quote?
+
+ */
+
+ if (quoted && c == quote) {
+ /* Skip the rest of the line. */
+ while ((c = getc(fp)) != '\n')
+ if (c == EOF) {
+ if (ferror(fp)) {
+ cat_errno = E_INPUT;
+ return 0;
+ }
+ break;
+ }
+ break;
+ }
+ if (c == '\\') {
+ int d;
+
+ c = getc(fp);
+ if (c == EOF)
+ break;
+ switch (c) {
+ case '\n':
+ current_lineno++;
+ continue;
+ case 'n':
+ c = '\n';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case '\\':
+ c = '\\';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c -= '0';
+ d = getc(fp);
+ if (d >= '0' && d <= '7') {
+ c = c*8 + d - '0';
+ d = getc(fp);
+ if (d >= '0' && d <= '7')
+ c = c*8 + d - '0';
+ else if (d != EOF)
+ ungetc(d,fp);
+ }
+ else if (d != EOF)
+ ungetc(d, fp);
+ if (c == '\0')
+ continue; /* XXX */
+ break;
+ default:
+ /* Ignore the quote. */
+ break;
+ }
+ }
+ if (i >= read_buf_len) {
+ if (!read_buf)
+ read_buf = malloc(read_buf_len = 40);
+ else
+ read_buf = realloc(read_buf, read_buf_len *= 2);
+ if (!read_buf) {
+ cat_errno = E_NOMEM;
+ return 0;
+ }
+ }
+ read_buf[i++] = c;
+ }
+ p = malloc(i + 1);
+ if (!p) {
+ cat_errno = E_NOMEM;
+ return 0;
+ }
+ memcpy(p, read_buf, i);
+ p[i] = '\0';
+ return p;
+}
+
+/* 0 success, -1 error */
+
+static
+int parse_command(fp, table, setnump, quotep)
+FILE *fp;
+struct message **table;
+unsigned *setnump;
+int *quotep;
+{
+ char buf[128];
+ if (fgets(buf, 128, fp) == NULL) {
+ cat_errno = ferror(fp) ? E_INPUT : E_EOF;
+ return -1;
+ }
+ if (buf[0] == ' ' || buf[0] == '\t' || buf[0] == '\n')
+ /* a comment */;
+ else if (strncmp(buf, "set", 3) == 0) {
+ if (sscanf(buf + 3, "%u", setnump) != 1) {
+ cat_errno = E_BADARG;
+ return -1;
+ }
+
+ }
+ else if (strncmp(buf, "delset", 6) == 0) {
+ unsigned num;
+ if (sscanf(buf + 6, "%u", &num) != 1) {
+ cat_errno = E_BADARG;
+ return -1;
+ }
+ delete_set(table, num);
+ *setnump = NL_SETD;
+ }
+ else if (strncmp(buf, "quote", 5) == 0) {
+ char *p = buf + 5;
+ while (*p == ' ' || *p == '\t')
+ p++;
+ /* XXX should \ be allowed as the quote character? */
+ if (*p == '\0' || *p == '\n')
+ *quotep = -1;
+ else
+ *quotep = *p;
+ }
+ else {
+ cat_errno = E_NOSUCHCOMMAND;
+ return -1;
+ }
+ if (strchr(buf, '\n') == 0) {
+ int c;
+ while ((c = getc(fp)) != '\n' && c != EOF)
+ ;
+ }
+ return 0;
+}
+
+
+static
+VOID delete_set(table, setnum)
+struct message **table;
+unsigned setnum;
+{
+ int i;
+
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ struct message *p, *nextp;
+ for (p = table[i], table[i] = 0; p; p = nextp) {
+ nextp = p->next;
+ if (p->setnum == setnum)
+ free((char *)p);
+ else {
+ p->next = table[i];
+ table[i] = p;
+ }
+ }
+ }
+}
+
+static
+VOID delete_message(table, setnum, msgnum)
+struct message **table;
+unsigned setnum, msgnum;
+{
+ struct message **pp;
+
+ for (pp = &table[hash(setnum, msgnum)]; *pp; pp = &(*pp)->next)
+ if ((*pp)->setnum == setnum && (*pp)->msgnum == msgnum) {
+ struct message *p = *pp;
+ *pp = p->next;
+ free(p->text);
+ free((char *)p);
+ break;
+ }
+}
+
+/* 0 success, -1 error. On error cat_errno is set to the error number. */
+
+static
+int read_catalog(fp, table)
+FILE *fp;
+struct message **table;
+{
+ int c;
+ unsigned setnum = NL_SETD;
+ int quote_char = -1;
+
+ for (;;) {
+ /* start of line */
+ c = getc(fp);
+ if (c == EOF)
+ break;
+ ++current_lineno;
+ if (isdigit(c)) {
+ if (parse_message(c, fp, table, setnum, quote_char) < 0)
+ return -1;
+ }
+ else if (c == '$') {
+ if (parse_command(fp, table, &setnum, &quote_char) < 0)
+ return -1;
+ }
+ else if (c != '\n') {
+ while ((c = getc(fp)) != '\n' && c != EOF)
+ if (c != ' ' && c != '\t') {
+ cat_errno = E_BADLINE;
+ return -1;
+ }
+ if (c == EOF)
+ break;
+ }
+ }
+ return 0;
+}
+
+static
+int hash(setnum, msgnum)
+unsigned setnum, msgnum;
+{
+ return ((setnum << 8) + msgnum) % HASH_TAB_SIZE;
+}
+
+#ifdef GENCAT
+
+static char *program_name;
+
+static int message_compare P((UNIV, UNIV));
+static void print_text P((char *, FILE *));
+static void usage P((void));
+
+#ifdef VARARGS
+static void fatal();
+#else
+static void fatal P((char *,...));
+#endif
+
+int main(argc, argv)
+int argc;
+char **argv;
+{
+ FILE *fp;
+ int i, j, nmessages;
+ struct message **list;
+ unsigned setnum;
+ struct message *table[HASH_TAB_SIZE];
+
+ program_name = argv[0];
+
+ if (argc < 3)
+ usage();
+
+ for (i = 0; i < HASH_TAB_SIZE; i++)
+ table[i] = 0;
+ for (i = 1; i < argc; i++) {
+ errno = 0;
+ fp = fopen(argv[i], "r");
+ if (!fp) {
+ if (i > 1)
+ fatal("can't open `%s': %s", argv[i], strerror(errno));
+ }
+ else {
+ current_lineno = 0;
+ cat_errno = E_ZERO;
+ if (read_catalog(fp, table) < 0) {
+ assert(cat_errno != E_ZERO);
+ assert(cat_errno
+ < sizeof(cat_errlist)/sizeof(cat_errlist[0]));
+ fatal("%s:%d: %s", argv[i], current_lineno,
+ cat_errlist[cat_errno]);
+ }
+ fclose(fp);
+ }
+ }
+
+ errno = 0;
+ fp = fopen(argv[1], "w");
+ if (!fp)
+ fatal("can't open `%s' for output: %s", argv[1], strerror(errno));
+ nmessages = 0;
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ struct message *p;
+ for (p = table[i]; p; p = p->next)
+ nmessages++;
+ }
+ list = (struct message **)malloc(nmessages*sizeof(struct message *));
+ if (!list)
+ fatal("out of memory");
+ j = 0;
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ struct message *p;
+ for (p = table[i]; p; p = p->next)
+ list[j++] = p;
+ }
+ assert(j == nmessages);
+
+ qsort((UNIV)list, nmessages, sizeof(struct message *), message_compare);
+
+ setnum = NL_SETD;
+ for (i = 0; i < nmessages; i++) {
+ struct message *p = list[i];
+ if (p->setnum != setnum) {
+ setnum = p->setnum;
+ fprintf(fp, "$set %u\n", setnum);
+ }
+ fprintf(fp, "%u ", p->msgnum);
+ print_text(p->text, fp);
+ putc('\n', fp);
+ }
+ if (fclose(fp) == EOF)
+ fatal("error closing `%s'", argv[1]);
+ return 0;
+}
+
+static
+VOID usage()
+{
+ fprintf(stderr, "usage: %s catfile msgfile...\n", program_name);
+ exit(1);
+}
+
+static
+#ifdef VARARGS
+VOID fatal(va_alist) va_dcl
+#else /* not VARARGS */
+VOID fatal(char *message,...)
+#endif /* not VARARGS */
+{
+ va_list ap;
+
+#ifdef VARARGS
+ char *message;
+ va_start(ap);
+ message = va_arg(ap, char *);
+#else /* not VARARGS */
+ va_start(ap, message);
+#endif /* not VARARGS */
+
+ fprintf(stderr, "%s: ", program_name);
+ vfprintf(stderr, message, ap);
+ putc('\n', stderr);
+ va_end(ap);
+ exit(1);
+}
+
+static
+int message_compare(p1, p2)
+UNIV p1, UNIV p2;
+{
+ struct message *m1 = *(struct message **)p1;
+ struct message *m2 = *(struct message **)p2;
+
+ if (m1->setnum < m2->setnum)
+ return -1;
+ if (m1->setnum > m2->setnum)
+ return 1;
+ if (m1->msgnum < m2->msgnum)
+ return -1;
+ if (m1->msgnum > m2->msgnum)
+ return 1;
+ return 0;
+}
+
+static
+VOID print_text(s, fp)
+char *s;
+FILE *fp;
+{
+ for (; *s; s++) {
+ if (*s == '\\')
+ fputs("\\\\", fp);
+ else if (ISASCII(*s) && isprint((UNCH)*s))
+ putc(*s, fp);
+ else {
+ switch (*s) {
+ case '\n':
+ fputs("\\n", fp);
+ break;
+ case '\b':
+ fputs("\\b", fp);
+ break;
+ case '\f':
+ fputs("\\f", fp);
+ break;
+ case '\t':
+ fputs("\\t", fp);
+ break;
+ case '\v':
+ fputs("\\v", fp);
+ break;
+ case '\r':
+ fputs("\\r", fp);
+ break;
+ default:
+ fprintf(fp, "\\%03o", (unsigned char)*s);
+ break;
+ }
+ }
+ }
+}
+
+#endif /* GENCAT */
+
+#ifdef TEST
+
+int main(argc, argv)
+int argc;
+char **argv;
+{
+ nl_catd catd;
+ int msgnum, setnum;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s catalogue\n", argv[0]);
+ exit(1);
+ }
+ catd = catopen(argv[1], 0);
+ fprintf(stderr, "Enter set number, message number pairs:\n");
+ fflush(stderr);
+ while (scanf("%d %d", &setnum, &msgnum) == 2) {
+ char *msg = catgets(catd, setnum, msgnum, "<default>");
+ fprintf(stderr, "Returned \"%s\"\n", msg);
+ fflush(stderr);
+ }
+ return 0;
+}
+
+#endif /* TEST */
+
+#endif /* not HAVE_CAT */
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/msgcat.h b/usr.bin/sgmls/sgmls/msgcat.h
new file mode 100644
index 0000000..83e998a
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/msgcat.h
@@ -0,0 +1,13 @@
+
+#ifdef HAVE_CAT
+#include <nl_types.h>
+#else
+typedef UNIV nl_catd;
+#endif
+
+/* Don't use prototypes here in case nl_types.h declares a conflicting
+prototype. */
+
+nl_catd catopen();
+int catclose();
+char *catgets();
diff --git a/usr.bin/sgmls/sgmls/pars1.c b/usr.bin/sgmls/sgmls/pars1.c
new file mode 100644
index 0000000..7960dc7
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/pars1.c
@@ -0,0 +1,958 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
+#define NEWGI (newetd->etdgi+1) /* GI of new tag. */
+
+static VOID doincludes P((void));
+static int pentname P((char *));
+static struct mpos *newmpos P((void));
+static VOID commbufs P((void));
+static VOID checkdtd P((void));
+
+/* PARSECON: Parse content of an element.
+*/
+int parsecon(tbuf, pcb)
+UNCH *tbuf; /* Work area for tokenization. */
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ int srn; /* SHORTREF delimiter number (1-32). */
+ int refrc; /* Return code from sentref, stagetd, etc. */
+
+ TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq);
+ if (eodsw) return(EOD_);
+ if (didreq && (conrefsw & TAGREF)) {didreq = 0; goto conr;}
+ if (etagimct>0) {etagimsw = --etagimct ? 1 : 0; destack(); return(ETG_);}
+ if (dostag) {
+ conrefsw = conrefsv;
+ etisw = etiswsv;
+ if (charmode) {dostag = 0; return datarc;}
+ return stag(datarc);
+ }
+ if (conrefsw) {
+ conr:
+ destack();
+ conrefsw = 0;
+ return ETG_;
+ }
+ else if (eofsw) return(EOD_);
+
+ datarc = 0;
+ while (1) {
+ parse(pcb);
+ srn = (int)pcb->action - SRMIN; /* Just in case it's a SHORTREF. */
+ switch (pcb->action) {
+ case DCE_: /* Data character in element content. */
+ /* The data character might be a non-SGML character so
+ reprocess it using pcbconm. */
+ REPEATCC;
+ pcb = conpcb = &pcbconm;
+ pcb->newstate = pcbcnet;
+ continue;
+ case DAS_: /* Current character begins data. */
+ data = FPOS;
+ continue;
+
+ case NLF_: /* NET or SR returns data in lookahead buffer. */
+ datalen = (UNS)(ptcon - data); REPEATCC;
+ goto rcc;
+
+ case LAF_: /* Return data in lookahead buffer: mixed. */
+ datalen = (UNS)(ptcon+1 - data);
+ goto rcc;
+
+ case NON_: /* Single nonchar in nonchbuf. */
+ datalen = 2; data = nonchbuf;
+ goto nrcc;
+
+ case DAR_: /* Return data except for last char. */
+ REPEATCC;
+ case DAF_: /* Return data in source entity buffer. */
+ datalen = (UNS)(FPOS - data);
+ rcc:
+ REPEATCC;
+ case DEF_: /* Return data in data entity. */
+ nrcc:
+ datarc = DAF_;
+ if (pcb==&pcbcone) {
+ pcbconm.newstate = pcbcnet;
+ conpcb = &pcbconm;
+ }
+ if (charmode) return(datarc);
+ stagmin = MINNONE; stagreal = newetd = ETDCDATA;
+ return(stag(datarc));
+
+ case LAS_: /* Start lookahead buffer with current char. */
+ *(ptcon = data = tbuf+1) = *FPOS;
+ continue;
+
+ case LAM_: /* Move character to lookahead buffer. */
+ *++ptcon = *FPOS;
+ continue;
+
+ case STG_: /* Process non-null start-tag. */
+ CTRSET(tagctr); /* Start counting tag length. */
+ parsenm(tbuf, NAMECASE); /* Get the GI. */
+ newetd = etdref(tbuf);
+ if (newetd && newetd->adl) {
+ parseatt(newetd->adl, tbuf);
+ adlval((int)ADN(al), newetd);
+ }
+ parsetag(&pcbstag); /* Parse the tag ending. */
+ if ((CTRGET(tagctr)-tagdelsw)>=TAGLEN)
+ sgmlerr(66, &pcbstag, (UNCH *)0, (UNCH *)0);
+ if (!newetd) {
+ sgmlerr(132, pcb, tbuf+1, (UNCH *)0);
+ continue;
+ }
+ return(stagetd(&pcbstag));
+
+ case NST_: /* Process null start-tag. */
+ return nstetd();
+
+ case ETC_: /* End-tag in CDATA or RCDATA. */
+ case ETG_: /* Process non-null end-tag. */
+ newetd = etdref(parsenm(tbuf, NAMECASE)); /* Get the GI. */
+ parsetag(&pcbetag); /* Parse tag end. */
+ if (!newetd) /* Error: undefined.*/
+ sgmlerr(11, &pcbetag, tbuf+1, (UNCH *)0);
+ else if (etagetd(&pcbetag)>=0) return ETG_;/* Open element. */
+ if (pcb->action!=ETC_) continue;
+ /* Tag is undefined or not for an open element and we are in
+ a CDATA or RCDATA element; issue message and treat as
+ null end-tag (</>).
+ */
+ sgmlerr(57, &pcbetag, (UNCH *)0, (UNCH *)0);
+ case NET_: /* Process null end-tag. */
+ if ((refrc = netetd(conpcb))!=0) return ETG_;
+ continue;
+
+ case NED_: /* Process null end-tag delimiter. */
+ etagmin = MINNET;
+ newetd = etagreal = ETDNET;
+ etagimct = etag();
+ etagimsw = etagimct ? 1 : 0; destack();
+ return ETG_;
+ case GTR_:
+ if (entget()!=-1) {
+ data = FPOS;
+ continue;
+ }
+ /* fall through */
+ case EOD_: /* End of primary file. */
+ if (ts<1) return(EOD_); /* Normal end: stack is empty. */
+ etagimct = ts-1; /* Treat as end-tag for top tag on stack. */
+ etagmin = MINETAG; etagreal = tags[0].tetd;
+ destack();
+ eofsw = 1; /* Return EOD_ after destacking all. */
+ return ETG_;
+
+ /* Short references ending with blanks:
+ If the blank sequence is followed by RE, go do SR7 or SR6.
+ If the entity is undefined and we are in mixed content,
+ the blanks must be returned as data. If not, they
+ can be ignored.
+ */
+ case SR9_: /* Process SR9 (two or more blanks). */
+ REPEATCC; /* Make first blank the CC. */
+ case SR4_: /* Process SR4 (RS, blanks). */
+ parseseq(tbuf, BSEQLEN); /* Squeeze out all blanks. */
+ if (*FPOS=='\r') {srn = (srn==9) ? 7 : 6; data = tbuf; goto sr6;}
+ else REPEATCC;
+ if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
+ if (refrc>0) return refrc;
+ if (refrc==ENTUNDEF && pcb==&pcbconm)
+ {data = tbuf; goto nrcc;}
+ continue;
+
+ /* Short references ending with RE:
+ If the reference is defined, the RE is ignored.
+ For RE and RS RE,
+ no special action is needed if the reference is undefined,
+ as the RE will be processed immediately as the current character.
+ For B RE and RS B RE,
+ the input is primed with a special character that will
+ be treated as an RE that cannot be a short reference.
+ */
+ case SR7_: /* Process SR7 (blanks, RE). */
+ datalen = (UNS)(FPOS - data);
+ case SR2_: /* Process SR2 (RE). */
+ case SR5_: /* Process SR5 (RS, RE). */
+ sr6: /* Process SR6 (RS, blanks, RE). */
+ if ((refrc = shortref(srn, pcb))!=ENTUNDEF) {
+ if (refrc==DEF_) goto nrcc; /* Defined: data entity. */
+ if (refrc>0) return refrc; /* Defined: tag entity. */
+ continue; /* Defined: not tag. */
+ }
+ if (pcb!=&pcbconm) continue; /* Not mixed; ignore chars. */
+ if (srn>=6) /* Return blanks as data. */
+ {*FPOS = lex.d.genre; REPEATCC; goto nrcc;}
+ case REF_: /* Undefined SR with RE; return record end. */
+ datarc = REF_;
+ if (charmode) return(datarc);
+#if 0
+ /* The standard says this situation can force a tag.
+ See 323:3-6, 412:1-7. */
+ /* If RE would be ignored, don't treat it as start-tag
+ because it could force a required tag; but do change
+ state to show that an RE was ignored.
+ */
+ if (scbsgml[pss].snext==scbsgmst) {
+ scbsgml[pss].snext = scbsgmnr;
+ TRACEGML(scbsgml, pss, conactsw, conact);
+ continue;
+ }
+#endif
+ stagmin = MINNONE; stagreal = newetd = ETDCDATA;
+ return(stag(datarc));
+
+ case SR3_: /* Process SR3 (RS). */
+ REPEATCC;
+ if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
+ if (refrc>0) return refrc;
+ continue;
+
+ case RBR_: /* Two right brackets */
+ srn = 26;
+ REPEATCC;
+ /* fall through */
+ case SR1_: /* Process SR1 (TAB). */
+ case SR8_: /* Process SR8 (space). */
+ case SR19: /* Process SR19 (-). */
+ case SR26: /* Process SR26 (]). */
+ REPEATCC;
+ goto srproc;
+
+ case FCE_: /* Process free character (SR11-18, SR21-32). */
+ fce[0] = *FPOS;
+ srn = mapsrch(&lex.s.dtb[lex.s.fce], fce);
+ case SR10: /* Process SR10 ("). */
+ case SR11: /* Process SR11 (#). */
+ case SR20: /* Process SR20 (-). */
+ case SR25: /* Process SR25 ([). */
+ srproc:
+ if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc;
+ if (refrc>0) return refrc;
+ if (refrc==ENTUNDEF) { /* Treat the SR as data. */
+ data = FPOS - (srn==lex.s.hyp2);/* Two data chars if SR20.*/
+ if (pcb!=&pcbconm) { /* If not in mixed content: */
+ if (srn>=lex.s.data) { /* Change PCB. */
+ pcb = conpcb = &pcbconm;
+ pcb->newstate = pcbcnda;
+ }
+ }
+ else pcb->newstate = pcbcnda;/* Now in data found state. */
+ }
+ continue;
+
+ case ERX_: /* Entity ref in RCDATA: cancel ending delims.*/
+ lexcon[lex.d.tago] = lex.l.fre;
+ lexcon[lex.d.net] = lex.l.nonet;
+ lexlms[lex.d.msc] = lex.l.fre;
+ continue;
+
+ case EE_: /* Entity end in RCDATA: check nesting. */
+ if (es<rcessv) {synerr(37, pcb); rcessv = es;}
+ /* If back at top level, re-enable the ending delimiters. */
+ if (es==rcessv) {
+ lexcon[lex.d.tago] = lex.l.tago;
+ lexcon[lex.d.net] = etictr ? lex.l.net : lex.l.nonet;
+ lexlms[lex.d.msc] = lex.l.msc;
+ }
+ continue;
+
+ case PIE_: /* PI entity: same as PIS_. */
+ return PIS_;
+
+ case RSR_: /* Record start: ccnt=0; ++rcnt.*/
+ ++RCNT; CTRSET(RSCC);
+ default:
+ return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */
+ }
+ }
+}
+/* STAGETD: Process start-tag etd.
+*/
+int stagetd(pcb)
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ if (!newetd->etdmod) {
+ sgmlerr(43, pcb, newetd->etdgi+1, (UNCH *)0);
+ ++ds.etdercnt;
+ etdset(newetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,
+ (PETD *)0, (PETD *)0, (PECB *)0);
+ TRACEETD(newetd);
+ }
+ stagmin = MINNONE; stagreal = newetd;
+ return stag(0);
+}
+/* NSTETD: Process null start-tag etd.
+*/
+int nstetd()
+{
+ newetd = ts>0 ? tags[ts].tetd
+ : tags[0].tetd->etdmod[2].tu.thetd;
+ stagmin = MINNULL; stagreal = ETDNULL;
+ etisw = 0;
+ return stag(0);
+}
+/* ETAGETD: Process end-tag etd.
+*/
+int etagetd(pcb)
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ etagmin = MINNONE; etagreal = newetd;
+ if ((etagimct = etag())<0) {
+ sgmlerr(E_ETAG, pcb, NEWGI, tags[ts].tetd->etdgi+1);
+ return etagimct;
+ }
+ etagimsw = etagimct ? 1 : 0; destack();
+ return ETG_;
+}
+/* NETETD: Process null end-tag etd.
+*/
+int netetd(pcb)
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ if (ts<1) {
+ sgmlerr(51, pcb, (UNCH *)0, (UNCH *)0);
+ return 0;
+ }
+ etagmin = MINNULL; etagreal = ETDNULL;
+ etagimsw = 0; destack();
+ return ETG_;
+}
+/* SHORTREF: Process a short (alternative) reference to an entity.
+ Returns ENTUNDEF if entity is not defined, otherwise returns
+ the return code from stagetd or etagetd if the entity was
+ a tag, or zero if an error occurred somewhere.
+*/
+int shortref(srn, pcb)
+int srn; /* Short reference number. */
+struct parse *pcb; /* Parse control block for this parse. */
+{
+ int rc; /* Return code from entopen. */
+
+ if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF;
+ if (!tags[ts].tsrm[srn]->estore) {
+ sgmlerr(93, pcb, tags[ts].tsrm[srn]->ename+1,
+ tags[ts].tsrm[0]->ename+1);
+ return(ENTUNDEF);
+ }
+ rc = entopen(tags[ts].tsrm[srn]);
+ if (rc==ENTDATA) return DEF_;
+ if (rc==ENTPI) return PIS_;
+ return(0);
+}
+/* PARSEPRO: Parse prolog.
+ Note: ptpro cannot overrun tbuf (and therefore needn't be
+ tested), as long as the buffer exceeds the longest
+ lookahead sequence in the content parse tables.
+*/
+int parsepro()
+{
+ struct parse *oldpcb;
+
+ while (1) {
+ int rc; /* Return code: DAF MSS DCE */
+ switch (parse(propcb)) {
+
+ case LAS_: /* Start lookahead buffer with current char. */
+ *(ptpro = data = tbuf+1) = *FPOS;
+ continue;
+ case LAM_: /* Move character to lookahead buffer. */
+ *++ptpro = *FPOS;
+ continue;
+ case LAF_: /* Return data in lookahead buffer. */
+ datalen = (UNS)(ptpro+1 - data);
+ REPEATCC;
+ rc = DAF_;
+ break; /* Prolog ended; data pending. */
+
+ case DTD_: /* Process document type declaration. */
+ parsenm(tbuf, NAMECASE); /* Get declaration name. */
+ if (!ustrcmp(tbuf+1, sgmlkey)
+ && !dtdsw && !sgmlsw++) {
+#if 0
+ parse(&pcbmdi);
+#endif
+ /* If we got some appinfo, return. */
+ if (sgmldecl())
+ return APP_;
+ continue;
+ }
+ if (!ustrcmp(tbuf+1, key[KDOCTYPE]) && !dtdsw++) {
+ startdtd();
+ mddtds(tbuf);
+ continue;
+ }
+ sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);
+ continue;
+ case DTE_: /* DOCTYPE declaration (and prolog) ended. */
+ REPEATCC; /* Put back char that followed MSC. */
+ if (es != 0)
+ sgmlerr(143, propcb, (UNCH *)0, (UNCH *)0);
+ else if (dtdrefsw) {/* Process referenced DTD before real DTE. */
+ dtdrefsw = 0; /* Keep us from coming through here again. */
+ REPEATCC; /* Put back MSC so it follows referenced DTD. */
+ entref(indtdent);
+ }
+ else mddtde(tbuf);
+ continue;
+
+ case MD_:
+ /* Process markup declaration within DTD or LPD. */
+ parsenm(tbuf, NAMECASE); /* Get declaration name. */
+ if (!ustrcmp(tbuf+1, key[KENTITY]))
+ mdentity(tbuf);
+ else if (!ustrcmp(tbuf+1, key[KUSEMAP]))
+ mdsrmuse(tbuf);
+ else if (!ustrcmp(tbuf+1, key[KATTLIST]))
+ mdadl(tbuf);
+ else if (!ustrcmp(tbuf+1, key[KSHORTREF]))
+ mdsrmdef(tbuf);
+ else if (!ustrcmp(tbuf+1, key[KELEMENT]))
+ mdelem(tbuf);
+ else if (!ustrcmp(tbuf+1, key[KNOTATION]))
+ mdnot(tbuf);
+ else
+ sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0);
+ continue;
+ case MDC_: /* Process markup declaration comment. */
+ sgmlsw++; /* SGML declaration not allowed after comment */
+ parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN);
+ continue;
+
+ case MSS_: /* Process marked section start. */
+ oldpcb = propcb;
+ propcb = mdms(tbuf, propcb);
+ if (propcb==&pcbmsc || propcb==&pcbmsrc) {
+ if (oldpcb == &pcbmds)
+ sgmlerr(135, oldpcb, (UNCH *)0, (UNCH *)0);
+ conpcb = propcb;
+ rc = DCE_;
+ break;
+ }
+ continue;
+ case MSE_: /* Process marked section end. */
+ if (mdmse()) propcb = &pcbmds;
+ continue;
+ case MSP_: /* Marked section start in prolog outside DTD */
+ rc = MSS_;
+ break;
+ case PIE_: /* PI entity: same as PIS_. */
+ return(PIS_);
+
+ case EOD_: /* Return end of primary entity. */
+ if (!sw.onlypro || propcb != &pcbpro || !dtdsw)
+ sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0);
+ else {
+ setdtype();
+ checkdtd();
+ }
+ return propcb->action;
+ case PIS_: /* Return processing instruction (string). */
+ sgmlsw++; /* SGML declaration not allowed after PI */
+ return((int)propcb->action); /* Prolog will continue later. */
+
+ case CIR_: /* Chars ignored; trying to resume parse. */
+ synerr(E_RESTART, propcb);
+ REPEATCC;
+ continue;
+ case STE_: /* Start tag ended prolog */
+ REPEATCC;
+ REPEATCC;
+ rc = STE_;
+ break;
+ case PEP_: /* Previous character ended prolog. */
+ REPEATCC;
+ case DCE_: /* Data character ended prolog. */
+ REPEATCC;
+ rc = DCE_;
+ break;
+ case EE_: /* Illegal entity end in ignored marked section. */
+ /* An error message has already been given. */
+ continue;
+ default:
+ abort();
+ } /* switch */
+ setdtype(); /* First pass only: set document type. */
+ checkdtd();
+ if (sw.onlypro)
+ return EOD_;
+ TRACESET(); /* Set trace switches. */
+ endprolog();
+ /* *DOC is first element; stack it at level 0. */
+ stack(newetd = nextetd = stagreal = etagreal = docetd);
+ return(rc);
+ } /* while */
+}
+
+/* Allocate buffers that are used in the DTD. */
+
+VOID startdtd()
+{
+ nmgrp = (struct etd **)rmalloc((GRPCNT+1)*sizeof(struct etd *));
+ nnmgrp = (PDCB *)rmalloc((GRPCNT+1)*sizeof(PDCB));
+ gbuf = (struct thdr *)rmalloc((GRPGTCNT+3)*sizeof(struct thdr));
+ /* The extra 1 is for parsing the name of a parameter entity in
+ mdentity(). */
+ nmbuf = (UNCH *)rmalloc(NAMELEN+3);
+ pubibuf = (UNCH *)rmalloc(LITLEN+1);
+ sysibuf = (UNCH *)rmalloc(LITLEN+1);
+ commbufs();
+ doincludes();
+}
+
+static
+VOID checkdtd()
+{
+ struct dcncb *np;
+
+ if (sw.swundef) {
+ int i;
+ struct etd *ep;
+ struct srh *sp;
+
+ for (i = 0; i < ETDHASH; i++)
+ for (ep = etdtab[i]; ep; ep = ep->etdnext)
+ if (!ep->etdmod)
+ sgmlerr(140, (struct parse *)0, ep->etdgi + 1,
+ (UNCH *)0);
+ for (sp = srhtab[0]; sp; sp = sp->enext)
+ if (sp->srhsrm[0] == 0)
+ sgmlerr(152, (struct parse *)0, sp->ename + 1,
+ (UNCH *)0);
+ }
+ for (np = dcntab[0]; np; np = np->enext)
+ if (!np->defined)
+ sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0);
+}
+
+/* Return non-zero if s is a valid parameter entity name.
+If so put a transformed name in entbuf. */
+
+static
+int pentname(s)
+char *s;
+{
+ int i;
+ if (lextoke[(UNCH)*s] != NMS)
+ return 0;
+ entbuf[2] = ENTCASE ? lextran[(UNCH)*s] : (UNCH)*s;
+ for (i = 1; s[i]; i++) {
+ if (i > NAMELEN - 1)
+ return 0;
+ if (lextoke[(UNCH)s[i]] < NMC || s[i] == EOBCHAR)
+ return 0;
+ entbuf[i + 2] = ENTCASE ? lextran[(UNCH)s[i]] : (UNCH)s[i];
+ }
+ entbuf[1] = lex.d.pero;
+ entbuf[i + 2] = '\0';
+ entbuf[0] = (UNCH)(i + 3); /* length byte, PERO and '\0' */
+ return 1;
+}
+
+/* Handle sw.includes. */
+
+static
+VOID doincludes()
+{
+ char **p;
+ if (!sw.includes)
+ return;
+ for (p = sw.includes; *p; p++) {
+ if (pentname(*p)) {
+ if (!entfind(entbuf)) {
+ union etext etx;
+ etx.c = savestr(key[KINCLUDE]);
+ entdef(entbuf, ESM, &etx);
+ ++ds.ecbcnt;
+ ds.ecbtext += ustrlen(key[KINCLUDE]);
+ }
+ }
+ else
+ sgmlerr(138, (struct parse *)0, (UNCH *)*p, (UNCH *)0);
+ }
+}
+
+/* Allocate buffers that are use both in the DTD and the instance. */
+
+static
+VOID commbufs()
+{
+ al = (struct ad *)rmalloc((ATTCNT+2)*sizeof(struct ad));
+ lbuf = (UNCH *)rmalloc(LITLEN + 1);
+}
+
+static
+struct mpos *newmpos()
+{
+ int j;
+ unsigned long *h;
+ struct mpos *p = (struct mpos *)rmalloc((GRPLVL+2)*sizeof(struct mpos));
+
+ assert(grplongs > 0);
+ h = (unsigned long *)rmalloc((GRPLVL+2)*grplongs*sizeof(unsigned long));
+ for (j = 0; j < GRPLVL+2; j++) {
+ p[j].h = h;
+ h += grplongs;
+ }
+ return p;
+}
+
+/* Perform end of prolog buffer allocation. */
+
+VOID endprolog()
+{
+ int i;
+
+ ambigfree();
+ if (dtdsw) {
+ frem((UNIV)nmgrp);
+ frem((UNIV)nnmgrp);
+ frem((UNIV)gbuf);
+ frem((UNIV)nmbuf);
+ frem((UNIV)sysibuf);
+ frem((UNIV)pubibuf);
+ }
+ else {
+ commbufs();
+ doincludes();
+ }
+ scbsgml = (struct restate *)rmalloc((TAGLVL+1)*sizeof(struct restate));
+ tags = (struct tag *)rmalloc((TAGLVL+1)*sizeof(struct tag));
+ grplongs = (GRPCNT + LONGBITS - 1)/LONGBITS;
+ for (i = 0; i < TAGLVL+1; i++)
+ tags[i].tpos = newmpos();
+ savedpos = newmpos();
+}
+
+/* SETDTYPE: Establish specified or default document type.
+*/
+VOID setdtype()
+{
+ /* Initialize default model hdr for declared content. */
+ undechdr.ttype = MANY+MCHARS+MGI; /* Declared content is ANY. */
+ undechdr.tu.tnum = 0; /* No content model. */
+
+ /* Initialize content model and etd for *DOC. */
+ prcon[0].ttype = MGI; /* Model is an element model. */
+ prcon[0].tu.tnum = 2; /* A single group with a single GI in it. */
+ prcon[1].ttype = TTSEQ; /* Non-repeatable SEQ group. */
+ prcon[1].tu.tnum = 1; /* Only one token in group. */
+ prcon[2].ttype = TTETD; /* Token is an etd. */
+ docetd = etddef(indocetd); /* etd for document as a whole. */
+ etdset(docetd, ETDOCC, prcon, (PETD *)0, (PETD *)0, SRMNULL);
+
+ /* Put specified or default document type etd in *DOC model. */
+ if (!dtype) {
+ sgmlerr(E_DOCTYPE, propcb, (UNCH *)0, (UNCH *)0);
+ dtype = indefetd;
+ }
+ prcon[2].tu.thetd = etddef(dtype);
+ if (!prcon[2].tu.thetd->etdmod) {
+ if (dtype != indefetd)
+ sgmlerr(52, propcb, dtype+1, (UNCH *)0);
+ ++ds.etdercnt;
+ etdset(prcon[2].tu.thetd, (UNCH)SMO+EMO+ETDUSED+ETDOCC, &undechdr,
+ (PETD *)0, (PETD *)0, (PECB *)0);
+ }
+ TRACEETD(docetd);
+ TRACEMOD(prcon);
+ TRACEETD(prcon[2].tu.thetd);
+ return;
+}
+/* PARSETAG: Tag end parser for SGML documents.
+ For start-tags, it
+ sets etisw to TAGNET if tag ended with ETI; otherwise to 0.
+*/
+VOID parsetag(pcb)
+struct parse *pcb; /* Parse control block: pcbstag or pcbetag. */
+{
+ tagdelsw = 1; /* Assume tag had an ETI or TAGC. */
+ switch (parse(pcb)) {
+ case ETIC: /* Tag closed with ETI. */
+ if (!sd.shorttag) synerr(194, pcb);
+ etisw = TAGNET; /* Set switch for stack entry flag. */
+ return;
+ case DSC:
+ synerr(9, pcb);
+ REPEATCC;
+ etisw = 0;
+ return;
+ case NVS: /* Att name or value token found. */
+ case NTV: /* Name token value found. */
+ synerr(E_POSSATT, pcb);
+ pcb->newstate = 0; /* Reset parse state. */
+ REPEATCC; /* Put it back for next read. */
+ tagdelsw = 0; /* Tag had no closing delimiter. */
+ etisw = 0; /* Don't flag stack entry. */
+ return;
+ case TAGO: /* Tag closing implied by TAGO. */
+ if (!sd.shorttag) synerr(193, pcb);
+ REPEATCC; /* Put it back for next read. */
+ tagdelsw = 0; /* Tag had no closing delimiter. */
+ case TAGC: /* Normal close. */
+ default: /* Invalid character (msg was sent). */
+ etisw = 0; /* Don't flag stack entry. */
+ return;
+ }
+}
+/* STAG: Check whether a start-tag is valid at this point in the document
+ structure, or whether other tags must precede it.
+ Special case processing is done for the fake tag, #CDATA, as
+ it is never stacked.
+*/
+int stag(dataret)
+int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */
+{
+ int rc, realrc; /* Return code from context or other test. */
+ int mexts = 0; /* >0=stack level of minus grp; -1=plus; 0=none.*/
+
+ badresw = pexsw = 0;
+ /* If real element (i.e., not #PCDATA) set mexts and test if empty. */
+ if (dataret==0) {
+ mexts = pexmex(newetd);
+ /* If element is declared empty, it is same as a conref. */
+ if (GET(newetd->etdmod->ttype, MNONE)) conrefsw = TAGREF;
+ }
+ if (GET(tags[ts].tetd->etdmod->ttype, MANY))
+ rc = mexts>0 ? RCMEX : RCHIT;
+ else rc = context(newetd, tags[ts].tetd->etdmod, tags[ts].tpos,
+ &tags[ts].status, mexts);
+ TRACESTG(newetd, dataret, rc, nextetd, mexts);
+
+ switch (rc) {
+ case RCEND: /* End current element, then retry start-tag. */
+ if (ts<1) realrc = RCMISS;
+ else realrc = RCEND;
+ break;
+ case RCREQ: /* Stack compulsory GI, then retry start-tag. */
+ realrc = RCREQ;
+ break;
+ case RCMISS: /* Start-tag invalid (#PCDATA or real). */
+ if (ts>0 && GET(tags[ts].tetd->etdmod->ttype, MANY))
+ realrc = RCEND;
+ else realrc = RCMISS;
+ break;
+ case RCMEX: /* Start-tag invalid (minus exception). */
+ etagimct = ts - mexts;
+ realrc = RCEND;
+ break;
+ case RCHITMEX: /* Invalid minus exclusion for required element. */
+#if 0 /* This will have been detected by exclude.c. */
+ sgmlerr(E_MEXERR, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1);
+#endif
+ case RCHIT: /* Start-tag was valid. */
+ realrc = RCHIT;
+ break;
+ case RCPEX: /* Start-tag valid only because of plus exception. */
+ pexsw = TAGPEX;
+ realrc = RCHIT;
+ break;
+ default:
+ abort();
+ }
+
+ switch (realrc) {
+ case RCEND: /* End current element, then retry start-tag. */
+ if (didreq) sgmlerr(07, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
+ didreq = 0; /* No required start-tag done. */
+ dostag = 1; etiswsv = etisw; /* Save real start-tag status. */
+ conrefsv = conrefsw; /* Save real start-tag conref. */
+ conrefsw = 0; /* Current element is not empty. */
+ etagmin = MINSTAG; destack(); /* Process omitted end-tag. */
+ return ETG_;
+ case RCREQ: /* Stack compulsory GI, then retry start-tag. */
+ if (!BADPTR(nextetd)) {
+#if 0 /* This will have been detected in exclude.c. */
+ if ((mexts = pexmex(nextetd))>0)
+ sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1,
+ tags[mexts].tetd->etdgi+1);
+#endif
+ if (!nextetd->etdmod) {
+ sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
+ etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,
+ (PETD *)0, (PETD *)0, (PECB *)0);
+ ++ds.etdercnt;
+ TRACEETD(nextetd);
+ }
+ }
+ if (BITOFF(nextetd->etdmin, SMO)) {
+ if (!BADPTR(stagreal))
+ sgmlerr(21, &pcbstag, nextetd->etdgi+1, stagreal->etdgi+1);
+ else if (stagreal==ETDCDATA)
+ sgmlerr(49, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
+ else sgmlerr(50, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
+ }
+ didreq = 1; /* Required start-tag done. */
+ dostag = 1; etiswsv = etisw; /* Save real start-tag status. */
+ etisw = 0; conrefsv = conrefsw; /* Save real start-tag conref. */
+ /* If element is declared empty, it is same as a conref. */
+ conrefsw = (GET(nextetd->etdmod->ttype, MNONE)) ? TAGREF : 0;
+ stack(nextetd); /* Process omitted start-tag. */
+ return STG_;
+ case RCMISS: /* Start-tag invalid (#PCDATA or actual). */
+ dostag = 0; contersw |= 1; didreq = 0;
+ if (dataret) {
+ if (dataret==REF_) badresw = 1;
+ else sgmlerr(E_CHARS, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);
+ return dataret;
+ }
+ sgmlerr(E_CONTEXT, &pcbstag, NEWGI, tags[ts].tetd->etdgi+1);
+ if (stagmin!=MINNULL) stagmin = MINNONE; stack(newetd);
+ return STG_;
+ case RCHIT: /* Start-tag was valid. */
+ dostag = 0; didreq = 0;
+ if (dataret) return dataret;
+ stack(newetd);
+ return STG_;
+ }
+ return NOP_; /* To avoid Borland C++ warning */
+}
+/* PEXMEX: See if a GI is in a plus or minus exception group on the stack.
+ If in a minus, returns stack level of minus group; otherwise,
+ returns -1 if in a plus and not a minus, and zero if in neither.
+*/
+int pexmex(curetd)
+struct etd *curetd; /* The etd for this GI. */
+{
+ int tsl; /* Temporary stack level for looping. */
+ int pex = 0; /* 1=found in plus grp; 0=not. */
+
+ for (tsl = ts; tsl>0; --tsl) {
+ if (tags[tsl].tetd->etdmex && ingrp(tags[tsl].tetd->etdmex, curetd))
+ return(tsl);
+ if (tags[tsl].tetd->etdpex && ingrp(tags[tsl].tetd->etdpex, curetd))
+ pex = -1;
+ }
+ return(pex);
+}
+/* STACK: Add a new entry to the tag stack.
+ If there is no room, issue a message and reuse last position.
+*/
+VOID stack(curetd)
+struct etd *curetd; /* The etd for this entry. */
+{
+ /* Stack the new element type definition (error if no room). */
+ if (++ts>TAGLVL)
+ sgmlerr(E_STAGMAX, conpcb, curetd->etdgi+1, tags[--ts].tetd->etdgi+1);
+ tags[ts].tetd = curetd;
+
+ /* Set flags: plus exception + tag had ETI + context error + empty. */
+ tags[ts].tflags = (UNCH)pexsw + etisw + contersw + conrefsw; contersw = 0;
+
+ /* If tag had ETI, update ETI counter and enable NET if first ETI. */
+ if (etisw && ++etictr==1) lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.net;
+
+ /* If etd has ALT table, use it; otherwise, use last element's ALT. */
+ if (curetd->etdsrm) {
+ if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) {
+ /* Map hasn't been defined. Ignore it. */
+ sgmlerr(159, &pcbstag, curetd->etdgi + 1, (UNCH *)0);
+ curetd->etdsrm = 0;
+ tags[ts].tsrm = tags[ts-1].tsrm;
+ }
+ else
+ tags[ts].tsrm = curetd->etdsrm;
+ }
+ else
+ tags[ts].tsrm = tags[ts-1].tsrm;
+
+ /* Initialize rest of stack entry. */
+ tags[ts].status = 0;
+ tags[ts].tpos[0].g = 1; /* M: Index in model of next token to test.*/
+ tags[ts].tpos[0].t = 1; /* P: Index in tpos of current group. */
+ HITCLEAR(tags[ts].tpos[0].h);
+ tags[ts].tpos[1].g = 1; /* Index of group in model (dummy grp). */
+ tags[ts].tpos[1].t = 1; /* 1st token is next in grp to be tested. */
+ HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */
+ TRACESTK(&tags[ts], ts, etictr);
+
+ exclude();
+ return;
+}
+/* ETAG: Check validity of an end-tag by seeing if it matches any tag
+ on the stack. If so, return the offset of the match from the
+ current entry (0=current). If there is no match, issue a message
+ and return an error code (-1).
+ If the newetd is ETDNET, a NET delimiter was found, so check for
+ a tag that ended with ETI instead of a matching GI.
+*/
+int etag()
+{
+ int tsl = ts+1; /* Temporary stack level for looping. */
+
+ /* See if end-tag is anywhere on stack, starting at current entry. */
+ while (--tsl) {
+ if (newetd!=ETDNET ? newetd==tags[tsl].tetd : tags[tsl].tflags) {
+ TRACEETG(&tags[ts], newetd, tsl, ts-tsl);
+ return(ts-tsl);
+ }
+ }
+ return (-1); /* End-tag didn't match any start-tag. */
+}
+/* DESTACK:
+ Call ECONTEXT to see if element can be ended at this point.
+ and issue message if there are required tags left.
+ Remove the current entry from the tag stack.
+ Issue an error if the destacked element was not minimizable
+ and its end-tag was omitted.
+*/
+VOID destack()
+{
+ register int ecode = 0; /* Error code (0=o.k.). */
+ UNCH *eparm2 = NULL; /* Second parameter of error message. */
+ register int minmsgsw; /* 1=message if tag omitted; 0=no message. */
+
+ /* If element has a content model (i.e., not a keyword) and there
+ are required tags left, and no CONREF attribute was specified,
+ issue an error message.
+ */
+ if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD)
+ && !conrefsw
+ && !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) {
+ if (BADPTR(nextetd))
+ sgmlerr(54, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0);
+ else
+ sgmlerr(30, conpcb, tags[ts].tetd->etdgi+1, nextetd->etdgi+1);
+ }
+ /* If the current tag ended with ETI, decrement the etictr.
+ If etictr is now zero, disable the NET delimiter.
+ */
+ if (GET(tags[ts--].tflags, TAGNET) && --etictr==0)
+ lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.nonet;
+
+ minmsgsw = BITOFF(tags[ts+1].tetd->etdmin, EMO);
+ if (!conrefsw && minmsgsw && (etagimsw || etagmin==MINETAG)) {
+ /* Minimization caused by NET delimiter. */
+ if (BADPTR(etagreal)) ecode = 46;
+ /* Minimization caused by a containing end-tag. */
+ else {ecode = 20; eparm2 = etagreal->etdgi+1;}
+ }
+ else if (!conrefsw && etagmin==MINSTAG && (minmsgsw || ts<=0)) {
+ /* Minimization caused by out-of-context start-tag. */
+ if (!BADPTR(stagreal)) {
+ ecode = ts>0 ? 39 : 89;
+ eparm2 = stagreal->etdgi+1;
+ }
+ /* Minimization caused by out-of-context data. */
+ else if (stagreal==ETDCDATA) ecode = ts>0 ? 47 : 95;
+ /* Minimization caused by out-of-context short start-tag. */
+ else ecode = ts>0 ? 48 : 96;
+ if (ts<=0 && ecode) eodsw = 1;
+ }
+ if (ecode) sgmlerr((UNS)ecode, conpcb, tags[ts+1].tetd->etdgi+1, eparm2);
+ /* TEMP: See if parser bug caused stack to go below zero. */
+ else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;}
+ TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr);
+}
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/pars2.c b/usr.bin/sgmls/sgmls/pars2.c
new file mode 100644
index 0000000..8c97ec6
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/pars2.c
@@ -0,0 +1,1308 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+/* PARSE: Parse a source input stream with specified lexical and state tables.
+ Return to caller with action code.
+*/
+int parse(pcb)
+struct parse *pcb; /* Current parse control block. */
+{
+ int rc; /* Return code from ENTREF. */
+
+ while (1) {
+ NEWCC;
+ pcb->input = pcb->plex[*FPOS];
+ pcb->state = pcb->newstate;
+ pcb->newstate = (*(pcb->ptab + pcb->state)) [pcb->input];
+ pcb->action = (*(pcb->ptab + pcb->state + 1)) [pcb->input];
+ TRACEPCB(pcb);
+ switch (pcb->action) {
+ case RC2_: /* Back up two characters. */
+ REPEATCC;
+ case RCC_: /* Repeat current character. */
+ REPEATCC;
+ case NOP_: /* No action necessary.*/
+ continue;
+
+ case RS_: /* Record start: ccnt=0; ++rcnt.*/
+ ++RCNT; CTRSET(RSCC);
+ continue;
+
+ case GET_: /* EOB or dull EOS or EE found: keep going.*/
+ if (entget()==-1) {pcb->action = EOD_; break;}/* Signal if EOD.*/
+ continue;
+
+ case EOF_: /* Illegal entity end; return EE_. */
+ synerr(E_EOF, pcb);
+ pcb->action = EE_;
+ case EE_: /* Important EOS or EE found: return to caller.*/
+ if (entget()==-1) pcb->action = EOD_; /* Signal if EOD. */
+ break;
+
+ case PER_: /* Parameter entity reference. */
+ REPEATCC; /* Use PERO as 1st char of entity name. */
+ parsenm(entbuf, ENTCASE);
+ parse(&pcbref); /* Handle REFC or other terminator. */
+ rc = entref(entbuf);
+ if (rc==ENTPI) {pcb->action = PIE_; break;}
+ continue;
+
+ case ER_: /* General entity reference; continue. */
+ parsenm(entbuf, ENTCASE);
+ parse(&pcbref); /* Handle REFC or other terminator. */
+ rc = entref(entbuf);
+ if (rc==ENTDATA) {pcb->action = DEF_; break;}
+ if (rc==ENTPI) {pcb->action = PIE_; break;}
+ continue;
+
+
+ case PEX_: /* Parameter entity reference; return. */
+ REPEATCC; /* Use PERO as 1st char of entity name. */
+ case ERX_: /* General entity reference; return. */
+ parsenm(entbuf, ENTCASE);
+ parse(&pcbref); /* Handle REFC or other terminator. */
+ rc = entref(entbuf);
+ if (rc == ENTDATA){
+ /* Reference to external data/subdoc entity in replaceable
+ character data. */
+ if (BITON(entdatsw, NDECONT)) {
+ switch (((PNE)data)->nextype) {
+ case ESNCDATA:
+ case ESNSDATA:
+ /* The standard says `non-SGML data entity'
+ but the amendment should have changed it
+ to `external data entity'. */
+ synerr(145, pcb);
+ break;
+ case ESNNDATA:
+ case ESNSUB:
+ /* This is definitely illegal. */
+ synerr(141, pcb);
+ break;
+ }
+ entdatsw = 0;
+ continue;
+ }
+ pcb->action = DEF_;
+ }
+ else if (rc == ENTPI) {
+ /* Reference to PI entity not allowed in replaceable
+ character data. */
+ synerr(59, pcb);
+ entpisw = 0;
+ continue;
+ }
+ else if (rc) pcb->action = EE_;
+ break;
+
+ case CRN_: /* Character reference: numeric. */
+ parsetkn(entbuf, NU, NAMELEN);
+ parse(&pcbref); /* Handle reference terminator. */
+ pcb->action = charrefn(entbuf, pcb);
+ if (pcb->action==CRN_) continue; /* Invalid reference */
+ break;
+
+ case CRA_: /* Character reference: alphabetic. */
+ parsenm(entbuf, NAMECASE);
+ parse(&pcbref); /* Handle reference terminator. */
+ charrefa(entbuf);
+ continue;
+
+ case SYS_: /* Invalid NONCHAR: send msg and ignore. */
+ synerr(E_SYS, pcb);
+ if (*FPOS == DELNONCH) NEWCC;
+ continue;
+
+ case NON_: /* Valid NONCHAR: prefix and shift encoding. */
+ synerr(60, pcb);
+ pcb->action = datachar(*FPOS, pcb);
+ break;
+ case NSC_:
+ synerr(60, pcb);
+ NEWCC;
+ nonchbuf[1] = *FPOS;
+ pcb->action = NON_;
+ break;
+ case PCI_: /* Previous character was invalid (INV_). */
+ REPEATCC;
+ case INV_: /* Markup ended by invalid char; repeat char. */
+ synerr(9, pcb);
+ REPEATCC;
+ break;
+
+ case LNR_: /* Previous char exceeded len; back up to it. */
+ REPEATCC;
+ case LEN_: /* Token too long; ignore excess character. */
+ synerr(3, pcb);
+ continue;
+
+ case RCR_: /* Repeat current char and return to caller. */
+ REPEATCC;
+ default: /* Actions for specific parse. */
+ break;
+ }
+ return (int)pcb->action;
+ }
+}
+/* CHARREFA: Resolve an alphabetical reference to a function character
+ and put the character in the read buffer.
+ If reference is bad, issue an error message.
+*/
+VOID charrefa(r)
+UNCH *r; /* Undelimited char ref (with length and EOS). */
+{
+ UNCH thechar;
+
+ thechar = mapsrch(funtab, r+1);
+ if (thechar == 0)
+ synerr(62, &pcbref);
+ else {
+ /* This isn't ideal, because the character position will still
+ be wrong for one line. */
+ if (thechar == RSCHAR) RCNT--;
+ setcurchar(thechar);
+ REPEATCC;
+ }
+}
+
+/* Make the current character ch. */
+
+VOID setcurchar(ch)
+int ch;
+{
+ /* If we're reading directly from an internal entity, we can't
+ change the entity, since the entity might be referenced again.
+ So in this case we copy the entity. This is inefficient, but
+ it will only happen in a case like this:
+
+ <!entity % amp "&">
+ <!entity e "x%amp;#SPACE;">
+
+ Usually character references will have been processed while the
+ entity was being defined. */
+ if (*FPOS != ch) {
+ if (!FILESW && !COPIEDSW) {
+ UNCH *s = savestr(FBUF + 1);
+ FPOS = s + (FPOS - FBUF - 1);
+ FBUF = s - 1;
+ COPIEDSW = 1;
+ }
+ *FPOS = ch;
+ }
+}
+
+/* CHARREFN: Resolve a numeric character reference.
+ If reference is bad, issue an error message.
+*/
+
+int charrefn(r, pcb)
+UNCH *r; /* Undelimited character reference. */
+struct parse *pcb; /* Current parse control block. */
+{
+ int thechar;
+
+ thechar = atoi((char *)r);
+ if (thechar<0 || thechar>255) {
+ synerr(61, &pcbref);
+ return((int)pcb->action);
+ }
+ return datachar(thechar, pcb);
+}
+
+/* Return ch as a datachar. If this a non-SGML character which might
+confuse the parser, shift it to a code that won't and place it in a
+special buffer which has DELNONCH in the preceding byte. Otherwise
+put it the read buffer. */
+
+int datachar(ch, pcb)
+int ch;
+struct parse *pcb;
+{
+ switch (ch) {
+ case EOS:
+ case EOFCHAR:
+ case EOBCHAR:
+ case GENRECHAR:
+ case DELCDATA:
+ case DELSDATA:
+ case DELNONCH:
+ /* A potentially confusing character which must be prefixed
+ with DELNONCH. */
+ nonchbuf[1] = SHIFTNON((UNCH)ch);
+ return NON_;
+ }
+ setcurchar(ch);
+ /* If in content, return DCE_ for element content, DAF_ for mixed. */
+ /* If not content, it must be a literal parse, so return MLA_. */
+ if (pcb == conpcb) {
+ if (pcb == &pcbcone)
+ return DCE_;
+ else {
+ data = FPOS;
+ /* Action for DAF_ will do REPEATCC. */
+ NEWCC;
+ return DAF_;
+ }
+ }
+ else
+ return MLA_;
+}
+/* INITATT: Initialize al with adl. */
+
+VOID initatt(adl)
+struct ad *adl;
+{
+ notadn = 0; /* No NOTATION attribute yet. */
+ conrefsw = 0; /* Assume no content reference att. */
+ /* Copy attribute definition list as a template. */
+ memcpy((UNIV)al, (UNIV)adl, (1+ADN(adl))*ADSZ);
+}
+
+/* PARSEATT: Parse attribute specification list.
+ Make a current copy of the attribute definition list
+ and update it with the user's specifications.
+ Indicate each attribute that was specified in the
+ list (as opposed to defaulted) by setting the ASPEC flag.
+ If no attributes were specified, return NULL. Otherwise,
+ if in the prolog, make a permanent copy of the list and
+ return its pointer. If not in the prolog, return al.
+*/
+struct ad *parseatt(adl, pt)
+struct ad *adl; /* Attribute definition list. */
+UNCH *pt; /* Tokenization area: tbuf[TAGLEN+ATTSPLEN]. */
+{
+ UNCH *antvptr;
+ UNCH *nm = 0; /* Pointer to saved name in tbuf (with length). */
+ int adn = -1; /* Position of attribute in list (-1=empty). */
+ UNCH *tbuflim = pt + ATTSPLEN;
+ mdessv = es; /* Save es for checking entity nesting. */
+ initatt(adl);
+ while (pt<=tbuflim) {
+ parse(&pcbstag);
+ switch (pcbstag.action) {
+ case NVS: /* Att name or value token found. */
+ parsenm(pt, NAMECASE); /* Case translation wanted on name. */
+ pt += *(nm = pt); /* Save name while pointing past it. */
+ continue;
+
+ case AVD: /* Delimited value found. */
+ case AVDA: /* Delimited value found (alternate delimiter). */
+ /* Find position (adn) of saved attribute name in list. */
+ adn = anmget((int)ADN(al), nm);
+ parselit(pt,
+ (adn == 0 || ADTYPE(al, adn) == ACHARS)
+ ? &pcblitr
+ : &pcblitt,
+ LITLEN,
+ (pcbstag.action==AVD) ? lex.d.lit : lex.d.lita);
+ if (adn == 0) {
+ /* Error: unrecognized attribute name. */
+ sgmlerr(13, &pcbstag, nm+1, pt);
+ continue;
+ }
+ /* Tokenize and validate value; let it default if an error. */
+ /* Put value in list and bump ptr by the normalized length
+ (which is always >= the actual length). */
+ if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
+ continue;
+ case AVU: /* Attribute value found: undelimited. */
+ if (!sd.shorttag) sgmlerr(196, &pcbstag, (UNCH *)0, (UNCH *)0);
+ parsetkn(pt, NMC, LITLEN);
+ /* Find position (adn) of saved attribute name in list. */
+ if ((adn = anmget((int)ADN(al), nm))==0) {
+ /* Error: unrecognized attribute name. */
+ sgmlerr(13, &pcbstag, nm+1, pt);
+ continue;
+ }
+ /* Tokenize and validate value; let it default if an error. */
+ /* Put value in list and bump ptr by the normalized length
+ (which is always >= the actual length). */
+ if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
+ continue;
+
+ case NASV: /* Saved NVS was really an NTV. */
+ REPEATCC; /* Put back next token starter. */
+ pt = nm; /* Back up to NVS. */
+ case NTV: /* Name token value found. */
+ if (!sd.shorttag) sgmlerr(195, &pcbstag, (UNCH *)0, (UNCH *)0);
+ if (pcbstag.action==NTV) parsenm(pt, NAMECASE);
+ if ((adn = antvget((int)ADN(al), pt, &antvptr))==0) {
+ /* Error: unrecognized name token value. */
+ sgmlerr(74, &pcbstag, pt+1, (UNCH *)0);
+ continue;
+ }
+ /* Validate value; let it default if an error. */
+ /* Put value in list and bump ptr by the normalized length
+ (which is always >= the actual length). */
+ if (!attval(0, antvptr+1, adn, adl)) pt += ADLEN(al,adn);
+ continue;
+
+ default: /* All attributes have been parsed. */
+ REPEATCC; /* Put next char back for tag close parse. */
+ break;
+ }
+ break;
+ }
+ if (pt>tbuflim) synerr(75, &pcbstag);
+ if (es!=mdessv) synerr(37, &pcbstag);
+ if (adn<0) return((struct ad *)0); /* List was empty. */
+ TRACEADL(al);
+ return al;
+}
+/* ATTVAL: Validate a specified attribute value. Issue a message if it is
+ the wrong type (or otherwise is not up to spec), and use the default.
+ Call PARSEVAL to tokenize the value, unless it is a CDATA string.
+ If the attribute is a group, the value is a string.
+ For other types, the token count is set by PARSEVAL if the value
+ is syntactically correct. If incorrect (or if CDATA) the token
+ count is zero (i.e., the value is a string).
+ The length of a token does not include the length byte, and
+ there is no EOS. A string length (as always) includes both
+ the length byte and the EOS.
+ If it is a CONREF attribute, set a switch for STAG().
+ If it is a CURRENT attribute, store the value as the new default.
+*/
+#define DEFVAL adl[adn].addef /* Default value of current attribute. */
+#define DEFNUM adl[adn].adnum /* Default group size of current attribute. */
+#define DEFLEN adl[adn].adlen /* Length of default value of current attribute.*/
+int attval(mtvsw, adval, adn, adl)
+int mtvsw; /* Must tokenize value: 1=yes; 0=no. */
+UNCH *adval; /* Untokenized attribute value. */
+int adn; /* Attribute's position in list. */
+struct ad *adl; /* Element's master att def list. */
+{
+ int errcode; /* Value/declaration conflict error code. */
+
+ if (GET(ADFLAGS(al,adn), ASPEC)) /* Can't respecify same attribute. */
+ {sgmlerr(73, &pcbstag, ADNAME(al,adn), adval); return(1);}
+ SET(ADFLAGS(al,adn), ASPEC); /* Indicate att was specified. */
+ if (GET(ADFLAGS(al,adn), ACONREF)) /* If attribute is content reference: */
+ conrefsw = TAGREF; /* Set switch for STAG(). */
+ if (mtvsw && ADTYPE(al,adn)!=ACHARS) {
+ /* If no syntax errors, check for proper group membership. */
+ if ( ((errcode = parseval(adval, ADTYPE(al,adn), lbuf))==0)
+ && GET(ADFLAGS(al,adn), AGROUP)
+ && !amemget(&al[adn], ADNUM(al,adn), lbuf) ) errcode = 18;
+ /* If syntax or group membership error, send message and exit. */
+ if (errcode) {
+ sgmlerr(errcode, &pcbstag, ADNAME(al,adn), adval);
+ SET(ADFLAGS(al,adn), AERROR);
+ return(1);
+ }
+ /* Replace specified value in adval with tokenized in lbuf. */
+ ustrcpy(adval, lbuf);
+ if (BITOFF(ADFLAGS(al,adn), AGROUP)) ADNUM(al,adn) = (UNCH)tokencnt;
+ }
+ if (!mtvsw)
+ adval--;
+ /* If attribute is FIXED, specified value must equal default. */
+ if (BITON(ADFLAGS(al,adn), AFIXED) && ustrcmp(adval, DEFVAL)) {
+ /* Since the value has been tokenized, don't use it in the
+ error message. */
+ sgmlerr(67, &pcbstag, ADNAME(al,adn), (UNCH *)0);
+ SET(ADFLAGS(al,adn), AERROR);
+ return(1);
+ }
+ ADLEN(al,adn) = vallen(ADTYPE(al,adn), ADNUM(al,adn), adval);
+ if (ADLEN(al,adn) > LITLEN) {
+ sgmlerr(224, &pcbstag, ADNAME(al,adn), (UNCH *)0);
+ SET(ADFLAGS(al,adn), AERROR);
+ return 1;
+ }
+ ADVAL(al,adn) = adval;
+ /* If attribute is CURRENT, value is new default.*/
+ if (GET(ADFLAGS(al,adn), ACURRENT)) {
+ if (ADLEN(al,adn)>DEFLEN) {
+ ds.attdef += (ADLEN(al,adn) - DEFLEN);
+ DEFLEN = ADLEN(al,adn);
+ }
+ DEFVAL = replace(DEFVAL, ADVAL(al,adn));
+ DEFNUM = ADNUM(al,adn);
+ }
+ return(0); /* Indicate value was valid. */
+}
+/* ADLVAL: Validate the completed attribute definition list (defaults plus
+ specified values). Issue a message if an
+ attribute is required or current and its value is NULL.
+*/
+VOID adlval(adsz, newetd)
+int adsz; /* Size of list. */
+struct etd *newetd; /* Element type definition for this element. */
+{
+ int adn = 1; /* Position in list. */
+ UNCH *npt, *pt; /* Ptr save areas. */
+ UNCH nptsv; /* Save area for ptr value (length?). */
+ struct dcncb *dpt; /* Save area for dcncb ptr. */
+
+ aentctr = 0; /* Number of AENTITY tokens in this att list. */
+ idrctr = 0; /* Number of IDREF tokens in this att list. */
+ do {
+ if (ADVAL(al,adn)==NULL) { /* NULL value */
+ if (GET(ADFLAGS(al,adn), AREQ+ACURRENT)) { /*Error if REQ, CURRENT*/
+ sgmlerr(19, &pcbstag, ADNAME(al,adn), (UNCH *)0);
+ SET(ADFLAGS(al,adn), AINVALID);
+ }
+ }
+ else switch (ADTYPE(al,adn)) {
+ case AENTITY: /* Return data ecb pointer if valid entity. */
+ aenttst(adn, ADVAL(al,adn));
+ break;
+ case AENTITYS: /* Return data ecb pointers if valid entities. */
+ pt = ADVAL(al,adn);
+ tokencnt = (int)ADNUM(al,adn);
+ while (tokencnt--) {
+ nptsv = *(npt = pt + *pt+1);
+ *pt += 2; *npt = EOS;
+ aenttst(adn, pt);
+ *pt -= 2; *(pt = npt) = nptsv;
+ }
+ break;
+ case AID:
+ /* Define ID; msg if it already exists. */
+ if (iddef(ADVAL(al,adn))) {
+ sgmlerr(71, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ continue;
+ }
+ ++ds.idcnt;
+ break;
+ case AIDREF:
+ idreftst(adn, ADVAL(al,adn));
+ break;
+ case AIDREFS:
+ pt = ADVAL(al,adn);
+ tokencnt = (int)ADNUM(al,adn);
+ while (tokencnt--) {
+ nptsv = *(npt = pt + *pt+1);
+ *pt += 2; *npt = EOS;
+ idreftst(adn, pt);
+ *pt -= 2; *(pt = npt) = nptsv;
+ }
+ break;
+ case ANOTEGRP: /* Return notation identifier. */
+ if (GET(ADFLAGS(al,adn), ASPEC)) notadn = adn;/*NOTATION specified*/
+ if ((dpt = dcnfind(ADVAL(al,adn)))==0) {
+ sgmlerr(77, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ }
+ else ADDATA(al,adn).x = dpt;
+ break;
+ }
+ if (!sd.shorttag && !sd.omittag && ADVAL(al,adn)!=NULL
+ && !GET(ADFLAGS(al,adn), ASPEC+AINVALID))
+ sgmlerr(197, &pcbstag, ADNAME(al,adn), (UNCH *)0);
+ } while ((adn+=BITON(ADFLAGS(al,adn),AGROUP) ? (int)ADNUM(al,adn)+1 : 1)<=adsz);
+
+ /* Error if NOTATION specified with CONREF attribute or EMPTY element. */
+ if (notadn && (conrefsw
+ || (newetd && GET(newetd->etdmod->ttype, MNONE)))) {
+ sgmlerr((UNS)(conrefsw ? 84 : 76), &pcbstag,
+ ADNAME(al,notadn), ADVAL(al,notadn)+1);
+ SET(ADFLAGS(al,notadn), AINVALID);
+ }
+}
+/* AENTTST: Validate an individual ENTITY token in AENTITY or AENTITYS value.
+*/
+VOID aenttst(adn, pt)
+int adn; /* Position in list. */
+UNCH *pt; /* Ptr to current ENTITY token in value. */
+{
+ struct entity *ept; /* Save area for ecb ptr. */
+
+ if (++aentctr>GRPCNT) {
+ sgmlerr(136, &pcbstag, ADNAME(al,adn), pt+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ return;
+ }
+ if ( (ept = entfind(pt))==0
+ && (ecbdeflt==0 || (ept = usedef(pt))==0) ) {
+ sgmlerr(ecbdeflt ? 151 : 72, &pcbstag, ADNAME(al,adn), pt+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ return;
+ }
+ if (ept->estore==ESX || ept->estore==ESC || ept->estore==ESN) {
+ /* Error if DCN has no notation identifier. */
+ if (ept->estore==ESN && NEXTYPE(ept->etx.n)!=ESNSUB
+ && !NEDCNDEFINED(ept->etx.n)) {
+ sgmlerr(78, &pcbstag, NEDCN(ept->etx.n)+1,
+ pt+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ }
+ }
+ else {
+ sgmlerr(86, &pcbstag, ADNAME(al,adn), pt+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ }
+}
+/* IDREFTST: Validate an individual IDREF token in an IDREF or IDREFS value.
+*/
+VOID idreftst(adn, pt)
+int adn; /* Position in list. */
+UNCH *pt; /* Ptr to current IDREF token in value. */
+{
+ struct fwdref *rp;
+ if (++idrctr>GRPCNT) {
+ sgmlerr(70, &pcbstag, ADNAME(al,adn), pt+1);
+ SET(ADFLAGS(al,adn), AINVALID);
+ return;
+ }
+ /* Note IDREF; indicate if ID exists. */
+ if ((rp = idref(pt)) != 0)
+ rp->msg = saverr(69, &pcbstag, ADNAME(al,adn), pt+1);
+ ++ds.idrcnt;
+}
+/* ANMGET: Locate an attribute name in an attribute definition list.
+*/
+int anmget(adsz, nm)
+int adsz; /* Size of list. */
+UNCH *nm; /* Value to be found (with length byte). */
+{
+ int adn = 0; /* Position in list. */
+
+ while (++adn <= adsz && ustrcmp(nm+1, ADNAME(al,adn))) {
+ if (BITON(ADFLAGS(al,adn), AGROUP)) adn += (int)ADNUM(al,adn);
+ }
+ return (adn > adsz) ? 0 : adn;
+}
+/* ANTVGET: Find the position of a name token value in an attribute list.
+ Return the position of the attribute definition, or zero
+ if none was found. Set pp to the value, if non-NULL.
+*/
+int antvget(adsz, nm, pp)
+int adsz; /* Size of list. */
+UNCH *nm; /* Value to be found (with length byte). */
+UNCH **pp; /* Store value here */
+{
+ int adn = 0; /* Position in list. */
+
+ while (++adn<=adsz) {
+ /* Test only name group members. */
+ if (BITON(ADFLAGS(al,adn), AGROUP)) {
+ int advn; /* Position of value in sub-list. */
+ if ((advn = amemget(&al[adn], (int)ADNUM(al,adn), nm))!=0) {
+ if (pp)
+ *pp = al[adn+advn].adname;
+ return adn;
+ }
+ adn += (int)ADNUM(al,adn);
+ }
+ }
+ return 0;
+}
+/* AMEMGET: Get the position of a member in an attribute name token group.
+ Returns the position, or zero if not found.
+ The length byte is ignored in the comparison so that final
+ form tokens from ATTVAL can be compared to group members.
+*/
+int amemget(anmtgrp, adsz, nm)
+struct ad anmtgrp[]; /* Name token group. */
+int adsz; /* Size of group. */
+UNCH *nm; /* Name to be found (with length byte). */
+{
+ int adn = 0; /* Position in group. */
+
+ while ( ++adn<=adsz && ustrncmp(nm+1, anmtgrp[adn].adname+1, (UNS)*nm-1)) ;
+ return (adn>adsz) ? 0 : adn;
+}
+/* VALLEN: Returns the length of an attribute value for capacity
+ calculations. Normally, the length is NORMSEP plus the number
+ of characters. For tokenized lists, it is NORMSEP,
+ plus the number of characters in the tokens, plus
+ NORMSEP for each token.
+ ACHARS and tokenized lists don't have a length byte.
+
+*/
+UNS vallen(type, num, def)
+int type; /* ADTYPE(al,adn) */
+int num; /* ADNUM(al,adn) */
+UNCH *def; /* ADVAL(al,adn) */
+{
+ if (type == ACHARS)
+ return ustrlen(def) + NORMSEP;
+ if (type < ATKNLIST)
+ return *def - 2 + NORMSEP;
+ return ustrlen(def) + num * (NORMSEP - 1) + NORMSEP;
+}
+/* PARSEGRP: Parse GI names, get their etds, and form an array of pointers
+ to them. The array is terminated by a NULL pointer.
+ The number of pointers (including the NULL) is returned.
+ The grp buffer must have room for GRPCNT+1 etds.
+*/
+UNS parsegrp(grp, pcb, tbuf)
+struct etd *grp[]; /* Buffer for building the group. */
+struct parse *pcb; /* Current parse control block. */
+UNCH *tbuf;
+{
+ int grpcnt = 0; /* Number of etds in the group. */
+ int i;
+ int essv = es; /* Entity stack level when grp started. */
+
+ while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
+ switch (pcb->action) {
+ case NAS_: /* GI name: get its etd for the group. */
+ grp[grpcnt] = etddef(parsenm(tbuf, NAMECASE));
+ for (i = 0; i < grpcnt; i++)
+ if (grp[i] == grp[grpcnt]) {
+ mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->etdgi + 1);
+ break;
+ }
+ if (i == grpcnt)
+ grpcnt++;
+ continue;
+
+ case EE_: /* Entity ended (correctly or incorrectly). */
+ if (es<essv) {synerr(37, pcb); essv = es;}
+ continue;
+
+ case PIE_: /* PI entity reference (invalid). */
+ entpisw = 0; /* Reset PI entity indicator. */
+ synerr(59, pcb);
+ continue;
+
+ default:
+ break;
+ }
+ break;
+ }
+ grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */
+ if (es!=essv) synerr(37, pcb);
+ return grpcnt; /* Return number of ptrs in group. */
+}
+/* PARSNGRP: Parse notation names, get their dcncbs, and form an array of
+ pointers to them. The array is terminated by a NULL pointer.
+ The number of pointers (including the NULL) is returned.
+ The grp buffer must have room for GRPCNT+1 members.
+*/
+UNS parsngrp(grp, pcb, tbuf)
+struct dcncb *grp[]; /* Buffer for building the group. */
+struct parse *pcb; /* Current parse control block. */
+UNCH *tbuf;
+{
+ int grpcnt = 0; /* Number of members in the group. */
+ int i;
+ int essv = es; /* Entity stack level when grp started. */
+
+ while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
+ switch (pcb->action) {
+ case NAS_: /* Member name: get its control block. */
+ grp[grpcnt] = dcndef(parsenm(tbuf, NAMECASE));
+ for (i = 0; i < grpcnt; i++)
+ if (grp[i] == grp[grpcnt]) {
+ mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->ename + 1);
+ break;
+ }
+ if (i == grpcnt)
+ grpcnt++;
+ continue;
+
+ case EE_: /* Entity ended (correctly or incorrectly). */
+ if (es<essv) {synerr(37, pcb); essv = es;}
+ continue;
+
+ case PIE_: /* PI entity reference (invalid). */
+ entpisw = 0; /* Reset PI entity indicator. */
+ synerr(59, pcb);
+ continue;
+
+ default:
+ break;
+ }
+ break;
+ }
+ grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */
+ if (es!=essv) synerr(37, pcb);
+ return grpcnt; /* Return number of ptrs in group. */
+}
+/* COPYGRP: Allocate storage for a group and copy the group into it.
+*/
+PETD *copygrp(pg, grpsz)
+PETD pg[]; /* Pointer to a group (array of etd ptrs). */
+UNS grpsz; /* Number of ptrs in grp, including final NULL. */
+{
+ UNS glen; /* Group length in characters. */
+ PETD *gnm; /* Ptr to permanent name group. */
+
+ if (pg==0) return (PETD *)0;
+ glen = grpsz * sizeof(struct etd *);
+ memcpy( (UNIV)(gnm = (struct etd **)rmalloc(glen)) , (UNIV)pg, glen );
+ return gnm;
+}
+/* INGRP: Locate an etd in a name group and return its index+1 (or zero
+ if not found).
+*/
+int ingrp(pg, ketd)
+PETD pg[]; /* Array of pointers to etds. */
+PETD ketd; /* Pointer to etd to be found in group. */
+{
+ int i = 0; /* Array index. */
+
+ while (pg[i]) if (pg[i++]==ketd) return i;
+ return 0;
+}
+/* PARSELIT: Parse a delimited string and collect it into a token.
+ Caller supplies buffer, which must be 1 longer than
+ maximum string allowed.
+ Caller also supplies character that delimits the string.
+ TODO: Return 1 if CDATA, SDATA or NONSGML occurred.
+*/
+#ifdef USE_PROTOTYPES
+VOID parselit(UNCH *tbuf, struct parse *pcb, UNS maxlen, UNCH del)
+#else
+VOID parselit(tbuf, pcb, maxlen, del)
+UNCH *tbuf; /* Work area for tokenization (parmlen+1). */
+struct parse *pcb; /* Current parse control block. */
+UNS maxlen; /* Maximum length of token. */
+UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
+#endif
+{
+ UNCH *pt = tbuf; /* Current pointer into tbuf. */
+ UNCH lexsv = lexlms[del];/* Saved lexlms value of delimiter. */
+ int essv = es; /* Entity stack level when literal started. */
+ UNCH datadel; /* Delimiter for CDATA/SDATA entity. */
+ int parmlen = (int)maxlen; /* Working limit (to be decremented). */
+
+ lexlms[del] = lex.l.litc; /* Set delimiter to act as literal close. */
+ do {
+ switch (parse(pcb)) {
+ case LP2_: /* Move 2nd char back to buffer; redo prev.*/
+ REPEATCC;
+ case LPR_: /* Move previous char to buffer; REPEATCC; */
+ REPEATCC;
+ case MLA_: /* Move character to buffer. */
+ *pt++ = *FPOS; --parmlen;
+ continue;
+
+ case FUN_: /* Function char found; replace with space.*/
+ *pt++ = ' '; --parmlen;
+ continue;
+
+ case RSM_: /* Record start: ccnt=0; ++rcnt.*/
+ ++RCNT; CTRSET(RSCC); *pt++ = *FPOS; --parmlen;
+ continue;
+
+ case ERX_: /* Entity reference: cancel LITC delim. */
+ case PEX_: /* Parameter entity ref: cancel LITC delim.*/
+ lexlms[del] = lexsv;
+ continue;
+
+ case EE_:
+ if (es<essv) {
+ synerr(37, pcb);
+ essv = es;
+ }
+ /* If back at top level, re-enable the LITC delimiter. */
+ if (es==essv) lexlms[del] = lex.l.litc;
+ continue;
+
+ case MLE_: /* Char not allowed in minimum literal. */
+ synerr(63, pcb);
+ continue;
+
+ case DEF_: /* Data entity: add it to buffer. */
+ if (pcb == &pcblitt) {
+ int parmlensv = parmlen;
+ entdatsw = 0;
+ parmlen = tokdata(pt, parmlen);
+ if (parmlen < 0)
+ break;
+ pt += parmlensv - parmlen;
+ continue;
+ }
+ if ((parmlen -= (int)datalen+2)<0) {entdatsw = 0; break;}
+ *pt++ = datadel =
+ BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA;
+ entdatsw = 0;
+ memcpy( pt , data, datalen );
+ pt += datalen;
+ *pt++ = datadel;
+ continue;
+
+ case NON_: /* Non-SGML char (delimited and shifted). */
+ if ((parmlen -= 2)<0) break;
+ memcpy( pt , nonchbuf, 2 );
+ pt += 2;
+ continue;
+
+ case RPR_: /* Remove character from buffer. */
+ --pt; ++parmlen;
+ break;
+
+ case EOD_:
+ exiterr(92, pcb);
+
+ default:
+ break;
+ }
+ break;
+ } while (parmlen>=0 && pcb->action!=TER_);
+
+ if (parmlen<0) {--pt; sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); REPEATCC;}
+ datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */
+ *pt++ = EOS;
+ lexlms[del] = lexsv; /* Restore normal delimiter handling. */
+ if (es!=essv) synerr(37, pcb);
+ return;
+}
+
+/* Handle a data entity in a tokenized attribute value literal.
+Parmlen is amount of space left. Return new parmlen. If there's not
+enough space return -1, and copy up to parmlen + 1 characters. */
+
+int tokdata(pt, parmlen)
+UNCH *pt;
+int parmlen;
+{
+ int skip = (pcblitt.newstate == 0);
+ int i;
+
+ for (i = 0; parmlen >= 0 && i < datalen; i++) {
+ switch (data[i]) {
+ case RSCHAR:
+ /* ignore it */
+ break;
+ case RECHAR:
+ case TABCHAR:
+ case SPCCHAR:
+ if (!skip) {
+ *pt++ = data[i];
+ parmlen--;
+ skip = 1;
+ }
+ break;
+ default:
+ if (data[i] == DELNONCH) {
+ assert(i + 1 < datalen);
+ if ((parmlen -= 2) < 0)
+ break;
+ *pt++ = DELNONCH;
+ *pt++ = data[++i];
+ skip = 0;
+ }
+ else {
+ *pt++ = data[i];
+ parmlen--;
+ skip = 0;
+ }
+ break;
+ }
+ }
+ pcblitt.newstate = skip ? 0 : pcblittda;
+ return parmlen;
+}
+
+
+/* PARSEMD: Parser for markup declarations.
+ It returns a token each time it is called.
+
+*/
+int parsemd(pt, namecase, lpcb, tokenlen)
+UNCH *pt; /* Token buffer: >=tokenlen+2. */
+int namecase; /* Case translation: ENTCASE NAMECASE AVALCASE. */
+struct parse *lpcb; /* Parse control block for literal parse. */
+UNS tokenlen; /* Max length of expected token: NAMELEN LITLEN */
+{
+ struct parse *pcb; /* Current parse control block. */
+
+ pcb = (lpcb) ? &pcbmd : &pcbmdc; /* If no literal pcb, dcl is comment. */
+
+ doparse: while (parse(pcb)==EE_)
+ if (es<mdessv) {synerr(37, pcb); mdessv = es;}
+ if (pcb->action==PIE_) { /* PI entity reference not allowed. */
+ entpisw = 0; /* Reset PI entity indicator. */
+ synerr(59, pcb);
+ goto doparse;
+ }
+ ++parmno; /* Increment parameter counter. */
+ switch (pcb->action) {
+ case CDR: /* COM[1] (MINUS) occurred previously. */
+ REPEATCC;
+ return (int)pcb->action;
+ case LIT: /* Literal: CDATA with LIT delimiter. */
+ parselit(pt, lpcb, tokenlen, lex.d.lit);
+ return (int)pcb->action;
+ case LITE: /* Literal: CDATA with LITA delimiter. */
+ parselit(pt, lpcb, tokenlen, lex.d.lita);
+ return((int)(pcb->action = LIT));
+ case RNS: /* Reserved name started (after RNI). */
+ parsenm(pt, NAMECASE);
+ return (int)pcb->action;
+ case NAS: /* Name started. */
+ if (namecase!=AVALCASE) {
+ parsenm(pt, namecase);
+ return (int)pcb->action;
+ }
+ /* Treat attribute value as name character string. */
+ case NMT: /* Name token string. */
+ parsetkn(pt, NMC, (int)tokenlen); /* Get undelimited value. */
+ return (int)pcb->action;
+ case NUM: /* Number or number token string. */
+ parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen);
+ return (int)pcb->action;
+ case PENR:
+ REPEATCC;
+ return (pcb->action = PEN);
+ case EOD_:
+ exiterr(133, pcb);
+ /* EXIT */
+ default: /* End of declaration. */
+ return (int)pcb->action; /* EMD GRPS MGRP PEN PGRP */
+ }
+}
+/* PARSEMOD: If the declared content was a keyword, the token count is zero
+ and it is only necessary to save the type. Otherwise,
+ collect the outermost token count and model type bytes for a model.
+ The count includes tokens found in nested groups also.
+ After building the model, parse for its occurrence indicator.
+*/
+struct thdr *parsemod(dctype)
+int dctype; /* Content type (0=model). */
+{
+ gbuf[0].ttype = (UNCH)dctype; /* Initialize content flags byte. */
+ if (dctype) {gbuf[0].tu.tnum = 0; return gbuf;} /* Return if not model. */
+
+ gbuf[0].tu.tnum = 0; /* Don't count 1st group or model header. */
+ gbuf[1].ttype = 0; /* Initialize 1st group type ... */
+ gbuf[1].tu.tnum = 0; /* and count. */
+ grplvl = 1; /* Content model is 1st level group. */
+ pcbgrcm.newstate = 0; /* Go parse the model group. */
+ /* Empty group is trapped during syntax parse; other errors return NULL. */
+ if (!parsegcm(&pcbgrcm, &gbuf[1], &gbuf[0])) return (struct thdr *)0;
+ parse(&pcbgrcs); /* Get the model suffix, if there is one. */
+ switch(pcbgrcs.action) {
+ case OPT: /* OPT occurrence indicator for model. */
+ SET(gbuf[1].ttype, TOPT|TXOPT);
+ break;
+ case REP: /* REP occurrence indicator for model. */
+ SET(gbuf[1].ttype, TREP|TXREP);
+ break;
+ case OREP: /* OREP occurrence indicator for model. */
+ SET(gbuf[1].ttype, TOREP|TXOREP);
+ break;
+ default: /* RCR_: Repeat char and return. */
+ break;
+ }
+ if (sw.swambig) ambig(); /* Check content model for ambiguity. */
+ return gbuf;
+}
+/* PARSEGCM: Collect token headers (struct thdr) into a group (array).
+ An etd is defined for each GI (if none exists) and its pointer is
+ stored in the header. The function is called recursively.
+*/
+struct thdr *parsegcm(pcb, pgh, gbuf)
+struct parse *pcb; /* Current parse control block. */
+struct thdr *pgh; /* Current group header in group buffer. */
+struct thdr *gbuf; /* Header for outermost group (model). */
+{
+#define MCON gbuf->ttype /* Model type (content attributes). */
+ struct thdr *pg=pgh; /* Current group token. */
+ struct thdr *pgsv=pgh; /* Saved current token for occ indicator. */
+ int optcnt = 0; /* Count of optional tokens in group. */
+ int essv = es; /* Entity stack level when grp started. */
+
+ while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE)
+ switch (pcb->action) {
+
+ case NAS_: /* GI name: get its etd and store it. */
+ ++gbuf->tu.tnum; ++pgh->tu.tnum;
+ (pgsv = ++pg)->ttype = TTETD;
+ pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE));
+ SET(MCON, MGI);
+ continue;
+
+ case RNS_: /* Reserved name started (#PCDATA). */
+ parsenm(tbuf, NAMECASE);
+ if (ustrcmp(tbuf+1, key[KPCDATA])) {
+ mderr(116, ntoa(gbuf->tu.tnum), tbuf+1);
+ return (struct thdr *)0;
+ }
+ /* If #PCDATA is the first non-group token, model is a phrase. */
+ if (!MCON) SET(MCON, MPHRASE);
+ case DTAG: /* Data tag template ignored; treat as #PCDATA. */
+ if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */
+ ++gbuf->tu.tnum; ++pgh->tu.tnum;
+ (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */
+ pg->tu.thetd = ETDCDATA;
+ ++optcnt; /* Ct opt tokens to see if grp is opt.*/
+ SET(MCON, MCHARS);
+ continue;
+
+ case GRP_: /* Group started. */
+ ++gbuf->tu.tnum; ++pgh->tu.tnum;
+ (pgsv = ++pg)->ttype = 0; /* Type will be set by connector. */
+ pg->tu.tnum = 0; /* Group has number instead of etd. */
+ if (++grplvl>GRPLVL) {
+ mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ return (struct thdr *)0;
+ }
+ pg = parsegcm(pcb, pg, gbuf);
+ if (!pg) return (struct thdr *)0;
+ if (GET(pgsv->ttype, TOPT)) ++optcnt; /* Indicate nested opt grp. */
+ --grplvl;
+ continue;
+
+ case OREP: /* OREP occurrence indicator for current token.*/
+ SET(pgsv->ttype, TREP|TXREP);
+ /* Now treat like OPT. */
+ case OPT: /* OPT occurrence indicator for current token. */
+ SET(pgsv->ttype, TXOPT);
+ if (GET(pgsv->ttype, TOPT)) continue; /* Exit if nested opt grp. */
+ SET(pgsv->ttype, TOPT);
+ ++optcnt; /* Count opt tokens to see if grp is optional. */
+ continue;
+ case REP: /* REP occurrence indicator for current token. */
+ SET(pgsv->ttype, TREP|TXREP);
+ continue;
+
+ case OR: /* OR connector found. */
+ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR);
+ else if (GET(pgh->ttype, TTAND)!=TTOR)
+ mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ continue;
+ case AND: /* AND connector found. */
+ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND);
+ else if (GET(pgh->ttype, TTAND)!=TTAND)
+ mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ continue;
+ case SEQ: /* SEQ connector found. */
+ if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ);
+ else if (GET(pgh->ttype, TTAND)!=TTSEQ)
+ mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ continue;
+
+ case EE_: /* Entity ended (correctly or incorrectly). */
+ if (es<essv) {synerr(37, pcb); essv = es;}
+ continue;
+
+ case PIE_: /* PI entity reference (not permitted). */
+ entpisw = 0; /* Reset PI entity indicator. */
+ synerr(59, pcb);
+ continue;
+
+ default: /* Syntax errors return in disgrace. */
+ synerr(37, pcb);
+ return (struct thdr *)0;
+ }
+ if (pgh->tu.tnum>GRPCNT) {
+ mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ return (struct thdr *)0;
+ }
+ if (gbuf->tu.tnum>GRPGTCNT) {
+ mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0);
+ return (struct thdr *)0;
+ }
+ if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */
+ /* An optional token in an OR group makes the group optional. */
+ if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT);
+ /* If all tokens in any group are optional, so is the group. */
+ if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT);
+
+ if (es!=essv) synerr(37, pcb);
+ return pg; /* Return pointer to GRPS token. */
+}
+/* PARSENM: Parser for SGML names, which can be translated with LEXTRAN.
+ The input is read from the entity stack. CC is 1st char of name.
+ Returns a pointer to the parsed name.
+*/
+UNCH *parsenm(tbuf, nc)
+UNCH *tbuf; /* Buffer for name: >=NAMELEN+2. */
+int nc; /* Namecase translation: 1=yes; 0=no. */
+{
+ UNCH len; /* Length of name (incl EOS & length byte). */
+
+ *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS;
+ while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) {
+ TRACETKN(NMC, lextoke);
+ if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) {
+ --len;
+ entget();
+ }
+ }
+ REPEATCC; /* Put back the non-token character. */
+ *(tbuf + ++len) = EOS; /* Terminate name with standard EOS. */
+ *tbuf = ++len; /* Store length ahead of name. */
+ return tbuf;
+}
+/* PARSETKN: Parser for start-tag attribute value tokens.
+ First character of token is already in *FPOS.
+ Returns a pointer to the parsed token.
+ Parsed token has EOS but no length byte.
+*/
+#ifdef USE_PROTOTYPES
+UNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen)
+#else
+UNCH *parsetkn(tbuf, scope, maxlen)
+UNCH *tbuf; /* Buffer for token: >=maxlen+1. */
+UNCH scope; /* Minimum lexical class allowed. */
+int maxlen; /* Maximum length of a token. */
+#endif
+{
+ int i = 1;
+ tbuf[0] = *FPOS;
+ while (i < maxlen) {
+ NEWCC;
+ if (lextoke[*FPOS] < scope) {
+ REPEATCC;
+ break;
+ }
+ TRACETKN(scope, lextoke);
+ if (*FPOS == EOBCHAR)
+ entget();
+ else
+ tbuf[i++] = *FPOS;
+ }
+ tbuf[i] = EOS;
+ return tbuf;
+}
+/* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ).
+ First character of sequence is already in *FPOS.
+*/
+VOID parseseq(tbuf, maxlen)
+UNCH *tbuf; /* Buffer for storing found sequence. */
+int maxlen; /* Maximum length of a blank sequence. */
+{
+ tbuf[0] = *FPOS;
+ datalen = 1;
+ for (;;) {
+ NEWCC;
+ if (*FPOS == EOBCHAR) {
+ entget();
+ continue;
+ }
+ if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR)
+ || datalen >= maxlen)
+ break;
+ tbuf[datalen++] = *FPOS;
+ TRACETKN(SEP, lextoke);
+ }
+}
+/* S2VALNM: Parser for attribute values that are tokenized like names.
+ The input is read from a string (hence S ("string") 2 ("to") VALNM).
+ It stops at the first bad character.
+ Returns a pointer to the created name.
+*/
+#ifdef USE_PROTOTYPES
+UNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate)
+#else
+UNCH *s2valnm(nm, s, scope, translate)
+UNCH *nm; /* Name to be created. */
+UNCH *s; /* Source string to be parsed as name. */
+UNCH scope; /* Minimum lexical class allowed. */
+int translate; /* Namecase translation: 1=yes; 0=no. */
+#endif
+{
+ UNCH len = 0; /* Length of name (incl EOS and length). */
+
+ for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++)
+ nm[++len] = translate ? lextran[*s] : *s;
+ nm[++len] = EOS; /* Terminate name with standard EOS. */
+ *nm = ++len; /* Store length ahead of name. */
+ return nm;
+}
+/* PARSEVAL: Parser for attribute values.
+ The input is read from a string and tokenized in a buffer.
+ The input is terminated by EOS.
+ Each token is preceded by its actual length; there is no EOS.
+ If an error occurs while parsing, or
+ if a token doesn't conform, set the token count to 0 to show that
+ value was not tokenized and return the error code.
+ After successful parse, return buffer length and 0 error code.
+ The number of tokens found is set in external variable tokencnt.
+*/
+int parseval(s, atype, tbuf)
+UNCH *s; /* Source string to be parsed as token list. */
+UNS atype; /* Type of token list expected. */
+UNCH *tbuf; /* Work area for tokenization. */
+{
+ int t;
+ UNCH *pt = tbuf;
+
+ pcbval.newstate = 0; tokencnt = 0;
+ while (1) {
+ for (;;) {
+ pcbval.input = lextoke[*s];
+ pcbval.state = pcbval.newstate;
+ pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input];
+ pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input];
+ TRACEVAL(&pcbval, atype, s, tokencnt);
+ if (pcbval.action != NOPA)
+ break;
+ s++;
+ }
+
+
+ switch (pcbval.action) {
+ case INVA: /* Invalid character; terminate parse. */
+ if (*s == '\0') goto alldone; /* Normal termination. */
+ tokencnt = 0; /* Value was not tokenized. */
+ return(14);
+ case LENA: /* Length limit of token exceeded; end parse. */
+ tokencnt = 0; /* Value was not tokenized. */
+ return(15);
+ default: /* Token begun: NUMA, NASA, or NMTA. */
+ break;
+ }
+
+ ++tokencnt; /* One token per iteration. */
+ switch (atype) {
+ case AENTITY:
+ if (tokencnt>1) {tokencnt = 0; return(16);}
+ case AENTITYS:
+ if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
+ s2valnm(pt, s, NMC, ENTCASE);
+ break;
+
+ case AID:
+ case AIDREF:
+ case ANAME:
+ case ANOTEGRP:
+ if (tokencnt>1) {tokencnt = 0; return(16);}
+ case AIDREFS:
+ case ANAMES:
+ if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
+ s2valnm(pt, s, NMC, NAMECASE);
+ break;
+
+ case ANMTGRP:
+ case ANMTOKE:
+ if (tokencnt>1) {tokencnt = 0; return(16);}
+ case ANMTOKES:
+ /* No test needed because NMTA, NUMA and NASA are all valid. */
+ s2valnm(pt, s, NMC, NAMECASE);
+ break;
+
+ case ANUMBER:
+ if (tokencnt>1) {tokencnt = 0; return(16);}
+ case ANUMBERS:
+ if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
+ s2valnm(pt, s, NU, NAMECASE);
+ t = lextoke[s[*pt - 2]];
+ if (t == NMS || t == NMC) {tokencnt = 0; return(17);}
+ break;
+
+ case ANUTOKE:
+ if (tokencnt>1) {tokencnt = 0; return(16);}
+ case ANUTOKES:
+ if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
+ s2valnm(pt, s, NMC, NAMECASE);
+ break;
+ }
+ *pt -= 2;
+ s += *pt;
+ pt += *pt + 1;
+ }
+ alldone:
+ *pt++ = EOS;
+ if (*tbuf == '\0')
+ return 25;
+ if (atype < ATKNLIST)
+ *tbuf += 2; /* include length and EOS */
+ return 0;
+}
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/pcbrf.c b/usr.bin/sgmls/sgmls/pcbrf.c
new file mode 100644
index 0000000..16786e5
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/pcbrf.c
@@ -0,0 +1,1344 @@
+/* PCBRF: Parse tables for reference concrete syntax.
+*/
+#include "config.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "action.h" /* Action names for all parsing. */
+#include "synxtrn.h" /* Declarations for concrete syntax constants. */
+#include "adl.h" /* Definitions for attribute list processing. */
+/* PCBCONM: State and action table for content parse of mixed content.
+ Initial state assumes a start-tag was just processed.
+*/
+/* Symbols for state names (end with a number). */
+#define ET0 0 /* Markup found or buffer flushed; no data. */
+#define DA0 2 /* Data in buffer. */
+#define DA1 4 /* Data and space in buffer. */
+#define ER0 6 /* ERO found; start lookahead buffer. */
+#define CR0 8 /* CRO found (ERO, RNI). */
+#define RS0 10 /* RS found; possible SR 3-6. */
+#define ME0 12 /* MSC found; possible SR26. */
+#define ME1 14 /* MSC, MSC found. */
+#define ES0 16 /* TAGO found; start lookahead buffer. */
+#define EE0 18 /* End-tag start (TAGO,ETI); move to lookahead buffer. */
+#define NE0 20 /* End-tag start (TAGO,NET); process NET if not end-tag. */
+#define MD0 22 /* MDO found (TAGO, MDO[2]). */
+#define MC0 24 /* MDO, COM found. */
+#define SC0 26 /* COM found; possible SR19-20. */
+#define SP0 28 /* Space found; data pending; possible SR7 or SR9. */
+#define SR0 30 /* SPCR found; possible SR7 or SR9. */
+#define TB0 32 /* TAB found; possible SR7 or SR9. */
+
+int pcbcnet = ET0; /* PCBCONM: markup found or data buffer flushed.*/
+int pcbcnda = DA0; /* PCBCONM: data in buffer. */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+et0 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,ET0 ,ET0 ,ET0 ,RS0 ,ET0 ,TB0 ,DA0 ,ET0 ,ER0 ,
+ ET0 ,SC0 ,DA0 ,ET0 ,ET0 ,SR0 ,DA0 ,ME0 ,ET0 ,DA0 ,ET0 ,DA0 ,ES0 ,ET0 },/*et0*/
+et0a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,GET_,GET_,RSR_,SR2_,DAS_,DAS_,NSC_,LAS_,
+ REF_,NOP_,DAS_,NED_,SR10,DAS_,DAS_,NOP_,SR25,DAS_,SR11,DAS_,LAS_,FCE_},
+
+da0 []={DA0 ,DA0 ,DA0 ,DA0 ,DA1 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da0*/
+da0a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,
+ DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_},
+
+da1 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da1*/
+da1a[]={NOP_,NOP_,NOP_,NOP_,DAR_,DAF_,DAF_,DAR_,DAF_,DAR_,DAR_,NOP_,DAF_,DAF_,
+ DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_},
+
+er0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/
+er0a[]={LAF_,LAF_,LAF_,ER_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+cr0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*cr0*/
+cr0a[]={NLF_,CRN_,NLF_,CRA_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_},
+
+rs0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,RS0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*rs0*/
+rs0a[]={SR3_,SR3_,SR3_,SR3_,SR4_,SR3_,SR3_,GET_,SR3_,SR5_,SR4_,SR3_,SR3_,SR3_,
+ SR3_,SR3_,SR3_,NED_,SR3_,SR4_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_},
+
+me0 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME0, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0,
+ ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ME1 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*me0*/
+me0a[]={SR26,SR26,SR26,SR26,SR26,SR26,SR26,GET_,SR26,SR26,SR26,SR26,SR26,SR26,
+ SR26,SR26,SR26,SR26,SR26,SR26,SR26,NOP_,SR26,SR26,SR26,SR26,SR26,SR26},
+
+me1 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME1, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0,
+ ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*me1*/
+me1a[]={RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,GET_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,
+ RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,MSE_,RBR_,RBR_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+es0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ES0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,EE0 ,NE0 ,ET0 ,ET0 ,MD0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*es0*/
+es0a[]={LAF_,LAF_,LAF_,STG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAM_,LAF_,LAF_,PIS_,LAF_,NST_,LAF_,LAF_},
+
+ee0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,EE0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*ee0*/
+ee0a[]={LAF_,LAF_,LAF_,ETG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_,LAF_},
+
+ne0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,NE0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*ne0*/
+ne0a[]={NLF_,NLF_,NLF_,ETG_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NET_,NLF_,NLF_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+md0 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, MD0, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0,
+ ET0 ,MC0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*md0*/
+md0a[]={LAF_,LAF_,LAF_,MD_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,MSS_,LAF_,LAF_,MDC_,LAF_,LAF_},
+
+mc0 []={ET0, ET0, ET0, ET0, ET0, ET0 ,ET0, MC0, ET0 ,ET0, ET0 ,ET0, ET0, ET0,
+ ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0 ,ET0 ,ET0 ,ET0, ET0, ET0 },/*mc0*/
+mc0a[]={NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,MDC_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_},
+
+sc0 []={ET0, ET0, ET0, ET0, ET0, ET0 ,ET0, SC0, ET0 ,ET0, ET0 ,ET0, ET0, ET0,
+ ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0 ,ET0 ,ET0 ,ET0, ET0, ET0 },/*sc0*/
+sc0a[]={SR19,SR19,SR19,SR19,SR19,SR19,SR19,GET_,SR19,SR19,SR19,SR19,SR19,SR19,
+ SR19,SR20,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+sp0 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,SP0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 ,
+ ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*sp0*/
+sp0a[]={NOP_,NOP_,NOP_,NOP_,SR9_,DAF_,DAF_,GTR_,DAF_,SR7_,SR9_,NOP_,NOP_,DAF_,
+ DAF_,DAF_,NOP_,DAF_,DAF_,SR9_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_},
+
+sr0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,SR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*sr0*/
+sr0a[]={SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,GET_,SR8_,SR7_,SR9_,SR8_,SR8_,SR8_,
+ SR8_,SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_},
+
+tb0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,TB0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*tb0*/
+tb0a[]={SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,GET_,SR1_,SR7_,SR9_,SR1_,SR1_,SR1_,
+ SR1_,SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+
+*conmtab[] = {et0, et0a, da0, da0a, da1, da1a, er0, er0a, cr0, cr0a, rs0, rs0a,
+ me0, me0a, me1, me1a, es0, es0a, ee0, ee0a, ne0, ne0a, md0, md0a,
+ mc0, mc0a, sc0, sc0a, sp0, sp0a, sr0, sr0a, tb0, tb0a };
+struct parse pcbconm = {"CONM", lexcnm, conmtab, 0, 0, 0, 0};
+#undef ET0
+#undef DA0
+#undef DA1
+#undef ER0
+#undef CR0
+#undef RS0
+#undef ME0
+#undef ME1
+#undef ES0
+#undef EE0
+#undef NE0
+#undef MD0
+#undef MC0
+#undef SC0
+#undef SP0
+#undef SR0
+#undef TB0
+/* PCBCONE: State and action table for content parse of element content.
+ Initial state assumes a start-tag was just processed.
+*/
+/* Symbols for state names (end with a number). */
+#define ET2 0 /* Markup found. */
+#define ER2 2 /* ERO found; start lookahead buffer. */
+#define CR2 4 /* CRO found (ERO, RNI). */
+#define RS2 6 /* RS found; possible SR 3-6 if they were declared. */
+#define ME2 8 /* MSC found. */
+#define ME3 10 /* MSC, MSC found. */
+#define ES2 12 /* TAGO found; start lookahead buffer. */
+#define EE2 14 /* End-tag start (TAGO,ETI); move to lookahead buffer. */
+#define NE2 16 /* End-tag start (TAGO,NET); process NET if not end-tag. */
+#define MD2 18 /* MDO found (TAGO, MDO[2]). */
+#define MC2 20 /* MDO, COM found. */
+#define SC2 22 /* COM found; possible SR19-20 if they were mapped. */
+#define SP2 24 /* Space found; possible SR7 or SR9. */
+#define SR2 26 /* SPCR found; possible SR7 or SR9. */
+#define TB2 28 /* TAB found; possible SR7 or SR9. */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+et2 []={ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,TB2 ,ET2 ,ET2 ,ER2 ,
+ ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*et2*/
+et2a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,GET_,GET_,RS_ ,SR2_,NOP_,DCE_,DCE_,LAS_,
+ NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_},
+
+er2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 },/*er2*/
+er2a[]={LAF_,LAF_,LAF_,ER_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_},
+
+cr2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*cr2*/
+cr2a[]={NLF_,CRN_,NLF_,CRA_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_},
+
+rs2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*rs2*/
+rs2a[]={SR3_,SR3_,SR3_,SR3_,SR4_,SR3_,SR3_,GET_,SR3_,SR5_,SR4_,SR3_,SR3_,SR3_,
+ SR3_,SR3_,SR3_,NED_,SR3_,SR4_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */
+me2 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME2, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2,
+ ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ME3 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*me2*/
+me2a[]={SR26,SR26,SR26,SR26,SR26,SR26,SR26,GET_,SR26,SR26,SR26,SR26,SR26,SR26,
+ SR26,SR26,SR26,SR26,SR26,SR26,SR26,NOP_,SR26,SR26,SR26,SR26,SR26,SR26},
+
+me3 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME3, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2,
+ ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*me3*/
+me3a[]={RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,GET_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,
+ RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,MSE_,RBR_,RBR_},
+
+es2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,EE2 ,NE2 ,ET2 ,ET2 ,MD2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*es2*/
+es2a[]={LAF_,LAF_,LAF_,STG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAM_,LAF_,LAF_,PIS_,LAF_,NST_,LAF_,LAF_},
+
+ee2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,EE2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*ee2*/
+ee2a[]={LAF_,LAF_,LAF_,ETG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_,LAF_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spc mdo msc mso pio rni tagc tago fce */
+ne2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,NE2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*ne2*/
+ne2a[]={NLF_,NLF_,NLF_,ETG_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NET_,NLF_,NLF_},
+
+md2 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, MD2, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2,
+ ET2 ,MC2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*md2*/
+md2a[]={LAF_,LAF_,LAF_,MD_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,MSS_,LAF_,LAF_,MDC_,LAF_,LAF_},
+
+mc2 []={ET2, ET2, ET2, ET2, ET2, ET2 ,ET2, MC2, ET2 ,ET2, ET2 ,ET2, ET2, ET2,
+ ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2 ,ET2 ,ET2 ,ET2, ET2, ET2 },/*mc2*/
+mc2a[]={NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,
+ NLF_,MDC_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_},
+
+sc2 []={ET2, ET2, ET2, ET2, ET2, ET2 ,ET2, SC2, ET2 ,ET2, ET2 ,ET2, ET2, ET2,
+ ET2 ,ET2 ,ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2 ,ET2 ,ET2 ,ET2, ET2, ET2 },/*sc2*/
+sc2a[]={SR19,SR19,SR19,SR19,SR19,SR19,SR19,GET_,SR19,SR19,SR19,SR19,SR19,SR19,
+ SR19,SR20,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net lit spc mdo msc mso pio rni tagc tago fce */
+sp2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,RS2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 ,
+ ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*sp2*/
+sp2a[]={DCE_,DCE_,DCE_,DCE_,SR9_,DCE_,GET_,GET_,RS_ ,SR7_,SR9_,DCE_,DCE_,LAS_,
+ NOP_,NOP_,DCE_,NED_,SR10,SR9_,DCE_,LAS_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_},
+
+sr2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*sr2*/
+sr2a[]={SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,GET_,SR8_,SR7_,SR9_,SR8_,SR8_,SR8_,
+ SR8_,SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_},
+
+tb2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,TB2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
+ ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*tb2*/
+tb2a[]={SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,GET_,SR1_,SR7_,SR9_,SR1_,SR1_,SR1_,
+ SR1_,SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_},
+
+*conetab[] = {et2, et2a, er2, er2a, cr2, cr2a, rs2, rs2a, me2, me2a, me3, me3a,
+ es2, es2a, ee2, ee2a, ne2, ne2a, md2, md2a, mc2, mc2a, sc2, sc2a,
+ sp2, sp2a, sr2, sr2a, tb2, tb2a };
+struct parse pcbcone = {"CONE", lexcnm, conetab, 0, 0, 0, 0};
+#undef ET2
+#undef ER2
+#undef CR2
+#undef RS2
+#undef ME2
+#undef ME3
+#undef ES2
+#undef EE2
+#undef NE2
+#undef MD2
+#undef MC2
+#undef SC2
+#undef SP2
+#undef SR2
+#undef TB2
+/* PCBCONR: State and action table for content parse of replaceable character
+ data. Initial state assumes a start-tag was just processed.
+ Only entity references and character references are recognized.
+*/
+/* Symbols for state names (end with a number). */
+#define ET4 0 /* Markup found or buffer flushed; no data. */
+#define DA4 2 /* Data in buffer. */
+#define ER4 4 /* ERO found; start lookahead buffer. */
+#define CR4 6 /* CRO found (ER2, RNI). */
+#define ES4 8 /* TAGO found; start lookahead buffer. */
+#define EE4 10 /* End-tag start (TAGO,ETI); move to lookahead buffer. */
+#define NE4 12 /* End-tag start (TAGO,NET); process NET if not end-tag. */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+et4 []={DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,DA4 ,DA4 ,ET4 ,ER4 ,
+ ET4 ,DA4 ,DA4 ,ET4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ES4 },/*et4*/
+et4a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EE_ ,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,LAS_,
+ REF_,DAS_,DAS_,NED_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,LAS_},
+
+da4 []={DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,DA4 ,DA4 ,ET4 ,ET4 ,
+ ET4 ,DA4 ,DA4 ,ET4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 },/*da4*/
+da4a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_,
+ DAF_,NOP_,NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,DAF_},
+
+er4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ER4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,
+ ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,CR4 ,ET4 ,ET4 },/*er4*/
+er4a[]={LAF_,LAF_,LAF_,ERX_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_},
+
+cr4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,CR4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,
+ ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*cr4*/
+cr4a[]={LAF_,CRN_,LAF_,CRA_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+es4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,
+ ET4 ,ET4 ,EE4 ,NE4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*es4*/
+es4a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+ee4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,EE4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,
+ ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*ee4*/
+ee4a[]={LAF_,LAF_,LAF_,ETC_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_},
+
+ne4 []={EE4 ,EE4 ,EE4 ,ET4 ,EE4 ,EE4 ,EE4 ,NE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,
+ EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,ET4 ,EE4 },/*ne4*/
+ne4a[]={RC2_,RC2_,RC2_,ETC_,RC2_,RC2_,RC2_,GET_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,
+ RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,NET_,RC2_},
+
+*conrtab[] = {et4, et4a, da4, da4a, er4, er4a, cr4, cr4a,
+ es4, es4a, ee4, ee4a, ne4, ne4a};
+struct parse pcbconr = {"CONR", lexcon, conrtab, 0, 0, 0, 0};
+#undef ET4
+#undef DA4
+#undef ER4
+#undef CR4
+#undef ES4
+#undef EE4
+#undef NE4
+/* PCBCONC: State and action table for content parse of character data.
+ Initial state assumes a start-tag was just processed.
+*/
+/* Symbols for state names (end with a number). */
+#define ET6 0 /* Markup found or buffer flushed; no data. */
+#define DA6 2 /* Data in buffer. */
+#define ES6 4 /* TAGO found; start lookahead buffer. */
+#define EE6 6 /* End-tag start (TAGO,ETI); move to lookahead buffer. */
+#define NE6 8 /* End-tag start (TAGO,NET); process NET if not end-tag. */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+et6 []={DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,DA6 ,DA6 ,ET6 ,DA6 ,
+ ET6 ,DA6 ,DA6 ,ET6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ES6 },/*et6*/
+et6a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EOF_,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,DAS_,
+ REF_,DAS_,DAS_,NED_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,LAS_},
+
+da6 []={DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,DA6 ,DA6 ,ET6 ,ET6 ,
+ ET6 ,DA6 ,DA6 ,ET6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 },/*da6*/
+da6a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_,
+ DAF_,NOP_,NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,DAF_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+es6 []={ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ES6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,
+ ET6 ,ET6 ,EE6 ,NE6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 },/*es6*/
+es6a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+ee6 []={ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,EE6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,
+ ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 },/*ee6*/
+ee6a[]={LAF_,LAF_,LAF_,ETC_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_},
+
+ne6 []={EE6 ,EE6 ,EE6 ,ET6 ,EE6 ,EE6 ,EE6 ,NE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,
+ EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,ET6 ,EE6 },/*ne6*/
+ne6a[]={RC2_,RC2_,RC2_,ETC_,RC2_,RC2_,RC2_,GET_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,
+ RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,NET_,RC2_},
+
+*conctab[] = {et6, et6a, da6, da6a, es6, es6a, ee6, ee6a, ne6, ne6a};
+struct parse pcbconc = {"CONC", lexcon, conctab, 0, 0, 0, 0};
+#undef ET6
+#undef DA6
+#undef ES6
+#undef EE6
+#undef NE6
+/* PCBPRO: State and action table for prolog parse.
+ Initial state assumes document just began.
+*/
+/* Symbols for state names (end with a number). */
+#define ET7 0 /* Markup found. */
+#define ES7 2 /* TAGO found; start lookahead buffer. */
+#define MD7 4 /* MDO found (TAGO, MDO[2]). */
+#define MC7 6 /* MDO, COM found. */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+et7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,
+ ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 },/*et7*/
+et7a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,EE_ ,GET_,RS_ ,NOP_,NOP_,DCE_,DCE_,DCE_,
+ DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,LAS_},
+
+es7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,
+ ET7 ,ET7 ,ET7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/
+es7a[]={PEP_,PEP_,PEP_,STE_,PEP_,PEP_,PEP_,GET_,PEP_,PEP_,PEP_,PEP_,PEP_,PEP_,
+ PEP_,PEP_,PEP_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_},
+
+md7 []={ET7, ET7, ET7, ET7, ET7 ,ET7, ET7, MD7, ET7 ,ET7 ,ET7 ,ET7, ET7, ET7,
+ ET7, MC7, ET7, ET7, ET7, ET7 ,ET7, ET7, ET7, ET7 ,ET7, ET7 },/*md7*/
+md7a[]={LAF_,LAF_,LAF_,DTD_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,MSP_,LAF_,LAF_,LAF_,NOP_,LAF_},
+
+mc7 []={ET7, ET7, ET7, ET7, ET7, ET7 ,ET7, MC7, ET7 ,ET7, ET7 ,ET7, ET7, ET7,
+ ET7, ET7, ET7, ET7, ET7, ET7 ,ET7 ,ET7, ET7 ,ET7 ,ET7, ET7 },/*mc7*/
+mc7a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,MDC_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a};
+struct parse pcbpro = {"PRO", lexcon, protab, 0, 0, 0, 0};
+#undef ET7
+#undef ES7
+#undef MD7
+#undef MC7
+/* PCBMDS: State and action table for parse of markup declaration subset.
+ Initial state assumes subset just began (MSO found).
+*/
+/* Symbols for state names (end with a number). */
+#define ET8 0 /* Markup found. */
+#define ER8 2 /* PERO found; start lookahead buffer. */
+#define ME8 4 /* MSC found. */
+#define ME9 6 /* MSC, MSC found. */
+#define ES8 8 /* TAGO found; start lookahead buffer. */
+#define MD8 10 /* MDO found (TAGO, MDO[2]). */
+#define MC8 12 /* MDO, CD found. */
+#define DC8 14 /* Data characters found (erroneously). */
+
+static UNCH
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+et8 []={DC8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,ET8 ,ET8 ,ET8 ,ET8 ,ET8 ,DC8 ,DC8 ,DC8 ,
+ DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ME8 ,DC8 ,ER8 ,DC8 ,DC8 ,DC8 ,ES8 },/*et8*/
+et8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,GET_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+er8 []={DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 ,ER8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,
+ DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 },/*er8*/
+er8a[]={NOP_,NOP_,NOP_,PER_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+me8 []={ET8, ET8, ET8, ET8, ET8 ,ET8, ET8, ME8, ET8 ,ET8 ,ET8 ,ET8, ET8, ET8,
+ ET8 ,ET8, ET8 ,ET8, ET8, ME9 ,ET8, ET8, ET8, ET8 ,ET8, ET8 },/*me8*/
+me8a[]={DTE_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_,GET_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_,
+ DTE_,DTE_,DTE_,DTE_,DTE_,NOP_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_},
+
+me9 []={DC8, DC8, DC8, DC8, DC8 ,DC8, DC8, ME9, DC8 ,DC8 ,DC8 ,DC8, DC8, DC8,
+ DC8 ,DC8, DC8 ,DC8, DC8, DC8 ,DC8, DC8, DC8, DC8 ,ET8, DC8 },/*me9*/
+me9a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,MSE_,NOP_},
+
+/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
+ nmre com eti net mdo msc mso pero pio rni tagc tago */
+es8 []={DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ES8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,
+ DC8 ,DC8 ,DC8 ,DC8 ,MD8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 },/*es8*/
+es8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,PIS_,NOP_,NOP_,NOP_},
+
+md8 []={DC8, DC8, DC8, ET8, DC8 ,DC8, DC8, MD8, DC8 ,DC8 ,DC8 ,DC8, DC8, DC8,
+ DC8 ,MC8, DC8 ,DC8, DC8, DC8 ,ET8, DC8, DC8, DC8 ,ET8, DC8 },/*md8*/
+md8a[]={NOP_,NOP_,NOP_,MD_ ,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,MSS_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+mc8 []={DC8, DC8, DC8, DC8, DC8, DC8 ,DC8, MC8, DC8 ,DC8, DC8 ,DC8, DC8, DC8,
+ DC8 ,ET8, DC8 ,DC8, DC8, DC8 ,DC8 ,DC8, DC8 ,DC8 ,DC8, DC8 },/*mc8*/
+mc8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,MDC_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+dc8 []={DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,
+ DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 ,ET8 },/*dc8*/
+dc8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,GET_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,CIR_,NOP_,CIR_,NOP_,NOP_,NOP_,CIR_},
+
+*mdstab[] = {et8, et8a, er8, er8a, me8, me8a, me9, me9a,
+ es8, es8a, md8, md8a, mc8, mc8a, dc8, dc8a};
+struct parse pcbmds = {"MDS", lexcon, mdstab, 0, 0, 0, 0};
+#undef ET8
+#undef ER8
+#undef ME8
+#undef ME9
+#undef ES8
+#undef MD8
+#undef MC8
+#undef DC8
+/* PCBGRCM: State and action table for content model group.
+ Groups can nest. Reserved names are allowed.
+ Data tag token groups are allowed.
+ A non-reserved name or model group can have a suffix.
+ Columns are based on LEXGRP.C.
+*/
+/* Symbols for state names (end with a number). */
+#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */
+#define CO1 2 /* Connector between tokens expected. */
+#define ER1 4 /* PERO found when token was expected. */
+#define SP1 6 /* Name or model: suffix or connector expected. */
+#define RN1 8 /* RNI found; possible #PCDATA. */
+#define DG1 10 /* Data tag: group begun; name expected. */
+#define DN1 12 /* Data tag: name found; SEQ connector expected. */
+#define DT1 14 /* Data tag: ignore template and pattern; MSC expected. */
+#define DR1 16 /* PERO found when data tag name was expected. */
+#define LI1 18 /* Literal in data tag group; search for LIT. */
+#define LA1 20 /* Literal in data tag group; search for LITA. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+tk01 []={TK1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,DG1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,RN1 ,TK1 ,TK1 },/*tk1*/
+tk01a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,GRP_,INV_,INV_,
+ INV_,GRP_,INV_,INV_,NOP_,INV_,INV_,NOP_,INV_,INV_},
+
+co01 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/
+co01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,AND ,GRPE,INV_,INV_,INV_,
+ INV_,INV_,INV_,OR ,INV_,INV_,INV_,INV_,SEQ ,INV_},
+
+er01 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/
+er01a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+sp01 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,SP1 ,CO1 ,SP1 ,CO1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 },/*sp1*/
+sp01a[]={INV_,LEN_,LEN_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,AND ,GRPE,INV_,INV_,INV_,
+ INV_,INV_,OPT ,OR ,INV_,REP ,OREP,INV_,SEQ ,LEN_},
+
+/* bit nmc nms spc spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+rn01 []={TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*rn1*/
+rn01a[]={PCI_,PCI_,RNS_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+dg01 []={TK1 ,TK1 ,DN1 ,DG1 ,DG1 ,DG1 ,DG1 ,DG1 ,DG1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,DR1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*dg1*/
+dg01a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,INV_},
+
+dn01 []={TK1 ,TK1 ,TK1 ,DN1 ,DN1 ,DN1 ,DN1 ,DN1 ,DN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dn1*/
+dn01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,DTAG,INV_},
+
+dt01 []={TK1 ,TK1 ,TK1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,TK1 ,DT1 ,DT1 ,LI1 ,LA1 ,
+ CO1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/
+dt01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,NOP_,NOP_,NOP_,NOP_,
+ GRPE,INV_,INV_,NOP_,NOP_,INV_,INV_,INV_,NOP_,INV_},
+
+/* bit nmc nms spc spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+dr01 []={TK1 ,TK1 ,DG1 ,TK1 ,TK1 ,DR1 ,TK1 ,DR1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*dr1*/
+dr01a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+li01 []={LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,DT1 ,LI1 ,
+ LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 },/*li1*/
+li01a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+la01 []={LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,DT1 ,
+ LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 },/*la1*/
+la01a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+*grcmtab[] = {tk01, tk01a, co01, co01a, er01, er01a, sp01, sp01a,
+ rn01, rn01a, dg01, dg01a, dn01, dn01a, dt01, dt01a,
+ dr01, dr01a, li01, li01a, la01, la01a};
+struct parse pcbgrcm = {"GRCM", lexgrp, grcmtab, 0, 0, 0, 0};
+#undef TK1
+#undef CO1
+#undef ER1
+#undef SP1
+#undef RN1
+#undef DG1
+#undef DN1
+#undef DT1
+#undef DR1
+#undef LI1
+#undef LA1
+/* PCBGRCS: State and action table for content model suffix.
+ If suffix occurs, process it. Otherwise, put character
+ back for the next parse.
+*/
+/* Symbols for state names (end with a number). */
+#define SP4 0 /* Suffix expected. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+sp04 []={SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,
+ SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 },/*sp4*/
+sp04a[]={RCR_,RCR_,RCR_,RCR_,RCR_,SYS_,EE_ ,GET_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,
+ RCR_,RCR_,OPT ,RCR_,RCR_,REP ,OREP,RCR_,RCR_,RCR_},
+
+*grcstab[] = {sp04, sp04a};
+struct parse pcbgrcs = {"GRCS", lexgrp, grcstab, 0, 0, 0, 0};
+#undef SP4
+/* PCBGRNT: State and action table for name token group parse.
+ Groups cannot nest. Reserved names are not allowed.
+ No suffixes or data tag pattern groups.
+*/
+/* Symbols for state names (end with a number). */
+#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */
+#define CO1 2 /* Connector between tokens expected. */
+#define ER1 4 /* PERO found when token was expected. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+tk02 []={TK1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,CO1 },/*tk1*/
+tk02a[]={INV_,NMT_,NMT_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NMT_},
+
+co02 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/
+co02a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,GRPE,INV_,INV_,INV_,
+ INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NOP_,INV_},
+
+er02 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/
+er02a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+*grnttab[] = {tk02, tk02a, co02, co02a, er02, er02a};
+struct parse pcbgrnt = {"GRNT", lexgrp, grnttab, 0, 0, 0, 0};
+#undef TK1
+#undef CO1
+#undef ER1
+/* PCBGRNM: State and action table for name group parse.
+ Groups cannot nest. Reserved names are not allowed.
+ No suffixes or data tag pattern groups.
+*/
+/* Symbols for state names (end with a number). */
+#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */
+#define CO1 2 /* Connector between tokens expected. */
+#define ER1 4 /* PERO found when token was expected. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+tk03 []={TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*tk1*/
+tk03a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,INV_},
+
+co03 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/
+co03a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,GRPE,INV_,INV_,INV_,
+ INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NOP_,INV_},
+
+er03 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/
+er03a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+*grnmtab[] = {tk03, tk03a, co03, co03a, er03, er03a};
+struct parse pcbgrnm = {"GRNM", lexgrp, grnmtab, 0, 0, 0, 0};
+#undef TK1
+#undef CO1
+#undef ER1
+/* PCBREF: State and action table to find the end of entity, parameter entity,
+ and character references. The opening delimiter and name
+ have already been found; the parse determines whether the
+ tokenization of the name ended normally and processes the REFC.
+*/
+/* Symbols for state names (end with a number). */
+#define ER5 0 /* Handle REFC or other entity reference termination. */
+#define ER6 2 /* Return to caller and reset state for next call. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+er05 []={ER5 ,ER6 ,ER6 ,ER6 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,
+ ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER6 },/*er5*/
+er05a[]={RCR_,LEN_,LEN_,NOP_,RCR_,SYS_,RCR_,GET_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,
+ RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,NOP_},
+
+er06 []={ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,
+ ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 },/*er6*/
+er06a[]={RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,
+ RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_},
+
+*reftab[]={er05, er05a, er06, er06a};
+struct parse pcbref = {"ENTREF", lexgrp, reftab, 0, 0, 0, 0};
+#undef ER5
+#undef ER6
+/*
+Use (typical) Name Ending Chsw References RS RE SEP
+Parameter literal LITPC LIT/A OK Parm,Char RSM_ LAM_ LAM_
+ Data tag template NO
+System ID LITC LIT/A n/a none RSM_ LAM_ LAM_
+ Processing instruction PIC
+Attribute value LITRV LIT/A NO Gen,Char RS_ FUN_ FUN_
+Minimum literal LITV LIT/A n/a none RS_ FUN_ MLE_
+*/
+/* PCBLITP: Literal parse with parameter and character references;
+ no function character translation.
+*/
+/* Symbols for state names (end with a number). */
+#define DA0 0 /* Data in buffer. */
+#define ER0 2 /* ERO found. */
+#define CR0 4 /* CRO found (ER0, RNI). */
+#define PR0 6 /* PRO found (for PCBLITP). */
+
+static UNCH
+/* free num min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+da13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,
+ DA0 ,DA0 ,DA0 ,PR0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da3*/
+da13a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RSM_,MLA_,MLA_,MLA_,NSC_,NOP_,
+ MLA_,MLA_,MLA_,NOP_,MLA_,MLA_,MLA_,TER_},
+
+er13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er3*/
+er13a[]={LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,
+ LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_},
+
+cr13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr3*/
+cr13a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,
+ LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_},
+
+pr13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,PR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*pr3*/
+pr13a[]={LPR_,LPR_,LPR_,PEX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,
+ LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_},
+
+*litptab[] = {da13, da13a, er13, er13a, cr13, cr13a, pr13, pr13a};
+struct parse pcblitp = {"LITP", lexlms, litptab, 0, 0, 0, 0};
+#undef DA0
+#undef ER0
+#undef CR0
+#undef PR0
+/* PCBLITC: Literal parse; no references; no function char translation.
+ Used for character data (system data).
+*/
+/* Symbols for state names (end with a number). */
+#define DA0 0 /* Data in buffer. */
+
+static UNCH
+/* free num min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+da2 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da2*/
+da2a[]={MLA_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RSM_,MLA_,MLA_,MLA_,SYS_,MLA_,
+ MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_},
+
+*litctab[] = {da2, da2a};
+struct parse pcblitc = {"LITC", lexlms, litctab, 0, 0, 0, 0};
+#undef DA0
+/* PCBLITR: Attribute value parse; general and character references;
+ function chars are translated.
+*/
+/* Symbols for state names (end with a number). */
+#define DA0 0 /* Data in buffer. */
+#define ER0 2 /* ERO found. */
+#define CR0 4 /* CRO found (ER0, RNI). */
+
+static UNCH
+/* free num min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+da11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da1*/
+da11a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RS_ ,FUN_,FUN_,MLA_,NSC_,NOP_,
+ MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_},
+
+er11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er1*/
+er11a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,
+ LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_},
+
+cr11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr1*/
+cr11a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,
+ LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_},
+
+*litrtab[] = {da11, da11a, er11, er11a, cr11, cr11a};
+struct parse pcblitr = {"LITR", lexlms, litrtab, 0, 0, 0, 0};
+#undef DA0
+#undef ER0
+#undef CR0
+/* PCBLITV: Literal parse; no references; RS ignored; RE/SPACE sequences
+ become single SPACE. Only minimum data characters allowed.
+*/
+/* Symbols for state names (end with a number). */
+#define LS0 0 /* Leading SPACE or RE found. */
+#define VA0 2 /* Valid character found. */
+#define SP0 4 /* SPACE/RE sequence begun. */
+
+static UNCH
+/* free num min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 ,VA0 ,
+ VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*ls0*/
+ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_,
+ MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_},
+va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 ,
+ VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*va0*/
+da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_,MLE_,
+ MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_},
+sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 ,
+ VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*sp0*/
+sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_,
+ MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,RPR_},
+
+*litvtab[] = {ls10, ls10a, va10, da10a, sp10, sp10a};
+struct parse pcblitv = {"LITV", lexlms, litvtab, 0, 0, 0, 0};
+#undef LS0
+#undef VA0
+#undef SP0
+/* PCBLITT: Tokenized attribute value parse.
+*/
+
+/* PCBLITT: Attribute value parse; general and character references;
+ function chars are translated.
+*/
+/* Symbols for state names (end with a number). */
+#define SP0 0 /* Ignore spaces */
+#define DA0 2 /* Data character */
+#define ER0 4 /* ERO found; ignore space */
+#define ER1 6 /* ERO found; don't ignore space */
+#define CR0 8 /* CRO found (ER0, RNI); ignore space */
+#define CR1 10 /* CR0 found; don't ignore space */
+
+int pcblittda = DA0;
+
+static UNCH
+/* free num min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+
+sp14 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,SP0 ,SP0 ,SP0 ,SP0 ,DA0 ,DA0 ,ER0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*sp0*/
+sp14a[]={MLA_,MLA_,MLA_,MLA_,NOP_,NON_,EE_ ,GET_,RS_ ,NOP_,NOP_,MLA_,NSC_,NOP_,
+ MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_},
+
+da14 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,SP0 ,DA0 ,DA0 ,ER1 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,SP0 },/*da0*/
+da14a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RS_ ,FUN_,FUN_,MLA_,NSC_,NOP_,
+ MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_},
+
+er14 []={DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er0*/
+er14a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,
+ LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_},
+
+er15 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER1 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,CR1 ,DA0 ,DA0 ,DA0 },/*er1*/
+er15a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,
+ LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_},
+
+cr14 []={DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr0*/
+cr14a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,
+ LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_},
+
+cr15 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR1 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,
+ DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr1*/
+cr15a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,
+ LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_},
+
+*litttab[] = {sp14, sp14a, da14, da14a, er14, er14a, er15, er15a, cr14, cr14a,
+ cr15, cr15a};
+struct parse pcblitt = {"LITT", lexlms, litttab, 0, 0, 0, 0};
+#undef SP0
+#undef DA0
+#undef ER0
+#undef ER1
+#undef CR0
+#undef CR1
+/* PCBMD: State and action table for markup declaration tokenization.
+ Columns are based on LEXMARK.C.
+*/
+/* Symbols for state names (end with a number). */
+#define SP1 0 /* Separator before token expected. */
+#define TK1 2 /* Token expected. */
+#define CM0 4 /* COM[1] found when sep expected: possible comment, MGRP.*/
+#define CM1 6 /* COM[1] found: possible comment, MGRP, or minus.*/
+#define CM2 8 /* COM[2] found; in comment. */
+#define CM3 10 /* Ending COM[1] found; end comment or continue it. */
+#define PR1 12 /* PERO found when token was expected. */
+#define PX1 14 /* PLUS found: PGRP or error. */
+#define RN1 16 /* RNI found; possible reserved name start. */
+
+int pcbmdtk = TK1; /* PCBMD: token expected. */
+
+static UNCH
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,CM0 ,SP1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 },
+sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE,
+ MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_},
+
+tk21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 },
+tk21a[]={INV_,NMT ,NUM ,NAS ,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE,
+ MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_},
+
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+cm20 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
+cm20a[]={LNR_,LNR_,LNR_,LNR_,LNR_,SYS_,LNR_,GET_,LNR_,NOP_,LNR_,LNR_,LNR_,LNR_,
+ LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_},
+
+cm21 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },
+cm21a[]={CDR ,CDR ,CDR ,CDR ,CDR ,SYS_,CDR ,GET_,CDR ,NOP_,CDR ,MGRP,CDR ,CDR ,
+ CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR },
+
+cm22 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM2 ,
+ CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 },
+cm22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+cm23 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 ,CM2 ,
+ CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 },
+cm23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP1 ,PR1 ,TK1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 },
+pr21a[]={PCI_,PCI_,PCI_,PER_,PEN ,SYS_,PENR,GET_,PEN ,PENR,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PENR,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+px21 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,PX1 ,SP1 ,PX1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
+px21a[]={PCI_,PCI_,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PGRP,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+rn21 []={TK1 ,TK1 ,TK1 ,SP1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },
+rn21a[]={PCI_,PCI_,PCI_,RNS ,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+*mdtab[] = {sp21, sp21a, tk21, tk21a, cm20, cm20a, cm21, cm21a, cm22, cm22a,
+ cm23, cm23a, pr21, pr21a, px21, px21a, rn21, rn21a};
+struct parse pcbmd = {"MD", lexmark, mdtab, 0, 0, 0, 0};
+#undef SP1
+#undef TK1
+#undef CM0
+#undef CM1
+#undef CM2
+#undef CM3
+#undef PR1
+#undef PX1
+#undef RN1
+/* PCBMDC: State and action table for comment declaration.
+*/
+/* Symbols for state names (end with a number). */
+#define CD2 0 /* COM[2] found; in comment. */
+#define CD3 2 /* Ending COM[1] found; end comment or continue it. */
+#define EM1 4 /* Ending COM[2] found; start new comment or end. */
+#define CD1 6 /* COM[1] found; new comment or error. */
+
+static UNCH
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+cd22 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD3 ,CD2 ,CD2 ,CD2 ,CD2 ,
+ CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 },
+cd22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+cd23 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD3 ,CD2 ,CD3 ,CD2 ,EM1 ,CD2 ,CD2 ,CD2 ,CD2 ,
+ CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 },
+cd23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+em21 []={CD2 ,CD2 ,CD2 ,CD2 ,EM1 ,EM1 ,CD2 ,EM1 ,EM1 ,CD1 ,CD2 ,CD2 ,CD2 ,CD2 ,
+ CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 },
+em21a[]={INV_,INV_,INV_,INV_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,INV_,INV_,EMD ,INV_,INV_},
+
+cd21 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD1 ,CD2 ,CD1 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,
+ CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 },
+cd21a[]={PCI_,PCI_,PCI_,PCI_,PCI_,SYS_,EOF_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_,PCI_,
+ PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
+
+*mdctab[] = {cd22, cd22a, cd23, cd23a, em21, em21a, cd21, cd21a};
+struct parse pcbmdc = {"MDC", lexmark, mdctab, 0, 0, 0, 0};
+#undef CD2
+#undef CD3
+#undef EM1
+#undef CD1
+/* PCBMDI: State and action table for ignoring markup declarations.
+ Literals are handled properly so a TAGC won't end the declaration.
+ An error is noted if the entity ends within a declaration that
+ is being ignored.
+ TO DO: Handle nested declaration sets.
+*/
+/* Symbols for state names (end with a number). */
+#define NC1 0 /* Not in a comment; TAGC ends declaration. */
+#define IC1 2 /* COM[1] found; possible comment. */
+#define IC2 4 /* COM[2] found; in comment. */
+#define IC3 6 /* Ending COM[1] found; end comment or continue it. */
+#define LI1 8 /* Literal parameter; search for LIT. */
+#define LA1 10 /* Literal parameter; search for LITA. */
+
+static UNCH
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+nc21 []={NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,IC1 ,NC1 ,NC1 ,LI1 ,LA1 ,
+ NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 },
+nc21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,EMD ,NOP_,NOP_},
+
+ic21 []={NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,IC1 ,NC1 ,IC1 ,NC1 ,IC2 ,NC1 ,NC1 ,LI1 ,LA1 ,
+ NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 },
+ic21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,EMD ,NOP_,NOP_},
+
+ic22 []={IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,NC1 ,IC2 ,IC2 ,IC3 ,IC2 ,IC2 ,IC2 ,IC2 ,
+ IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 },
+ic22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+ic23 []={IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC3 ,NC1 ,IC3 ,IC2 ,NC1 ,IC2 ,IC2 ,IC2 ,IC2 ,
+ IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 },/*ic3*/
+ic23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+li21 []={LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,NC1 ,LI1 ,
+ LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 },/*li1*/
+li21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+la21 []={LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,NC1 ,
+ LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 },/*la1*/
+la21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+*mditab[] = {nc21, nc21a, ic21, ic21a, ic22, ic22a,
+ ic23, ic23a, li21, li21a, la21, la21a};
+struct parse pcbmdi = {"MDI", lexmark, mditab, 0, 0, 0, 0};
+#undef NC1
+#undef IC1
+#undef IC2
+#undef IC3
+#undef LI1
+#undef LA1
+/* PCBMSRC: State and action table for marked section in RCDATA mode.
+ Nested marked sections are not recognized; the first MSE ends it.
+ Initial state assumes an MS declaration was processed.
+ Columns are based on LEXLMS.C but LITC column needn't exist.
+*/
+/* Symbols for state names (end with a number). */
+#define ET0 0 /* MSS processed or buffer flushed; no data. */
+#define DA0 2 /* Data in buffer. */
+#define ER0 4 /* ERO found; start lookahead buffer. */
+#define CR0 6 /* CRO found (ER0, RNI). */
+#define ME0 8 /* MSC found. */
+#define ME1 10 /* MSC, MSC found. */
+
+static UNCH
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+et30 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 ,ER0 ,
+ DA0 ,ME0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*et0*/
+et30a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EE_ ,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,LAS_,
+ DAS_,LAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_},
+
+da30 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 ,ET0 ,
+ DA0 ,ET0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da0*/
+da30a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_,
+ NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+er30 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/
+er30a[]={LAF_,LAF_,LAF_,ERX_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_},
+
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+cr30 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
+ ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*cr0*/
+cr30a[]={LAF_,CRN_,LAF_,CRA_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+me30 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME0, ET0 ,ET0 ,ET0 ,ET0, ET0 ,ET0 ,
+ ET0, ME1, ET0 ,ET0, ET0 ,ET0, ET0 ,ET0 },/*me0*/
+me30a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+me31 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME1, ET0 ,ET0 ,ET0 ,ET0, ET0 ,ET0 ,
+ ET0, ET0, ET0 ,ET0, ET0 ,ET0, ET0 ,ET0,},/*me1*/
+me31a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,MSE_,LAF_,LAF_},
+
+*msrctab[]={et30, et30a, da30, da30a, er30, er30a, cr30, cr30a,
+ me30, me30a, me31, me31a};
+struct parse pcbmsrc = {"MSRCDATA", lexlms, msrctab, 0, 0, 0, 0};
+#undef ET0
+#undef DA0
+#undef ER0
+#undef CR0
+#undef ME0
+#undef ME1
+/* PCBMSC: State and action table for marked section in CDATA mode.
+ Nested marked sections are not recognized; the first MSE ends it.
+ Initial state assumes an MS declaration was processed.
+*/
+/* Symbols for state names (end with a number). */
+#define ET2 0 /* MSS processed or buffer flushed; no data. */
+#define DA2 2 /* Data in buffer. */
+#define ME2 4 /* MSC found. */
+#define ME3 6 /* MSC, MSC found. */
+
+static UNCH
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+et32 []={DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,DA2 ,DA2 ,ET2 ,DA2 ,
+ DA2 ,ME2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 },/*et2*/
+et32a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EOF_,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,DAS_,
+ DAS_,LAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_},
+
+da32 []={DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,DA2 ,DA2 ,ET2 ,DA2 ,
+ DA2 ,ET2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 },/*da2*/
+da32a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,NOP_,
+ NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+me32 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME2, ET2 ,ET2 ,ET2 ,ET2, ET2 ,ET2 ,
+ ET2, ME3, ET2 ,ET2, ET2 ,ET2, ET2, ET2,},/*me2*/
+me32a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
+
+me33 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME3, ET2 ,ET2 ,ET2 ,ET2, ET2 ,ET2 ,
+ ET2, ET2, ET2 ,ET2, ET2 ,ET2, ET2, ET2,},/*me3*/
+me33a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,MSE_,LAF_,LAF_},
+
+*msctab[]={et32, et32a, da32, da32a, me32, me32a, me33, me33a};
+struct parse pcbmsc = {"MSCDATA", lexlms, msctab, 0, 0, 0, 0};
+#undef ET2
+#undef DA2
+#undef ME2
+#undef ME3
+/* PCBMSI: State and action table for marked section in IGNORE mode.
+ Nested marked sections are recognized; the matching MSE ends it.
+ Initial state assumes an MS declaration, MSS, or MSE was processed.
+*/
+/* Symbols for state names (end with a number). */
+#define ET4 0 /* Markup found or buffer flushed; no data. */
+#define ME4 2 /* MSC found. */
+#define ME5 4 /* MSC, MSC found. */
+#define ES4 6 /* TAGO found. */
+#define MD4 8 /* MDO found (TAGO, MDO[2]). */
+
+static UNCH
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc refc */
+et34 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,
+ ET4 ,ME4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ET4 },/*et4*/
+et34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+me34 []={ET4, ET4, ET4, ET4, ET4 ,ET4, ET4, ME4, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 ,
+ ET4, ME5 ,ET4, ET4, ET4 ,ET4, ET4, ET4, ET4,},/*me4*/
+me34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+me35 []={ET4, ET4, ET4, ET4, ET4 ,ET4, ET4, ME5, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 ,
+ ET4, ET4 ,ET4, ET4, ET4 ,ET4, ET4, ET4, ET4,},/*me5*/
+me35a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,MSE_,NOP_,NOP_,NOP_},
+
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tagc tago litc */
+es34 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ES4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,
+ MD4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*es4*/
+es34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+md34 []={ET4, ET4, ET4, ET4, ET4 ,MD4, ET4, MD4, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 ,
+ ET4, ET4 ,ET4, ET4, ET4 ,ET4, ET4, ET4,},/*md4*/
+md34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_,
+ NOP_,NOP_,MSS_,NOP_,NOP_,NOP_,NOP_,NOP_},
+
+*msitab[]={et34, et34a, me34, me34a, me35, me35a, es34, es34a, md34, md34a};
+struct parse pcbmsi = {"MSIGNORE", lexlms, msitab, 0, 0, 0, 0};
+#undef ET4
+#undef ME4
+#undef ME5
+#undef ES4
+#undef MD4
+#undef NS4
+/* PCBSTAG: State and action table for start-tag parse.
+ Columns are based on LEXMARK.C.
+*/
+/* Symbols for state names (end with a number). */
+#define SP1 0 /* Separator before name expected. */
+#define AN1 2 /* Attribute name expected. */
+#define SP2 4 /* Separator or value indicator expected. */
+#define VI1 6 /* Value indicator expected. */
+#define AV1 8 /* Attribute value expected. */
+
+int pcbstan = AN1; /* PCBSTAG: attribute name expected. */
+
+static UNCH
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+sp41 []={SP1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
+sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_,
+ INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_},
+
+an41 []={SP1 ,SP1 ,SP1 ,SP2 ,AN1 ,AN1 ,AN1 ,AN1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
+an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_,
+ INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_},
+
+sp42 []={SP1 ,SP1 ,SP1 ,SP1 ,VI1 ,SP2 ,SP2 ,SP2 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 },
+sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_,
+ INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_},
+
+/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 },
+vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_,
+ INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_},
+
+av41 []={SP1 ,SP1 ,SP1 ,SP1 ,AV1 ,AV1 ,AV1 ,AV1 ,AV1 ,SP1 ,SP1 ,SP1 ,AN1 ,AN1 ,
+ SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
+av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,AVD ,AVDA,
+ INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_},
+
+*stagtab[] = {sp41, sp41a, an41, an41a, sp42, sp42a, vi41, vi41a, av41, av41a};
+struct parse pcbstag = {"STAG", lexmark, stagtab, 0, 0, 0, 0};
+#undef SP1
+#undef AN1
+#undef SP2
+#undef VI1
+#undef AV1
+/* PCBETAG: State and action table for end-tag parse.
+*/
+#define TC1 0 /* Tag close expected (no attributes allowed). */
+
+static UNCH
+/* bit nmc nu nms spc non ee eob rs com eti grpo lit lita
+ dso dsc pero plus refc rni tagc tago vi */
+tc41 []={TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,
+ TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 },/*tc1*/
+tc41a[]={INV_,INV_,INV_,INV_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
+ INV_,INV_,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_},
+
+*etagtab[] = {tc41, tc41a};
+struct parse pcbetag = {"ETAG", lexmark, etagtab, 0, 0, 0, 0};
+#undef TC1
+/* PCBVAL: State and action table for tokenizing attribute values.
+ Columns are based on lextoke (but EOB cannot occur).
+*/
+/* Symbols for state names (end with a number). */
+#define TK1 0 /* Token expected. */
+#define SP1 2 /* Separator before token expected. */
+
+static UNCH
+/* inv rec sep sp nmc nms nu eob */
+tk51 []={TK1 ,TK1 ,TK1 ,TK1 ,SP1 ,SP1 ,SP1 },/*tk1*/
+tk51a[]={INVA,INVA,INVA,NOPA,NMTA,NASA,NUMA},
+
+sp51 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*sp1*/
+sp51a[]={INVA,INVA,INVA,NOPA,LENA,LENA,LENA},
+
+*valtab[] = {tk51, tk51a, sp51, sp51a};
+struct parse pcbval = {"VAL", lextoke, valtab, 0, 0, 0, 0};
+#undef TK1
+#undef SP1
+/* PCBEAL: State and action table for end of attribute specification list.
+ If delimiter occurs, process it. Otherwise, put invalid character
+ back for the next parse.
+*/
+/* Symbols for state names (end with a number). */
+#define AL0 0 /* Delimiter expected. */
+
+static UNCH
+/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita
+ dtgc dtgo opt or pero plus rep rni seq refc */
+al00 []={AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,
+ AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 },/*al0*/
+al00a[]={INV_,INV_,INV_,INV_,INV_,SYS_,EE_ ,GET_,INV_,INV_,INV_,INV_,INV_,INV_,
+ GRPE,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_},
+
+*ealtab[] = {al00, al00a};
+struct parse pcbeal = {"EAL", lexgrp, ealtab, 0, 0, 0, 0};
+#undef AL0
+
+/* PCBSD: State and action tables for SGML declaration parsing. */
+
+/* Symbols for state names. */
+
+#define SP1 0 /* Separator before token expected. */
+#define TK1 2 /* Token expected. */
+#define CM0 4 /* COM[1] found when sep expected: possible comment.*/
+#define CM1 6 /* COM[1] found: possible comment.*/
+#define CM2 8 /* COM[2] found; in comment. */
+#define CM3 10 /* Ending COM[1] found; end comment or continue it. */
+
+static UNCH
+/* sig dat num nms spc non ee eob rs com lit lita tagc */
+
+sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,CM0 ,TK1 ,TK1 ,SP1 },
+sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD},
+
+tk31 []={TK1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 },
+tk31a[]={INV_,ISIG,NUM1,NAS1,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD},
+
+cm30 []={SP1 ,CM0 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 },
+cm30a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_},
+
+cm31 []={TK1 ,CM1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 },
+cm31a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_},
+
+cm32 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 },
+cm32a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_},
+
+cm33 []={CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 },
+cm33a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_},
+
+*sdtab[]={sp31, sp31a, tk31, tk31a, cm30, cm30a, cm31, cm31a, cm32, cm32a,
+ cm33, cm33a};
+
+struct parse pcbsd = {"SD", lexsd, sdtab, 0, 0, 0, 0};
+
+#undef SP1
+#undef TK1
+#undef CM0
+#undef CM1
+#undef CM2
+#undef CM3
diff --git a/usr.bin/sgmls/sgmls/portproc.c b/usr.bin/sgmls/sgmls/portproc.c
new file mode 100644
index 0000000..0bb2431
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/portproc.c
@@ -0,0 +1,104 @@
+/* portproc.c -
+
+ Semi-portable implementation of run_process().
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifdef SUPPORT_SUBDOC
+
+#include "std.h"
+#include "entity.h"
+#include "appl.h"
+
+/* This code shows how you might use system() to implement run_process().
+ANSI C says very little about the behaviour of system(), and so this
+is necessarily system dependent. */
+
+/* Characters that are significant to the shell and so need quoting. */
+#define SHELL_MAGIC "$\\\"';&()|<>^ \t\n"
+/* Character with which to quote shell arguments. */
+#define SHELL_QUOTE_CHAR '\''
+/* String that can be used to get SHELL_QUOTE_CHAR into a quoted argument. */
+#define SHELL_ESCAPE_QUOTE "'\\''"
+/* Character that can be used to separate arguments to the shell. */
+#define SHELL_ARG_SEP ' '
+
+static UNS shell_quote P((char *, char *));
+
+int run_process(argv)
+char **argv;
+{
+ char **p;
+ char *s, *command;
+ int ret;
+ UNS len = 0;
+
+ for (p = argv; *p; p++)
+ len += shell_quote(*p, (char *)0);
+ len += p - argv;
+ s = command = xmalloc(len);
+ for (p = argv; *p; ++p) {
+ if (s > command)
+ *s++ = SHELL_ARG_SEP;
+ s += shell_quote(*p, s);
+ }
+ *s++ = '\0';
+ errno = 0;
+ ret = system(command);
+ if (ret < 0)
+ appl_error(E_EXEC, argv[0], strerror(errno));
+ free(command);
+ return ret;
+}
+
+/* Quote a string so that it appears as a single argument to the
+shell (as used for system()). Store the quoted argument in result, if
+result is not NULL. Return the length. */
+
+static
+UNS shell_quote(s, result)
+char *s, *result;
+{
+ UNS len = 0;
+ int quoted = 0;
+
+ if (strpbrk(s, SHELL_MAGIC)) {
+ quoted = 1;
+ len++;
+ if (result)
+ result[0] = SHELL_QUOTE_CHAR;
+ }
+ for (; *s; s++) {
+ if (*s == SHELL_QUOTE_CHAR) {
+ if (result)
+ strcpy(result + len, SHELL_ESCAPE_QUOTE);
+ len += strlen(SHELL_ESCAPE_QUOTE);
+ }
+ else {
+ if (result)
+ result[len] = *s;
+ len++;
+ }
+ }
+ if (quoted) {
+ if (result)
+ result[len] = SHELL_QUOTE_CHAR;
+ len++;
+ }
+ return len;
+}
+
+#endif /* SUPPORT_SUBDOC */
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/serv.c b/usr.bin/sgmls/sgmls/serv.c
new file mode 100644
index 0000000..b9699d2
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/serv.c
@@ -0,0 +1,299 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+/* ETDDEF: Define an element type definition.
+ Use an existing one if there is one; otherwise create one, which
+ rmalloc initializes to zero which shows it is a virgin etd.
+*/
+PETD etddef(ename)
+UNCH *ename; /* Element name (GI) with length byte. */
+{
+ PETD p; /* Pointer to an etd. */
+ int hnum; /* Hash number for ename. */
+
+ if ((p = (PETD)hfind((THASH)etdtab,ename,hnum = hash(ename, ETDHASH)))==0){
+ p = (PETD)hin((THASH)etdtab, ename, hnum, ETDSZ);
+ }
+ return p;
+}
+/* ETDSET: Store data in an element type definition.
+ The etd must be valid and virgin (except for adl and etdmin).
+ As an etd cannot be modified, there is no checking for existing
+ pointers and no freeing of their storage.
+*/
+#ifdef USE_PROTOTYPES
+PETD etdset(PETD p, UNCH fmin, struct thdr *cmod, PETD *mexgrp, PETD *pexgrp,
+ struct entity **srm)
+#else
+PETD etdset(p, fmin, cmod, mexgrp, pexgrp, srm)
+PETD p; /* Pointer to an etd. */
+UNCH fmin; /* Minimization bit flags. */
+struct thdr *cmod; /* Pointer to content model. */
+PETD *mexgrp; /* Pointers to minus and plus exception lists. */
+PETD *pexgrp; /* Pointers to minus and plus exception lists. */
+struct entity **srm; /* Short reference map. */
+#endif
+{
+ p->etdmin |= fmin;
+ p->etdmod = cmod;
+ p->etdmex = mexgrp;
+ p->etdpex = pexgrp;
+ p->etdsrm = srm;
+ return p;
+}
+/* ETDREF: Retrieve the pointer to an element type definition.
+*/
+PETD etdref(ename)
+UNCH *ename; /* Element name (GI) with length byte.. */
+{
+
+ return (PETD)hfind((THASH)etdtab, ename, hash(ename, ETDHASH));
+}
+/* ETDCAN: Cancel an element definition. The etd is freed and is removed
+ from the hash table, but its model and other pointers are not freed.
+*/
+VOID etdcan(ename)
+UNCH *ename; /* GI name (with length and EOS). */
+{
+ PETD p;
+
+ if ((p = (PETD)hout((THASH)etdtab, ename, hash(ename, ETDHASH)))!=0)
+ frem((UNIV)p);
+}
+/* SYMBOL TABLE FUNCTIONS: These functions manage hash tables that are used
+ for entities, element type definitions, IDs, and other purposes. The
+ interface will be expanded in the future to include multiple environments,
+ probably by creating arrays of the present hash tables with each table
+ in the array corresponding to an environment level.
+*/
+/* HASH: Form hash value for a string.
+ From the Dragon Book, p436.
+*/
+int hash(s, hashsize)
+UNCH *s; /* String to be hashed. */
+int hashsize; /* Size of hash table array. */
+{
+ unsigned long h = 0, g;
+
+ while (*s != 0) {
+ h <<= 4;
+ h += *s++;
+ if ((g = h & 0xf0000000) != 0) {
+ h ^= g >> 24;
+ h ^= g;
+ }
+ }
+ return (int)(h % hashsize);
+}
+/* HFIND: Look for a name in a hash table.
+*/
+struct hash *hfind(htab, s, h)
+struct hash *htab[]; /* Hash table. */
+UNCH *s; /* Entity name. */
+int h; /* Hash value for entity name. */
+{
+ struct hash *np;
+
+ for (np = htab[h]; np != 0; np = np->enext)
+ if (ustrcmp(s, np->ename) == 0) return np; /* Found it. */
+ return (struct hash *)0; /* Not found. */
+}
+/* HIN: Locates an entry in a hash table, or allocates a new one.
+ Returns a pointer to a structure containing a name
+ and a pointer to the next entry. Other data in the
+ structure must be maintained by the caller.
+*/
+struct hash *hin(htab, name, h, size)
+struct hash *htab[]; /* Hash table. */
+UNCH *name; /* Entity name. */
+int h; /* Hash value for entity name. */
+UNS size; /* Size of structures pointed to by table. */
+{
+ struct hash *np;
+
+ if ((np = hfind(htab, name, h))!=0) return np; /* Return if name found. */
+ /* Allocate space for structure and name. */
+ np = (struct hash *)rmalloc(size + name[0]);
+ np->ename = (UNCH *)np + size;
+ memcpy(np->ename, name, name[0]); /* Store name in it. */
+ np->enext = htab[h]; /* 1st entry is now 2nd.*/
+ htab[h] = np; /* New entry is now 1st.*/
+ return np; /* Return new entry ptr. */
+}
+/* HOUT: Remove an entry from a hash table and return its pointer.
+ The caller must free any pointers in the entry and then
+ free the entry itself if that is what is desired; this
+ routine does not free any storage.
+*/
+struct hash *hout(htab, s, h)
+struct hash *htab[]; /* Hash table. */
+UNCH *s; /* Search argument entry name. */
+int h; /* Hash value for search entry name. */
+{
+ struct hash **pp;
+
+ for (pp = &htab[h]; *pp != 0; pp = &(*pp)->enext)
+ if (ustrcmp(s, (*pp)->ename) == 0) { /* Found it. */
+ struct hash *tem = *pp;
+ *pp = (*pp)->enext;
+ return tem;
+ }
+ return 0; /* NULL if not found; else ptr. */
+}
+/* SAVESTR: Save a null-terminated string
+*/
+UNCH *savestr(s)
+UNCH *s;
+{
+ UNCH *rp;
+
+ rp = (UNCH *)rmalloc(ustrlen(s) + 1);
+ ustrcpy(rp, s);
+ return rp;
+}
+/* SAVENM: Save a name (with length and EOS)
+*/
+UNCH *savenm(s)
+UNCH *s;
+{
+ UNCH *p;
+ p = (UNCH *)rmalloc(*s);
+ memcpy(p, s, *s);
+ return p;
+}
+/* REPLACE: Free the storage for the old string (p) and store the new (s).
+ If the specified ptr is NULL, don't free it.
+*/
+UNCH *replace(p, s)
+UNCH *p;
+UNCH *s;
+{
+ if (p) frem((UNIV)p); /* Free old storage (if any). */
+ if (!s) return(s); /* Return NULL if new string is NULL. */
+ return savestr(s);
+}
+/* RMALLOC: Interface to memory allocation with error handling.
+ If storage is not available, fatal error message is issued.
+ Storage is initialized to zeros.
+*/
+UNIV rmalloc(size)
+unsigned size; /* Number of bytes of initialized storage. */
+{
+ UNIV p = (UNIV)calloc(size, 1);
+ if (!p) exiterr(33, (struct parse *)0);
+ return p;
+}
+UNIV rrealloc(p, n)
+UNIV p;
+UNS n;
+{
+ UNIV r = realloc(p, n);
+ if (!r)
+ exiterr(33, (struct parse *)0);
+ return r;
+}
+
+UNCH *pt;
+/* FREM: Free specified memory area gotten with rmalloc().
+*/
+VOID frem(ptr)
+UNIV ptr; /* Memory area to be freed. */
+{
+ free(ptr);
+}
+/* MAPSRCH: Find a string in a table and return its associated value.
+ The last entry must be a dummy consisting of a NULL pointer for
+ the string and whatever return code is desired if the
+ string is not found in the table.
+*/
+int mapsrch(maptab, name)
+struct map maptab[];
+UNCH *name;
+{
+ int i = 0;
+
+ do {
+ UNCH *mapnm, *nm;
+ for (mapnm = maptab[i].mapnm, nm=name; *nm==*mapnm; mapnm++) {
+ if (!*nm++) return maptab[i].mapdata;
+ }
+ } while (maptab[++i].mapnm);
+ return maptab[i].mapdata;
+}
+/* IDDEF: Define an ID control block; return -1 if it already exists.
+*/
+int iddef(iname)
+UNCH *iname; /* ID name (with length and EOS). */
+{
+ PID p;
+ struct fwdref *r;
+
+ p = (PID)hin((THASH)itab, iname, hash(iname, IDHASH), IDSZ);
+ if (p->iddefed) return(-1);
+ p->iddefed = 1;
+ TRACEID("IDDEF", p);
+ /* Delete any forward references. */
+ r = p->idrl;
+ p->idrl = 0;
+ while (r) {
+ struct fwdref *tem = r->next;
+ if (r->msg)
+ msgsfree(r->msg);
+ frem((UNIV)r);
+ r = tem;
+ }
+ return(0);
+}
+/* IDREF: Store a reference to an ID and define the ID if it doesn't yet exist.
+ Return 0 if already defined, otherwise pointer to a fwdref.
+*/
+struct fwdref *idref(iname)
+UNCH *iname; /* ID name (with length and EOS). */
+{
+ PID p;
+ int hnum;
+ struct fwdref *rp;
+
+ if ((p = (PID)hfind((THASH)itab, iname, (hnum = hash(iname, IDHASH))))==0)
+ p = (PID)hin((THASH)itab, iname, hnum, IDSZ);
+ if (p->iddefed)
+ return 0;
+ rp = (struct fwdref *)rmalloc(FWDREFSZ);
+ rp->next = p->idrl;
+ p->idrl = rp;
+ rp->msg = 0;
+ TRACEID("IDREF", p);
+ return rp;
+}
+/* IDRCK: Check idrefs.
+*/
+VOID idrck()
+{
+ int i;
+ PID p;
+ struct fwdref *r;
+
+ for (i = 0; i < IDHASH; i++)
+ for (p = itab[i]; p; p = p->idnext)
+ if (!p->iddefed)
+ for (r = p->idrl; r; r = r->next)
+ svderr(r->msg);
+}
+/* NTOA: Converts a positive integer to an ASCII string (abuf)
+ No leading zeros are generated.
+*/
+UNCH *ntoa(i)
+int i;
+{
+ static UNCH buf[1 + 3*sizeof(int) + 1];
+ sprintf((char *)buf, "%d", i);
+ return buf;
+}
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgml1.c b/usr.bin/sgmls/sgmls/sgml1.c
new file mode 100644
index 0000000..94a6119
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgml1.c
@@ -0,0 +1,477 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+
+#define ETDCON (tags[ts].tetd->etdmod->ttype) /* ETD content flags. */
+
+/* SGML: Main SGML driver routine.
+*/
+enum sgmlevent sgmlnext(rcbdafp, rcbtagp)
+struct rcbdata *rcbdafp;
+struct rcbtag *rcbtagp;
+{
+ while (prologsw && !conactsw) {
+ int oconact;
+ conact = parsepro();
+ conactsw = 0; /* Assume sgmlact() will not be skipped. */
+ switch(conact) {
+
+ case PIS_:
+ case EOD_:
+ case APP_: /* APPINFO */
+ conactsw = 1; /* We can skip sgmlact() in opening state. */
+ break;
+
+ case DAF_:
+ newetd = stagreal = ETDCDATA;
+ conact = stag(datarc = DAF_);
+ conactsw = 1; /* We can skip sgmlact() in opening state. */
+ prologsw = 0; /* End the prolog. */
+ break;
+ case DCE_:
+ case MSS_:
+ /* prcon[2].tu.thetd holds the etd for the document element. */
+ newetd = stagreal = prcon[2].tu.thetd;
+ stagmin = MINSTAG; /* This tag was minimized. */
+ /* It's an error if the start tag of the document element
+ is not minimizable. */
+ if (BITOFF(newetd->etdmin, SMO))
+ sgmlerr(226, conpcb, (UNCH *)0, (UNCH *)0);
+ oconact = conact; /* Save conact. */
+ conact = stag(0); /* Start the document element. */
+ conactsw = 1; /* conact needs processing. */
+ prologsw = 0; /* The prolog is finished. */
+ if (oconact == MSS_) {
+ if (msplevel==0) conpcb = getpcb((int)ETDCON);
+ conpcb = mdms(tbuf, conpcb); /* Parse the marked section
+ start. */
+ }
+ break;
+ default: /* STE_: not defined in SGMLACT.H. */
+ if (msplevel==0) conpcb = getpcb((int)ETDCON);
+ prologsw = 0; /* End the prolog. */
+ break;
+ }
+ }
+ for (;;) {
+ unsigned swact; /* Switch action: saved conact, new, or sgmlact.*/
+
+ if (conactsw) {
+ conactsw = 0;
+ swact = conact;
+ contersw = contersv;
+ }
+ else {
+ conact = parsecon(tbuf, conpcb);
+ swact = sgmlact((UNCH)(conact != EOD_ ? conact : LOP_));
+ }
+
+ switch (swact) {
+
+ case MD_: /* Process markup declaration. */
+ parsenm(tbuf, NAMECASE); /* Get declaration name. */
+ if (!ustrcmp(tbuf+1, key[KUSEMAP])) mdsrmuse(tbuf);
+ else sgmlerr(E_MDNAME, conpcb, tbuf+1, (UNCH *)0);
+ continue;
+ case MDC_: /* Process markup declaration comment. */
+ if (*FPOS!=lex.d.mdc)
+ parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN);
+ continue;
+
+ case MSS_: /* Process marked section start. */
+ conpcb = mdms(tbuf, conpcb);
+ continue;
+ case MSE_: /* Process marked section end (drop to LOP_). */
+ if (mdmse()) conpcb = getpcb((int)ETDCON);
+ continue;
+
+ case PIS_: /* Return processing instruction (string). */
+ if (entpisw) rcbdafp->data = data;
+ else {
+ parselit(tbuf, &pcblitc, PILEN, lex.d.pic);
+ rcbdafp->data = tbuf;
+ }
+ rcbdafp->datalen = datalen;
+ rcbdafp->contersw = entpisw;
+ entpisw = 0; /* Reset for next time.*/
+ scbset(); /* Update location in current scb. */
+ return SGMLPIS;
+
+ case APP_:
+ rcbdafp->data = tbuf;
+ rcbdafp->datalen = ustrlen(tbuf);
+ rcbdafp->contersw = 0;
+ scbset();
+ return SGMLAPP;
+ case ETG_: /* Return end-tag. */
+ charmode = 0; /* Not in char mode unless CDATA or RCDATA.*/
+ if (msplevel==0) conpcb = getpcb((int)ETDCON);
+ rcbtagp->contersw = tags[ts+1].tflags;
+ rcbtagp->tagmin = etagimsw ? MINETAG : etagmin;
+ rcbtagp->curgi = tags[ts+1].tetd->etdgi;
+ rcbtagp->ru.oldgi = tags[ts].tetd->etdgi;
+ if (etagmin==MINSTAG) rcbtagp->tagreal =
+ BADPTR(stagreal) ? stagreal : (PETD)stagreal->etdgi;
+ else rcbtagp->tagreal =
+ BADPTR(etagreal) ? etagreal : (PETD)etagreal->etdgi;
+ rcbtagp->etictr = etictr;
+ rcbtagp->srmnm = tags[ts].tsrm!=SRMNULL ? tags[ts].tsrm[0]->ename
+ : 0;
+ scbset(); /* Update location in current scb. */
+ return SGMLETG;
+
+ case STG_: /* Return start-tag. */
+ charmode = 0; /* Not in char mode unless CDATA or RCDATA.*/
+ if (!conrefsw && msplevel==0) conpcb = getpcb((int)ETDCON);
+ rcbtagp->contersw = tags[ts].tflags;
+ rcbtagp->tagmin = dostag ? MINSTAG : stagmin;
+ rcbtagp->curgi = tags[ts].tetd->etdgi;
+ /* Get attribute list if one was defined for this element. */
+ rcbtagp->ru.al = !tags[ts].tetd->adl ? 0 :
+ rcbtagp->tagmin==MINNONE ? al : tags[ts].tetd->adl;
+ rcbtagp->tagreal = BADPTR(stagreal)?stagreal:(PETD)stagreal->etdgi;
+ rcbtagp->etictr = etictr;
+ rcbtagp->srmnm = tags[ts].tsrm!=SRMNULL ? tags[ts].tsrm[0]->ename
+ : 0;
+ scbset(); /* Update location in current scb. */
+ return SGMLSTG;
+
+ case DAF_: /* Return data in source entity buffer. */
+ charmode = 1;
+ rcbdafp->datalen = datalen;
+ rcbdafp->data = data;
+ rcbdafp->contersw = contersw | entdatsw;
+ contersw = entdatsw = 0;/* Reset for next time.*/
+ scbset(); /* Update location in current scb. */
+ return SGMLDAF;
+
+ case CON_: /* Process conact after returning REF_. */
+ conactsw = 1;
+ contersv = contersw;
+ case REF_: /* Return RE found. */
+ if (badresw) {
+ badresw = 0;
+ sgmlerr(E_CHARS, &pcbconm, tags[ts].tetd->etdgi+1, (UNCH *)0);
+ continue;
+ }
+ charmode = 1;
+ rcbdafp->contersw = contersw;
+ contersw = 0; /* Reset for next time.*/
+ scbset(); /* Update location in current scb. */
+ return SGMLREF;
+
+ case EOD_: /* End of source document entity. */
+ if (mslevel != 0) sgmlerr(139, conpcb, (UNCH *)0, (UNCH *)0);
+ idrck(); /* Check idrefs. */
+ scbset(); /* Update location in current scb. */
+ return SGMLEOD;
+
+ default: /* LOP_: Loop again with no action. */
+ continue;
+ }
+ }
+}
+/* PCBSGML: State and action table for action codes returned to text processor
+ by SGML.C.
+ Columns are based on SGMLACT.H values minus DAF_, except that end
+ of document has input code LOP_, regardless of its action code.
+*/
+/* Symbols for state names (end with a number). */
+#define ST1 0 /* Just had a start tag. */
+#define NR1 2 /* Just had an RS or RE. */
+#define DA1 4 /* Just had some data. */
+#define NR2 6 /* Just had an RE; RE pending. */
+#define ST2 8 /* Had only markup since last RE/RS; RE pending. */
+
+static UNCH sgmltab[][11] = {
+/*daf_ etg_ md_ mdc_ mss_ mse_ pis_ ref_ stg_ rsr_ eod */
+ {DA1 ,DA1 ,ST1 ,ST1 ,ST1 ,ST1 ,ST1 ,NR1 ,ST1 ,NR1 ,ST1 },/*st1*/
+ {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_},
+
+ {DA1 ,DA1 ,ST1 ,ST1 ,ST1 ,ST1 ,ST1 ,NR2 ,ST1 ,NR1 ,ST1 },/*nr1*/
+ {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_},
+
+ {DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,NR2 ,ST1 ,NR1 ,ST1 },/*da1*/
+ {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_},
+
+ {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*nr2*/
+ {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_},
+
+ {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR1 ,ST1 ,NR2 ,ST1 },/*st2*/
+ {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_},
+};
+int scbsgmst = ST1; /* SCBSGML: trailing stag or markup; ignore RE. */
+int scbsgmnr = NR1; /* SCBSGML: new record; do not ignore RE. */
+/* SGMLACT: Determine action to be taken by SGML.C based on current state and
+ specified input.
+ For start or end of a plus exception element, push or pop the
+ pcbsgml stack.
+ Return to caller with action code.
+*/
+#ifdef USE_PROTOTYPES
+int sgmlact(UNCH conret)
+#else
+int sgmlact(conret)
+UNCH conret; /* Action returned to SGML.C by content parse. */
+#endif
+{
+ int action;
+
+ if (conret==STG_ && GET(tags[ts].tflags, TAGPEX))
+ {++pss; scbsgml[pss].snext = ST1;}
+ scbsgml[pss].sstate = scbsgml[pss].snext;
+ scbsgml[pss].snext = sgmltab[scbsgml[pss].sstate]
+ [scbsgml[pss].sinput = conret-DAF_];
+ scbsgml[pss].saction = sgmltab[scbsgml[pss].sstate+1][scbsgml[pss].sinput];
+ TRACEGML(scbsgml, pss, conactsw, conact);
+ action = scbsgml[pss].saction;
+ if (conret==ETG_ && GET(tags[ts+1].tflags, TAGPEX)) {
+ pss--;
+ /* An included subelement affects the enclosing state like a
+ processing instruction (or MDC_ or MD_),
+ that is to say NR1 is changed to ST1 and NR2 to ST2. */
+ scbsgml[pss].sstate = scbsgml[pss].snext;
+ scbsgml[pss].snext = sgmltab[scbsgml[pss].sstate][PIS_ - DAF_];
+ }
+ return action;
+}
+/* GETPCB: Choose pcb for new or resumed element.
+*/
+struct parse *getpcb(etdcon)
+int etdcon; /* Content type of new or resumed element. */
+{
+ if (BITON(etdcon, MGI)) {
+ return(BITON(etdcon, MCHARS) ? &pcbconm : &pcbcone);
+ }
+ if (BITON(etdcon, MCDATA) || BITON(etdcon, MRCDATA)) {
+ charmode = 1;
+ return(BITON(etdcon, MCDATA) ? &pcbconc : (rcessv = es, &pcbconr));
+ }
+ return(&pcbconm);
+}
+
+struct markup *sgmlset(swp)
+struct switches *swp;
+{
+ /* Initialize variables based on switches structure members. */
+ sw = *swp;
+ rbufs = (UNCH *)rmalloc((UNS)3+sw.swbufsz) + 3; /* DOS file read area. */
+ TRACEPRO(); /* Set trace switches for prolog. */
+ msginit(swp);
+ ioinit(swp);
+ sdinit();
+ return &lex.m;
+}
+
+/* Points for each capacity, indexed by *CAP in sgmldecl.h. We'll replace
+2 with the real NAMELEN at run time. */
+
+static UNCH cappoints[] = {
+ 1,
+ 2,
+ 1,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 1,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+};
+
+static long capnumber[NCAPACITY];
+static long maxsubcap[NCAPACITY];
+
+VOID sgmlend(p)
+struct sgmlcap *p;
+{
+ int i;
+ for (; es >= 0; --es)
+ if (FILESW)
+ fileclos();
+
+ capnumber[NOTCAP] = ds.dcncnt;
+ capnumber[EXGRPCAP] = ds.pmexgcnt;
+ capnumber[ELEMCAP] = ds.etdcnt+ds.etdercnt;
+ capnumber[EXNMCAP] = ds.pmexcnt;
+ capnumber[GRPCAP] = ds.modcnt;
+ capnumber[ATTCAP] = ds.attcnt;
+ capnumber[ATTCHCAP] = ds.attdef;
+ capnumber[AVGRPCAP] = ds.attgcnt;
+ capnumber[IDCAP] = ds.idcnt;
+ capnumber[IDREFCAP] = ds.idrcnt;
+ capnumber[ENTCAP] = ds.ecbcnt;
+ capnumber[ENTCHCAP] = ds.ecbtext;
+ capnumber[MAPCAP] = ds.srcnt + ds.srcnt*lex.s.dtb[0].mapdata;
+ capnumber[NOTCHCAP] = ds.dcntext;
+
+ capnumber[TOTALCAP] = 0;
+
+ for (i = 1; i < NCAPACITY; i++) {
+ if (cappoints[i] > 1)
+ cappoints[i] = NAMELEN;
+ capnumber[i] += maxsubcap[i]/cappoints[i];
+ capnumber[TOTALCAP] += (long)capnumber[i] * cappoints[i];
+ }
+ p->number = capnumber;
+ p->points = cappoints;
+ p->limit = sd.capacity;
+ p->name = captab;
+
+ for (i = 0; i < NCAPACITY; i++) {
+ long excess = capnumber[i]*cappoints[i] - sd.capacity[i];
+ if (excess > 0) {
+ char buf[sizeof(long)*3 + 1];
+ sprintf(buf, "%ld", excess);
+ sgmlerr(162, (struct parse *)0,
+ (UNCH *)captab[i], (UNCH *)buf);
+ }
+ }
+}
+
+VOID sgmlsubcap(v)
+long *v;
+{
+ int i;
+ for (i = 0; i < NCAPACITY; i++)
+ if (v[i] > maxsubcap[i])
+ maxsubcap[i] = v[i];
+}
+
+int sgmlsdoc(ptr)
+UNIV ptr;
+{
+ struct entity *e;
+ union etext etx;
+ etx.x = ptr;
+
+ e = entdef(indocent, ESF, &etx);
+ if (!e)
+ return -1;
+ return entopen(e);
+}
+
+/* SGMLGENT: Get a data entity.
+ Returns:
+ -1 if the entity does not exist
+ -2 if it is not a data entity
+ 1 if it is an external entity
+ 2 if it is an internal cdata entity
+ 3 if it is an internal sdata entity
+*/
+int sgmlgent(iname, np, tp)
+UNCH *iname;
+PNE *np;
+UNCH **tp;
+{
+ PECB ep; /* Pointer to an entity control block. */
+
+ ep = entfind(iname);
+ if (!ep)
+ return -1;
+ switch (ep->estore) {
+ case ESN:
+ if (np)
+ *np = ep->etx.n;
+ return 1;
+ case ESC:
+ if (tp)
+ *tp = ep->etx.c;
+ return 2;
+ case ESX:
+ if (tp)
+ *tp = ep->etx.c;
+ return 3;
+ }
+ return -2;
+}
+
+/* Mark an entity. */
+
+int sgmlment(iname)
+UNCH *iname;
+{
+ PECB ep;
+ int rc;
+
+ ep = entfind(iname);
+ if (!ep)
+ return -1;
+ rc = ep->mark;
+ ep->mark = 1;
+ return rc;
+}
+
+int sgmlgcnterr()
+{
+ return msgcnterr();
+}
+
+/* This is for error handling functions that want to print a gi backtrace. */
+
+UNCH *getgi(i)
+int i;
+{
+ return i >= 0 && i <= ts ? tags[i].tetd->etdgi + 1 : NULL;
+}
+
+/* Returns the value of prologsw for the use by error handling functions. */
+
+int inprolog()
+{
+ return prologsw;
+}
+
+/* Used by the error handling functions to access scbs. */
+
+int getlocation(level, locp)
+int level;
+struct location *locp;
+{
+ if (level < 0 || level > es)
+ return 0;
+ if (locp) {
+ int es = level;
+ /* source macros access a variable called `es' */
+
+ locp->filesw = FILESW;
+ locp->rcnt = RCNT;
+ locp->ccnt = CCNT;
+ locp->ename = ENTITY + 1;
+ locp->fcb = SCBFCB;
+ locp->curchar = CC;
+ locp->nextchar = NEXTC;
+ }
+ return 1;
+}
+
+int sgmlloc(linenop, filenamep)
+unsigned long *linenop;
+char **filenamep;
+{
+ int level = es;
+ int es;
+
+ for (es = level; es >= 0 && !FILESW; es--)
+ ;
+ if (es < 0)
+ return 0;
+ *linenop = RCNT;
+ *filenamep = ioflid(SCBFCB);
+ return 1;
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgml2.c b/usr.bin/sgmls/sgmls/sgml2.c
new file mode 100644
index 0000000..e202f84
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgml2.c
@@ -0,0 +1,499 @@
+/* Added exiterr() for terminal errors to prevent SGML.MSG errors. */
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+static int iorc; /* Return code from io* functions */
+/* ENTDEF: Process an entity definition and return the pointer to it.
+ The entity text must be in permanent storage.
+ There is no checking to see if the entity already exists;
+ the caller must have done that.
+*/
+#ifdef USE_PROTOTYPES
+PECB entdef(UNCH *ename, UNCH estore, union etext *petx)
+#else
+PECB entdef(ename, estore, petx)
+UNCH *ename; /* Entity name (with length and EOS). */
+UNCH estore; /* Entity storage class. */
+union etext *petx; /* Ptr to entity text union. */
+#endif
+{
+ PECB p;
+
+ p = (PECB)hin((THASH)etab, ename, hash(ename, ENTHASH), ENTSZ);
+ memcpy((UNIV)&p->etx, (UNIV)petx, ETEXTSZ);
+ p->estore = estore;
+ TRACEECB("ENTDEF", p);
+ return(p);
+}
+/* ENTFIND: If an entity exists, return ptr to its ecb.
+ Return NULL if it is not defined.
+*/
+PECB entfind(ename)
+UNCH *ename; /* Entity name (with length and EOS). */
+{
+ PECB p;
+
+ p = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH));
+ TRACEECB("ENTFIND", p);
+ return p;
+}
+/* ENTREF: Process a general or parameter entity reference.
+ If the entity is defined it returns the return code from ENTOPEN.
+ It returns ENTUNDEF for undefined parameter entity references
+ and for general entity references when defaulting is not allowed.
+ Otherwise, it uses the default entity text.
+*/
+int entref(ename)
+UNCH *ename; /* Entity name (with length and EOS). */
+{
+ PECB ecb; /* Entity control block. */
+
+ /* Get the entity control block, if the entity has been defined. */
+ if ((ecb = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)))==0
+ || ecb->estore == 0) {
+ if ( ename[1]==lex.d.pero
+ || ecbdeflt==0
+ || (ecb = usedef(ename))==0 ) {
+ sgmlerr(ename[1] == lex.d.pero || ecbdeflt == 0 ? 35 : 150,
+ (struct parse *)0, ename+1, (UNCH *)0);
+ return(ENTUNDEF);
+ }
+ }
+ return(entopen(ecb));
+}
+/* ENTOPEN: Open a newly referenced entity.
+ Increment the stack pointer (es) and initialize the new entry.
+ ENTDATA if entity is CDATA or SDATA, ENTPI if it is PI,
+ 0 if normal and all o.k.; <0 if not.
+*/
+int entopen(ecb)
+struct entity *ecb; /* Entity control block. */
+{
+ int i; /* Loop counter. */
+
+ /* See if we have exceeded the entity nesting level. */
+ if (es>=ENTLVL) {
+ sgmlerr(34, (struct parse *)0, ecb->ename+1, ntoa(ENTLVL));
+ return(ENTMAX);
+ }
+ /* If entity is an etd, pi, or data, return it without creating an scb. */
+ switch (ecb->estore) {
+ case ESN:
+ if (NEXTYPE(ecb->etx.n)!=ESNSUB) {
+ if (!NEDCNDEFINED(ecb->etx.n))
+ sgmlerr(78, (struct parse *)0, NEDCN(ecb->etx.n)+1,
+ ecb->ename+1);
+ }
+ else {
+#if 0
+ if (!NEID(ecb->etx.n)) {
+ sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0);
+ return ENTFILE;
+ }
+#endif
+ if (sw.nopen >= sd.subdoc)
+ sgmlerr(188, (struct parse *)0,
+ (UNCH *)NULL, (UNCH *)NULL);
+ }
+ data = (UNCH *)ecb->etx.n;
+ entdatsw = NDECONT;
+ return(ENTDATA);
+ case ESC:
+ case ESX:
+ datalen = ustrlen(ecb->etx.c);
+ data = ecb->etx.c;
+ entdatsw = (ecb->estore==ESC) ? CDECONT : SDECONT;
+ return(ENTDATA);
+ case ESI:
+ datalen = ustrlen(ecb->etx.c);
+ data = ecb->etx.c;
+ entpisw = 4;
+ return(ENTPI);
+ }
+ /* If the same entity is already open, send msg and ignore it.
+ Level 0 needn't be tested, as its entity name is always *DOC.
+ */
+ for (i = 0; ++i<=es;) if (scbs[i].ecb.enext==ecb) {
+ sgmlerr(36, (struct parse *)0, ecb->ename+1, (UNCH *)0);
+ return(ENTLOOP);
+ }
+ /* Update SCB if entity trace is wanted in messages or entity is a file.
+ (Avoid this at start when es==-1 or memory will be corrupted.)
+ */
+ if (es >= 0 && (sw.swenttr || FILESW)) scbset();
+
+ /* Stack the new source control block (we know there is room). */
+ ++es; /* Increment scbs index. */
+ RCNT = CCO = RSCC = 0; /* No records or chars yet. */
+ COPIEDSW = 0;
+ memcpy((UNIV)&ECB, (UNIV)ecb, (UNS)ENTSZ); /* Copy the ecb into the scb. */
+ ECBPTR = ecb; /* Save the ecb pointer in scb.ecb.enext. */
+ TRACEECB("ENTOPEN", ECBPTR);
+
+ /* For memory entities, the read buffer is the entity text.
+ The text starts at FBUF, so FPOS should be FBUF-1
+ because it is bumped before each character is read.
+ */
+ if (ECB.estore<ESFM) {FPOS = (FBUF = ECB.etx.c)-1; return 0;}
+
+ /* For file entities, suspend any open file and do first read. */
+ if (ECB.etx.x == 0) {
+ --es;
+ switch (ecb->estore) {
+ case ESF:
+ sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0);
+ break;
+ case ESP:
+ sgmlerr(229, (struct parse *)0, ecb->ename + 2, (UNCH *)0);
+ break;
+ default:
+ abort();
+ }
+ return ENTFILE;
+ }
+ fileopen(); /* Open new external file. */
+ if (iorc<0) { /* If open not successful: */
+ FPOS = FBUF-1; /* Clean CCNT for OPEN error msg.*/
+ filerr(32, ecb->ename+1);
+ --es; /* Pop the stack. */
+ return(ENTFILE);
+ }
+ filepend(es); /* Suspend any open file. */
+ fileread(); /* First read of file must be ok.*/
+ return 0;
+}
+/* ENTGET: Get next record of entity (if there is one).
+ Otherwise, close the file (if entity is a file) and
+ pop the entity stack. If nothing else is on the stack,
+ return -1 to advise the caller.
+*/
+int entget()
+{
+ RSCC += (CCO = FPOS-FBUF);
+ /* Characters-in-record (ignore EOB/EOF). */
+ tagctr += CCO; /* Update tag length counter. */
+ switch (*FPOS) {
+ case EOBCHAR: /* End of file buffer: refill it. */
+ rbufs[-2] = FPOS[-2];
+ rbufs[-1] = FPOS[-1];
+ fileread(); /* Read the file. */
+ if (iorc > 0) break;
+ readerr:
+ filerr(31, ENTITY+1); /* Treat error as EOF. */
+ case EOFCHAR: /* End of file: close it. */
+ fileclos(); /* Call SGMLIO to close file. */
+ conterr:
+ if (es==0) { /* Report if it is primary file. */
+ FPOS = FBUF-1; /* Preserve CCNT for omitted end-tags. */
+ return -1;
+ }
+ case EOS: /* End of memory entity: pop the stack. */
+ TRACEECB("ENTPOP", ECBPTR);
+ if (COPIEDSW) {
+ frem((UNIV)(FBUF + 1));
+ COPIEDSW = 0;
+ }
+ --es; /* Pop the SCB stack. */
+ if (FBUF) break; /* Not a PEND file. */
+ filecont(); /* Resume previous file. */
+ if (iorc<0) { /* If CONT not successful: */
+ filerr(94, ENTITY+1);
+ goto conterr;
+ }
+ fileread(); /* Read the file. */
+ if (iorc<=0) goto readerr; /* If READ not successful: */
+ rbufs[-1] = SCB.pushback;
+ FPOS += CCO;
+ CCO = 0;
+ if (delmscsw && es==0) { /* End of DTD. */
+ delmscsw = 0;
+ *rbufs = lex.d.msc;
+ }
+ break;
+ }
+ return 0;
+}
+/* USEDEF: Use the default value for an entity reference.
+ Returns the ECB for the defaulted entity.
+*/
+PECB usedef(ename)
+UNCH *ename; /* Entity name (with length and EOS). */
+{
+ union etext etx; /* Save return from entgen. */
+ PECB ecb; /* Entity control block. */
+ PNE pne = 0; /* Ptr to NDATA entity control block. */
+ UNCH estore; /* Default entity storage type. */
+
+ if ((estore = ecbdeflt->estore)<ESFM) /* Default is an internal string. */
+ etx.c = ecbdeflt->etx.c;
+ else {
+ /* Move entity name into fpi. */
+ fpidf.fpinm = ename + 1;
+ if ((etx.x = entgen(&fpidf))==0) return (PECB)0;
+ if (estore==ESN) {
+ memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ);
+ NEID(pne) = etx.x;
+ etx.n = pne;
+ }
+ }
+ if (sw.swrefmsg) sgmlerr(45, (struct parse *)0, ename+1, (UNCH *)0);
+ ++ds.ecbcnt;
+ ecb = entdef(ename, estore, &etx);
+ ecb->dflt = 1;
+ if (pne) NEENAME(pne) = ecb->ename;
+ return(ecb);
+}
+/* SCBSET: Set source control block to current location in the current entity.
+ This routine is called by SGML when it returns to the text
+ processor and by ERROR when it reports an error.
+*/
+VOID scbset()
+{
+ if (es >= 0 && FBUF) {
+ CC = *FPOS;
+ if (*FPOS == DELNONCH)
+ NEXTC = FPOS[1];
+ else
+ NEXTC = 0;
+ CCO = FPOS + 1 - FBUF;
+ }
+}
+/* FILEOPEN: Call IOOPEN to open an external entity (file).
+*/
+VOID fileopen() /* Open an external entity's file. */
+{
+ iorc = ioopen(ECB.etx.x, &SCBFCB);
+}
+/* FILEREAD: Call IOREAD to read an open external entity (file).
+*/
+VOID fileread() /* Read the current external entity's file. */
+{
+ int newfile;
+ iorc = ioread(SCBFCB, rbufs, &newfile);
+ FPOS = (FBUF = rbufs) - 1; /* Actual read buffer. */
+ if (newfile) RCNT = 0;
+}
+/* FILEPEND: Call IOPEND to close an open external entity (file) temporarily.
+*/
+VOID filepend(es) /* Close the current external entity's file. */
+int es; /* Local index to scbs. */
+{
+ while (--es>=0) { /* Find last external file on stack. */
+ int off;
+ if (!FILESW) continue; /* Not an external file. */
+ if (!FBUF) continue; /* Already suspended. */
+ off = CCO;
+ assert(off >= -1);
+ if (off < 0) off = 0;
+ else CCO = 0;
+ FPOS -= CCO;
+ SCB.pushback = FPOS[-1];
+ FBUF = 0; /* Indicate pending file. */
+ RSCC += off; /* Update characters-in-record counter. */
+ tagctr += off; /* Update tag length counter. */
+ iopend(SCBFCB, off, rbufs);
+ return;
+ }
+}
+/* FILECONT: Call IOCONT to reopen an external entity (file).
+*/
+VOID filecont() /* Open an external entity's file. */
+{
+ iorc = iocont(SCBFCB);
+}
+/* FILECLOS: Call IOCLOSE to close an open external entity (file).
+*/
+VOID fileclos() /* Close the current external entity's file. */
+{
+ if (!SCBFCB)
+ return;
+ ioclose(SCBFCB);
+ /* The fcb will have been freed by sgmlio.
+ Make sure we don't access it again. */
+ SCBFCB = NULL;
+}
+/* ERROR: Interface to text processor SGML I/O services for error handling.
+*/
+VOID error(e)
+struct error *e;
+{
+ scbset(); /* Update location in source control block. */
+ msgprint(e);
+}
+/* PTRSRCH: Find a pointer in a list and return its index.
+ Search key must be on list as there is no limit test.
+ This routine is internal only -- not for user data.
+*/
+UNIV mdnmtab[] = {
+ (UNIV)key[KATTLIST],
+ (UNIV)key[KDOCTYPE],
+ (UNIV)key[KELEMENT],
+ (UNIV)key[KENTITY],
+ (UNIV)key[KLINKTYPE],
+ (UNIV)key[KLINK],
+ (UNIV)key[KNOTATION],
+ (UNIV)sgmlkey,
+ (UNIV)key[KSHORTREF],
+ (UNIV)key[KUSELINK],
+ (UNIV)key[KUSEMAP]
+};
+UNIV pcbtab[] = {
+ (UNIV)&pcbconc,
+ (UNIV)&pcbcone,
+ (UNIV)&pcbconm,
+ (UNIV)&pcbconr,
+ (UNIV)&pcbetag,
+ (UNIV)&pcbgrcm,
+ (UNIV)&pcbgrcs,
+ (UNIV)&pcbgrnm,
+ (UNIV)&pcbgrnt,
+ (UNIV)&pcblitc,
+ (UNIV)&pcblitp,
+ (UNIV)&pcblitr,
+ (UNIV)&pcblitt,
+ (UNIV)&pcblitv,
+ (UNIV)&pcbmd,
+ (UNIV)&pcbmdc,
+ (UNIV)&pcbmdi,
+ (UNIV)&pcbmds,
+ (UNIV)&pcbmsc,
+ (UNIV)&pcbmsi,
+ (UNIV)&pcbmsrc,
+ (UNIV)&pcbpro,
+ (UNIV)&pcbref,
+ (UNIV)&pcbstag,
+ (UNIV)&pcbval,
+ (UNIV)&pcbeal,
+ (UNIV)&pcbsd,
+};
+UNS ptrsrch(ptrtab, ptr)
+UNIV ptrtab[];
+UNIV ptr;
+{
+ UNS i;
+
+ for (i = 0; ; ++i)
+ if (ptrtab[i] == ptr)
+ break;
+ return i;
+}
+/* MDERR: Process errors for markup declarations.
+ Prepare the special parameters that only exist for
+ markup declaration errors.
+*/
+VOID mderr(number, parm1, parm2)
+UNS number; /* Error number. */
+UNCH *parm1; /* Additional parameters (or NULL). */
+UNCH *parm2; /* Additional parameters (or NULL). */
+{
+ struct error err;
+ errorinit(&err, subdcl ? MDERR : MDERR2, number);
+ err.parmno = parmno;
+ err.subdcl = subdcl;
+ err.eparm[0] = (UNIV)parm1;
+ err.eparm[1] = (UNIV)parm2;
+ err.errsp = (sizeof(pcbtab)/sizeof(pcbtab[0])) + ptrsrch(mdnmtab,
+ (UNIV)mdname);
+ error(&err);
+}
+/* SGMLERR: Process errors for SGML parser.
+*/
+VOID sgmlerr(number, pcb, parm1, parm2)
+UNS number; /* Error number. */
+struct parse *pcb; /* Current parse control block. */
+UNCH *parm1; /* Error message parameters. */
+UNCH *parm2; /* Error message parameters. */
+{
+ struct error err;
+ errorinit(&err, DOCERR, number);
+ if (!pcb) pcb = prologsw ? propcb : conpcb;
+ err.errsp = ptrsrch(pcbtab, (UNIV)pcb);
+ err.eparm[0] = (UNIV)parm1;
+ err.eparm[1] = (UNIV)parm2;
+ error(&err);
+}
+/* SAVERR: Save an error for possible later use.
+*/
+UNIV saverr(number, pcb, parm1, parm2)
+UNS number; /* Error number. */
+struct parse *pcb; /* Current parse control block. */
+UNCH *parm1; /* Error message parameters. */
+UNCH *parm2; /* Error message parameters. */
+{
+ struct error err;
+ errorinit(&err, DOCERR, number);
+ if (!pcb) pcb = prologsw ? propcb : conpcb;
+ err.errsp = ptrsrch(pcbtab, (UNIV)pcb);
+ err.eparm[0] = (UNIV)parm1;
+ err.eparm[1] = (UNIV)parm2;
+ scbset();
+ return msgsave(&err);
+}
+/* SVDERR: Print a saved error.
+*/
+VOID svderr(p)
+UNIV p;
+{
+ msgsprint(p);
+}
+/* EXITERR: Process terminal errors for SGML parser.
+*/
+VOID exiterr(number, pcb)
+UNS number; /* Error number. */
+struct parse *pcb; /* Current parse control block. */
+{
+ struct error err;
+ errorinit(&err, EXITERR, number);
+ if (!pcb) pcb = prologsw ? propcb : conpcb;
+ err.errsp = ptrsrch(pcbtab, (UNIV)pcb);
+ error(&err);
+ /* The error handler should have exited. */
+ abort();
+}
+/* SYNERR: Process syntax errors for SGML parser.
+*/
+VOID synerr(number, pcb)
+UNS number; /* Error number. */
+struct parse *pcb; /* Current parse control block. */
+{
+ struct error err;
+ errorinit(&err, DOCERR, number);
+ err.errsp = ptrsrch(pcbtab, (UNIV)pcb);
+ error(&err);
+}
+/* FILERR: Process a file access error.
+*/
+VOID filerr(number, parm)
+UNS number;
+UNCH *parm;
+{
+ struct error err;
+ errorinit(&err, FILERR, number);
+ err.eparm[0] = (UNIV)parm;
+ err.sverrno = errno;
+ error(&err);
+}
+/* ERRORINIT: Constructor for struct error.
+*/
+VOID errorinit(e, type, number)
+struct error *e;
+UNS type;
+UNS number;
+{
+ int i;
+ e->errtype = type;
+ e->errnum = number;
+ e->errsp = 0;
+ for (i = 0; i < MAXARGS; i++)
+ e->eparm[i] = 0;
+ e->parmno = 0;
+ e->subdcl = 0;
+}
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgmlaux.h b/usr.bin/sgmls/sgmls/sgmlaux.h
new file mode 100644
index 0000000..f87ac8b
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlaux.h
@@ -0,0 +1,70 @@
+/* This file controls the interface between the parser core and the auxiliary
+functions in entgen.c, sgmlio.c, and sgmlmsg.c */
+
+#include "std.h"
+#include "entity.h"
+#include "sgmldecl.h"
+
+/* Error types (ERRTYPE) for calls to error-handling services
+ performed for SGML by the text processor (SGMLIO).
+ NOTE: Strings in these blocks have no lengths, but cannot exceed
+ NAMELEN (plus 1 more byte for the zero terminator).
+*/
+#define FILERR 0 /* Error: file access. */
+#define DOCERR 1 /* Error: in document markup. */
+#define MDERR 2 /* Error: in markup declaration with subdcl. */
+#define MDERR2 3 /* Error: in markup declaration with no subdcl. */
+#define EXITERR 4 /* Error: terminal error in document markup. */
+/* Quantities affecting error messages and their arguments.
+*/
+#define MAXARGS 2 /* Maximum number of arguments in a msg. */
+
+/* NOTE: Error handler must return, or next call to SGML must be RSET or END,
+ except for EXITERR errors which must not return.
+*/
+struct error { /* IPB for error messages. */
+ UNS errtype; /* Type of error: DOC, MD, MD2, FIL. */
+ UNS errnum; /* Error number. */
+ UNS errsp; /* Special parameter index in message file. */
+ int sverrno; /* Saved value of errno. */
+ int parmno; /* MDERROR: declaration parameter number. */
+ UNCH *subdcl; /* MDERROR: subject of declaration. */
+ UNIV eparm[MAXARGS]; /* Ptrs to arguments (no length, but EOS). */
+};
+
+struct location {
+ int filesw;
+ unsigned long rcnt;
+ int ccnt;
+ UNCH curchar;
+ UNCH nextchar;
+ UNCH *ename;
+ UNIV fcb;
+};
+
+int ioopen P((UNIV, UNIV*));
+VOID ioclose P((UNIV));
+int ioread P((UNIV, UNCH *, int *));
+VOID iopend P((UNIV, int, UNCH *));
+int iocont P((UNIV));
+VOID ioinit P((struct switches *));
+char *ioflid P((UNIV));
+
+UNIV entgen P((struct fpi *));
+
+VOID msgprint P((struct error *));
+VOID msginit P((struct switches *));
+UNIV msgsave P((struct error *));
+VOID msgsprint P((UNIV));
+VOID msgsfree P((UNIV));
+int msgcnterr P((void));
+
+
+int inprolog P((void));
+UNCH *getgi P((int));
+
+int getlocation P((int, struct location *));
+UNIV rmalloc P((unsigned int));
+UNIV rrealloc P((UNIV, UNS));
+VOID frem P((UNIV));
+VOID exiterr P((unsigned int,struct parse *));
diff --git a/usr.bin/sgmls/sgmls/sgmldecl.c b/usr.bin/sgmls/sgmls/sgmldecl.c
new file mode 100644
index 0000000..aab66e9
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmldecl.c
@@ -0,0 +1,1741 @@
+/* sgmldecl.c -
+ SGML declaration parsing.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "sgmlincl.h"
+
+/* Symbolic names for the error numbers that are be generated only by
+this module. */
+
+#define E_STANDARD 163
+#define E_SIGNIFICANT 164
+#define E_BADLIT 165
+#define E_SCOPE 166
+#define E_XNUM 167
+#define E_BADVERSION 168
+#define E_NMUNSUP 169
+#define E_XNMLIT 170
+#define E_CHARDESC 171
+#define E_CHARDUP 172
+#define E_CHARRANGE 173
+#define E_7BIT 174
+#define E_CHARMISSING 175
+#define E_SHUNNED 176
+#define E_NONSGML 177
+#define E_CAPSET 178
+#define E_CAPMISSING 179
+#define E_SYNTAX 180
+#define E_CHARNUM 181
+#define E_SWITCHES 182
+#define E_INSTANCE 183
+#define E_ZEROFEATURE 184
+#define E_YESNO 185
+#define E_CAPACITY 186
+#define E_NOTSUPPORTED 187
+#define E_FORMAL 189
+#define E_BADCLASS 190
+#define E_MUSTBENON 191
+#define E_BADBASECHAR 199
+#define E_SYNREFUNUSED 200
+#define E_SYNREFUNDESC 201
+#define E_SYNREFUNKNOWN 202
+#define E_SYNREFUNKNOWNSET 203
+#define E_FUNDUP 204
+#define E_BADFUN 205
+#define E_FUNCHAR 206
+#define E_GENDELIM 207
+#define E_SRDELIM 208
+#define E_BADKEY 209
+#define E_BADQUANTITY 210
+#define E_BADNAME 211
+#define E_REFNAME 212
+#define E_DUPNAME 213
+#define E_QUANTITY 214
+#define E_QTOOBIG 215
+#define E_NMSTRTCNT 219
+#define E_NMCHARCNT 220
+#define E_NMDUP 221
+#define E_NMBAD 222
+#define E_NMMINUS 223
+#define E_UNKNOWNSET 227
+
+#define CANON_NMC '.' /* Canonical name character. */
+#define CANON_NMS 'A' /* Canonical name start character. */
+#define CANON_MIN ':' /* Canonical minimum data character. */
+
+#define SUCCESS 1
+#define FAIL 0
+#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
+#define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)
+
+static UNCH standard[] = "ISO 8879:1986";
+
+#define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
+#define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"
+
+static UNCH (*newkey)[REFNAMELEN+1] = 0;
+
+struct pmap {
+ char *name;
+ UNIV value;
+};
+
+/* The reference capacity set. */
+#define REFCAPSET \
+{ 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
+35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }
+
+long refcapset[NCAPACITY] = REFCAPSET;
+
+/* A pmap of known capacity sets. */
+
+static struct pmap capset_map[] = {
+ { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
+ { 0 },
+};
+
+/* Table of capacity names. Must match *CAP in sgmldecl.h. */
+
+char *captab[] = {
+ "TOTALCAP",
+ "ENTCAP",
+ "ENTCHCAP",
+ "ELEMCAP",
+ "GRPCAP",
+ "EXGRPCAP",
+ "EXNMCAP",
+ "ATTCAP",
+ "ATTCHCAP",
+ "AVGRPCAP",
+ "NOTCAP",
+ "NOTCHCAP",
+ "IDCAP",
+ "IDREFCAP",
+ "MAPCAP",
+ "LKSETCAP",
+ "LKNMCAP",
+};
+
+/* The default SGML declaration. */
+#define MAXNUMBER 99999999L
+
+/* Reference quantity set */
+
+#define REFATTCNT 40
+#define REFATTSPLEN 960
+#define REFBSEQLEN 960
+#define REFDTAGLEN 16
+#define REFDTEMPLEN 16
+#define REFENTLVL 16
+#define REFGRPCNT 32
+#define REFGRPGTCNT 96
+#define REFGRPLVL 16
+#define REFNORMSEP 2
+#define REFPILEN 240
+#define REFTAGLEN 960
+#define REFTAGLVL 24
+
+#define ALLOC_MAX 65534
+
+#define BIGINT 30000
+
+#define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
+#define MAXATTSPLEN BIGINT
+#define MAXBSEQLEN BIGINT
+#define MAXDTAGLEN 16
+#define MAXDTEMPLEN 16
+#define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
+#define MAXGRPCNT MAXGRPGTCNT
+/* Must be between 96 and 253 */
+#define MAXGRPGTCNT 253
+#define MAXGRPLVL MAXGRPGTCNT
+#define MAXLITLEN BIGINT
+/* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
+in a buffer intended for a literal.) */
+#define MAXNAMELEN (REFLITLEN - 1)
+#define MAXNORMSEP 2
+#define MAXPILEN BIGINT
+#define MAXTAGLEN BIGINT
+#define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)
+
+/* Table of quantity names. Must match Q* in sgmldecl.h. */
+
+static char *quantity_names[] = {
+ "ATTCNT",
+ "ATTSPLEN",
+ "BSEQLEN",
+ "DTAGLEN",
+ "DTEMPLEN",
+ "ENTLVL",
+ "GRPCNT",
+ "GRPGTCNT",
+ "GRPLVL",
+ "LITLEN",
+ "NAMELEN",
+ "NORMSEP",
+ "PILEN",
+ "TAGLEN",
+ "TAGLVL",
+};
+
+static int max_quantity[] = {
+ MAXATTCNT,
+ MAXATTSPLEN,
+ MAXBSEQLEN,
+ MAXDTAGLEN,
+ MAXDTEMPLEN,
+ MAXENTLVL,
+ MAXGRPCNT,
+ MAXGRPGTCNT,
+ MAXGRPLVL,
+ MAXLITLEN,
+ MAXNAMELEN,
+ MAXNORMSEP,
+ MAXPILEN,
+ MAXTAGLEN,
+ MAXTAGLVL,
+};
+
+static char *quantity_changed;
+
+/* Non-zero means the APPINFO parameter was not NONE. */
+static int appinfosw = 0;
+
+struct sgmldecl sd = {
+ REFCAPSET, /* capacity */
+#ifdef SUPPORT_SUBDOC
+ MAXNUMBER, /* subdoc */
+#else /* not SUPPORT_SUBDOC */
+ 0, /* subdoc */
+#endif /* not SUPPORT_SUBDOC */
+ 1, /* formal */
+ 1, /* omittag */
+ 1, /* shorttag */
+ 1, /* shortref */
+ { 1, 0 }, /* general/entity name case translation */
+ { /* reference quantity set */
+ REFATTCNT,
+ REFATTSPLEN,
+ REFBSEQLEN,
+ REFDTAGLEN,
+ REFDTEMPLEN,
+ REFENTLVL,
+ REFGRPCNT,
+ REFGRPGTCNT,
+ REFGRPLVL,
+ REFLITLEN,
+ REFNAMELEN,
+ REFNORMSEP,
+ REFPILEN,
+ REFTAGLEN,
+ REFTAGLVL,
+ },
+};
+
+static int systemcharset[] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+};
+
+static struct pmap charset_map[] = {
+ { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */
+ { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */
+ { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
+ /* system character set */
+ { 0 }
+};
+
+static int synrefcharset[256]; /* the syntax reference character set */
+
+#define CHAR_NONSGML 01
+#define CHAR_SIGNIFICANT 02
+#define CHAR_MAGIC 04
+#define CHAR_SHUNNED 010
+
+static UNCH char_flags[256];
+static int done_nonsgml = 0;
+static UNCH *nlextoke = 0; /* new lextoke */
+static UNCH *nlextran = 0; /* new lextran */
+
+
+static UNCH kcharset[] = "CHARSET";
+static UNCH kbaseset[] = "BASESET";
+static UNCH kdescset[] = "DESCSET";
+static UNCH kunused[] = "UNUSED";
+static UNCH kcapacity[] = "CAPACITY";
+static UNCH kpublic[] = "PUBLIC";
+static UNCH ksgmlref[] = "SGMLREF";
+static UNCH kscope[] = "SCOPE";
+static UNCH kdocument[] = "DOCUMENT";
+static UNCH kinstance[] = "INSTANCE";
+static UNCH ksyntax[] = "SYNTAX";
+static UNCH kswitches[] = "SWITCHES";
+static UNCH kfeatures[] = "FEATURES";
+static UNCH kminimize[] = "MINIMIZE";
+static UNCH kdatatag[] = "DATATAG";
+static UNCH komittag[] = "OMITTAG";
+static UNCH krank[] = "RANK";
+static UNCH kshorttag[] = "SHORTTAG";
+static UNCH klink[] = "LINK";
+static UNCH ksimple[] = "SIMPLE";
+static UNCH kimplicit[] = "IMPLICIT";
+static UNCH kexplicit[] = "EXPLICIT";
+static UNCH kother[] = "OTHER";
+static UNCH kconcur[] = "CONCUR";
+static UNCH ksubdoc[] = "SUBDOC";
+static UNCH kformal[] = "FORMAL";
+static UNCH kyes[] = "YES";
+static UNCH kno[] = "NO";
+static UNCH kappinfo[] = "APPINFO";
+static UNCH knone[] = "NONE";
+static UNCH kshunchar[] = "SHUNCHAR";
+static UNCH kcontrols[] = "CONTROLS";
+static UNCH kfunction[] = "FUNCTION";
+static UNCH krs[] = "RS";
+static UNCH kre[] = "RE";
+static UNCH kspace[] = "SPACE";
+static UNCH knaming[] = "NAMING";
+static UNCH klcnmstrt[] = "LCNMSTRT";
+static UNCH kucnmstrt[] = "UCNMSTRT";
+static UNCH klcnmchar[] = "LCNMCHAR";
+static UNCH kucnmchar[] = "UCNMCHAR";
+static UNCH knamecase[] = "NAMECASE";
+static UNCH kdelim[] = "DELIM";
+static UNCH kgeneral[] = "GENERAL";
+static UNCH kentity[] = "ENTITY";
+static UNCH kshortref[] = "SHORTREF";
+static UNCH knames[] = "NAMES";
+static UNCH kquantity[] = "QUANTITY";
+
+#define sderr mderr
+
+static UNIV pmaplookup P((struct pmap *, char *));
+static UNCH *ltous P((long));
+static VOID sdfixstandard P((UNCH *));
+static int sdparm P((UNCH *, struct parse *));
+static int sdname P((UNCH *, UNCH *));
+static int sdckname P((UNCH *, UNCH *));
+static int sdversion P((UNCH *));
+static int sdcharset P((UNCH *));
+static int sdcsdesc P((UNCH *, int *));
+static int sdpubcapacity P((UNCH *));
+static int sdcapacity P((UNCH *));
+static int sdscope P((UNCH *));
+static VOID setlexical P((void));
+static VOID noemptytag P((void));
+static int sdpubsyntax P((UNCH *));
+static int sdsyntax P((UNCH *));
+static int sdxsyntax P((UNCH *));
+static int sdtranscharnum P((UNCH *));
+static int sdtranschar P((int));
+static int sdshunchar P((UNCH *));
+static int sdsynref P((UNCH *));
+static int sdfunction P((UNCH *));
+static int sdnaming P((UNCH *));
+static int sddelim P((UNCH *));
+static int sdnames P((UNCH *));
+static int sdquantity P((UNCH *));
+static int sdfeatures P((UNCH *));
+static int sdappinfo P((UNCH *));
+
+static VOID bufsalloc P((void));
+static VOID bufsrealloc P((void));
+
+/* Parse the SGML declaration. Return non-zero if there was some appinfo. */
+
+int sgmldecl()
+{
+ int i;
+ int errsw = 0;
+ UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */
+ static int (*section[]) P((UNCH *)) = {
+ sdversion,
+ sdcharset,
+ sdcapacity,
+ sdscope,
+ sdsyntax,
+ sdfeatures,
+ sdappinfo,
+ };
+ /* These are needed if we use mderr. */
+ parmno = 0;
+ mdname = sgmlkey;
+ subdcl = NULL;
+ for (i = 0; i < SIZEOF(section); i++)
+ if ((*section[i])(tbuf) == FAIL) {
+ errsw = 1;
+ break;
+ }
+ if (!errsw)
+ setlexical();
+ bufsrealloc();
+ /* Parse the >. Don't overwrite the appinfo. */
+ if (!errsw)
+ sdparm(endbuf, 0);
+ /* We must exit if we hit end of document. */
+ if (pcbsd.action == EOD_)
+ exiterr(161, &pcbsd);
+ if (!errsw && pcbsd.action != ESGD)
+ sderr(126, (UNCH *)0, (UNCH *)0);
+ return appinfosw;
+}
+
+/* Parse the literal (which should contain the version of the
+standard) at the beginning of a SGML declaration. */
+
+static int sdversion(tbuf)
+UNCH *tbuf;
+{
+ if (sdparm(tbuf, &pcblitv) != LIT1) {
+ sderr(123, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ sdfixstandard(tbuf);
+ if (ustrcmp(tbuf, standard) != 0)
+ sderr(E_BADVERSION, tbuf, standard);
+ return SUCCESS;
+}
+
+/* Parse the CHARSET section. Use one token lookahead. */
+
+static int sdcharset(tbuf)
+UNCH *tbuf;
+{
+ int i;
+ int status[256];
+
+ if (sdname(tbuf, kcharset) == FAIL) return FAIL;
+ (void)sdparm(tbuf, 0);
+
+ if (sdcsdesc(tbuf, status) == FAIL)
+ return FAIL;
+
+ for (i = 128; i < 256; i++)
+ if (status[i] != UNDESC)
+ break;
+ if (i >= 256) {
+ /* Only a 7-bit character set was described. Fill it out to 8-bits. */
+ for (i = 128; i < 256; i++)
+ status[i] = UNUSED;
+#if 0
+ sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
+#endif
+ }
+ /* Characters that are declared UNUSED in the document character set
+ are assigned to non-SGML. */
+ for (i = 0; i < 256; i++) {
+ if (status[i] == UNDESC) {
+ sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
+ char_flags[i] |= CHAR_NONSGML;
+ }
+ else if (status[i] == UNUSED)
+ char_flags[i] |= CHAR_NONSGML;
+ }
+ done_nonsgml = 1;
+ return SUCCESS;
+}
+
+/* Parse a character set description. Uses one character lookahead. */
+
+static int sdcsdesc(tbuf, status)
+UNCH *tbuf;
+int *status;
+{
+ int i;
+ int nsets = 0;
+ struct fpi fpi;
+
+ for (i = 0; i < 256; i++)
+ status[i] = UNDESC;
+
+ for (;;) {
+ int nchars;
+ int *baseset = 0;
+
+ if (pcbsd.action != NAS1) {
+ if (nsets == 0) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ break;
+ }
+ if (!matches(tbuf, kbaseset)) {
+ if (nsets == 0) {
+ sderr(118, tbuf+1, kbaseset);
+ return FAIL;
+ }
+ break;
+ }
+ nsets++;
+ MEMZERO((UNIV)&fpi, FPISZ);
+ if (sdparm(tbuf, &pcblitv) != LIT1) {
+ sderr(123, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ fpi.fpipubis = tbuf;
+ /* Give a warning if it is not a CHARSET fpi. */
+ if (parsefpi(&fpi))
+ sderr(E_FORMAL, (UNCH *)0, (UNCH *)0);
+ else if (fpi.fpic != FPICHARS)
+ sderr(E_BADCLASS, kcharset, (UNCH *)0);
+ else {
+ fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
+ baseset = (int *)pmaplookup(charset_map,
+ (char *)fpi.fpipubis + fpi.fpil);
+ if (!baseset)
+ sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
+ }
+ if (sdname(tbuf, kdescset) == FAIL) return FAIL;
+ nchars = 0;
+ for (;;) {
+ long start, count;
+ long basenum;
+ if (sdparm(tbuf, 0) != NUM1)
+ break;
+ start = atol((char *)tbuf);
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ count = atol((char *)tbuf);
+ switch (sdparm(tbuf, &pcblitv)) {
+ case NUM1:
+ basenum = atol((char *)tbuf);
+ break;
+ case LIT1:
+ basenum = UNKNOWN;
+ break;
+ case NAS1:
+ if (matches(tbuf, kunused)) {
+ basenum = UNUSED;
+ break;
+ }
+ /* fall through */
+ default:
+ sderr(E_CHARDESC, ltous(start), (UNCH *)0);
+ return FAIL;
+ }
+ if (start + count > 256)
+ sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
+ else {
+ int i;
+ int lim = (int)start + count;
+ for (i = (int)start; i < lim; i++) {
+ if (status[i] != UNDESC)
+ sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
+ else if (basenum == UNUSED || basenum == UNKNOWN)
+ status[i] = (int)basenum;
+ else if (baseset == 0)
+ status[i] = UNKNOWN_SET;
+ else {
+ int n = basenum + (i - start);
+ if (n < 0 || n > 255)
+ sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
+ else if (baseset[n] == UNUSED)
+ sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0);
+ else
+ status[i] = baseset[n];
+ }
+ }
+ }
+ nchars++;
+ }
+ if (nchars == 0) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ }
+ return SUCCESS;
+}
+
+/* Parse the CAPACITY section. Uses one token lookahead. */
+
+static int sdcapacity(tbuf)
+UNCH *tbuf;
+{
+ int ncap;
+
+ if (sdckname(tbuf, kcapacity) == FAIL)
+ return FAIL;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kpublic))
+ return sdpubcapacity(tbuf);
+ if (!matches(tbuf, ksgmlref)) {
+ sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
+ return FAIL;
+ }
+ memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
+ ncap = 0;
+ for (;;) {
+ int capno = -1;
+ int i;
+
+ if (sdparm(tbuf, 0) != NAS1)
+ break;
+ for (i = 0; i < SIZEOF(captab); i++)
+ if (matches(tbuf, captab[i])) {
+ capno = i;
+ break;
+ }
+ if (capno < 0)
+ break;
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ sd.capacity[capno] = atol((char *)tbuf);
+ ncap++;
+ }
+ if (ncap == 0) {
+ sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+
+ return SUCCESS;
+}
+
+/* Parse a CAPACITY section that started with PUBLIC. Must do one
+token lookahead, since sdcapacity() also does. */
+
+static int sdpubcapacity(tbuf)
+UNCH *tbuf;
+{
+ UNIV ptr;
+ if (sdparm(tbuf, &pcblitv) != LIT1) {
+ sderr(123, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ sdfixstandard(tbuf);
+ ptr = pmaplookup(capset_map, (char *)tbuf);
+ if (!ptr)
+ sderr(E_CAPSET, tbuf, (UNCH *)0);
+ else
+ memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
+ (void)sdparm(tbuf, 0);
+ return SUCCESS;
+}
+
+/* Parse the SCOPE section. Uses no lookahead. */
+
+static int sdscope(tbuf)
+UNCH *tbuf;
+{
+ if (sdckname(tbuf, kscope) == FAIL)
+ return FAIL;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kdocument))
+ ;
+ else if (matches(tbuf, kinstance))
+ sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
+ else {
+ sderr(E_SCOPE, tbuf+1, (UNCH *)0);
+ return FAIL;
+ }
+ return SUCCESS;
+}
+
+/* Parse the SYNTAX section. Uses one token lookahead. */
+
+static int sdsyntax(tbuf)
+UNCH *tbuf;
+{
+ if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kpublic))
+ return sdpubsyntax(tbuf);
+ return sdxsyntax(tbuf);
+}
+
+/* Parse the SYNTAX section which starts with PUBLIC. Uses one token
+lookahead. */
+
+static int sdpubsyntax(tbuf)
+UNCH *tbuf;
+{
+ int nswitches;
+ if (sdparm(tbuf, &pcblitv) != LIT1)
+ return FAIL;
+ sdfixstandard(tbuf);
+ if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
+ sd.shortref = 0;
+ else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
+ sd.shortref = 1;
+ else
+ sderr(E_SYNTAX, tbuf, (UNCH *)0);
+ if (sdparm(tbuf, 0) != NAS1)
+ return SUCCESS;
+ if (!matches(tbuf, kswitches))
+ return SUCCESS;
+ nswitches = 0;
+ for (;;) {
+ int errsw = 0;
+
+ if (sdparm(tbuf, 0) != NUM1)
+ break;
+ if (atol((char *)tbuf) > 255) {
+ sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
+ errsw = 1;
+ }
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (!errsw) {
+ if (atol((char *)tbuf) > 255)
+ sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
+ }
+ nswitches++;
+ }
+ if (nswitches == 0) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
+ return SUCCESS;
+}
+
+/* Parse an explicit concrete syntax. Uses one token lookahead. */
+
+static
+int sdxsyntax(tbuf)
+UNCH *tbuf;
+{
+ static int (*section[]) P((UNCH *)) = {
+ sdshunchar,
+ sdsynref,
+ sdfunction,
+ sdnaming,
+ sddelim,
+ sdnames,
+ sdquantity,
+ };
+ int i;
+
+ for (i = 0; i < SIZEOF(section); i++)
+ if ((*section[i])(tbuf) == FAIL)
+ return FAIL;
+ return SUCCESS;
+}
+
+/* Parse the SHUNCHAR section. Uses one token lookahead. */
+
+static
+int sdshunchar(tbuf)
+UNCH *tbuf;
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ char_flags[i] &= ~CHAR_SHUNNED;
+
+ if (sdckname(tbuf, kshunchar) == FAIL)
+ return FAIL;
+
+ if (sdparm(tbuf, 0) == NAS1) {
+ if (matches(tbuf, knone)) {
+ (void)sdparm(tbuf, 0);
+ return SUCCESS;
+ }
+ if (matches(tbuf, kcontrols)) {
+ for (i = 0; i < 256; i++)
+ if (ISASCII(i) && iscntrl(i))
+ char_flags[i] |= CHAR_SHUNNED;
+ if (sdparm(tbuf, 0) != NUM1)
+ return SUCCESS;
+ }
+ }
+ if (pcbsd.action != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ do {
+ long n = atol((char *)tbuf);
+ if (n > 255)
+ sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
+ else
+ char_flags[(int)n] |= CHAR_SHUNNED;
+ } while (sdparm(tbuf, 0) == NUM1);
+ return SUCCESS;
+}
+
+/* Parse the syntax reference character set. Uses one token lookahead. */
+
+static
+int sdsynref(tbuf)
+UNCH *tbuf;
+{
+ return sdcsdesc(tbuf, synrefcharset);
+}
+
+/* Translate a character number from the syntax reference character set
+to the system character set. If it can't be done, give an error message
+and return -1. */
+
+static
+int sdtranscharnum(tbuf)
+UNCH *tbuf;
+{
+ long n = atol((char *)tbuf);
+ if (n > 255) {
+ sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
+ return -1;
+ }
+ return sdtranschar((int)n);
+}
+
+
+static
+int sdtranschar(n)
+int n;
+{
+ int ch = synrefcharset[n];
+ if (ch >= 0)
+ return ch;
+ switch (ch) {
+ case UNUSED:
+ sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
+ break;
+ case UNDESC:
+ sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
+ break;
+ case UNKNOWN:
+ sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
+ break;
+ case UNKNOWN_SET:
+ sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
+ break;
+ default:
+ abort();
+ }
+ return -1;
+}
+
+
+/* Parse the function section. Uses two tokens lookahead. "NAMING"
+could be a function name. */
+
+static
+int sdfunction(tbuf)
+UNCH *tbuf;
+{
+ static UNCH *fun[] = { kre, krs, kspace };
+ static int funval[] = { RECHAR, RSCHAR, ' ' };
+ int i;
+ int had_tab = 0;
+ int changed = 0; /* attempted to change reference syntax */
+
+ if (sdckname(tbuf, kfunction) == FAIL)
+ return FAIL;
+ for (i = 0; i < SIZEOF(fun); i++) {
+ int ch;
+ if (sdname(tbuf, fun[i]) == FAIL)
+ return FAIL;
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ ch = sdtranscharnum(tbuf);
+ if (ch >= 0 && ch != funval[i])
+ changed = 1;
+ }
+ for (;;) {
+ int tabsw = 0;
+ int namingsw = 0;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, (UNCH *)"TAB")) {
+ tabsw = 1;
+ if (had_tab)
+ sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
+ }
+ else {
+ for (i = 0; i < SIZEOF(fun); i++)
+ if (matches(tbuf, fun[i]))
+ sderr(E_BADFUN, fun[i], (UNCH *)0);
+ if (matches(tbuf, knaming))
+ namingsw = 1;
+ else
+ changed = 1;
+ }
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (namingsw) {
+ if (matches(tbuf, klcnmstrt))
+ break;
+ changed = 1;
+ }
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (tabsw && !had_tab) {
+ int ch = sdtranscharnum(tbuf);
+ if (ch >= 0 && ch != TABCHAR)
+ changed = 1;
+ had_tab = 1;
+ }
+
+ }
+ if (!had_tab)
+ changed = 1;
+ if (changed)
+ sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
+ return SUCCESS;
+}
+
+/* Parse the NAMING section. Uses no lookahead. */
+
+static
+int sdnaming(tbuf)
+UNCH *tbuf;
+{
+ int i;
+ int bad = 0;
+ static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
+ static UNCH *types[] = { kgeneral, kentity };
+
+#define NCLASSES SIZEOF(classes)
+
+ int bufsize = 4; /* allocated size of buf */
+ UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
+ in naming classes */
+ int bufi = 0; /* next index into buf */
+ int start[NCLASSES]; /* index of first character for each class */
+ int count[NCLASSES]; /* number of characters for each class */
+
+ for (i = 0; i < NCLASSES; i++) {
+ UNCH *s;
+
+ if (sdckname(tbuf, classes[i]) == FAIL) {
+ frem((UNIV)buf);
+ return FAIL;
+ }
+ if (sdparm(tbuf, &pcblitp) != LIT1) {
+ sderr(123, (UNCH *)0, (UNCH *)0);
+ frem((UNIV)buf);
+ return FAIL;
+ }
+ start[i] = bufi;
+
+ for (s = tbuf; *s; s++) {
+ int c = *s;
+ if (c == DELNONCH) {
+ c = UNSHIFTNON(*s);
+ s++;
+ }
+ c = sdtranschar(c);
+ if (c < 0)
+ bad = 1;
+ else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
+ && c != '.' && c != '-') {
+ int class = lextoke[c];
+ if (class == SEP || class == SP || class == NMC
+ || class == NMS || class == NU)
+ sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
+ else
+ sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
+ bad = 1;
+ }
+ if (bufi >= bufsize)
+ buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
+ buf[bufi++] = c;
+ }
+
+ count[i] = bufi - start[i];
+ (void)sdparm(tbuf, 0);
+ }
+ if (!bad && count[0] != count[1]) {
+ sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
+ bad = 1;
+ }
+ if (!bad && count[2] != count[3]) {
+ sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
+ bad = 1;
+ }
+ if (!bad) {
+ nlextoke = (UNCH *)rmalloc(256);
+ memcpy((UNIV)nlextoke, lextoke, 256);
+ nlextoke['.'] = nlextoke['-'] = INV;
+
+ nlextran = (UNCH *)rmalloc(256);
+ memcpy((UNIV)nlextran, lextran, 256);
+
+ for (i = 0; i < count[0]; i++) {
+ UNCH lc = buf[start[0] + i];
+ UNCH uc = buf[start[1] + i];
+ nlextoke[lc] = NMS;
+ nlextoke[uc] = NMS;
+ nlextran[lc] = uc;
+ }
+
+ for (i = 0; i < count[2]; i++) {
+ UNCH lc = buf[start[2] + i];
+ UNCH uc = buf[start[3] + i];
+ if (nlextoke[lc] == NMS) {
+ sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
+ bad = 1;
+ }
+ else if (nlextoke[uc] == NMS) {
+ sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
+ bad = 1;
+ }
+ else {
+ nlextoke[lc] = NMC;
+ nlextoke[uc] = NMC;
+ nlextran[lc] = uc;
+ }
+ }
+ if (nlextoke['-'] != NMC) {
+ sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
+ bad = 1;
+ }
+ if (bad) {
+ if (nlextoke) {
+ frem((UNIV)nlextoke);
+ nlextoke = 0;
+ }
+ if (nlextran) {
+ frem((UNIV)nlextran);
+ nlextran = 0;
+ }
+ }
+ }
+
+ frem((UNIV)buf);
+
+ if (sdckname(tbuf, knamecase) == FAIL)
+ return FAIL;
+ for (i = 0; i < SIZEOF(types); ++i) {
+ if (sdname(tbuf, types[i]) == FAIL)
+ return FAIL;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kyes))
+ sd.namecase[i] = 1;
+ else if (matches(tbuf, kno))
+ sd.namecase[i] = 0;
+ else {
+ sderr(E_YESNO, tbuf+1, (UNCH *)0);
+ return FAIL;
+ }
+ }
+ return SUCCESS;
+}
+
+/* Parse the DELIM section. Uses one token lookahead. */
+
+static
+int sddelim(tbuf)
+UNCH *tbuf;
+{
+ int changed = 0;
+ if (sdname(tbuf, kdelim) == FAIL
+ || sdname(tbuf, kgeneral) == FAIL
+ || sdname(tbuf, ksgmlref) == FAIL)
+ return FAIL;
+ for (;;) {
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kshortref))
+ break;
+ if (sdparm(tbuf, &pcblitp) != LIT1) {
+ sderr(123, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ changed = 1;
+ }
+ if (changed) {
+ sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
+ changed = 0;
+ }
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, ksgmlref))
+ sd.shortref = 1;
+ else if (matches(tbuf, knone))
+ sd.shortref = 0;
+ else {
+ sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */
+ return FAIL;
+ }
+ while (sdparm(tbuf, &pcblitp) == LIT1)
+ changed = 1;
+ if (changed)
+ sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
+ return SUCCESS;
+}
+
+/* Parse the NAMES section. Uses one token lookahead. */
+
+static
+int sdnames(tbuf)
+UNCH *tbuf;
+{
+ int i;
+ if (sdckname(tbuf, knames) == FAIL)
+ return FAIL;
+ if (sdname(tbuf, ksgmlref) == FAIL)
+ return FAIL;
+
+ while (sdparm(tbuf, 0) == NAS1) {
+ int j;
+ if (matches(tbuf, kquantity))
+ break;
+ for (i = 0; i < NKEYS; i++)
+ if (matches(tbuf, key[i]))
+ break;
+ if (i >= NKEYS) {
+ sderr(E_BADKEY, tbuf+1, (UNCH *)0);
+ return FAIL;
+ }
+ if (sdparm(tbuf, &pcblitp) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (!newkey) {
+ newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
+ MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
+ }
+ for (j = 0; j < NKEYS; j++) {
+ if (matches(tbuf, key[j])) {
+ sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
+ break;
+ }
+ if (matches(tbuf, newkey[j])) {
+ sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
+ break;
+ }
+ }
+ if (j >= NKEYS)
+ ustrcpy(newkey[i], tbuf + 1);
+ }
+ /* Now install the new keys. */
+ if (newkey) {
+ for (i = 0; i < NKEYS; i++)
+ if (newkey[i][0] != '\0') {
+ UNCH temp[REFNAMELEN + 1];
+
+ ustrcpy(temp, key[i]);
+ ustrcpy(key[i], newkey[i]);
+ ustrcpy(newkey[i], temp);
+ }
+ }
+ return SUCCESS;
+}
+
+/* Parse the QUANTITY section. Uses one token lookahead. */
+
+static int sdquantity(tbuf)
+UNCH *tbuf;
+{
+ int quantity[NQUANTITY];
+ int i;
+
+ for (i = 0; i < NQUANTITY; i++)
+ quantity[i] = -1;
+ if (sdckname(tbuf, kquantity) == FAIL)
+ return FAIL;
+ if (sdname(tbuf, ksgmlref) == FAIL)
+ return FAIL;
+ while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
+ long n;
+ for (i = 0; i < SIZEOF(quantity_names); i++)
+ if (matches(tbuf, quantity_names[i]))
+ break;
+ if (i >= SIZEOF(quantity_names)) {
+ sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
+ return FAIL;
+ }
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ n = atol((char *)tbuf);
+ if (n < sd.quantity[i])
+ sderr(E_QUANTITY, (UNCH *)quantity_names[i],
+ ltous((long)sd.quantity[i]));
+ else if (n > max_quantity[i]) {
+ sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
+ ltous((long)max_quantity[i]));
+ quantity[i] = max_quantity[i];
+ }
+ else
+ quantity[i] = (int)n;
+ }
+ for (i = 0; i < NQUANTITY; i++)
+ if (quantity[i] > 0) {
+ sd.quantity[i] = quantity[i];
+ if (!quantity_changed)
+ quantity_changed = (char *)rmalloc(NQUANTITY);
+ quantity_changed[i] = 1;
+ }
+ return SUCCESS;
+}
+
+/* Parse the FEATURES section. Uses no lookahead. */
+
+static int sdfeatures(tbuf)
+UNCH *tbuf;
+{
+ static struct {
+ UNCH *name;
+ UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */
+ UNIV valp; /* UNCH * if boolean, long * if numeric. */
+ } features[] = {
+ { kminimize, 0, 0 },
+ { kdatatag, 1, 0 },
+ { komittag, 1, (UNIV)&sd.omittag },
+ { krank, 1, 0 },
+ { kshorttag, 1, (UNIV)&sd.shorttag },
+ { klink, 0, 0 },
+ { ksimple, 2, 0 },
+ { kimplicit, 1, 0 },
+ { kexplicit, 2, 0 },
+ { kother, 0, 0 },
+ { kconcur, 2, 0 },
+ { ksubdoc, 2, (UNIV)&sd.subdoc },
+ { kformal, 1, (UNIV)&sd.formal },
+ };
+
+ int i;
+
+ if (sdckname(tbuf, kfeatures) == FAIL)
+ return FAIL;
+ for (i = 0; i < SIZEOF(features); i++) {
+ if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
+ if (features[i].argtype > 0) {
+ long n;
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (matches(tbuf, kyes)) {
+ if (features[i].argtype > 1) {
+ if (sdparm(tbuf, 0) != NUM1) {
+ sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ n = atol((char *)tbuf);
+ if (n == 0)
+ sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
+ }
+ else
+ n = 1;
+ }
+ else if (matches(tbuf, kno))
+ n = 0;
+ else {
+ sderr(E_YESNO, tbuf+1, (UNCH *)0);
+ return FAIL;
+ }
+ if (features[i].valp == 0) {
+ if (n > 0)
+ sderr(E_NOTSUPPORTED, features[i].name,
+ (UNCH *)0);
+ }
+ else if (features[i].argtype > 1)
+ *(long *)features[i].valp = n;
+ else
+ *(UNCH *)features[i].valp = (UNCH)n;
+ }
+ }
+ if (!sd.shorttag)
+ noemptytag();
+ return SUCCESS;
+}
+
+/* Parse the APPINFO section. Uses no lookahead. */
+
+static int sdappinfo(tbuf)
+UNCH *tbuf;
+{
+ if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
+ switch (sdparm(tbuf, &pcblitv)) {
+ case LIT1:
+ appinfosw = 1;
+ break;
+ case NAS1:
+ if (matches(tbuf, knone))
+ break;
+ sderr(118, tbuf+1, knone);
+ return FAIL;
+ default:
+ sderr(E_XNMLIT, knone, (UNCH *)0);
+ return FAIL;
+ }
+ return SUCCESS;
+}
+
+/* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to
+the standard requires the latter. */
+
+static VOID sdfixstandard(tbuf)
+UNCH *tbuf;
+{
+ if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
+ sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
+ tbuf[8] = ':';
+ }
+}
+
+static int sdname(tbuf, key)
+UNCH *tbuf;
+UNCH *key;
+{
+ if (sdparm(tbuf, 0) != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (!matches(tbuf, key)) {
+ sderr(118, tbuf+1, key);
+ return FAIL;
+ }
+ return SUCCESS;
+}
+
+static int sdckname(tbuf, key)
+UNCH *tbuf;
+UNCH *key;
+{
+ if (pcbsd.action != NAS1) {
+ sderr(120, (UNCH *)0, (UNCH *)0);
+ return FAIL;
+ }
+ if (!matches(tbuf, key)) {
+ sderr(118, tbuf+1, key);
+ return FAIL;
+ }
+ return SUCCESS;
+}
+
+/* Parse a SGML declaration parameter. If lpcb is NULL, pt must be
+REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
+long. LPCB should be NULL if a literal is not allowed. */
+
+static int sdparm(pt, lpcb)
+UNCH *pt; /* Token buffer. */
+struct parse *lpcb; /* PCB for literal parse. */
+{
+ for (;;) {
+ parse(&pcbsd);
+ if (pcbsd.action != ISIG)
+ break;
+ sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
+ }
+ ++parmno;
+ switch (pcbsd.action) {
+ case LIT1:
+ if (!lpcb) {
+ sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
+ REPEATCC;
+ return pcbsd.action = INV_;
+ }
+ parselit(pt, lpcb, REFLITLEN, lex.d.lit);
+ return pcbsd.action;
+ case LIT2:
+ if (!lpcb) {
+ sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
+ REPEATCC;
+ return pcbsd.action = INV_;
+ }
+ parselit(pt, lpcb, REFLITLEN, lex.d.lita);
+ return pcbsd.action = LIT1;
+ case NAS1:
+ parsenm(pt, 1);
+ return pcbsd.action;
+ case NUM1:
+ parsetkn(pt, NU, REFNAMELEN);
+ return pcbsd.action;
+ }
+ return pcbsd.action;
+}
+
+VOID sdinit()
+{
+ int i;
+ /* Shunned character numbers in the reference concrete syntax. */
+ static UNCH refshun[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
+ };
+ UNCH **p;
+ /* A character is magic if it is a non-SGML character used for
+ some internal purpose in the parser. */
+ char_flags[EOS] |= CHAR_MAGIC;
+ char_flags[EOBCHAR] |= CHAR_MAGIC;
+ char_flags[EOFCHAR] |= CHAR_MAGIC;
+ char_flags[GENRECHAR] |= CHAR_MAGIC;
+ char_flags[DELNONCH] |= CHAR_MAGIC;
+ char_flags[DELCDATA] |= CHAR_MAGIC;
+ char_flags[DELSDATA] |= CHAR_MAGIC;
+
+ /* Figure out the significant SGML characters. */
+ for (p = lextabs; *p; p++) {
+ UNCH datclass = (*p)[CANON_DATACHAR];
+ UNCH nonclass = (*p)[CANON_NONSGML];
+ for (i = 0; i < 256; i++)
+ if (!(char_flags[i] & CHAR_MAGIC)
+ && (*p)[i] != datclass && (*p)[i] != nonclass)
+ char_flags[i] |= CHAR_SIGNIFICANT;
+ }
+ for (i = 0; i < SIZEOF(refshun); i++)
+ char_flags[refshun[i]] |= CHAR_SHUNNED;
+ for (i = 0; i < 256; i++)
+ if (ISASCII(i) && iscntrl(i))
+ char_flags[i] |= CHAR_SHUNNED;
+ bufsalloc();
+}
+
+
+static
+VOID bufsalloc()
+{
+ scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
+ tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
+ /* entbuf is used for parsing numeric character references */
+ entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
+}
+
+static
+VOID bufsrealloc()
+{
+ UNS size;
+
+ if (ENTLVL != REFENTLVL)
+ scbs = (struct source *)rrealloc((UNIV)scbs,
+ (ENTLVL+1)*sizeof(struct source));
+ /* Calculate the size for tbuf. */
+ size = LITLEN + ATTSPLEN;
+ if (PILEN > size)
+ size = PILEN;
+ if (BSEQLEN > size)
+ size = BSEQLEN;
+ if (size != REFATTSPLEN + REFLITLEN)
+ tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
+ if (NAMELEN != REFNAMELEN)
+ entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
+}
+
+
+/* Check that the non-SGML characters are compatible with the concrete
+syntax and munge the lexical tables accordingly. If IMPLIED is
+non-zero, then the SGML declaration was implied; in this case, don't
+give error messages about shunned characters not being declared
+non-SGML. Also make any changes that are required by the NAMING section.
+*/
+
+static VOID setlexical()
+{
+ int i;
+ UNCH **p;
+
+ if (nlextoke) {
+ /* Handle characters that were made significant by the
+ NAMING section. */
+ for (i = 0; i < 256; i++)
+ if (nlextoke[i] == NMC || nlextoke[i] == NMS)
+ char_flags[i] |= CHAR_SIGNIFICANT;
+ }
+
+ for (i = 0; i < 256; i++)
+ if (char_flags[i] & CHAR_SIGNIFICANT) {
+ /* Significant SGML characters musn't be non-SGML. */
+ if (char_flags[i] & CHAR_NONSGML) {
+ UNCH buf[2];
+ buf[0] = i;
+ buf[1] = '\0';
+ sderr(E_NONSGML, buf, (UNCH *)0);
+ char_flags[i] &= ~CHAR_NONSGML;
+ }
+ }
+ else {
+ /* Shunned characters that are not significant SGML characters
+ must be non-SGML. */
+ if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
+ == CHAR_SHUNNED) {
+ sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
+ char_flags[i] |= CHAR_NONSGML;
+ }
+ }
+
+
+ /* Now munge the lexical tables. */
+ for (p = lextabs; *p; p++) {
+ UNCH nonclass = (*p)[CANON_NONSGML];
+ UNCH datclass = (*p)[CANON_DATACHAR];
+ UNCH nmcclass = (*p)[CANON_NMC];
+ UNCH nmsclass = (*p)[CANON_NMS];
+ UNCH minclass = (*p)[CANON_MIN];
+ for (i = 0; i < 256; i++) {
+ if (char_flags[i] & CHAR_NONSGML) {
+ /* We already know that it's not significant. */
+ if (!(char_flags[i] & CHAR_MAGIC))
+ (*p)[i] = nonclass;
+ }
+ else {
+ if (char_flags[i] & CHAR_MAGIC) {
+ sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
+ }
+ else if (!(char_flags[i] & CHAR_SIGNIFICANT))
+ (*p)[i] = datclass;
+ else if (nlextoke
+ /* This relies on the fact that lextoke
+ occurs last in lextabs. */
+ && lextoke[i] != nlextoke[i]) {
+ switch (nlextoke[i]) {
+ case NMC:
+ (*p)[i] = nmcclass;
+ break;
+ case NMS:
+ (*p)[i] = nmsclass;
+ break;
+ case INV:
+ /* This will happen if period is not a
+ name character. */
+ (*p)[i] = minclass;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+ }
+ }
+ if (nlextran) {
+ memcpy((UNIV)lextran, (UNIV)nlextran, 256);
+ frem((UNIV)nlextran);
+ }
+ if (nlextoke) {
+ frem((UNIV)nlextoke);
+ nlextoke = 0;
+ }
+
+}
+
+/* Munge parse tables so that empty start and end tags are not recognized. */
+
+static VOID noemptytag()
+{
+ static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
+ int i;
+
+ for (i = 0; i < SIZEOF(pcbs); i++) {
+ int maxclass, maxstate;
+ int j, k, act;
+ UNCH *plex = pcbs[i]->plex;
+ UNCH **ptab = pcbs[i]->ptab;
+
+ /* Figure out the maximum lexical class. */
+ maxclass = 0;
+ for (j = 0; j < 256; j++)
+ if (plex[j] > maxclass)
+ maxclass = plex[j];
+
+ /* Now figure out the maximum state number and at the same time
+ change actions. */
+
+ maxstate = 0;
+
+ for (j = 0; j <= maxstate; j += 2) {
+ for (k = 0; k <= maxclass; k++)
+ if (ptab[j][k] > maxstate)
+ maxstate = ptab[j][k];
+ /* If the '>' class has an empty start or end tag action,
+ change it to the action that the NMC class has. */
+ act = ptab[j + 1][plex['>']];
+ if (act == NET_ || act == NST_)
+ ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
+ }
+ }
+}
+
+/* Lookup the value of the entry in pmap PTR whose key is KEY. */
+
+static UNIV pmaplookup(ptr, key)
+struct pmap *ptr;
+char *key;
+{
+ for (; ptr->name; ptr++)
+ if (strcmp(key, ptr->name) == 0)
+ return ptr->value;
+ return 0;
+}
+
+/* Return an ASCII representation of N. */
+
+static UNCH *ltous(n)
+long n;
+{
+ static char buf[sizeof(long)*3 + 2];
+ sprintf(buf, "%ld", n);
+ return (UNCH *)buf;
+}
+
+VOID sgmlwrsd(fp)
+FILE *fp;
+{
+ int i;
+ int changed;
+ char *p;
+ char uc[256]; /* upper case characters (with different lower
+ case characters) */
+ char lcletter[256]; /* LC letters: a-z */
+
+ fprintf(fp, "<!SGML \"%s\"\n", standard);
+ fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n",
+ SYSTEM_CHARSET_OWNER,
+ SYSTEM_CHARSET_DESCRIPTION,
+ SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
+
+ if (!done_nonsgml) {
+ done_nonsgml = 1;
+ for (i = 0; i < 256; i++)
+ if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
+ == CHAR_SHUNNED)
+ char_flags[i] |= CHAR_NONSGML;
+ }
+ i = 0;
+ while (i < 256) {
+ int j;
+ for (j = i + 1; j < 256; j++)
+ if ((char_flags[j] & CHAR_NONSGML)
+ != (char_flags[i] & CHAR_NONSGML))
+ break;
+ if (char_flags[i] & CHAR_NONSGML)
+ fprintf(fp, "%d %d UNUSED\n", i, j - i);
+ else
+ fprintf(fp, "%d %d %d\n", i, j - i, i);
+ i = j;
+ }
+ fprintf(fp, "CAPACITY\n");
+ changed = 0;
+ for (i = 0; i < NCAPACITY; i++)
+ if (refcapset[i] != sd.capacity[i]) {
+ if (!changed) {
+ fprintf(fp, "SGMLREF\n");
+ changed = 1;
+ }
+ fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
+ }
+ if (!changed)
+ fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
+ fprintf(fp, "SCOPE DOCUMENT\n");
+
+ fprintf(fp, "SYNTAX\nSHUNCHAR");
+ for (i = 0; i < 256; i++)
+ if (char_flags[i] & CHAR_SHUNNED)
+ fprintf(fp, " %d", i);
+ fprintf(fp, "\n");
+ fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n",
+ SYSTEM_CHARSET_OWNER,
+ SYSTEM_CHARSET_DESCRIPTION,
+ SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
+
+ fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n");
+
+ MEMZERO((UNIV)uc, 256);
+ for (i = 0; i < 256; i++)
+ if (lextran[i] != i)
+ uc[lextran[i]] = 1;
+
+ MEMZERO((UNIV)lcletter, 256);
+ for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
+ lcletter[(unsigned char)*p]= 1;
+
+ fprintf(fp, "NAMING\n");
+ fputs("LCNMSTRT \"", fp);
+ for (i = 0; i < 256; i++)
+ if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
+ fprintf(fp, "&#%d;", i);
+ fputs("\"\n", fp);
+ fputs("UCNMSTRT \"", fp);
+ for (i = 0; i < 256; i++)
+ if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
+ fprintf(fp, "&#%d;", lextran[i]);
+ fputs("\"\n", fp);
+ fputs("LCNMCHAR \"", fp);
+ for (i = 0; i < 256; i++)
+ if (lextoke[i] == NMC && !uc[i])
+ fprintf(fp, "&#%d;", i);
+ fputs("\"\n", fp);
+ fputs("UCNMCHAR \"", fp);
+ for (i = 0; i < 256; i++)
+ if (lextoke[i] == NMC && !uc[i])
+ fprintf(fp, "&#%d;", lextran[i]);
+ fputs("\"\n", fp);
+
+ fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
+ sd.namecase[0] ? "YES" : "NO",
+ sd.namecase[1] ? "YES" : "NO");
+ fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
+ sd.shortref ? "SGMLREF" : "NONE");
+ fprintf(fp, "NAMES SGMLREF\n");
+ if (newkey) {
+ /* The reference key was saved in newkey. */
+ for (i = 0; i < NKEYS; i++)
+ if (newkey[i][0])
+ fprintf(fp, "%s %s\n", newkey[i], key[i]);
+ }
+ fprintf(fp, "QUANTITY SGMLREF\n");
+ if (quantity_changed)
+ for (i = 0; i < NQUANTITY; i++)
+ if (quantity_changed[i])
+ fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
+ fprintf(fp,
+ "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
+ sd.omittag ? "YES" : "NO",
+ sd.shorttag ? "YES" : "NO");
+ fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
+ fprintf(fp, "OTHER CONCUR NO ");
+ if (sd.subdoc > 0)
+ fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
+ else
+ fprintf(fp, "SUBDOC NO ");
+ fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
+ fprintf(fp, "APPINFO NONE");
+ fprintf(fp, ">\n");
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgmldecl.h b/usr.bin/sgmls/sgmls/sgmldecl.h
new file mode 100644
index 0000000..d5d0466
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmldecl.h
@@ -0,0 +1,84 @@
+/* sgmldecl.h: SGML declaration parsing. */
+
+#define QATTCNT 0
+#define QATTSPLEN 1
+#define QBSEQLEN 2
+#define QDTAGLEN 3
+#define QDTEMPLEN 4
+#define QENTLVL 5
+#define QGRPCNT 6
+#define QGRPGTCNT 7
+#define QGRPLVL 8
+#define QLITLEN 9
+#define QNAMELEN 10
+#define QNORMSEP 11
+#define QPILEN 12
+#define QTAGLEN 13
+#define QTAGLVL 14
+
+#define NQUANTITY (QTAGLVL+1)
+
+#define TOTALCAP 0
+#define ENTCAP 1
+#define ENTCHCAP 2
+#define ELEMCAP 3
+#define GRPCAP 4
+#define EXGRPCAP 5
+#define EXNMCAP 6
+#define ATTCAP 7
+#define ATTCHCAP 8
+#define AVGRPCAP 9
+#define NOTCAP 10
+#define NOTCHCAP 11
+#define IDCAP 12
+#define IDREFCAP 13
+#define MAPCAP 14
+#define LKSETCAP 15
+#define LKNMCAP 16
+
+extern char *captab[];
+
+struct sgmldecl {
+ long capacity[NCAPACITY];
+ long subdoc;
+ UNCH formal;
+ UNCH omittag;
+ UNCH shorttag;
+ UNCH shortref;
+ UNCH namecase[2]; /* case translation of general/entity names */
+ int quantity[NQUANTITY];
+};
+
+extern struct sgmldecl sd;
+
+#define OMITTAG (sd.omittag)
+#define SUBDOC (sd.subdoc)
+#define SHORTTAG (sd.shorttag)
+#define FORMAL (sd.formal)
+
+#define ATTCNT (sd.quantity[QATTCNT])
+#define ATTSPLEN (sd.quantity[QATTSPLEN])
+#define BSEQLEN (sd.quantity[QBSEQLEN])
+#define ENTLVL (sd.quantity[QENTLVL])
+#define GRPGTCNT (sd.quantity[QGRPGTCNT])
+#define GRPCNT (sd.quantity[QGRPCNT])
+#define GRPLVL (sd.quantity[QGRPLVL])
+#define LITLEN (sd.quantity[QLITLEN])
+#define NAMELEN (sd.quantity[QNAMELEN])
+#define NORMSEP (sd.quantity[QNORMSEP])
+#define PILEN (sd.quantity[QPILEN])
+#define TAGLEN (sd.quantity[QTAGLEN])
+#define TAGLVL (sd.quantity[QTAGLVL])
+
+#define NAMECASE (sd.namecase[0])
+#define ENTCASE (sd.namecase[1])
+
+#define YES 1
+#define NO 0
+
+#define UNUSED -1
+#define UNKNOWN -2
+#define UNDESC -3
+#define UNKNOWN_SET -4
+
+extern int asciicharset[];
diff --git a/usr.bin/sgmls/sgmls/sgmlfnsm.h b/usr.bin/sgmls/sgmls/sgmlfnsm.h
new file mode 100644
index 0000000..0d617fb
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlfnsm.h
@@ -0,0 +1,129 @@
+/* SGMLFNSM.H: SGML function declarations (ANSI prototypes). */
+VOID adlfree P((struct ad *, int));
+VOID adlval P((int,struct etd *));
+VOID aenttst P((int, UNCH *));
+int allhit P((struct thdr *,unsigned long *,int,int));
+VOID ambig P((void));
+VOID ambigfree P((void));
+int amemget P((struct ad *,int,UNCH *));
+int anmget P((int,UNCH *));
+int anmtgrp P((struct parse *,struct ad *,int,UNS *,int));
+int antvget P((int,UNCH *,UNCH **));
+int anyhit P((unsigned long *));
+int attval P((int,UNCH *,int,struct ad *));
+VOID charrefa P((UNCH *));
+int charrefn P((UNCH *, struct parse *));
+int context P((struct etd *,struct thdr *,struct mpos *,UNCH *,int));
+struct etd **copygrp P((struct etd **,unsigned int));
+int datachar P((int, struct parse *));
+struct dcncb *dcnfind P((UNCH *));
+VOID destack P((void));
+int econtext P((struct thdr *,struct mpos *,UNCH *));
+VOID endprolog P((void));
+struct entity *entfind P((UNCH *));
+int entopen P((struct entity *));
+/* VOID eposset P((void)); NOT YET IN USE. */
+VOID error P((struct error *));
+VOID errorinit P((struct error *, unsigned, unsigned));
+int etag P((void));
+int etagetd P((struct parse *));
+VOID etdadl P((struct etd *));
+VOID etdcan P((UNCH *));
+struct etd *etddef P((UNCH *));
+struct etd *etdref P((UNCH *));
+VOID exclude P((void));
+VOID fileclos P((void));
+VOID filecont P((void));
+VOID fileopen P((void));
+VOID filepend P((int));
+VOID fileread P((void));
+VOID filerr P((unsigned, UNCH *));
+VOID fixdatt P((struct dcncb *));
+struct parse *getpcb P((int));
+int groupopt P((struct thdr *,struct mpos *));
+int groupreq P((struct etd *,struct thdr *,struct mpos *));
+int grpsz P((struct thdr *,int));
+int hash P((UNCH *,int));
+struct hash *hfind P((struct hash **,UNCH *,int));
+struct hash *hin P((struct hash **,UNCH *,int,unsigned int));
+int iddef P((UNCH *));
+VOID idrck P((void));
+struct fwdref *idref P((UNCH *));
+VOID idreftst P((int,UNCH *));
+int ingrp P((struct etd **,struct etd *));
+VOID initatt P((struct ad *));
+int mapsrch P((struct map *,UNCH *));
+VOID mdadl P((UNCH *));
+int mdattdef P((int, int));
+VOID mddtde P((UNCH *));
+VOID mddtds P((UNCH *));
+VOID mdelem P((UNCH *));
+VOID mdentity P((UNCH *));
+VOID mderr P((unsigned int,UNCH *,UNCH *));
+struct parse *mdms P((UNCH *,struct parse *));
+int mdmse P((void));
+VOID mdnadl P((UNCH *));
+VOID mdnot P((UNCH *));
+VOID mdsrmdef P((UNCH *));
+VOID mdsrmuse P((UNCH *));
+int netetd P((struct parse *));
+VOID newtoken P((struct thdr *,struct mpos *,UNCH *));
+int nstetd P((void));
+UNCH *ntoa P((int));
+int offbit P((unsigned long *,int,int));
+int parsecon P((UNCH *,struct parse *));
+int parsefpi P((struct fpi *));
+struct thdr *parsegcm P((struct parse *,struct thdr *,struct thdr *));
+VOID parselit P((UNCH *,struct parse *,unsigned int,UNCH));
+struct thdr *parsemod P((int));
+int parsepro P((void));
+VOID parseseq P((UNCH *,int));
+VOID parsetag P((struct parse *));
+int parseval P((UNCH *,unsigned int,UNCH *));
+int pexmex P((struct etd *));
+unsigned int ptrsrch P((UNIV *,UNIV));
+UNCH *pubfield P((UNCH *,UNCH *,UNCH,UNS *));
+UNCH *replace P((UNCH *,UNCH *));
+UNCH *sandwich P((UNCH *,UNCH *,UNCH *));
+UNIV saverr P((unsigned int,struct parse *,UNCH *,UNCH *));
+VOID scbset P((void));
+VOID sdinit P((void));
+VOID setcurchar P((int));
+VOID setdtype P((void));
+int sgmlact P((UNCH));
+int sgmldecl P((void));
+VOID sgmlerr P((unsigned int,struct parse *,UNCH *,UNCH *));
+int shortref P((int,struct parse *));
+struct srh *srhfind P((UNCH *));
+VOID stack P((struct etd *));
+int stag P((int));
+int stagetd P((struct parse *));
+VOID startdtd P((void));
+UNCH *savenm P((UNCH *));
+UNCH *savestr P((UNCH *));
+VOID storedatt P((PNE));
+VOID svderr P((UNIV));
+VOID synerr P((unsigned int,struct parse *));
+int testend P((struct thdr *,struct mpos *,int,int));
+int tokenopt P((struct thdr *,struct mpos *));
+int tokenreq P((struct etd *,struct thdr *,struct mpos *));
+UNS vallen P((int,int,UNCH *));
+struct dcncb *dcndef P((UNCH *));
+struct entity *entdef P((UNCH *,UNCH,union etext *));
+int entget P((void));
+int entref P((UNCH *));
+struct etd *etdset P((struct etd *,UNCH,struct thdr *,struct etd **,
+ struct etd **, struct entity **));
+struct hash *hout P((struct hash **,UNCH *,int));
+struct fpi *mdextid P((UNCH *,struct fpi *,UNCH *,UNCH *,struct ne *));
+int parse P((struct parse *));
+struct ad *parseatt P((struct ad *,UNCH *));
+unsigned int parsegrp P((struct etd **,struct parse *, UNCH *));
+unsigned int parsngrp P((struct dcncb **,struct parse *, UNCH *));
+int parsemd P((UNCH *,int,struct parse *,unsigned int));
+UNCH *parsenm P((UNCH *,int));
+UNCH *parsetkn P((UNCH *,UNCH,int));
+UNCH *s2valnm P((UNCH *,UNCH *,UNCH,int));
+struct srh *srhdef P((UNCH *));
+int tokdata P((UNCH *, int));
+struct entity *usedef P((UNCH *));
diff --git a/usr.bin/sgmls/sgmls/sgmlincl.h b/usr.bin/sgmls/sgmls/sgmlincl.h
new file mode 100644
index 0000000..c4eb5cc
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlincl.h
@@ -0,0 +1,20 @@
+/* SGMLINCL.H: Include file for parser core. */
+#ifndef SGMLINCL /* Don't include this file more than once. */
+#define SGMLINCL 1
+#include "config.h"
+#include "std.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "action.h" /* Action names for all parsing. */
+#include "adl.h" /* Definitions for attribute list processing. */
+#include "error.h" /* Symbols for error codes. */
+#include "etype.h" /* Definitions for element type processing. */
+#include "keyword.h" /* Definitions for keyword processing. */
+#include "lextoke.h" /* Symbols for tokenization lexical classes. */
+#include "source.h" /* Templates for source entity control blocks. */
+#include "synxtrn.h" /* Declarations for concrete syntax constants. */
+#include "sgmlxtrn.h" /* External variable declarations. */
+#include "trace.h" /* Declarations for internal trace functions. */
+#include "sgmlmain.h"
+#include "sgmlaux.h"
+#include "sgmlfnsm.h" /* ANSI C: Declarations for SGML functions. */
+#endif /* ndef SGMLINCL */
diff --git a/usr.bin/sgmls/sgmls/sgmlio.c b/usr.bin/sgmls/sgmls/sgmlio.c
new file mode 100644
index 0000000..c78bb7a
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlio.c
@@ -0,0 +1,384 @@
+/* sgmlio.c -
+ IO functions for core parser.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+/* SGML must see a file in which records start with RS and end with
+ RE, and EOFCHAR (Ctl-Z) is present at the end. This module must
+ supply these characters if they are not naturally present in the
+ file. SGML will open two files at a time: when an entity is
+ nested, the new file is opened before closing the old in order to
+ make sure the open is successful. If it is, the original open file
+ is closed temporarily (IOPEND); when the stack is popped, the new
+ file is closed and the original file is re-opened (IOCONT). SGML
+ will check error returns for the initial open of a file and all
+ reads, and for re-openings when the stack is popped, but not for
+ closes. Returning <0 indicates an error; 0 or more is a successful
+ operation, except for IOREAD where the return value is the number
+ of characters read, and must exceed 0 to be successful. The first
+ READ must always be successful, and normally consists of just
+ priming the buffer with EOBCHAR (or RS EOBCHAR). SGMLIO must
+ assure that there is an EOBCHAR at the end of each block read,
+ except for the last block of the entity, which must have an
+ EOFCHAR.
+
+ SGML views an entity as a contiguous whole, without regard to its
+ actual form of storage. SGMLIO supports entities that are
+ equivalent to a single file of one or more records, or to a
+ concatenation of files.
+*/
+
+/* Uses only stream I/O. This module should be portable to most ANSI
+ systems. */
+/* We try to ensure that if an IO operation fails, then errno will contain
+ a meaningful value (although it may be zero.) */
+
+#include "config.h"
+#ifdef HAVE_O_NOINHERIT
+#include <fcntl.h>
+#include <io.h>
+#endif /* HAVE_O_NOINHERIT */
+
+#include "sgmlaux.h" /* Include files for auxiliary functions.. */
+
+#ifdef HAVE_O_NOINHERIT
+#define FOPENR(file) nifopen(file)
+FILE *nifopen P((char *));
+#else /* not HAVE_O_NOINHERIT */
+#define FOPENR(file) fopen((file), "r")
+#endif /* not HAVE_O_NOINHERIT */
+
+struct iofcb { /* I/O file control block. */
+ FILE *fp; /* File handle. */
+ fpos_t off; /* Offset in file of current read block. */
+ char *next; /* Next file (NULL if no more). */
+ char *file; /* Current file (no length byte). */
+ int pendoff; /* Offset into line when file suspended. */
+ char bol; /* Non-zero if currently at beginning of line. */
+ char first; /* Non-zero if the first read. */
+ char wasbol; /* Non-zero if current block was at beginning of line. */
+ char canseek;
+ UNCH *pendbuf; /* Saved partial buffer for suspended file
+ that can't be closed and reopened. */
+};
+
+static char *lastfile; /* The name of the last file closed. */
+static int bufsize; /* Size of buffer passed to ioread(). */
+static char ismagic[256]; /* Table of magic chars that need to be prefixed
+ by DELNONCH. */
+static int stdinused = 0;
+
+static char *nextstr P((char *)); /* Iterate over list of strings. */
+static FILE *openfile P((char *, char *));
+static int closefile P((FILE *));
+static int isreg P((FILE *));
+
+VOID ioinit(swp)
+struct switches *swp;
+{
+ ismagic[EOBCHAR] = 1;
+ ismagic[EOFCHAR] = 1;
+ ismagic[EOS] = 1;
+ ismagic[(UNCH)DELNONCH] = 1;
+ ismagic[(UNCH)GENRECHAR] = 1;
+ bufsize = swp->swbufsz;
+}
+
+int ioopen(id, pp)
+UNIV id;
+UNIV *pp;
+{
+ struct iofcb *f;
+ char *s;
+ errno = 0;
+ if (!id)
+ return -1;
+ s = id;
+ if (!*s)
+ return -1;
+ f = (struct iofcb *)rmalloc((UNS)sizeof(struct iofcb));
+ f->file = s;
+ f->next = nextstr(s);
+ errno = 0;
+ f->fp = openfile(f->file, &f->canseek);
+ f->bol = 1;
+ f->first = 1;
+ f->pendbuf = 0;
+ *pp = (UNIV)f;
+ return f->fp ? 1 : -1;
+}
+
+VOID ioclose(p)
+UNIV p;
+{
+ struct iofcb *f = (struct iofcb *)p;
+ if (f->fp)
+ closefile(f->fp);
+ lastfile = f->file;
+ frem((UNIV)f);
+}
+
+VOID iopend(p, off, buf)
+UNIV p;
+int off;
+UNCH *buf;
+{
+ struct iofcb *f = (struct iofcb *)p;
+ if (!f->canseek) {
+ UNCH *s;
+ for (s = buf + off; *s != EOFCHAR && *s != EOBCHAR; s++)
+ ;
+ s++;
+ f->pendbuf = (UNCH *)rmalloc((UNS)(s - buf - off));
+ memcpy((UNIV)f->pendbuf, (UNIV)(buf + off), (UNS)(s - buf - off));
+ return;
+ }
+ f->bol = 0;
+ if (f->wasbol) {
+ if (off == 0)
+ f->bol = 1;
+ else
+ off--;
+ }
+ f->pendoff = off;
+ if (f->fp) {
+ fclose(f->fp);
+ f->fp = 0;
+ }
+}
+
+int iocont(p)
+UNIV p;
+{
+ struct iofcb *f = (struct iofcb *)p;
+ int c = EOF;
+ int off = f->pendoff;
+
+ if (!f->canseek)
+ return 0;
+
+ errno = 0;
+ f->fp = FOPENR(f->file);
+ if (!f->fp)
+ return -1;
+ if (fsetpos(f->fp, &f->off))
+ return -1;
+ while (--off >= 0) {
+ c = getc(f->fp);
+ if (c != EOF && ismagic[c])
+ off--;
+ }
+ if (c == '\n')
+ f->bol = 1;
+ if (ferror(f->fp))
+ return -1;
+ return 0;
+}
+
+/* Return -1 on error, otherwise the number of bytes read. The
+strategy is to concatenate the files, insert a RS at the beginning of
+each line, and change each '\n' into a RE. The returned data
+shouldn't cross a file boundary, otherwise error messages might be
+inaccurate. The first read must always succeed. */
+
+int ioread(p, buf, newfilep)
+UNIV p;
+UNCH *buf;
+int *newfilep;
+{
+ int i = 0;
+ struct iofcb *f = (struct iofcb *)p;
+ FILE *fp;
+ int c;
+
+ *newfilep = 0;
+ if (f->first) {
+ buf[i] = EOBCHAR;
+ f->first = 0;
+ return 1;
+ }
+ if (f->pendbuf) {
+ for (i = 0;
+ (buf[i] = f->pendbuf[i]) != EOBCHAR && buf[i] != EOFCHAR;
+ i++)
+ ;
+ frem((UNIV)f->pendbuf);
+ f->pendbuf = 0;
+ return i + 1;
+ }
+ fp = f->fp;
+ for (;;) {
+ errno = 0;
+ if (f->canseek && fgetpos(fp, &f->off))
+ f->canseek = 0;
+ errno = 0;
+ c = getc(fp);
+ if (c != EOF)
+ break;
+ if (ferror(fp))
+ return -1;
+ if (closefile(fp) == EOF)
+ return -1;
+ if (!f->next){
+ f->fp = 0;
+ buf[0] = EOFCHAR;
+ return 1;
+ }
+ f->file = f->next;
+ f->next = nextstr(f->next);
+ *newfilep = 1;
+ errno = 0;
+ fp = f->fp = openfile(f->file, &f->canseek);
+ if (!fp)
+ return -1;
+ f->bol = 1;
+ }
+ if (f->bol) {
+ f->bol = 0;
+ buf[i++] = RSCHAR;
+ f->wasbol = 1;
+ }
+ else
+ f->wasbol = 0;
+ errno = 0;
+ for (;;) {
+ if (c == '\n') {
+ f->bol = 1;
+ buf[i++] = RECHAR;
+ break;
+ }
+ if (ismagic[c]) {
+ buf[i++] = DELNONCH;
+ buf[i++] = SHIFTNON(c);
+ }
+ else
+ buf[i++] = c;
+ if (i >= bufsize - 2)
+ break;
+ c = getc(fp);
+ if (c == EOF) {
+ if (ferror(fp))
+ return -1;
+ /* This is in the middle of a line. */
+ break;
+ }
+ }
+ buf[i++] = EOBCHAR;
+ return i;
+}
+
+static char *nextstr(p)
+char *p;
+{
+ p = strchr(p, '\0');
+ return *++p ? p : 0;
+}
+
+/* Return the filename associated with p. If p is NULL, return the filename
+of the last file closed. */
+
+char *ioflid(p)
+UNIV p;
+{
+ if (!p)
+ return lastfile;
+ return ((struct iofcb *)p)->file;
+}
+
+static
+FILE *openfile(name, seekp)
+char *name;
+char *seekp;
+{
+ FILE *fp;
+ if (strcmp(name, STDINNAME) == 0) {
+ if (stdinused)
+ return 0;
+ stdinused = 1;
+ *seekp = 0;
+ return stdin;
+ }
+ fp = FOPENR(name);
+ if (fp)
+ *seekp = isreg(fp);
+ return fp;
+}
+
+/* Return -1 on error, 0 otherwise. */
+
+static
+int closefile(fp)
+FILE *fp;
+{
+ if (fp == stdin) {
+ stdinused = 0;
+ clearerr(fp);
+ return 0;
+ }
+ else
+ return fclose(fp);
+}
+
+#ifdef HAVE_O_NOINHERIT
+
+/* This is the same as fopen(name, "r") except that it tells DOS that
+the file descriptor should not be inherited by child processes. */
+
+FILE *nifopen(name)
+char *name;
+{
+ int fd = open(name, O_RDONLY|O_NOINHERIT|O_TEXT);
+ if (fd < 0)
+ return 0;
+ return fdopen(fd, "r");
+}
+
+#endif /* HAVE_O_NOINHERIT */
+
+#ifdef HAVE_SYS_STAT_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#ifndef S_ISREG
+#ifdef S_IFMT
+#ifdef S_IFREG
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif /* S_IFREG */
+#endif /* S_IFMT */
+#endif /* not S_ISREG */
+
+#endif /* HAVE_SYS_STAT_H */
+
+/* Return 1 if fp might be associated with a regular file. 0
+otherwise. We check this because on many Unix systems lseek() will
+succeed on a (pseudo-)terminal although terminals aren't seekable in
+the way we need. */
+
+static
+int isreg(fp)
+FILE *fp;
+{
+#ifdef S_ISREG
+ struct stat sb;
+
+ /* This assumes that a system that has S_ISREG will also have
+ fstat() and fileno(). */
+ if (fstat(fileno(fp), &sb) == 0)
+ return S_ISREG(sb.st_mode);
+#endif /* S_ISREG */
+ return 1;
+}
+
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+comment-column: 30
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgmlmain.h b/usr.bin/sgmls/sgmls/sgmlmain.h
new file mode 100644
index 0000000..3911f76
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlmain.h
@@ -0,0 +1,101 @@
+/* SGMLMAIN: Main interface to SGML services.
+
+Preprocessor variable names are the only supported interface
+to data maintained by SGML. They are defined in this file or in adl.h.
+*/
+/* Return control block types (RCBTYPE) from calls to parser (SGML):
+ Names and strings follow the convention for the IPBs.
+*/
+enum sgmlevent {
+ SGMLEOD, /* End of document. */
+ SGMLDAF, /* Data found. */
+ SGMLSTG, /* Start-tag found. */
+ SGMLETG, /* End-tag found. */
+ SGMLREF, /* Record end found. */
+ SGMLPIS, /* Processing instruction (string). */
+ SGMLAPP /* APPINFO (string) */
+};
+
+struct rcbdata { /* Return control block: DAF EOD REF PIS APP. */
+ UNS contersw; /* 1=context error; 2,4,8=data type; 0=not. */
+ UNS datalen; /* Length of data or PI (0=single nonchar). */
+ UNCH *data; /* Data, PI, single nonSGML, or NDATA ecb ptr. */
+};
+
+struct rcbtag { /* Return control block for STG and ETG. */
+ UNS contersw; /* 1=context error; 2=NET enabled; 0/0=not. */
+ UNS tagmin; /* Minim: NONE NULL NET DATA; implied by S/ETAG */
+ UNCH *curgi; /* Start-tag (or end-tag) GI. */
+ union {
+ struct ad *al; /* Start-tag: attribute list. */
+ UNCH *oldgi; /* End-tag: resumed GI. */
+ } ru;
+ struct ad *lal; /* Start-tag: link attribute list (UNUSED). */
+ UNS format; /* Format class for default processing. */
+ struct etd *tagreal; /* Dummy etd or ptr to GI that implied this tag.*/
+ int etictr; /* Number of elements on stack with NET enabled.*/
+ UNCH *srmnm; /* Current SHORTREF map name (NULL=#EMPTY). */
+};
+
+/* Accessors for rcbdata and rcbtag. */
+/* Datatype abbreviations: C=unsigned char S=string U=unsigned int L=4 bytes
+ A=array P=ptr to structure N=name (see sgmlcb.h)
+*/
+/* Data control block fields: processing instructions (SGMLPIS).
+*/
+#define PDATA(d) ((d).data) /*S PI string. */
+#define PDATALEN(d) ((d).datalen) /*U Length of PI string. */
+#define PIESW(d) (((d).contersw & 4)) /*U 1=PIDATA entity returned. */
+/* Data control block fields: other data types.
+*/
+#define CDATA(d) ((d).data) /*S CDATA content string. */
+#define CDATALEN(d) ((d).datalen) /*U Length of CDATA content string. */
+#define CONTERSW(d) (((d).contersw &1))/*U 1=CDATA or TAG out of context. */
+#define CDESW(d) (((d).contersw & 2)) /*U 1=CDATA entity returned. */
+#define SDESW(d) (((d).contersw & 4)) /*U 1=SDATA entity returned. */
+#define NDESW(d) (((d).contersw & 8)) /*U 1=NDATA entity returned. */
+#define NEPTR(d) ((PNE)(d).data) /*P Ptr to NDATA control block. */
+#define MARKUP(d) ((d).data) /*A Markup delimiter strings. */
+#define DTYPELEN(d) ((d).datalen) /*U Length of doc type name +len+EOS. */
+#define DOCTYPE(d) ((d).data) /*S Document type name (with len+EOS). */
+#define ADATA(d) ((d).data) /*S APPINFO */
+#define ADATALEN(d) ((d).datalen) /*U Length of APPINFO string. */
+/* Tag control block fields.
+*/
+#define ALPTR(t) ((t).ru.al) /*P Ptr to SGML attribute list. */
+#define CURGI(t) ((t).curgi+1) /*N GI of started or ended element. */
+#define OLDGI(t) ((t).ru.oldgi) /*S GI of resumed element. */
+#define TAGMIN(t) (t).tagmin /*U Minimization for current tag. */
+#define TAGREAL(t) ((t).tagreal) /*P Dummy etd that implied this tag. */
+#define TAGRLNM(t) ((UNCH *)(t).tagreal) /*P GI of tag that implied this tag.*/
+#define ETISW(t) (((t).contersw & 2)) /*U 1=NET delimiter enabled by ETI. */
+#define PEXSW(t) (((t).contersw & 4)) /*U 1=Element was plus exception. */
+#define MTYSW(t) (((t).contersw & 8)) /*U 1=Element is empty. */
+#define ETICTR(t) ((t).etictr) /*U Number of active NET delimiters. */
+#define SRMNM(t) ((t).srmnm) /*S Name of current SHORTREF map. */
+#define SRMCNT(t) ((t).contersw) /*U Number of SHORTREF maps defined. */
+#define FORMAT(t) ((t).format) /*U Format class.*/
+
+/* These function names are chosen so as to be distinct in the first 6
+letters. */
+
+/* Initialize. */
+struct markup *sgmlset P((struct switches *));
+/* Cleanup and return capacity usage statistics. */
+VOID sgmlend P((struct sgmlcap *));
+/* Set document entity. */
+int sgmlsdoc P((UNIV));
+/* Get entity. */
+int sgmlgent P((UNCH *, PNE *, UNCH **));
+/* Mark an entity. Return is non-zero if already marked.*/
+int sgmlment P((UNCH *));
+/* Get the next sgml event. */
+enum sgmlevent sgmlnext P((struct rcbdata *, struct rcbtag *));
+/* Get the error count. */
+int sgmlgcnterr P((void));
+/* Get the current location. */
+int sgmlloc P((unsigned long *, char **));
+/* Write out the SGML declaration. */
+VOID sgmlwrsd P((FILE *));
+/* Note subdocument capacity usage. */
+VOID sgmlsubcap P((long *));
diff --git a/usr.bin/sgmls/sgmls/sgmlmsg.c b/usr.bin/sgmls/sgmls/sgmlmsg.c
new file mode 100644
index 0000000..a35cb1b
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlmsg.c
@@ -0,0 +1,514 @@
+/* sgmlmsg.c -
+ message handling for core parser
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+#include "sgmlaux.h"
+#include "msg.h"
+
+static nl_catd catd;
+
+#define TEXT_SET 1 /* message set number for text of messages */
+#define HEADER_SET 2 /* message set number for header strings */
+#define PARM_SET 3 /* message set number for special parameters */
+
+#ifdef HAVE_EXTENDED_PRINTF
+#define xfprintf fprintf
+#else
+extern int xfprintf VP((FILE *, char *,...));
+#endif
+
+#define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
+
+static char *gettext P((int));
+static char *getheader P((int));
+static char *getparm P((int));
+static VOID elttrace P((FILE *, int));
+static int printit P((FILE *, struct error *));
+static char *transparm P((UNCH *, char *));
+static VOID spaces P((FILE *, int));
+
+#define PARMBUFSIZ 50
+static char parmbuf[PARMBUFSIZ*2];
+static char *parmbuf1 = parmbuf;
+static char *parmbuf2 = parmbuf + PARMBUFSIZ;
+
+static char *prog; /* program name */
+static int sweltr; /* non-zero means print an element trace */
+static int swenttr; /* non-zero means print an entity trace */
+static int cnterr = 0;
+static VOID (*die) P((void));
+
+static char *headers[] = {
+"In file included",
+"SGML error", /* parameters: type, severity, number */
+"Unsupported feature", /* type U errors */
+"Error", /* for type R errors */
+"Warning", /* severity type I */
+" at %s, %.0sline %lu", /* ignore entity name and ccnt */
+" at entity %s, line %lu",
+"%.0s%.0s in declaration parameter %d", /* ignore first two parameters */
+"%.0s in declaration parameter %d", /* ignore first parameter */
+"%.0s", /* parse mode */
+" at end of file",
+" at end of entity",
+" at record start",
+" at record end",
+" at \"%c\"",
+" at \"\\%03o\"",
+" accessing \"%s\"",
+"Element structure:"
+};
+
+/* Indexes into headers[] */
+
+#define HDRPFX 0
+#define HDRALL 1
+#define HDRUNSUP 2
+#define HDRSYS 3
+#define HDRWARN 4
+#define HDRLOC 5
+#define HDRELOC 6
+#define HDRMD 7
+#define HDRMD2 8
+#define HDRMODE 9
+#define HDREOF 10
+#define HDREE 11
+#define HDRRS 12
+#define HDRRE 13
+#define HDRPRT 14
+#define HDRCTL 15
+#define HDRFIL 16
+#define HDRELT 17
+
+/* Special parameters (error::errsp) */
+static char *parms[] = {
+"character data",
+"element content",
+"mixed content",
+"replaceable character data",
+"tag close",
+"content model group",
+"content model occurrence indicator",
+"name group",
+"name token group",
+"system data",
+"parameter literal",
+"attribute value literal",
+"tokenized attribute value literal",
+"minimum literal",
+"markup declaration",
+"markup declaration comment",
+"ignored markup declaration",
+"declaration subset",
+"CDATA marked section",
+"IGNORE marked section",
+"RCDATA marked section",
+"prolog",
+"reference",
+"attribute specification list",
+"tokenized attribute value",
+"attribute specification list close",
+"SGML declaration",
+"attribute definition list",
+"document type",
+"element",
+"entity",
+"link type",
+"link set",
+"notation",
+"SGML",
+"short reference mapping",
+"link set use",
+"short reference use",
+};
+
+static FILE *tfp; /* temporary file for saved messages */
+
+struct saved {
+ long start;
+ long end;
+ char exiterr;
+ char countit;
+};
+
+VOID msgprint(e)
+struct error *e;
+{
+ if (printit(stderr, e))
+ ++cnterr;
+ fflush(stderr);
+ if (e->errtype == EXITERR) {
+ if (die) {
+ (*die)();
+ abort();
+ }
+ else
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Save an error message. */
+
+UNIV msgsave(e)
+struct error *e;
+{
+ struct saved *sv;
+
+ sv = (struct saved *)rmalloc(sizeof(struct saved));
+ if (!tfp) {
+ tfp = tmpfile();
+ if (!tfp)
+ exiterr(160, (struct parse *)0);
+ }
+ sv->start = ftell(tfp);
+ sv->countit = (char)printit(tfp, e);
+ sv->end = ftell(tfp);
+ sv->exiterr = (char)(e->errtype == EXITERR);
+ return (UNIV)sv;
+}
+
+/* Print a saved error message. */
+
+VOID msgsprint(p)
+UNIV p;
+{
+ struct saved *sv = (struct saved *)p;
+ long cnt;
+
+ assert(p != 0);
+ assert(tfp != 0);
+ if (fseek(tfp, sv->start, SEEK_SET) < 0)
+ return;
+ /* Temporary files are opened in binary mode, so this is portable. */
+ cnt = sv->end - sv->start;
+ while (--cnt >= 0) {
+ int c = getc(tfp);
+ if (c == EOF)
+ break;
+ putc(c, stderr);
+ }
+ fflush(stderr);
+ if (sv->countit)
+ ++cnterr;
+ if (sv->exiterr)
+ exit(EXIT_FAILURE);
+}
+
+/* Free a sved error message. */
+
+VOID msgsfree(p)
+UNIV p;
+{
+ frem(p);
+}
+
+/* Return 1 if it should be counted as an error. */
+
+static int printit(efp, e)
+FILE *efp;
+struct error *e;
+{
+ int indent;
+ int countit;
+ int hdrcode;
+ int filelevel = -1, prevfilelevel = -1, toplevel;
+ struct location loc;
+ char type[2], severity[2];
+
+ assert(e->errnum < SIZEOF(messages));
+ assert(messages[e->errnum].text != NULL);
+ if (prog) {
+ fprintf(efp, "%s: ", prog);
+ indent = strlen(prog) + 2; /* don't rely on return value of fprintf */
+ /* Don't want to waste too much space on indenting. */
+ if (indent > 10)
+ indent = 4;
+ }
+ else
+ indent = 4;
+
+ for (toplevel = 0; getlocation(toplevel, &loc); toplevel++)
+ if (loc.filesw) {
+ prevfilelevel = filelevel;
+ filelevel = toplevel;
+ }
+ toplevel--;
+
+ if (e->errtype == FILERR) {
+ toplevel--;
+ filelevel = prevfilelevel;
+ }
+ if (swenttr && filelevel > 0) {
+ int level = 0;
+ int middle = 0; /* in the middle of a line */
+ do {
+ (void)getlocation(level, &loc);
+ if (loc.filesw) {
+ if (middle) {
+ fputs(":\n", efp);
+ spaces(efp, indent);
+ }
+ else
+ middle = 1;
+ xfprintf(efp, getheader(HDRPFX));
+ xfprintf(efp, getheader(HDRLOC), ioflid(loc.fcb),
+ loc.ename, loc.rcnt, loc.ccnt);
+ }
+ else if (middle)
+ xfprintf(efp, getheader(HDRELOC),
+ loc.ename, loc.rcnt + 1, loc.ccnt);
+ }
+ while (++level != filelevel);
+ if (middle) {
+ fputs(":\n", efp);
+ spaces(efp, indent);
+ }
+ }
+
+ /* We use strings for the type and severity,
+ so that the format can use %.0s to ignore them. */
+
+ type[0] = messages[e->errnum].type;
+ type[1] = '\0';
+ severity[0] = messages[e->errnum].severity;
+ severity[1] = '\0';
+
+ countit = (severity[0] != 'I');
+ if (!countit)
+ hdrcode = HDRWARN;
+ else if (type[0] == 'R')
+ hdrcode = HDRSYS;
+ else if (type[0] == 'U')
+ hdrcode = HDRUNSUP;
+ else
+ hdrcode = HDRALL;
+
+ xfprintf(efp, getheader(hdrcode), type, severity, e->errnum);
+
+ if (filelevel >= 0) {
+ (void)getlocation(filelevel, &loc);
+ xfprintf(efp, getheader(HDRLOC),
+ ioflid(loc.fcb), loc.ename, loc.rcnt, loc.ccnt);
+ while (filelevel < toplevel) {
+ ++filelevel;
+ if (swenttr) {
+ (void)getlocation(filelevel, &loc);
+ xfprintf(efp, getheader(HDRELOC),
+ loc.ename, loc.rcnt + 1, loc.ccnt);
+ }
+ }
+ }
+
+ /* It is necessary to copy the result of getparm() because
+ the specification of catgets() says in can return a
+ pointer to a static buffer which may get overwritten
+ by the next call to catgets(). */
+
+ switch (e->errtype) {
+ case MDERR:
+ strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1);
+ xfprintf(efp, getheader(HDRMD), parmbuf,
+ (e->subdcl ? e->subdcl : (UNCH *)""), e->parmno);
+ break;
+ case MDERR2:
+ /* no subdcl parameter */
+ strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1);
+ xfprintf(efp, getheader(HDRMD2), parmbuf, e->parmno);
+ break;
+ case DOCERR:
+ case EXITERR:
+ if (toplevel < 0)
+ break;
+ strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1);
+ xfprintf(efp, getheader(HDRMODE), parmbuf);
+ switch (loc.curchar) {
+ case EOFCHAR:
+ xfprintf(efp, getheader(HDREOF));
+ break;
+ case RSCHAR:
+ xfprintf(efp, getheader(HDRRS));
+ break;
+ case RECHAR:
+ xfprintf(efp, getheader(HDRRE));
+ break;
+ case DELNONCH:
+ xfprintf(efp, getheader(HDRCTL), UNSHIFTNON(loc.nextchar));
+ break;
+ case EOS:
+ xfprintf(efp, getheader(HDREE));
+ break;
+ case EOBCHAR:
+ break;
+ default:
+ if (ISASCII(loc.curchar) && isprint(loc.curchar))
+ xfprintf(efp, getheader(HDRPRT), loc.curchar);
+ else
+ xfprintf(efp, getheader(HDRCTL), loc.curchar);
+ break;
+ }
+ break;
+ case FILERR:
+ if (getlocation(toplevel + 1, &loc))
+ xfprintf(efp, getheader(HDRFIL), ioflid(loc.fcb));
+ break;
+ }
+ fputs(":\n", efp);
+
+ if (e->errtype == FILERR && e->sverrno != 0) {
+ char *errstr = strerror(e->sverrno);
+ UNS len = strlen(errstr);
+ /* Strip a trailing newline if there is one. */
+ if (len > 0 && errstr[len - 1] == '\n')
+ len--;
+ spaces(efp, indent);
+ for (; len > 0; len--, errstr++)
+ putc(*errstr, efp);
+ fputs(":\n", efp);
+ }
+
+ spaces(efp, indent);
+
+ xfprintf(efp, gettext(e->errnum),
+ transparm((UNCH *)e->eparm[0], parmbuf1),
+ transparm((UNCH *)e->eparm[1], parmbuf2));
+ putc('\n', efp);
+
+ if (sweltr)
+ elttrace(efp, indent);
+ return countit;
+}
+
+/* Print an element trace. */
+static VOID elttrace(efp, indent)
+FILE *efp;
+int indent;
+{
+ int i = 1;
+ UNCH *gi;
+
+ gi = getgi(i);
+ if (!gi)
+ return;
+ spaces(efp, indent);
+ xfprintf(efp, getheader(HDRELT));
+ do {
+ fprintf(efp, " %s", gi);
+ gi = getgi(++i);
+ } while (gi);
+ putc('\n', efp);
+}
+
+static VOID spaces(efp, indent)
+FILE *efp;
+int indent;
+{
+ while (--indent >= 0)
+ putc(' ', efp);
+}
+
+VOID msginit(swp)
+struct switches *swp;
+{
+ catd = swp->catd;
+ prog = swp->prog;
+ sweltr = swp->sweltr;
+ swenttr = swp->swenttr;
+ die = swp->die;
+}
+
+/* Return the error count. */
+
+int msgcnterr()
+{
+ return cnterr;
+}
+
+/* Transform a parameter into a form suitable for printing. */
+
+static char *transparm(s, buf)
+UNCH *s;
+char *buf;
+{
+ char *ptr;
+ int cnt;
+
+ if (!s)
+ return 0;
+
+ ptr = buf;
+ cnt = PARMBUFSIZ - 4; /* space for `...\0' */
+
+ while (*s) {
+ UNCH ch = *s++;
+ if (ch == DELNONCH) {
+ if (*s == '\0')
+ break;
+ ch = UNSHIFTNON(*s);
+ s++;
+ }
+ if (ch == DELCDATA || ch == DELSDATA)
+ ;
+ else if (ch == '\\') {
+ if (cnt < 2)
+ break;
+ *ptr++ = '\\';
+ *ptr++ = '\\';
+ cnt -= 2;
+ }
+ else if (ISASCII(ch) && isprint(ch)) {
+ if (cnt < 1)
+ break;
+ *ptr++ = ch;
+ cnt--;
+ }
+ else {
+ if (cnt < 4)
+ break;
+ sprintf(ptr, "\\%03o", ch);
+ ptr += 4;
+ cnt -= 4;
+ }
+ }
+ if (!*s)
+ *ptr = '\0';
+ else
+ strcpy(ptr, "...");
+ return buf;
+}
+
+/* The message and set numbers in the catgets function must be > 0. */
+
+static char *gettext(n)
+int n;
+{
+ assert(n > 0 && n < SIZEOF(messages));
+ assert(messages[n].text != 0);
+ return catgets(catd, TEXT_SET, n, messages[n].text);
+}
+
+static char *getheader(n)
+int n;
+{
+ assert(n >= 0 && n < SIZEOF(headers));
+ return catgets(catd, HEADER_SET, n + 1, headers[n]);
+}
+
+static char *getparm(n)
+int n;
+{
+ assert(n >= 0 && n < SIZEOF(parms));
+ return catgets(catd, PARM_SET, n + 1, parms[n]);
+}
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/sgmls.1 b/usr.bin/sgmls/sgmls/sgmls.1
new file mode 100644
index 0000000..b9967a0
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmls.1
@@ -0,0 +1,871 @@
+'\" t
+.\" Uncomment the next line to get a man page accurate for MS-DOS
+.\"nr Os 1
+.\" Uncomment the next line if tracing is enabled.
+.\"nr Tr 1
+.if \n(.g .if !r Os .nr Os 0
+.tr \(ts"
+.ds S \s-1SGML\s0
+.de TS
+.br
+.sp .5
+..
+.de TE
+.br
+.sp .5
+..
+.de TQ
+.br
+.ns
+.TP \\$1
+..
+.TH SGMLS 1
+.SH NAME
+sgmls \- a validating SGML parser
+.sp
+An \*S System Conforming to
+.if n .br
+International Standard ISO 8879 \(em
+.br
+Standard Generalized Markup Language
+.SH SYNOPSIS
+.B sgmls
+[
+.B \-deglprsuv
+]
+[
+.BI \-c file
+]
+.if \n(Os=1 \{\
+[
+.BI \-f file
+]
+.\}
+[
+.BI \-i name
+]
+.if \n(Tr \{\
+[
+.BI \-x flags
+]
+[
+.BI \-y flags
+]
+.\}
+[
+.I filename\|.\|.\|.
+]
+.SH DESCRIPTION
+.I Sgmls
+parses and validates
+the \*S document entity in
+.I filename\|.\|.\|.
+and prints on the standard output a simple \s-1ASCII\s0 representation of its
+Element Structure Information Set.
+(This is the information set which a structure-controlled
+conforming \*S application should act upon.)
+Note that the document entity may be spread amongst several files;
+for example, the SGML declaration, document type declaration and document
+instance set could each be in a separate file.
+If no filenames are specified, then
+.I sgmls
+will read the document entity from the standard input.
+A filename of
+.B \-
+can also be used to refer to the standard input.
+.LP
+The following options are available:
+.TP
+.BI \-c file
+Write a report of capacity usage to
+.IR file .
+The report is in the format of a RACT result.
+RACT is the Reference Application for Capacity Testing defined in the
+Proposed American National Standard
+.I
+Conformance Testing for Standard Generalized Markup Language (SGL) Systems
+(X3.190-199X),
+Draft July 1991.
+.TP
+.B \-d
+Warn about duplicate entity declarations.
+.TP
+.B \-e
+Describe open entities in error messages.
+Error messages always include the position of the most recently
+opened external entity.
+.if \n(Os=1 \{\
+.TP
+.BI \-f file
+Redirect errors to
+.IR file .
+.\}
+.TP
+.B \-g
+Show the \s-1GI\s0s of open elements in error messages.
+.TP
+.BI \-i name
+Pretend that
+.RS
+.IP
+.BI <!ENTITY\ %\ name\ \(tsINCLUDE\(ts>
+.LP
+occurs at the start of the document type declaration subset
+in the \*S document entity.
+Since repeated definitions of an entity are ignored,
+this definition will take precedence over any other definitions
+of this entity in the document type declaration.
+Multiple
+.B \-i
+options are allowed.
+If the \*S declaration replaces the reserved name
+.B INCLUDE
+then the new reserved name will be the replacement text of the entity.
+Typically the document type declaration will contain
+.IP
+.BI <!ENTITY\ %\ name\ \(tsIGNORE\(ts>
+.LP
+and will use
+.BI % name ;
+in the status keyword specification of a marked section declaration.
+In this case the effect of the option will be to cause the marked
+section not to be ignored.
+.RE
+.TP
+.B \-l
+Output
+.B L
+commands giving the current line number and filename.
+.TP
+.B \-p
+Parse only the prolog.
+.I Sgmls
+will exit after parsing the document type declaration.
+Implies
+.BR \-s .
+.TP
+.B \-r
+Warn about defaulted references.
+.TP
+.B \-s
+Suppress output.
+Error messages will still be printed.
+.TP
+.B \-u
+Warn about undefined elements: elements used in the DTD but not defined.
+Also warn about undefined short reference maps.
+.TP
+.B \-v
+Print the version number.
+.if \n(Tr \{\
+.TP
+.BI \-x flags
+.br
+.ns
+.TP
+.BI \-y flags
+Enable debugging output;
+.B \-x
+applies to the document body,
+.B \-y
+to the prolog.
+Each character in the
+.I flags
+argument enables tracing of a particular activity.
+.RS
+.TP
+.B t
+Trace state transitions.
+.TP
+.B a
+Trace attribute activity.
+.TP
+.B c
+Trace context checking.
+.TP
+.B d
+Trace declaration parsing.
+.TP
+.B e
+Trace entities.
+.TP
+.B g
+Trace groups.
+.TP
+.B i
+Trace \s-1ID\s0s.
+.TP
+.B m
+Trace marked sections.
+.TP
+.B n
+Trace notations.
+.RE
+.\}
+.SS "Entity Manager"
+An external entity resides in one or more files.
+The entity manager component of
+.I sgmls
+maps a sequence of files into an entity in three sequential stages:
+.IP 1.
+each carriage return character is turned into a non-SGML character;
+.IP 2.
+each newline character is turned into a record end character,
+and at the same time
+a record start character is inserted at the beginning of each line;
+.IP 3.
+the files are concatenated.
+.LP
+A system identifier is
+interpreted as a list of filenames separated by
+.if \n(Os=0 colons.
+.if \n(Os=1 semi-colons.
+A filename of
+.B \-
+can be used to refer to the standard input.
+If no system identifier is supplied, then the entity manager will
+attempt to generate a filename using the public identifier
+(if there is one) and other information available to it.
+Notation identifiers are not subject to this treatment.
+This process is controlled by the environment variable
+.BR \s-1SGML_PATH\s0 ;
+this contains a
+.if \n(Os=0 colon-separated
+.if \n(Os=1 semicolon-separated
+list of filename templates.
+A filename template is a filename that may contain
+substitution fields; a substitution field is a
+.B %
+character followed by a single letter that indicates the value
+of the substitution.
+If
+.B \s-1SGML_PATH\s0
+uses the
+.B %S
+field (the value of which is the system identifier),
+then the entity manager will also use
+.B \s-1SGML_PATH\s0
+to generate a filename
+when a system identifier that does not contain any
+.if \n(Os=0 colons
+.if \n(Os=1 semi-colons
+is supplied.
+The value of a substitution can either be a string
+or it can be
+.IR null .
+The entity manager transforms the list of
+filename templates into a list of filenames by substituting for each
+substitution field and discarding any template
+that contained a substitution field whose value was null.
+It then uses the first resulting filename that exists and is readable.
+Substitution values are transformed before being used for substitution:
+firstly, any names that were subject to upper case substitution
+are folded to lower case;
+secondly,
+.if \n(Os=0 \{\
+.\" Unix
+space characters are mapped to underscores
+and slashes are mapped to percents.
+.\}
+.if \n(Os=1 \{\
+.\" MS-DOS
+the characters
+.B +,./:=?
+and space characters are deleted.
+.\}
+The value of the
+.B %S
+field is not transformed.
+The values of substitution fields are as follows:
+.TP
+.B %%
+A single
+.BR % .
+.TP
+.B %D
+The entity's data content notation.
+This substitution will succeed only for external data entities.
+.TP
+.B %N
+The entity, notation or document type name.
+.TP
+.B %P
+The public identifier if there was a public identifier,
+otherwise null.
+.TP
+.B %S
+The system identifier if there was a system identifier
+otherwise null.
+.TP
+.B %X
+(This is provided mainly for compatibility with \s-1ARCSGML\s0.)
+A three-letter string chosen as follows:
+.LP
+.RS
+.ne 11
+.TS
+tab(&);
+c|c|c s
+c|c|c s
+c|c|c|c
+c|c|c|c
+l|lB|lB|lB.
+&&With public identifier
+&&_
+&No public&Device&Device
+&identifier&independent&dependent
+_
+Data or subdocument entity&nsd&pns&vns
+General SGML text entity&gml&pge&vge
+Parameter entity&spe&ppe&vpe
+Document type definition&dtd&pdt&vdt
+Link process definition&lpd&plp&vlp
+.TE
+.LP
+The device dependent version is selected if the public text class
+allows a public text display version but no public text display
+version was specified.
+.RE
+.TP
+.B %Y
+The type of thing for which the filename is being generated:
+.TS
+tab(&);
+l lB.
+SGML subdocument entity&sgml
+Data entity&data
+General text entity&text
+Parameter entity&parm
+Document type definition&dtd
+Link process definition&lpd
+.TE
+.LP
+The value of the following substitution fields will be null
+unless a valid formal public identifier was supplied.
+.TP
+.B %A
+Null if the text identifier in the
+formal public identifier contains an unavailable text indicator,
+otherwise the empty string.
+.TP
+.B %C
+The public text class, mapped to lower case.
+.TP
+.B %E
+The public text designating sequence (escape sequence)
+if the public text class is
+.BR \s-1CHARSET\s0 ,
+otherwise null.
+.TP
+.B %I
+The empty string if the owner identifier in the formal public identifier
+is an \s-1ISO\s0 owner identifier,
+otherwise null.
+.TP
+.B %L
+The public text language, mapped to lower case,
+unless the public text class is
+.BR \s-1CHARSET\s0 ,
+in which case null.
+.TP
+.B %O
+The owner identifier (with the
+.B +//
+or
+.B \-//
+prefix stripped.)
+.TP
+.B %R
+The empty string if the owner identifier in the formal public identifier
+is a registered owner identifier,
+otherwise null.
+.TP
+.B %T
+The public text description.
+.TP
+.B %U
+The empty string if the owner identifier in the formal public identifier
+is an unregistered owner identifier,
+otherwise null.
+.TP
+.B %V
+The public text display version.
+This substitution will be null if the public text class
+does not allow a display version or if no version was specified.
+If an empty version was specified, a value of
+.B default
+will be used.
+.br
+.ne 18
+.SS "System declaration"
+The system declaration for
+.I sgmls
+is as follows:
+.LP
+.TS
+tab(&);
+c1 s1 s1 s1 s1 s1 s1 s1 s
+c s s s s s s s s
+l l s s s s s s s
+l l s s s s s s s
+l l s s s s s s s
+l l l s s s s s s
+c s s s s s s s s
+l l l l l l l l l
+l l l l l l l l l
+l l l l l l l l l
+l l s s s s s s s
+l l l s s s s s s
+l l l s s s s s s
+c s s s s s s s s
+l l l l l l l l l.
+SYSTEM "ISO 8879:1986"
+CHARSET
+BASESET&"ISO 646-1983//CHARSET
+&\h'\w'"'u'International Reference Version (IRV)//ESC 2/5 4/0"
+DESCSET&0\0128\00
+CAPACITY&PUBLIC&"ISO 8879:1986//CAPACITY Reference//EN"
+FEATURES
+MINIMIZE&DATATAG&NO&OMITTAG&YES&RANK&NO&SHORTTAG&YES
+LINK&SIMPLE&NO&IMPLICIT&NO&EXPLICIT&NO
+OTHER&CONCUR&NO&SUBDOC&YES 1&FORMAL&YES
+SCOPE&DOCUMENT
+SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Reference//EN"
+SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Core//EN"
+VALIDATE
+&GENERAL&YES&MODEL&YES&EXCLUDE&YES&CAPACITY&YES
+&NONSGML&YES&SGML&YES&FORMAL&YES
+.T&
+c s s s s s s s s
+l l l l l l l l l.
+SDIF
+&PACK&NO&UNPACK&NO
+.TE
+.LP
+The memory usage of
+.I sgmls
+is not a function of the capacity points used by a document;
+however,
+.I sgmls
+can handle capacities significantly greater than the reference capacity set.
+.LP
+In some environments,
+higher values may be supported for the \s-1SUBDOC\s0 parameter.
+.LP
+Documents that do not use optional features are also supported.
+For example, if
+.B FORMAL\ NO
+is specified in the \*S declaration,
+public identifiers will not be required to be valid formal public identifiers.
+.LP
+Certain parts of the concrete syntax may be changed:
+.RS
+.LP
+The shunned character numbers can be changed.
+.LP
+Eight bit characters can be assigned to
+\s-1LCNMSTRT\s0, \s-1UCNMSTRT\s0, \s-1LCNMCHAR\s0 and \s-1UCNMCHAR\s0.
+Declaring this requires that the syntax reference character set be declared
+like this:
+.RS
+.ne 3
+.TS
+tab(&);
+l l.
+BASESET&"ISO Registration Number 100//CHARSET
+&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1"
+DESCSET&0\0256\00
+.TE
+.RE
+.LP
+Uppercase substitution can be performed or not performed
+both for entity names and for other names.
+.LP
+Either short reference delimiters assigned by the reference delimiter set
+or no short reference delimiters are supported.
+.LP
+The reserved names can be changed.
+.LP
+The quantity set can be increased within certain limits
+subject to there being sufficient memory available.
+The upper limit on \s-1\%NAMELEN\s0 is 239.
+The upper limits on
+\s-1\%ATTCNT\s0, \s-1\%ATTSPLEN\s0, \s-1\%BSEQLEN\s0, \s-1\%ENTLVL\s0,
+\s-1\%LITLEN\s0, \s-1\%PILEN\s0, \s-1\%TAGLEN\s0, and \s-1\%TAGLVL\s0
+are more than thirty times greater than the reference limits.
+The upper limit on
+\s-1\%GRPCNT\s0, \s-1\%GRPGTCNT\s0, and \s-1\%GRPLVL\s0 is 253.
+\s-1\%NORMSEP\s0
+cannot be changed.
+\s-1\%DTAGLEN\s0
+are
+\s-1\%DTEMPLEN\s0
+irrelevant since
+.I sgmls
+does not support the
+\s-1\%DATATAG\s0
+feature.
+.RE
+.SS "\*S declaration"
+The \*S declaration may be omitted,
+the following declaration will be implied:
+.TS
+tab(&);
+c1 s1 s1 s1 s1 s1 s1 s1 s
+c s s s s s s s s
+l l s s s s s s s.
+<!SGML "ISO 8879:1986"
+CHARSET
+BASESET&"ISO 646-1983//CHARSET
+&\h'\w'"'u'International Reference Version (IRV)//ESC 2/5 4/0"
+DESCSET&\0\00\0\09\0UNUSED
+&\0\09\0\02\0\09
+&\011\0\02\0UNUSED
+&\013\0\01\013
+&\014\018\0UNUSED
+&\032\095\032
+&127\0\01\0UNUSED
+.T&
+l l l s s s s s s
+l l s s s s s s s
+l l l s s s s s s
+c s s s s s s s s
+l l l l l l l l l.
+CAPACITY&PUBLIC&"ISO 8879:1986//CAPACITY Reference//EN"
+SCOPE&DOCUMENT
+SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Reference//EN"
+FEATURES
+MINIMIZE&DATATAG&NO&OMITTAG&YES&RANK&NO&SHORTTAG&YES
+LINK&SIMPLE&NO&IMPLICIT&NO&EXPLICIT&NO
+OTHER&CONCUR&NO&SUBDOC&YES 99999999&FORMAL&YES
+.T&
+c s s s s s s s s.
+APPINFO NONE>
+.TE
+with the exception that characters 128 through 254 will be assigned to
+\s-1DATACHAR\s0.
+When exporting documents that use characters in this range,
+an accurate description of the upper half of the document character set
+should be added to this declaration.
+For ISO Latin-1, an appropriate description would be:
+.br
+.ne 5
+.TS
+tab(&);
+l l.
+BASESET&"ISO Registration Number 100//CHARSET
+&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1"
+DESCSET&128\032\0UNUSED
+&160\095\032
+&255\0\01\0UNUSED
+.TE
+.SS "Output format"
+The output is a series of lines.
+Lines can be arbitrarily long.
+Each line consists of an initial command character
+and one or more arguments.
+Arguments are separated by a single space,
+but when a command takes a fixed number of arguments
+the last argument can contain spaces.
+There is no space between the command character and the first argument.
+Arguments can contain the following escape sequences.
+.TP
+.B \e\e
+A
+.BR \e.
+.TP
+.B \en
+A record end character.
+.TP
+.B \e|
+Internal \s-1SDATA\s0 entities are bracketed by these.
+.TP
+.BI \e nnn
+The character whose code is
+.I nnn
+octal.
+.LP
+A record start character will be represented by
+.BR \e012 .
+Most applications will need to ignore
+.B \e012
+and translate
+.B \en
+into newline.
+.LP
+The possible command characters and arguments are as follows:
+.TP
+.BI ( gi
+The start of an element whose generic identifier is
+.IR gi .
+Any attributes for this element
+will have been specified with
+.B A
+commands.
+.TP
+.BI ) gi
+The end an element whose generic identifier is
+.IR gi .
+.TP
+.BI \- data
+Data.
+.TP
+.BI & name
+A reference to an external data entity
+.IR name ;
+.I name
+will have been defined using an
+.B E
+command.
+.TP
+.BI ? pi
+A processing instruction with data
+.IR pi .
+.TP
+.BI A name\ val
+The next element to start has an attribute
+.I name
+with value
+.I val
+which takes one of the following forms:
+.RS
+.TP
+.B IMPLIED
+The value of the attribute is implied.
+.TP
+.BI CDATA\ data
+The attribute is character data.
+This is used for attributes whose declared value is
+.BR \s-1CDATA\s0 .
+.TP
+.BI NOTATION\ nname
+The attribute is a notation name;
+.I nname
+will have been defined using a
+.B N
+command.
+This is used for attributes whose declared value is
+.BR \s-1NOTATION\s0 .
+.TP
+.BI ENTITY\ name\|.\|.\|.
+The attribute is a list of general entity names.
+Each entity name will have been defined using an
+.BR I ,
+.B E
+or
+.B S
+command.
+This is used for attributes whose declared value is
+.B \s-1ENTITY\s0
+or
+.BR \s-1ENTITIES\s0 .
+.TP
+.BI TOKEN\ token\|.\|.\|.
+The attribute is a list of tokens.
+This is used for attributes whose declared value is anything else.
+.RE
+.TP
+.BI D ename\ name\ val
+This is the same as the
+.B A
+command, except that it specifies a data attribute for an
+external entity named
+.IR ename .
+Any
+.B D
+commands will come after the
+.B E
+command that defines the entity to which they apply, but
+before any
+.B &
+or
+.B A
+commands that reference the entity.
+.TP
+.BI N nname
+.IR nname.
+Define a notation
+This command will be preceded by a
+.B p
+command if the notation was declared with a public identifier,
+and by a
+.B s
+command if the notation was declared with a system identifier.
+A notation will only be defined if it is to be referenced
+in an
+.B E
+command or in an
+.B A
+command for an attribute with a declared value of
+.BR \s-1NOTATION\s0 .
+.TP
+.BI E ename\ typ\ nname
+Define an external data entity named
+.I ename
+with type
+.I typ
+.RB ( \s-1CDATA\s0 ,
+.B \s-1NDATA\s0
+or
+.BR \s-1SDATA\s0 )
+and notation
+.IR not.
+This command will be preceded by one or more
+.B f
+commands giving the filenames generated by the entity manager from the system
+and public identifiers,
+by a
+.B p
+command if a public identifier was declared for the entity,
+and by a
+.B s
+command if a system identifier was declared for the entity.
+.I not
+will have been defined using a
+.B N
+command.
+Data attributes may be specified for the entity using
+.B D
+commands.
+An external data entity will only be defined if it is to be referenced in a
+.B &
+command or in an
+.B A
+command for an attribute whose declared value is
+.B \s-1ENTITY\s0
+or
+.BR \s-1ENTITIES\s0 .
+.TP
+.BI I ename\ typ\ text
+Define an internal data entity named
+.I ename
+with type
+.I typ
+.RB ( \s-1CDATA\s0
+or
+.BR \s-1SDATA\s0 )
+and entity text
+.IR text .
+An internal data entity will only be defined if it is referenced in an
+.B A
+command for an attribute whose declared value is
+.B \s-1ENTITY\s0
+or
+.BR \s-1ENTITIES\s0 .
+.TP
+.BI S ename
+Define a subdocument entity named
+.IR ename .
+This command will be preceded by one or more
+.B f
+commands giving the filenames generated by the entity manager from the system
+and public identifiers,
+by a
+.B p
+command if a public identifier was declared for the entity,
+and by a
+.B s
+command if a system identifier was declared for the entity.
+A subdocument entity will only be defined if it is referenced
+in a
+.B {
+command
+or in an
+.B A
+command for an attribute whose declared value is
+.B \s-1ENTITY\s0
+or
+.BR \s-1ENTITIES\s0 .
+.TP
+.BI s sysid
+This command applies to the next
+.BR E ,
+.B S
+or
+.B N
+command and specifies the associated system identifier.
+.TP
+.BI p pubid
+This command applies to the next
+.BR E ,
+.B S
+or
+.B N
+command and specifies the associated public identifier.
+.TP
+.BI f filename
+This command applies to the next
+.B E
+or
+.B S
+command and specifies an associated filename.
+There will be more than one
+.B f
+command for a single
+.B E
+or
+.B S
+command if the system identifier used a
+.if \n(Os=0 colon.
+.if \n(Os=1 semi-colon.
+.TP
+.BI { ename
+The start of the \*S subdocument entity
+.IR ename ;
+.I ename
+will have been defined using a
+.B S
+command.
+.TP
+.BI } ename
+The end of the \*S subdocument entity
+.IR ename .
+.TP
+.BI L lineno\ file
+.TQ
+.BI L lineno
+Set the current line number and filename.
+The
+.I filename
+argument will be omitted if only the line number has changed.
+This will be output only if the
+.B \-l
+option has been given.
+.TP
+.BI # text
+An \s-1APPINFO\s0 parameter of
+.I text
+was specified in the \*S declaration.
+This is not strictly part of the ESIS, but a structure-controlled
+application is permitted to act on it.
+No
+.B #
+command will be output if
+.B \s-1APPINFO\s0\ \s-1NONE\s0
+was specified.
+A
+.B #
+command will occur at most once,
+and may be preceded only by a single
+.B L
+command.
+.TP
+.B C
+This command indicates that the document was a conforming \*S document.
+If this command is output, it will be the last command.
+An \*S document is not conforming if it references a subdocument entity
+that is not conforming.
+.SH BUGS
+Some non-SGML characters in literals are counted as two characters for the
+purposes of quantity and capacity calculations.
+.SH "SEE ALSO"
+The \*S Handbook, Charles F. Goldfarb
+.br
+\s-1ISO\s0 8879 (Standard Generalized Markup Language),
+International Organization for Standardization
+.SH ORIGIN
+\s-1ARCSGML\s0 was written by Charles F. Goldfarb.
+.LP
+.I Sgmls
+was derived from \s-1ARCSGML\s0 by James Clark (jjc@jclark.com),
+to whom bugs should be reported.
diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.c b/usr.bin/sgmls/sgmls/sgmlxtrn.c
new file mode 100644
index 0000000..d27eb66
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlxtrn.c
@@ -0,0 +1,223 @@
+/* Standard Generalized Markup Language Users' Group (SGMLUG)
+ SGML Parser Materials (ARCSGML 1.0)
+
+(C) 1983-1988 Charles F. Goldfarb (assigned to IBM Corporation)
+(C) 1988-1991 IBM Corporation
+
+Licensed to the SGML Users' Group for distribution under the terms of
+the following license: */
+
+char license[] =
+"SGMLUG hereby grants to any user: (1) an irrevocable royalty-free,\n\
+worldwide, non-exclusive license to use, execute, reproduce, display,\n\
+perform and distribute copies of, and to prepare derivative works\n\
+based upon these materials; and (2) the right to authorize others to\n\
+do any of the foregoing.\n";
+
+#include "sgmlincl.h"
+
+/* SGMLXTRN: Storage allocation and initialization for all public variables.
+ Exceptions: Constants lex????? and del????? are defined in
+ LEX?????.C modules; constants pcb????? are defined in PCB?????.c.
+*/
+int badresw = 0; /* 1=REF_ out of context; 0=valid. */
+int charmode = 0; /* >0=in #CHARS; 0=not. */
+int conactsw = 0; /* 1=return saved content action 0=get new one.*/
+int conrefsw = 0; /* 1=content reference att specified; 0=no. */
+int contersv = 0; /* Save contersw while processing pending REF. */
+int contersw = 0; /* 1=element or #CHARS out of context; 0=valid. */
+int datarc = 0; /* Return code for data: DAF_ or REF_. */
+int delmscsw = 0; /* 1=DELMSC must be read on return to es==0. */
+int didreq = 0; /* 1=required implied tag processed; 0=no. */
+int dostag = 0; /* 1=retry newetd instead of parsing; 0=parse. */
+int dtdsw = 0; /* DOCTYPE declaration found: 1=yes; 0=no. */
+int entdatsw = 0; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */
+int entpisw = 0; /* 4=PI entity occurred; 0=not. */
+int eodsw = 0; /* 1=eod found in error; 0=not yet. */
+int eofsw = 0; /* 1=eof found in body of document; 0=not yet. */
+int es = -1; /* Index of current source in stack. */
+int etagimct = 0; /* Implicitly ended elements left on stack. */
+int etagimsw = 0; /* 1=end-tag implied by other end-tag; 0=not. */
+int etagmin = MINNONE; /* Minim: NONE NULL NET DATA; implied by S/ETAG*/
+int etictr = 0; /* Number of "NET enabled" tags on stack. */
+int etisw = 0; /* 1=tag ended with eti; 0=did not. */
+int indtdsw = 0; /* Are we in the DTD? 1=yes; 0=no. */
+int mslevel = 0; /* Nesting level of marked sections. */
+int msplevel = 0; /* Nested MS levels subject to special parse. */
+int prologsw = 1; /* 1=in prolog; 0=not. */
+int pss = 0; /* SGMLACT: scbsgml stack level. */
+int sgmlsw = 0; /* SGML declaration found: 1=yes; 0=no. */
+int stagmin = MINNONE; /* Minimization: NONE, NULL tag, implied by STAG*/
+int tagctr = 0; /* Tag source chars read. */
+int ts = -1; /* Index of current tag in stack. */
+struct parse *propcb = &pcbpro; /* Current PCB for prolog parse. */
+int aentctr = 0; /* Number of ENTITY tokens in this att list. */
+int conact = 0; /* Return code from content parse. */
+int conrefsv = 0; /* Save conrefsw when doing implied start-tag.*/
+int dtdrefsw = 0; /* External DTD? 1=yes; 0=no. */
+int etiswsv = 0; /* Save etisw when processing implied start-tag.*/
+int grplvl = 0; /* Current level of nested grps in model. */
+int idrctr = 0; /* Number of IDREF tokens in this att list. */
+int mdessv = 0; /* ES level at start of markup declaration. */
+int notadn = 0; /* Position of NOTATION attribute in list. */
+int parmno = 0; /* Current markup declaration parameter number. */
+int pexsw = 0; /* 1=tag valid solely because of plus exception.*/
+int rcessv = 0; /* ES level at start of RCDATA content. */
+int tagdelsw = 0; /* 1=tag ended with delimiter; 0=no delimiter. */
+int tokencnt = 0; /* Number of tokens found in attribute value. */
+struct entity *ecbdeflt = 0; /* #DEFAULT ecb (NULL if no default entity). */
+struct etd *docetd = 0; /* The etd for the document as a whole. */
+struct etd *etagreal = 0; /* Actual or dummy etd that implied this tag. */
+struct etd *newetd = 0; /* The etd for a start- or end-tag recognized. */
+struct etd *nextetd = 0; /* ETD that must come next (only one choice). */
+struct etd *stagreal = 0; /* Actual or dummy etd that implied this tag. */
+struct parse *conpcb = 0; /* Current PCB for content parse. */
+UNCH *data = 0; /* Pointer to returned data in buffer. */
+UNCH *mdname = 0; /* Name of current markup declaration. */
+UNCH *ptcon = 0; /* Current pointer into tbuf. */
+UNCH *ptpro = 0; /* Current pointer into tbuf. */
+UNCH *rbufs = 0; /* DOS file read area: start position for read. */
+UNCH *subdcl = 0; /* Subject of markup declaration (e.g., GI). */
+int Tstart = 0; /* Save starting token for AND group testing. */
+UNS conradn = 0; /* 1=CONREF attribute in list (0=no). */
+UNS datalen = 0; /* Length of returned data in buffer. */
+UNS entlen = 0; /* Length of TAG or EXTERNAL entity text. */
+UNS idadn = 0; /* Number of ID attribute (0 if none). */
+UNS noteadn = 0; /* Number of NOTATION attribute (0 if none). */
+UNS reqadn = 0; /* Num of atts with REQUIRED default (0=none). */
+int grplongs; /* Number of longs for GRPCNT bitvector. */
+
+/* Variable arrays and structures.
+*/
+struct ad *al = 0; /* Current attribute list work area. */
+struct dcncb *dcntab[1]; /* List of data content notation names. */
+struct entity *etab[ENTHASH]; /* Entity hash table. */
+struct etd *etdtab[ETDHASH]; /* Element type definition hash table. */
+struct fpi fpidf; /* Fpi for #DEFAULT entity. */
+struct id *itab[IDHASH]; /* Unique identifier hash table. */
+struct etd **nmgrp = 0; /* Element name group */
+PDCB *nnmgrp = 0; /* Notation name group */
+struct restate *scbsgml = 0; /* SGMLACT: return action state stack. */
+struct source *scbs = 0; /* Stack of open sources ("SCB stack"). */
+struct srh *srhtab[1]; /* List of SHORTREF table headers. */
+struct sgmlstat ds; /* Document statistics. */
+struct switches sw; /* Parser control switches set by text proc. */
+struct tag *tags = 0; /* Stack of open elements ("tag stack"). */
+struct thdr *gbuf = 0; /* Buffer for creating group. */
+struct thdr prcon[3]; /* 0-2: Model for *DOC content. */
+struct thdr undechdr; /* 0:Default model hdr for undeclared content.*/
+UNCH *dtype = 0; /* Document type name. */
+UNCH *entbuf = 0; /* Buffer for entity reference name. */
+UNCH fce[2]; /* String form of FCE char.
+ (fce[1] must be EOS).*/
+/* Buffer for non-SGML character reference.*/
+UNCH nonchbuf[2] = { DELNONCH };
+UNCH *tbuf; /* Work area for tokenization. */
+UNCH *lbuf = 0; /* In tbuf: Literal parse area.*/
+UNCH *sysibuf = 0; /* Buffer for system identifiers. */
+UNCH *pubibuf = 0; /* Buffer for public identifiers. */
+UNCH *nmbuf = 0; /* Name buffer used by mdentity. */
+struct mpos *savedpos;
+
+/* Constants.
+*/
+struct map dctab[] = { /* Keywords for declared content parameter.*/
+ { key[KRCDATA], MRCDATA+MPHRASE },
+ { key[KCDATA], MCDATA+MPHRASE },
+ { key[KANY], MANY+MCHARS+MGI },
+ { key[KEMPTY], MNONE+MPHRASE },
+ { NULL, 0 }
+};
+struct map deftab[] = { /* Default value keywords. */
+ { key[KIMPLIED], DNULL },
+ { key[KREQUIRED], DREQ },
+ { key[KCURRENT], DCURR },
+ { key[KCONREF], DCONR },
+ { key[KFIXED], DFIXED},
+ { NULL, 0}
+};
+struct map dvtab[] = { /* Declared value: keywords and type codes.*/
+/* TYPE NUMBER */
+/* grp ANMTGRP Case 1 0 Grp size */
+/* grp member ANMTGRP Case 0 Position */
+/* grp ANOTEGRP Case 1 1 Grp size */
+ { key[KNOTATION], ANOTEGRP}, /* Case 1 Position */
+ { key[KCDATA], ACHARS }, /* Case 2 Always 0 */
+ { key[KENTITY], AENTITY }, /* Case 3 Normal 1 */
+ { key[KID], AID }, /* Case 4 Normal 1 */
+ { key[KIDREF], AIDREF }, /* Case 5 Normal 1 */
+ { key[KNAME], ANAME }, /* Case 6 Normal 1 */
+ { key[KNMTOKEN], ANMTOKE }, /* Case 7 Normal 1 */
+ { key[KNUMBER], ANUMBER }, /* Case 8 Normal 1 */
+ { key[KNUTOKEN], ANUTOKE }, /* Case 9 Normal 1 */
+ { key[KENTITIES], AENTITYS}, /* Case A Normal 1 */
+ { key[KIDREFS], AIDREFS }, /* Case B # tokens */
+ { key[KNAMES], ANAMES }, /* Case C # tokens */
+ { key[KNMTOKENS], ANMTOKES}, /* Case D # tokens */
+ { key[KNUMBERS], ANUMBERS}, /* Case E # tokens */
+ { key[KNUTOKENS], ANUTOKES}, /* Case F # tokens */
+ { NULL, 0 } /* Case 0 ERROR */
+};
+struct map enttab[] = { /* Entity declaration second parameter. */
+ { key[KCDATA], ESC },
+ { key[KSDATA], ESX },
+ { key[KMS], ESMS},
+ { key[KPI], ESI },
+ { key[KSTARTTAG], ESS },
+ { key[KENDTAG], ESE },
+ { key[KMD], ESMD},
+ { NULL, 0 }
+};
+struct map exttab[] = { /* Keywords for external identifier. */
+ { key[KSYSTEM], EDSYSTEM },
+ { key[KPUBLIC], EDPUBLIC },
+ { NULL, 0 }
+};
+struct map extettab[] = { /* Keywords for external entity type. */
+ { key[KCDATA], ESNCDATA },
+ { key[KNDATA], ESNNDATA },
+ { key[KSDATA], ESNSDATA },
+ { key[KSUBDOC], ESNSUB },
+ { NULL, 0 }
+};
+struct map funtab[] = { /* Function character reference names. */
+ { key[KRE], RECHAR },
+ { key[KRS], RSCHAR },
+ { key[KSPACE], SPCCHAR },
+ /* We should use an extra table for added functions. */
+ { (UNCH *)"TAB", TABCHAR },
+ { NULL, 0 }
+};
+struct map mstab[] = { /* Marked section keywords. */
+ { key[KTEMP], MSTEMP },
+ { key[KINCLUDE], MSTEMP }, /* Treat INCLUDE like TEMP; both are NOPs.*/
+ { key[KRCDATA], MSRCDATA},
+ { key[KCDATA], MSCDATA },
+ { key[KIGNORE], MSIGNORE},
+ { NULL, 0 }
+};
+struct map pubcltab[] = { /* Names for public text class. */
+ { (UNCH *)"CAPACITY", FPICAP },
+ { (UNCH *)"CHARSET", FPICHARS},
+ { (UNCH *)"DOCUMENT", FPIDOC },
+ { (UNCH *)"DTD", FPIDTD },
+ { (UNCH *)"ELEMENTS", FPIELEM },
+ { (UNCH *)"ENTITIES", FPIENT },
+ { (UNCH *)"LPD", FPILPD },
+ { (UNCH *)"NONSGML", FPINON },
+ { (UNCH *)"NOTATION", FPINOT },
+ { (UNCH *)"SHORTREF", FPISHORT},
+ { (UNCH *)"SUBDOC", FPISUB },
+ { (UNCH *)"SYNTAX", FPISYN },
+ { (UNCH *)"TEXT", FPITEXT },
+ { NULL, 0 }
+};
+UNCH indefent[] = "\12#DEFAULT"; /* Internal name: default entity name. */
+UNCH indefetd[] = "\12*DOCTYPE"; /* Internal name: default document type. */
+UNCH indocent[] = "\12*SGMLDOC"; /* Internal name: SGML document entity. */
+UNCH indocetd[] = "\6*DOC"; /* Internal name: document level etd. */
+UNCH indtdent[] = "\11*DTDENT"; /* Internal name: external DTD entity. */
+
+struct etd dumetd[3];
+struct entity *dumpecb;
+UNCH sgmlkey[] = "SGML";
diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.h b/usr.bin/sgmls/sgmls/sgmlxtrn.h
new file mode 100644
index 0000000..f1b0b4b
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/sgmlxtrn.h
@@ -0,0 +1,121 @@
+/* SGMLXTRN.H: External declarations for SGML public variables.
+ Exceptions: Constants lex????? and del????? are defined in
+ LEX?????.C modules; constants pcb????? are defined in PCB?????.c.
+*/
+#ifndef SGMLXTRN /* Don't include this file more than once. */
+#define SGMLXTRN
+extern int badresw; /* 1=REF_ out of context; 0=valid. */
+extern int charmode; /* >0=in #CHARS; 0=not. */
+extern int conactsw; /* 1=return saved content action 0=get new one.*/
+extern int conrefsw; /* 1=content reference att specified; 0=no. */
+extern int contersv; /* Save contersw while processing pending REF. */
+extern int contersw; /* 1=element or #CHARS out of context; 0=valid. */
+extern int datarc; /* Return code for data: DAF_ or REF_. */
+extern int delmscsw; /* 1=DELMSC must be read on return to es==0. */
+extern int didreq; /* 1=required implied tag processed; 0=no. */
+extern int dostag; /* 1=retry newetd instead of parsing; 0=parse. */
+extern int dtdsw; /* DOCTYPE declaration found: 1=yes; 0=no. */
+extern int entdatsw; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */
+extern int entpisw; /* 4=PI entity occurred; 0=not. */
+extern int eodsw; /* 1=eod found in error; 0=not yet. */
+extern int eofsw; /* 1=eof found in body of document; 0=not yet. */
+extern int etagimct; /* Implicitly ended elements left on stack. */
+extern int etagimsw; /* 1=end-tag implied by other end-tag; 0=not. */
+extern int etagmin; /* Minim: NONE NULL NET DATA; implied by S/ETAG*/
+extern int etictr; /* Number of "NET enabled" tags on stack. */
+extern int etisw; /* 1=tag ended with eti; 0=did not. */
+extern int indtdsw; /* Are we in the DTD? 1=yes; 0=no. */
+extern int mslevel; /* Nesting level of marked sections. */
+extern int msplevel; /* Nested MS levels subject to special parse. */
+extern int prologsw; /* 1=in prolog; 0=not. */
+extern int pss; /* SGMLACT: scbsgml stack level. */
+extern int sgmlsw; /* SGML declaration found: 1=yes; 0=no. */
+extern int stagmin; /* Minimization: NONE, NULL tag, implied by STAG*/
+extern int tagctr; /* Tag source chars read. */
+extern int ts; /* Index of current tag in stack. */
+extern struct parse *propcb; /* Current PCB for prolog parse. */
+extern int aentctr; /* Number of ENTITY tokens in this att list. */
+extern int conact; /* Return code from content parse. */
+extern int conrefsv; /* Save conrefsw when doing implied start-tag.*/
+extern int dtdrefsw; /* External DTD? 1=yes; 0=no. */
+extern int etiswsv; /* Save etisw when processing implied start-tag.*/
+extern int grplvl; /* Current level of nested grps in model. */
+extern int idrctr; /* Number of IDREF tokens in this att list. */
+extern int mdessv; /* ES level at start of markup declaration. */
+extern int notadn; /* Position of NOTATION attribute in list. */
+extern int parmno; /* Current markup declaration parameter number. */
+extern int pexsw; /* 1=tag valid solely because of plus exception.*/
+extern int rcessv; /* ES level at start of RCDATA content. */
+extern int tagdelsw; /* 1=tag ended with delimiter; 0=no delimiter. */
+extern int tokencnt; /* Number of tokens found in attribute value. */
+extern struct entity *ecbdeflt; /* #DEFAULT ecb (NULL if no default entity). */
+extern struct etd *docetd; /* The etd for the document as a whole. */
+extern struct etd *etagreal; /* Actual or dummy etd that implied this tag. */
+extern struct etd *newetd; /* The etd for a start- or end-tag recognized. */
+extern struct etd *nextetd; /* ETD that must come next (only one choice). */
+extern struct etd *stagreal; /* Actual or dummy etd that implied this tag. */
+extern struct parse *conpcb; /* Current PCB for content parse. */
+extern UNCH *data; /* Pointer to returned data in buffer. */
+extern UNCH *mdname; /* Name of current markup declaration. */
+extern UNCH *ptcon; /* Current pointer into tbuf. */
+extern UNCH *ptpro; /* Current pointer into tbuf. */
+extern UNCH *rbufs; /* DOS file read area: start position for read. */
+extern UNCH *subdcl; /* Subject of markup declaration (e.g., GI). */
+extern int Tstart; /* Save starting token for AND group testing. */
+extern UNS conradn; /* 1=CONREF attribute in list (0=no). */
+extern UNS datalen; /* Length of returned data in buffer. */
+extern UNS entlen; /* Length of TAG or EXTERNAL entity text. */
+extern UNS idadn; /* Number of ID attribute (0 if none). */
+extern UNS noteadn; /* Number of NOTATION attribute (0 if none). */
+extern UNS reqadn; /* Num of atts with REQUIRED default (0=none). */
+extern int grplongs; /* Number of longs for GRPCNT bitvector. */
+/* Variable arrays and structures.
+*/
+extern struct ad *al; /* Current attribute list work area. */
+extern struct dcncb *dcntab[];/* List of data content notation names. */
+extern struct entity *etab[]; /* Entity hash table. */
+extern struct etd *etdtab[]; /* Element type definition hash table. */
+extern struct fpi fpidf; /* Fpi for #DEFAULT entity. */
+extern struct id *itab[]; /* Unique identifier hash table. */
+extern struct etd **nmgrp; /* Element name group */
+extern PDCB *nnmgrp; /* Notation name group */
+extern struct restate *scbsgml; /* SGMLACT: return action state stack. */
+extern struct srh *srhtab[]; /* List of SHORTREF table headers. */
+extern struct sgmlstat ds; /* Document statistics. */
+extern struct switches sw; /* Parser control switches set by text proc. */
+extern struct tag *tags; /* Stack of open elements ("tag stack"). */
+extern struct thdr *gbuf; /* Buffer for creating group. */
+extern struct thdr prcon[]; /* 0-2: Model for *DOC content. */
+extern struct thdr undechdr; /* 0: Default model hdr for undeclared content. */
+extern UNCH *dtype; /* Document type name. */
+extern UNCH *entbuf; /* Buffer for entity reference name. */
+extern UNCH fce[]; /* String form of FCE char (fce[1] must be EOS).*/
+extern UNCH nonchbuf[]; /* Buffer for valid nonchar character reference.*/
+extern UNCH *tbuf; /* Work area for tokenization. */
+extern UNCH *lbuf; /* In tbuf: Literal parse area; TAGLEN limit.*/
+extern struct entity *dumpecb; /* SRMNULL points to this. */
+extern UNCH *sysibuf;
+extern UNCH *pubibuf;
+extern UNCH *nmbuf; /* Name buffer used by mdentity. */
+extern struct mpos *savedpos;
+
+/* Constants.
+*/
+extern int scbsgmnr; /* SCBSGML: new record; do not ignore RE. */
+extern int scbsgmst; /* SCBSGML: trailing stag or markup; ignore RE. */
+extern struct map dctab[]; /* Keywords for declared content parameter. */
+extern struct map deftab[]; /* Default value keywords. */
+extern struct map dvtab[]; /* Declared value: keywords and type codes.*/
+extern struct map enttab[]; /* Entity declaration second parameter. */
+extern struct map exttab[]; /* Keywords for external identifier. */
+extern struct map extettab[]; /* Keywords for external entity type. */
+extern struct map funtab[]; /* Function character reference names. */
+extern struct map mstab[]; /* Marked section keywords. */
+extern struct map pubcltab[]; /* Keywords for public text class. */
+extern UNCH indefent[]; /* Internal name: default entity name. */
+extern UNCH indefetd[]; /* Internal name: default document type. */
+extern UNCH indocent[]; /* Internal name: SGML document entity. */
+extern UNCH indocetd[]; /* Internal name: etd for document as a whole. */
+extern UNCH indtdent[]; /* Internal name: external DTD entity. */
+extern char license[]; /* SGML Users' Group free license. */
+#endif /* ndef SGMLXTRN */
diff --git a/usr.bin/sgmls/sgmls/source.h b/usr.bin/sgmls/sgmls/source.h
new file mode 100644
index 0000000..32cc85a
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/source.h
@@ -0,0 +1,114 @@
+/* SOURCE.H: Entity and source control block structures and definitions.
+*/
+#define ENTHASH 503 /* Size of entity hash table. Must be prime. */
+/* Entity storage class values for estore member of entity structure. */
+#define EST 1 /* String: Tag (usually a fixed STARTGI). */
+ /* <MDENTITY sets these:> */
+#define ESMD 2 /* String: Markup declaration. */
+#define ESMS 3 /* String: Marked section. */
+#define ESM 4 /* String: ordinary text. */
+ /* <ENTOPEN treats these specially:> */
+#define ESS 5 /* ETD: Start-tag. */
+#define ESE 6 /* ETD: End-tag. */
+#define ESI 7 /* String: PI. */
+#define ESX 8 /* String: SDATA general entity. */
+#define ESC 9 /* String: CDATA general entity. */
+ /* </MDENTITY> <MDEXTID sets these:> */
+#define ESFM 10 /* LPU: minimum external (file) storage class. */
+#define ESN 10 /* XCB: N/C/SDATA or SUBDOC control block. */
+ /* </ENTOPEN> */
+#define ESF 11 /* LPU: General entity. */
+#define ESP 12 /* LPU: Parameter entity. */
+#define ESD 13 /* LPU: Document type definition. */
+#define ESL 14 /* LPU: Link process definition. */
+#define ESK 15 /* LPU: Data content notation. */
+ /* </MDEXTID> */
+
+union etext { /* Entity text. */
+ UNIV x; /* External ID generated by system. */
+ UNCH *c; /* Character string. */
+ struct ne *n; /* N/C/SDATA or SUBDOC entity control block. */
+};
+#define ETEXTSZ sizeof(union etext)
+struct entity { /* Entity control block. */
+ struct entity *enext; /* Next entity in chain. */
+ UNCH *ename; /* Entity name with length and EOS. */
+ UNCH estore; /* Storage class (see values above). */
+ UNCH dflt; /* Declared as default entity. */
+ UNCH mark; /* For use by for sgmlment. */
+ union etext etx; /* Entity text. */
+};
+#define ENTSZ sizeof(struct entity)
+typedef struct entity *PECB; /* Ptr to entity control block. */
+typedef struct entity **TECB; /* Table of entity control blocks. */
+
+struct source { /* Source control block. */
+ struct entity ecb; /* Entity control block. */
+ unsigned long rcnt; /* Source record number. */
+ int ccnt; /* Source record chars since last RS. */
+ int curoff; /* Offset of curchar (chars read in this block).*/
+ UNCH curchar; /* Current character. */
+ UNCH nextchar; /* If curchar was DELNONCH, next character. */
+ UNIV fcb; /* SGMLIO fcb ptr returned by OPEN. */
+ UNCH *fbuf; /* 1st char in buffer (0=PEND) or entity text. */
+ UNCH *fpos; /* Current char in buffer or entity text. */
+ UNCH pushback; /* Character before pend position */
+ char copied; /* Is this a copy of the internal entity? */
+};
+#define SCBSZ sizeof(struct source)
+typedef struct source *PSCB; /* Ptr to source control block. */
+
+extern int es; /* Index of current source in stack. */
+extern struct source *scbs; /* Stack of open sources ("SCB stack"). */
+
+/* Member definitions for source and entity control blocks.
+*/
+#define SCB (scbs[es]) /* Ptr to current source control block. */
+
+#define ECB SCB.ecb /* Pointer to current entity control block. */
+#define FBUF SCB.fbuf /* Pointer to start of entity buffer. */
+#define FPOS SCB.fpos /* Pointer to current char of current source. */
+#define RSCC SCB.ccnt /* CCNT at start of block (across EOB/EOS/EOF). */
+#define CCO SCB.curoff /* Offset in read buffer of current char. */
+#define CC SCB.curchar /* Current character of current source entity. */
+#define NEXTC SCB.nextchar /* Next character in current source entity. */
+#define CCNT (SCB.ccnt+CCO) /* Position of CC in current record (RS=0). */
+#define RCNT SCB.rcnt /* Position of record in entity (origin=1). */
+#define SCBFCB SCB.fcb /* Current file control block (if FILESW). */
+#define ECBPTR ((ECB.enext)) /* Pointer to this entity's ECB. */
+#define ENTITY ((ECB.ename)) /* Current entity name. */
+#define FILESW (ECB.estore>=ESFM) /* 1=Entity is external file; 0=internal. */
+#define NEWCC (++FPOS) /* Get next current character. */
+#define REPEATCC (--FPOS) /* Repeat previous current character. */
+#define COPIEDSW SCB.copied /* Non-zero means entity was copied. */
+
+struct srh { /* Short reference map header. */
+ struct srh *enext; /* Next short reference map in chain. */
+ UNCH *ename; /* Short reference map name. */
+ TECB srhsrm; /* Ptr to short reference map. */
+};
+#define SRHSZ (sizeof(struct srh))
+typedef struct srh *PSRH; /* Ptr to short reference map header. */
+#define SRMNULL (&dumpecb) /* Dummy ptr to empty short reference map. */
+
+/* Definitions for ENTOPEN/ENTREF return codes.
+*/
+#define ENTUNDEF -1 /* Callers of ENTOPEN: entity undefined. */
+#define ENTLOOP -2 /* ENTOPEN: endless loop entity. */
+#define ENTMAX -3 /* ENTOPEN: too many open entities. */
+#define ENTFILE -4 /* ENTOPEN: file I/O error. */
+#define ENTDATA -5 /* ENTOPEN: CDATA or SDATA entity. */
+#define ENTPI -6 /* ENTOPEN: PI entity. */
+
+/* Definitions for ENTDATA switches set in contersw.
+*/
+#define CDECONT 2 /* 0010 CDATA entity referenced. */
+#define SDECONT 4 /* 0100 SDATA entity referenced. */
+#define NDECONT 8 /* 1000 NDATA entity referenced. */
+
+/* Definitions for manipulating signed source character counters.
+*/
+#define CTRSET(CTR) (CTR = (int) -(FPOS+1-FBUF)) /* Init source char ctr. */
+#define CTRGET(CTR) (CTR + (int) (FPOS+1-FBUF)) /* Read source char ctr. */
+
+
diff --git a/usr.bin/sgmls/sgmls/std.h b/usr.bin/sgmls/sgmls/std.h
new file mode 100644
index 0000000..3a9ab4b
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/std.h
@@ -0,0 +1,116 @@
+/* std.h -
+ Include standard header files.
+*/
+
+#ifndef STD_H
+#define STD_H 1
+
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef SUPPORT_SUBDOC
+#include <signal.h>
+#endif /* SUPPORT_SUBDOC */
+
+#ifndef STDDEF_H_MISSING
+#include <stddef.h>
+#endif /* not STDDEF_H_MISSING */
+
+#ifndef LIMITS_H_MISSING
+#include <limits.h>
+#endif /* not LIMITS_H_MISSING */
+
+#ifndef UINT_MAX
+#define UINT_MAX (sizeof(unsigned int) == 2 ? 0x7fff : \
+ (sizeof(unsigned int) == 4 ? 0x7fffffff : cant_guess_UINT_MAX))
+#endif
+
+#ifdef VARARGS
+#include <varargs.h>
+#else
+#include <stdarg.h>
+#endif
+
+#ifdef BSD_STRINGS
+#include <strings.h>
+#define memcpy(to, from, n) bcopy(from, to, n)
+#define memcmp(p, q, n) bcmp(p, q, n)
+#define strchr(s, c) index(s, c)
+#define strrchr(s, c) rindex(s, c)
+#else /* not BSD_STRINGS */
+#include <string.h>
+#endif /* not BSD_STRINGS */
+
+#ifdef STRERROR_MISSING
+#ifdef USE_PROTOTYPES
+extern char *strerror(int);
+#else
+extern char *strerror();
+#endif
+#endif /* STRERROR_MISSING */
+
+#ifdef STDLIB_H_MISSING
+UNIV malloc();
+UNIV calloc();
+UNIV realloc();
+char *getenv();
+long atol();
+#else /* not STDLIB_H_MISSING */
+#include <stdlib.h>
+#endif /* not STDLIB_H_MISSING */
+
+#ifdef REMOVE_MISSING
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+#define remove unlink
+#endif /* REMOVE_MISSING */
+
+#ifdef RAISE_MISSING
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+#define raise(sig) kill(getpid(), sig)
+#endif /* RAISE_MISSING */
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS 0
+#endif
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+#endif
+
+#ifdef FPOS_MISSING
+typedef long fpos_t;
+#define fsetpos(stream, pos) fseek(stream, *(pos), SEEK_SET)
+#define fgetpos(stream, pos) ((*(pos) = ftell(stream)) == -1L)
+#endif /* FPOS_MISSING */
+
+/* Old BSD systems lack L_tmpnam and tmpnam(). This is a partial
+emulation using mktemp(). It requires that the argument to tmpnam()
+be non-NULL. */
+
+#ifndef L_tmpnam
+#define tmpnam_template "/tmp/sgmlsXXXXXX"
+#define L_tmpnam (sizeof(tmpnam_template))
+#undef tmpnam
+#define tmpnam(buf) \
+ (mktemp(strcpy(buf, tmpnam_template)) == 0 || (buf)[0] == '\0' ? 0 : (buf))
+#endif /* not L_tmpnam */
+
+#ifndef errno
+extern int errno;
+#endif
+
+#endif /* not STD_H */
diff --git a/usr.bin/sgmls/sgmls/stklen.c b/usr.bin/sgmls/sgmls/stklen.c
new file mode 100644
index 0000000..43af5dd
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/stklen.c
@@ -0,0 +1,2 @@
+/* This tells Borland C++ to allocate a 14k stack. */
+unsigned _stklen = 14*1024;
diff --git a/usr.bin/sgmls/sgmls/strerror.c b/usr.bin/sgmls/sgmls/strerror.c
new file mode 100644
index 0000000..f5679c0
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/strerror.c
@@ -0,0 +1,36 @@
+/* strerror.c -
+ ANSI C strerror() function.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifdef STRERROR_MISSING
+#include <stdio.h>
+
+char *strerror(n)
+int n;
+{
+ extern int sys_nerr;
+ extern char *sys_errlist[];
+ static char buf[sizeof("Error ") + 1 + 3*sizeof(int)];
+
+ if (n >= 0 && n < sys_nerr && sys_errlist[n] != 0)
+ return sys_errlist[n];
+ else {
+ sprintf(buf, "Error %d", n);
+ return buf;
+ }
+}
+
+#endif /* STRERROR_MISSING */
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/synrf.c b/usr.bin/sgmls/sgmls/synrf.c
new file mode 100644
index 0000000..2076107
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/synrf.c
@@ -0,0 +1,72 @@
+/* SYNRF: Reserved names and other constants for reference concrete syntax.
+*/
+#include "config.h"
+#include "entity.h" /* Templates for entity control blocks. */
+#include "synxtrn.h" /* Declarations for concrete syntax constants. */
+#include "adl.h" /* Definitions for attribute list processing. */
+UNCH key[NKEYS][REFNAMELEN+1] = {
+ "ANY",
+ "ATTLIST",
+ "CDATA",
+ "CONREF",
+ "CURRENT",
+ "DEFAULT",
+ "DOCTYPE",
+ "ELEMENT",
+ "EMPTY",
+ "ENDTAG",
+ "ENTITIES",
+ "ENTITY",
+ "FIXED",
+ "ID",
+ "IDLINK",
+ "IDREF",
+ "IDREFS",
+ "IGNORE",
+ "IMPLIED",
+ "INCLUDE",
+ "INITIAL",
+ "LINK",
+ "LINKTYPE",
+ "MD",
+ "MS",
+ "NAME",
+ "NAMES",
+ "NDATA",
+ "NMTOKEN",
+ "NMTOKENS",
+ "NOTATION",
+ "NUMBER",
+ "NUMBERS",
+ "NUTOKEN",
+ "NUTOKENS",
+ "O",
+ "PCDATA",
+ "PI",
+ "POSTLINK",
+ "PUBLIC",
+ "RCDATA",
+ "RE",
+ "REQUIRED",
+ "RESTORE",
+ "RS",
+ "SDATA",
+ "SHORTREF",
+ "SIMPLE",
+ "SPACE",
+ "STARTTAG",
+ "SUBDOC",
+ "SYSTEM",
+ "TEMP",
+ "USELINK",
+ "USEMAP"
+};
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/synxtrn.h b/usr.bin/sgmls/sgmls/synxtrn.h
new file mode 100644
index 0000000..75b6471
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/synxtrn.h
@@ -0,0 +1,152 @@
+/* SYNXTRN.H: External declarations for concrete syntax constants.
+*/
+/* Short References
+*/
+#define SRCT 32 /* Number of short reference delimiters. */
+#define SRMAXLEN 3 /* Maximum length of a SHORTREF delimiter. */
+#define SRNPRT 8 /* Number of non-printable SHORTREF delimiters. */
+struct srdel {
+ struct map dtb[SRCT+2]; /* LEXCNM: Short reference delimiters. */
+ char *pdtb[SRNPRT+1]; /* LEXCNM: Printable form of unprintable SRs. */
+ int fce; /* LEXCNM: Index of first FCE in srdeltab. */
+ int hyp2; /* LEXCNM: Index of "two hyphens" in srdeltab. */
+ int data; /* LEXCNM: Index of first SR with data char. */
+ int hyp; /* LEXCNM: Index of hyphen in srdeltab. */
+ int prtmin; /* LEXCNM: Index of 1st printable SR. */
+ int spc; /* LEXCNM: Index of space in srdeltab. */
+ int lbr; /* LEXCNM: Index of left bracket in srdeltab. */
+ int rbr; /* LEXCNM: Index of right bracket in srdeltab. */
+};
+struct delim {
+ UNCH genre; /* LEXCON: Generated RE; cannot be markup. */
+ UNCH lit; /* LEXMARK: Char used as LIT delimiter.*/
+ UNCH lita; /* LEXMARK: Char used as LITA delimiter.*/
+ UNCH mdc; /* LEXLMS: Char used as MDC delimiter.*/
+ UNCH msc; /* LEXCON: Char used as MSC delimiter. */
+ UNCH net; /* LEXCON: Char used as NET when enabled.*/
+ UNCH pero; /* LEXMARK: Char used as PERO delimiter. */
+ UNCH pic; /* LEXCON: Char used as PIC delimiter.*/
+ UNCH tago; /* LEXCON: Char used as TAGO when enabled.*/
+};
+struct lexcode {
+ UNCH fce; /* LEXCNM: FRE character as entity reference. */
+ UNCH fre; /* LEXCON: Free character not an entity ref. */
+ UNCH litc; /* LEXLMS: Literal close delimiter enabled. */
+ UNCH msc; /* LEXLMS: Marked section close delim enabled. */
+ UNCH net; /* LEXCON: Null end-tag delimiter enabled. */
+ UNCH nonet; /* LEXCON: NET disabled; still used as ETI. */
+ UNCH spcr; /* LEXCNM: Space in use as SHORTREF delimiter. */
+ UNCH tago; /* LEXCON: Tag open delimiter enabled. */
+ UNCH cde; /* LEXLMS: CDATA/SDATA delimiters. */
+};
+struct lexical {
+ struct markup m; /* Markup strings for text processor. */
+ struct srdel s; /* Short reference delimiters. */
+ struct delim d; /* General delimiter characters. */
+ struct lexcode l; /* Lexical table code assignments. */
+};
+extern struct lexical lex; /* Delimiter set constants. */
+extern UNCH lexcnm[]; /* Lexical table: mixed content. */
+extern UNCH lexcon[]; /* Lexical table for content (except mixed). */
+extern UNCH lexgrp[]; /* Lexical table for groups. */
+extern UNCH lexlms[]; /* Lexical table: literals and marked sections. */
+extern UNCH lexmark[]; /* Lexical table for markup. */
+extern UNCH lexsd[]; /* Lexical table for SGML declaration. */
+extern UNCH lextran[]; /* Case translation table for SGML names. */
+extern UNCH lextoke[]; /* Lexical table for tokenization. */
+extern UNCH *lextabs[]; /* List of all lexical tables. */
+extern struct parse pcbconc; /* PCB: character data. */
+extern struct parse pcbcone; /* PCB: element content (no data allowed). */
+extern struct parse pcbconm; /* PCB: mixed content (data allowed). */
+extern struct parse pcbconr; /* PCB: replaceable character data. */
+extern struct parse pcbetag; /* PCB: end-tags. */
+extern struct parse pcbgrcm; /* PCB: content model group. */
+extern struct parse pcbgrcs; /* PCB: content model suffix. */
+extern struct parse pcbgrnm; /* PCB: name group. */
+extern struct parse pcbgrnt; /* PCB: name token group. */
+extern struct parse pcblitc; /* PCB: literal with CDATA. */
+extern struct parse pcblitp; /* PCB: literal with CDATA, parm & char refs. */
+extern struct parse pcblitr; /* PCB: attribute value with general refs. */
+extern struct parse pcblitt; /* PCB: tokenized attribute value. */
+extern struct parse pcblitv; /* PCB: literal with CDATA, function char trans.*/
+extern struct parse pcbmd; /* PCB: markup declaration. */
+extern struct parse pcbmdc; /* PCB: comment declaration. */
+extern struct parse pcbmdi; /* PCB: markup declaration (ignored). */
+extern struct parse pcbmds; /* PCB: markup declaration subset. */
+extern struct parse pcbmsc; /* PCB: marked section in CDATA mode. */
+extern struct parse pcbmsi; /* PCB: marked section in IGNORE mode. */
+extern struct parse pcbmsrc; /* PCB: marked section in RCDATA mode. */
+extern struct parse pcbpro; /* PCB: prolog. */
+extern struct parse pcbref; /* PCB: reference. */
+extern struct parse pcbstag; /* PCB: start-tag. */
+extern struct parse pcbval; /* PCB: attribute value. */
+extern struct parse pcbeal; /* PCB: end of attribute list. */
+extern struct parse pcbsd; /* PCB: SGML declaration. */
+extern int pcbcnda; /* PCBCONM: data in buffer. */
+extern int pcbcnet; /* PCBCONM: markup found or data buffer flushed.*/
+extern int pcbmdtk; /* PCBMD: token expected. */
+extern int pcbstan; /* PCBSTAG: attribute name expected. */
+extern int pcblittda; /* PCBLITT: data character found */
+
+#define KANY 0
+#define KATTLIST 1
+#define KCDATA 2
+#define KCONREF 3
+#define KCURRENT 4
+#define KDEFAULT 5
+#define KDOCTYPE 6
+#define KELEMENT 7
+#define KEMPTY 8
+#define KENDTAG 9
+#define KENTITIES 10
+#define KENTITY 11
+#define KFIXED 12
+#define KID 13
+#define KIDLINK 14
+#define KIDREF 15
+#define KIDREFS 16
+#define KIGNORE 17
+#define KIMPLIED 18
+#define KINCLUDE 19
+#define KINITIAL 20
+#define KLINK 21
+#define KLINKTYPE 22
+#define KMD 23
+#define KMS 24
+#define KNAME 25
+#define KNAMES 26
+#define KNDATA 27
+#define KNMTOKEN 28
+#define KNMTOKENS 29
+#define KNOTATION 30
+#define KNUMBER 31
+#define KNUMBERS 32
+#define KNUTOKEN 33
+#define KNUTOKENS 34
+#define KO 35
+#define KPCDATA 36
+#define KPI 37
+#define KPOSTLINK 38
+#define KPUBLIC 39
+#define KRCDATA 40
+#define KRE 41
+#define KREQUIRED 42
+#define KRESTORE 43
+#define KRS 44
+#define KSDATA 45
+#define KSHORTREF 46
+#define KSIMPLE 47
+#define KSPACE 48
+#define KSTARTTAG 49
+#define KSUBDOC 50
+#define KSYSTEM 51
+#define KTEMP 52
+#define KUSELINK 53
+#define KUSEMAP 54
+
+#define NKEYS (KUSEMAP+1)
+
+extern UNCH key[NKEYS][REFNAMELEN+1];
+
+/* Holds the SGML keyword (not alterable by concrete syntax). */
+extern UNCH sgmlkey[];
diff --git a/usr.bin/sgmls/sgmls/tools.h b/usr.bin/sgmls/sgmls/tools.h
new file mode 100644
index 0000000..57ce45a
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/tools.h
@@ -0,0 +1,76 @@
+/* TOOLS.H: Definitions for type declarations, printing, bit handling, etc.
+*/
+
+#if CHAR_SIGNED
+typedef unsigned char UNCH;
+#else
+typedef char UNCH;
+#endif
+
+#if CHAR_SIGNED
+#define ustrcmp(s1, s2) strcmp((char *)(s1), (char *)(s2))
+#define ustrcpy(s1, s2) strcpy((char *)(s1), (char *)(s2))
+#define ustrchr(s, c) (UNCH *)strchr((char *)(s), c)
+#define ustrncmp(s1, s2, n) strncmp((char *)(s1), (char *)(s2), n)
+#define ustrncpy(s1, s2, n) strncpy((char *)(s1), (char *)(s2), n)
+#define ustrlen(s1) strlen((char *)(s1))
+#else
+#define ustrcmp strcmp
+#define ustrcpy strcpy
+#define ustrchr strchr
+#define ustrncmp strncmp
+#define ustrncpy strncpy
+#define ustrlen strlen
+#endif
+
+#if 0
+int ustrcmp(UNCH *, UNCH *);
+UNCH *ustrchr(UNCH *, int);
+int ustrncmp(UNCH *, UNCH *, UNS);
+int ustrncpy(UNCH *, UNCH *, UNS);
+int ustrlen(UNCH *);
+#endif
+
+typedef unsigned UNS;
+
+#ifdef USE_ISASCII
+#define ISASCII(c) isascii(c)
+#else
+#define ISASCII(c) (1)
+#endif
+
+#ifdef BSD_STRINGS
+#define MEMZERO(s, n) bzero(s, n)
+#else /* not BSD_STRINGS */
+#define MEMZERO(s, n) memset(s, '\0', n)
+#endif /* not BSD_STRINGS */
+
+/* Macros for bit manipulation.
+*/
+#define SET(word, bits) ((word) |= (bits)) /* Turn bits on */
+#define RESET(word, bits) ((word) &= ~(bits)) /* Turn bits off */
+#define GET(word, bits) ((word) & (bits)) /* 1=any bit on */
+#define BITOFF(word, bits) (GET(word, bits)==0) /* 1=no bits on */
+#define BITON(word, bits) ((word) & (bits)) /* 1=any bit on */
+
+#define ETDCDATA (dumetd) /* Dummy etd pointer for #PCDATA. */
+#define ETDNULL (dumetd + 1) /* Dummy etd pointer for null tag. */
+#define ETDNET (dumetd + 2) /* Dummy etd pointer for NET delimiter. */
+#define BADPTR(p) \
+ ((p) == NULL || (p) == ETDCDATA || (p) == ETDNULL || (p) == ETDNET)
+#define PTRNUM(p) ((p) == NULL ? 0 : ((p) - dumetd) + 1)
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+/* VP is used for prototypes of varargs functions. You can't have a
+prototype if the function is defined using varargs.h rather than
+stdarg.h. */
+#ifdef VARARGS
+#define VP(parms) ()
+#else
+#define VP(parms) P(parms)
+#endif
diff --git a/usr.bin/sgmls/sgmls/trace.h b/usr.bin/sgmls/sgmls/trace.h
new file mode 100644
index 0000000..56362be
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/trace.h
@@ -0,0 +1,113 @@
+/* TRACE.H: Declarations for internal trace functions. */
+
+#ifdef TRACE
+
+/* Trace variables.
+*/
+extern int trace; /* Switch: 1=trace state transitions; 0=don't. */
+extern int atrace; /* Switch: 1=trace attribute activity; 0=don't. */
+extern int ctrace; /* Switch: 1=trace context checking; 0=don't. */
+extern int dtrace; /* Switch: 1=trace declaration parsing; 0=don't.*/
+extern int etrace; /* Switch: 1=trace entity activity; 0=don't.*/
+extern int gtrace; /* Switch: 1=trace group creations; 0=don't. */
+extern int itrace; /* Switch: 1=trace ID activity; 0=don't. */
+extern int mtrace; /* Switch: 1=trace MS activity; 0=don't. */
+extern int ntrace; /* Switch: 1=trace data notation activity. */
+extern char emd[]; /* For "EMD" parameter type in dtrace calls. */
+
+VOID traceadl P((struct ad *));
+VOID tracecon P((int,int,int,struct parse *,int,int));
+VOID tracedcn P((struct dcncb *));
+VOID tracedsk P((struct tag *,struct tag *,int,int));
+VOID traceecb P((char *,struct entity *));
+VOID traceend P((char *,struct thdr *,struct mpos *,int,int,int));
+VOID traceesn P((struct ne *));
+VOID traceetd P((struct etd *));
+VOID traceetg P((struct tag *,struct etd *,int,int));
+VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *,int));
+VOID tracegml P((struct restate *,int,int,int));
+VOID tracegrp P((struct etd **));
+VOID traceid P((char *,struct id *));
+VOID tracemd P((char *));
+VOID tracemod P((struct thdr *));
+VOID tracems P((int,int,int,int));
+VOID tracengr P((struct dcncb **));
+VOID tracepcb P((struct parse *));
+VOID tracepro P((void));
+VOID traceset P((void));
+VOID tracesrm P((char *,struct entity **,UNCH *));
+VOID tracestg P((struct etd *,int,int,struct etd *,int));
+VOID tracestk P((struct tag *,int,int));
+VOID tracetkn P((int,UNCH *));
+VOID traceval P((struct parse *,unsigned int,UNCH *,int));
+
+#define TRACEADL(al) ((void)(atrace && (traceadl(al), 1)))
+#define TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq) \
+ ((void)(gtrace \
+ && (tracecon(etagimct, dostag, datarc, pcb, conrefsw, didreq), 1)))
+#define TRACEDCN(dcn) ((void)(ntrace && (tracedcn(dcn), 1)))
+#define TRACEDSK(pts, ptso, ts3, etictr) \
+ ((void)(gtrace && (tracedsk(pts, ptso, ts3, etictr), 1)))
+#define TRACEECB(action, p) \
+ ((void)(etrace && (traceecb(action, p), 1)))
+#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) \
+ ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt, Tstart), 1)))
+#define TRACEESN(p) \
+ ((void)((etrace || atrace || ntrace) && (traceesn(p), 1)))
+#define TRACEETD(p) ((void)(gtrace && (traceetd(p), 1)))
+#define TRACEETG(pts, curetd, tsl, etagimct) \
+ ((void)(gtrace && (traceetg(pts, curetd, tsl, etagimct), 1)))
+#define TRACEGI(stagenm, gi, mod, pos, Tstart) \
+ ((void)(ctrace && (tracegi(stagenm, gi, mod, pos, Tstart), 1)))
+#define TRACEGML(scb, pss, conactsw, conact) \
+ ((void)(trace && (tracegml(scb, pss, conactsw, conact), 1)))
+#define TRACEGRP(p) ((void)(gtrace && (tracegrp(p), 1)))
+#define TRACEID(action, p) ((void)(itrace && (traceid(action, p), 1)))
+#define TRACEMD(p) ((void)(dtrace && (tracemd(p), 1)))
+#define TRACEMOD(p) ((void)(gtrace && (tracemod(p), 1)))
+#define TRACEMS(action, code, mslevel, msplevel) \
+ ((void)(mtrace && (tracems(action, code, mslevel, msplevel), 1)))
+#define TRACENGR(p) ((void)(gtrace && (tracengr(p), 1)))
+#define TRACEPCB(p) ((void)(trace && (tracepcb(p), 1)))
+#define TRACEPRO() (tracepro())
+#define TRACESET() (traceset())
+#define TRACESRM(action, pg, gi) \
+ ((void)(etrace && (tracesrm(action, pg, gi), 1)))
+#define TRACESTG(curetd, dataret, rc, nextetd, mexts) \
+ ((void)(gtrace && (tracestg(curetd, dataret, rc, nextetd, mexts), 1)))
+#define TRACESTK(pts, ts2, etictr) \
+ ((void)(gtrace && (tracestk(pts, ts2, etictr), 1)))
+#define TRACETKN(scope, lextoke) \
+ ((void)(trace && (tracetkn(scope, lextoke), 1)))
+#define TRACEVAL(pcb, atype, aval, tokencnt) \
+ ((void)(atrace && (traceval(pcb, atype, aval, tokencnt), 1)))
+
+#else /* not TRACE */
+
+#define TRACEADL(al) /* empty */
+#define TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq) /* empty */
+#define TRACEDCN(dcn) /* empty */
+#define TRACEDSK(pts, ptso, ts3, etictr) /* empty */
+#define TRACEECB(action, p) /* empty */
+#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) /* empty */
+#define TRACEESN(p) /* empty */
+#define TRACEETG(pts, curetd, tsl, etagimct) /* empty */
+#define TRACEETD(p) /* empty */
+#define TRACEGI(stagenm, gi, mod, pos, Tstart) /* empty */
+#define TRACEGML(scb, pss, conactsw, conact) /* empty */
+#define TRACEGRP(p) /* empty */
+#define TRACEID(action, p) /* empty */
+#define TRACEMD(p) /* empty */
+#define TRACEMOD(p) /* empty */
+#define TRACEMS(action, code, mslevel, msplevel) /* empty */
+#define TRACENGR(p) /* empty */
+#define TRACEPCB(p) /* empty */
+#define TRACEPRO() /* empty */
+#define TRACESET() /* empty */
+#define TRACESRM(action, pg, gi) /* empty */
+#define TRACESTG(curetd, dataret, rc, nextetd, mexts) /* empty */
+#define TRACESTK(pts, ts2, etictr) /* empty */
+#define TRACETKN(scope, lextoke) /* empty */
+#define TRACEVAL(pcb, atype, aval, tokencnt) /* empty */
+
+#endif /* not TRACE */
diff --git a/usr.bin/sgmls/sgmls/traceset.c b/usr.bin/sgmls/sgmls/traceset.c
new file mode 100644
index 0000000..df18cbe
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/traceset.c
@@ -0,0 +1,465 @@
+#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
+
+#ifdef TRACE
+
+#include "context.h"
+
+/* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ */
+#define STATUX tags[ts].status
+
+/* Trace variables.
+*/
+int trace = 0; /* Switch: 1=trace state transitions; 0=don't. */
+int atrace = 0; /* Switch: 1=trace attribute activity; 0=don't. */
+int ctrace = 0; /* Switch: 1=trace context checking; 0=don't. */
+int dtrace = 0; /* Switch: 1=trace declaration parsing; 0=don't.*/
+int etrace = 0; /* Switch: 1=trace entity activity; 0=don't.*/
+int gtrace = 0; /* Switch: 1=trace group creations; 0=don't. */
+int itrace = 0; /* Switch: 1=trace ID activity; 0=don't. */
+int mtrace = 0; /* Switch: 1=trace MS activity; 0=don't. */
+int ntrace = 0; /* Switch: 1=trace notation activity; 0=don't. */
+char emd[] = "EMD"; /* For "EMD" parameter type in dtrace calls. */
+
+/* Return a printable representation of c.
+*/
+static
+char *printable(c)
+int c;
+{
+ static char buf[5];
+ if (c >= 040 && c < 0177) {
+ buf[0] = c;
+ buf[1] = '\0';
+ }
+ else
+ sprintf(buf, "\\%03o", (UNCH)c);
+ return buf;
+}
+
+static
+VOID dotrace(s)
+char *s;
+{
+ trace = (s && strchr(s, 't') != 0);
+ atrace = (s && strchr(s, 'a') != 0);
+ ctrace = (s && strchr(s, 'c') != 0);
+ dtrace = (s && strchr(s, 'd') != 0);
+ etrace = (s && strchr(s, 'e') != 0);
+ gtrace = (s && strchr(s, 'g') != 0);
+ itrace = (s && strchr(s, 'i') != 0);
+ mtrace = (s && strchr(s, 'm') != 0);
+ ntrace = (s && strchr(s, 'n') != 0);
+}
+/* TRACESET: Set switches for tracing body of document.
+*/
+VOID traceset()
+{
+ dotrace(sw.trace);
+
+ if (trace||atrace||ctrace||dtrace||etrace||gtrace||itrace||mtrace||ntrace)
+ fprintf(stderr,
+"TRACESET: state=%d;att=%d;con=%d;dcl=%d;ent=%d;grp=%d;id=%d;ms=%d;dcn=%d.\n",
+ trace, atrace, ctrace, dtrace, etrace, gtrace, itrace,
+ mtrace, ntrace);
+}
+/* TRACEPRO: Set switches for tracing prolog.
+ */
+VOID tracepro()
+{
+ dotrace(sw.ptrace);
+
+ if (trace||atrace||dtrace||etrace||gtrace||mtrace||ntrace)
+ fprintf(stderr,
+ "TRACEPRO: state=%d; att=%d; dcl=%d; ent=%d; grp=%d; ms=%d; dcn=%d.\n",
+ trace, atrace, dtrace, etrace, gtrace, mtrace, ntrace);
+}
+/* TRACEPCB: Trace character just parsed and other pcb data.
+ */
+VOID tracepcb(pcb)
+struct parse *pcb;
+{
+ fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %d:%d.\n",
+ pcb->pname, pcb->state, pcb->input, pcb->action,
+ pcb->newstate, printable(*FPOS), *FPOS, ENTITY+1, RCNT,
+ RSCC+FPOS+1-FBUF);
+}
+/* TRACETKN: Trace character just read during token parse.
+ */
+VOID tracetkn(scope, lextoke)
+int scope;
+UNCH lextoke[]; /* Lexical table for token and name parses. */
+{
+ fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %d:%d.\n",
+ scope, lextoke[*FPOS],
+ printable(*FPOS), *FPOS, ENTITY+1, RCNT,
+ RSCC+FPOS+1-FBUF);
+}
+/* TRACEGML: Trace state of main SGML driver routine.
+ */
+VOID tracegml(scb, pss, conactsw, conact)
+struct restate *scb;
+int pss, conactsw, conact;
+{
+ fprintf(stderr,
+ "SGML%02d %2d-%2d-%2d-%2d in main driver; conactsw=%d; conact=%d.\n",
+ pss, scb[pss].sstate, scb[pss].sinput, scb[pss].saction,
+ scb[pss].snext, conactsw, conact);
+}
+/* TRACEVAL: Trace parse of an attribute value that is a token list.
+ */
+VOID traceval(pcb, atype, aval, tokencnt)
+struct parse *pcb;
+UNS atype; /* Type of token list expected. */
+UNCH *aval; /* Value string to be parsed as token list. */
+int tokencnt; /* Number of tokens found in attribute value. */
+{
+ fprintf(stderr,
+ "%-8s %2d-%2d-%2d-%2d at %p, atype=%02x, tokencnt=%d: ",
+ pcb->pname, pcb->state, pcb->input, pcb->action,
+ pcb->newstate, (UNIV)aval, atype, tokencnt);
+ fprintf(stderr, "%s\n", aval);
+}
+/* TRACESTK: Trace entry just placed on tag stack.
+ */
+VOID tracestk(pts, ts2, etictr)
+struct tag *pts; /* Stack entry for this tag. */
+int ts2; /* Stack depth. */
+int etictr; /* Number of "netok" tags on stack. */
+{
+ fprintf(stderr,
+ "STACK %s begun; stack depth %d; tflag=%02x; etictr=%d",
+ pts->tetd->etdgi+1, ts2, pts->tflags, etictr);
+ fprintf(stderr, " srm=%s.\n",
+ pts->tsrm!=SRMNULL ? (char *)(pts->tsrm[0]->ename+1) : "#EMPTY");
+}
+/* TRACEDSK: Trace entry just removed from tag stack.
+ */
+VOID tracedsk(pts, ptso, ts3, etictr)
+struct tag *pts; /* Stack entry for new open tag. */
+struct tag *ptso; /* Stack entry for tag just ended. */
+int ts3; /* Stack depth. */
+int etictr; /* Number of "netok" tags on stack. */
+{
+ fprintf(stderr,
+ "DESTACK %s ended; otflag=%02x; %s resumed; depth=%d; tflag=%02x; etictr=%d",
+ ptso->tetd->etdgi+1, ptso->tflags,
+ pts->tetd->etdgi+1, ts3, pts->tflags, etictr);
+ fprintf(stderr, " srm=%s.\n",
+ pts->tsrm!=SRMNULL ? (char *)(pts->tsrm[0]->ename+1) : "#EMPTY");
+}
+/* TRACECON: Trace interactions between content parse and stag/context
+ processing.
+ */
+VOID tracecon(etagimct, dostag, datarc, pcb, conrefsw, didreq)
+int etagimct; /* Implicitly ended elements left on stack. */
+int dostag; /* 1=retry newetd instead of parsing; 0=parse. */
+int datarc; /* Return code for data: DAF_ or REF_ or zero. */
+struct parse *pcb; /* Parse control block for this parse. */
+int conrefsw; /* 1=content reference att specified; 0=no. */
+int didreq; /* 1=required implied empty tag processed; 0=no.*/
+{
+ fprintf(stderr,
+ "CONTENT etagimct=%d dostag=%d datarc=%d pname=%s action=%d \
+conrefsw=%d didreq=%d\n",
+ etagimct, dostag, datarc, pcb->pname, pcb->action,
+ conrefsw, didreq);
+}
+/* TRACESTG: Trace start-tag context validation input and results.
+ */
+VOID tracestg(curetd, dataret, rc, nextetd, mexts)
+struct etd *curetd; /* The etd for this tag. */
+int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */
+int rc; /* Return code from context or other test. */
+struct etd *nextetd; /* The etd for a forced start-tag (if rc==2). */
+int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
+{
+ fprintf(stderr,
+ "STARTTAG newetd=%p; dataret=%d; rc=%d; nextetd=%p; mexts=%d.\n",
+ (UNIV)curetd, dataret, rc, (UNIV)nextetd, mexts);
+}
+/* TRACEETG: Trace end-tag matching test on stack.
+ */
+VOID traceetg(pts, curetd, tsl, etagimct)
+struct tag *pts; /* Stack entry for this tag. */
+struct etd *curetd; /* The etd for this tag. */
+int tsl; /* Temporary stack level for looping. */
+int etagimct; /* Num of implicitly ended tags left on stack. */
+{
+ fprintf(stderr,
+ "ENDTAG tsl=%d; newetd=%p; stacketd=%p; tflags=%02x; etagimct=%d.\n",
+ tsl, (UNIV)curetd, (UNIV)pts->tetd, pts->tflags, etagimct);
+}
+/* TRACEECB: Trace entity control block activity.
+ */
+VOID traceecb(action, p)
+char *action;
+struct entity *p;
+{
+ static char estype1[] = " TMMMSEIXCNFPDLK";
+ static char estype2[] = " DS ";
+ if (!p)
+ return;
+ fprintf(stderr,
+ "%-8s (es=%d) type %c%c entity %s at %p containing ",
+ action, es, estype1[p->estore], estype2[p->estore], p->ename+1,
+ (UNIV)p);
+ if (p->estore==ESN && strcmp(action, "ENTDEF"))
+ traceesn(p->etx.n);
+ else if (p->etx.x==0)
+ fprintf(stderr, "[NOTHING]");
+ else
+ fprintf(stderr, "%s",
+ p->etx.c[0] ? (char *)p->etx.c : "[EMPTY]");
+ putc('\n', stderr);
+}
+/* TRACEDCN: Trace data content notation activity.
+ */
+VOID tracedcn(p)
+struct dcncb *p;
+{
+ fprintf(stderr,
+ "DCN dcn=%p; adl=%p; notation is %s\n",
+ (UNIV)p, (UNIV)p->adl, p->ename+1);
+ if (p->adl)
+ traceadl(p->adl);
+}
+/* TRACEESN: Print a data entity control block.
+ */
+VOID traceesn(p)
+PNE p;
+{
+ fprintf(stderr, "ESN Entity name is %s; entity type is %s.\n",
+ (NEENAME(p)!=0) ? ((char *)NEENAME(p))+1 : "[UNDEFINED]",
+ /* NEXTYPE(p)); */
+ (NEXTYPE(p)==1 ? "CDATA" : (NEXTYPE(p)==2 ? "NDATA" : "SDATA")));
+ fprintf(stderr, " System ID is %s\n",
+ (NEID(p)!=0) ? (char *)NEID(p) : "[UNDEFINED]");
+ if (p->nedcn!=0)
+ tracedcn(p->nedcn);
+}
+/* TRACESRM: Print the members of a short reference map.
+ */
+VOID tracesrm(action, pg, gi)
+char *action;
+TECB pg;
+UNCH *gi;
+{
+ int i = 0; /* Loop counter. */
+
+ if (pg==SRMNULL)
+ fprintf(stderr, "%-8s SHORTREF table empty for %s.\n", action, gi);
+ else {
+ fprintf(stderr, "%-8s %s at %p mapped for %s.\n",
+ action, pg[0]->ename+1, (UNIV)pg,
+ gi ? (char *)gi : "definition");
+ while (++i<=lex.s.dtb[0].mapdata)
+ if (pg[i])
+ fprintf(stderr, "%14s%02u %p %s\n",
+ "SR", i, (UNIV)pg[i], pg[i]->ename+1);
+ }
+}
+/* TRACEADL: Print an attribute definition list.
+ */
+VOID traceadl(al)
+struct ad al[];
+{
+ int i=0;
+
+ fprintf(stderr, "ADLIST %p %d membe%s; %d attribut%s\n",
+ (UNIV)al, ADN(al), ADN(al)==1 ? "r" : "rs", AN(al),
+ AN(al)==1 ? "e" : "es");
+ while (++i<=ADN(al)) {
+ fprintf(stderr,
+ (BITOFF(ADFLAGS(al,i), AGROUP) && ADTYPE(al,i)<=ANOTEGRP)
+ ? " %p %-8s %02x %02x %2d %2d %p %p\n"
+ : " %p %-8s %02x %02x %2d %2d %p %p\n",
+ &al[i], ADNAME(al,i), ADFLAGS(al,i), ADTYPE(al,i), ADNUM(al,i),
+ ADLEN(al,i), ADVAL(al,i), ADDATA(al,i).x);
+ if (ADVAL(al,i)) {
+ fprintf(stderr, "%s", ADVAL(al,i));
+ if (ADTYPE(al,i)==AENTITY && ADDATA(al,i).n!=0) {
+ fprintf(stderr, "=>");
+ traceesn(ADDATA(al,i).n);
+ }
+ else if (ADTYPE(al,i)==ANOTEGRP)
+ fprintf(stderr, "=>%s",
+ (ADDATA(al,i).x->dcnid!=0)
+ ? (char *)ADDATA(al,i).x->dcnid
+ : "[UNDEFINED]");
+ }
+ else
+ fprintf(stderr, "[%s]",
+ GET(ADFLAGS(al,i), AREQ)
+ ? "REQUIRED"
+ : (GET(ADFLAGS(al,i), ACURRENT) ? "CURRENT" : "NULL"));
+ }
+ fprintf(stderr, "\n");
+}
+/* TRACEMOD: Print the members of a model.
+ */
+VOID tracemod(pg)
+struct thdr pg[];
+{
+ fprintf(stderr, "MODEL %p %02x %d\n",
+ (UNIV)&pg[0], pg[0].ttype, pg[0].tu.tnum);
+ if ((pg[0].ttype & MKEYWORD) == 0) {
+ int i;
+
+ for (i = 1; i < pg[0].tu.tnum + 2; i++) {
+ if (GET(pg[i].ttype, TTMASK) == TTETD)
+ fprintf(stderr, " %p %02x %s\n",
+ (UNIV)&pg[i], pg[i].ttype, pg[i].tu.thetd->etdgi+1);
+ else if (GET(pg[i].ttype, TTMASK) == TTCHARS)
+ fprintf(stderr, " %p %02x %s\n",
+ (UNIV)&pg[i], pg[i].ttype, "#PCDATA");
+ else
+ fprintf(stderr, " %p %02x %d\n",
+ (UNIV)&pg[i], pg[i].ttype, pg[i].tu.tnum);
+ }
+ }
+ fprintf(stderr, "\n");
+}
+/* TRACEGRP: Print the members of a name (i.e., etd) group.
+ */
+VOID tracegrp(pg)
+struct etd *pg[];
+{
+ int i = -1; /* Loop counter. */
+
+ fprintf(stderr, "ETDGRP %p\n", (UNIV)pg);
+ while (pg[++i]!=0)
+ fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->etdgi+1);
+}
+/* TRACENGR: Print the members of a notation (i.e., dcncb) group.
+ */
+VOID tracengr(pg)
+struct dcncb *pg[];
+{
+ int i = -1; /* Loop counter. */
+
+ fprintf(stderr, "DCNGRP %p\n", (UNIV)pg);
+ while (pg[++i]!=0)
+ fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->ename+1);
+}
+/* TRACEETD: Print an element type definition.
+ */
+VOID traceetd(p)
+struct etd *p; /* Pointer to an etd. */
+{
+ fprintf(stderr,
+"ETD etd=%p %s min=%02x cmod=%p ttype=%02x mex=%p, pex=%p, ",
+ (UNIV)p, p->etdgi+1, p->etdmin, (UNIV)p->etdmod,
+ p->etdmod->ttype, (UNIV)p->etdmex, (UNIV)p->etdpex);
+ fprintf(stderr, "adl=%p, srm=%s.\n",
+ (UNIV)p->adl,
+ (p->etdsrm==SRMNULL)
+ ? "#EMPTY"
+ : (p->etdsrm) ? (char *)(p->etdsrm[0]->ename+1) : "#CURRENT");
+}
+/* TRACEID: Print an ID control block.
+ */
+VOID traceid(action, p)
+char *action;
+struct id *p; /* Pointer to an ID. */
+{
+ fprintf(stderr, "%-8s %s at %p is %s; ", action, p->idname+1, (UNIV)p,
+ p->iddefed ? "defined" : "undefined");
+ fprintf(stderr, "last ref=%p\n", (UNIV)p->idrl);
+}
+/* TRACEMD: Trace a markup declaration parameter.
+ */
+VOID tracemd(parmid)
+char *parmid; /* Parameter identifier. */
+{
+ fprintf(stderr, "MDPARM %-8s for %-8s, token %02d, type %02u, %s.\n",
+ mdname, subdcl ? (char *)subdcl : "[NONE]", parmno, pcbmd.action, parmid);
+}
+/* TRACEMS: Trace marked section activity.
+ */
+VOID tracems(action, code, mslevel, msplevel)
+int action; /* 1=began new level; 0=resumed previous. */
+int code;
+int mslevel; /* Nesting level of marked sections. */
+int msplevel; /* Nested MS levels subject to special parse. */
+{
+ fprintf(stderr,
+ "MS%c %2d %s nesting level %d (msp %d).\n",
+ (action ? ' ' : 'E'), code, (action ? "began" : "resumed"),
+ mslevel, msplevel);
+}
+
+static
+VOID tracehits(h)
+unsigned long *h;
+{
+ int i;
+ fprintf(stderr, " H=");
+ for (i = grplongs - 1; i >= 0; --i)
+ fprintf(stderr, "%0*lx", LONGBITS/4, h[i]);
+}
+
+/* TRACEGI: Trace GI testing stages in CONTEXT.C processing.
+ */
+VOID tracegi(stagenm, gi, mod, pos, Tstart)
+char *stagenm;
+struct etd *gi; /* ETD of new GI. */
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+int Tstart; /* Initial T for this group. */
+{
+ int i = 0; /* Loop counter. */
+
+ fprintf(stderr, "%-10s %d:", stagenm, P);
+ while (++i<=P)
+ fprintf(stderr, " %d-%d", pos[i].g, pos[i].t);
+ fprintf(stderr, " (%u) gocc=%02x gtype=%02x gnum=%d",
+ M, GOCC, GTYPE, GNUM);
+ tracehits(H);
+ fprintf(stderr, " status=%d Tstart=%d\n", STATUX, Tstart);
+ fprintf(stderr,
+ "=>%-8s tocc=%02x ttype=%02x thetd=%p (%s) gietd=%p (%s)\n",
+ tags[ts].tetd->etdgi+1, TOCC, TTYPE, (UNIV)TOKEN.tu.thetd,
+ (TTYPE
+ ? (TTYPE==TTETD ? (char *)(TOKEN.tu.thetd->etdgi+1) : "#GROUP")
+ : "#PCDATA"),
+ (UNIV)gi,
+ (gi==ETDCDATA ? "#PCDATA" : (char *)(gi->etdgi+1)));
+}
+/* TRACEEND: Trace testing for end of group in CONTEXT.C processing.
+ */
+VOID traceend(stagenm, mod, pos, rc, opt, Tstart)
+char *stagenm;
+struct thdr mod[]; /* Model of current open element. */
+struct mpos pos[]; /* Position in open element's model. */
+int rc; /* Return code: RCNREQ RCHIT RCMISS RCEND */
+int opt; /* ALLHIT parm: 1=test optionals; 0=ignore. */
+int Tstart; /* Initial T for this group. */
+{
+ int i = 0; /* Loop counter. */
+
+ fprintf(stderr, "%-10s %d:", stagenm, P);
+ while (++i<=P)
+ fprintf(stderr, " %d-%d", pos[i].g, pos[i].t);
+ fprintf(stderr, " (%u) gocc=%02x gtype=%02x gnum=%d",
+ M, GOCC, GTYPE, GNUM);
+ tracehits(H);
+ fprintf(stderr, " status=%d Tstart=%d\n", STATUX, Tstart);
+ fprintf(stderr, "=>%-8s tocc=%02x ttype=%02x thetd=%p (%s)",
+ tags[ts].tetd->etdgi+1, TOCC, TTYPE, (UNIV)TOKEN.tu.thetd,
+ (TTYPE
+ ? (TTYPE==TTETD ? (char *)(TOKEN.tu.thetd->etdgi+1) : "#GROUP")
+ : "#PCDATA"));
+ fprintf(stderr, " rc=%d offbitT=%d allhit=%d\n",
+ rc, offbit(H, (int)T, GNUM), allhit(&GHDR, H, 0, opt));
+}
+
+#endif /* TRACE */
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/unix.cfg b/usr.bin/sgmls/sgmls/unix.cfg
new file mode 100644
index 0000000..0bc8410
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/unix.cfg
@@ -0,0 +1,147 @@
+/* unix.cfg: Configuration file for sgmls on Unix. */
+
+/* A list of filename templates to use for searching for external entities.
+The filenames are separated by the character specified in PATH_FILE_SEP.
+See sgmls.man for details. */
+#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D"
+/* The character that separates the filenames templates. */
+#define PATH_FILE_SEP ':'
+/* The character that separates filenames in a system identifier.
+Usually the same as PATH_FILE_SEP. */
+#define SYSID_FILE_SEP ':'
+/* The environment variable that contains the list of filename templates. */
+#define PATH_ENV_VAR "SGML_PATH"
+
+/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name
+or system identifier into a legal filename. A character in
+MIN_DAT_SUBS_FROM will be transformed into the character in the
+corresponding position in MIN_DAT_SUBS_TO. If there is no such
+position, then the character is removed. */
+/* This says that spaces should be transformed to underscores, and
+slashes to percents. */
+#define MIN_DAT_SUBS_FROM " /"
+#define MIN_DAT_SUBS_TO "_%"
+
+/* Define this to allow tracing. */
+/* #define TRACE 1 */
+
+/* Define this you want support for subdocuments. This is implemented
+using features that are not part of Standard C, so you might not want
+to define it if you are porting to a new system. Otherwise I suggest
+you leave it defined. */
+#define SUPPORT_SUBDOC 1
+
+/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports
+X/Open extensions; if they do, then, for example,
+
+ printf("%2$s%1$s", "bar", "foo")
+
+should print `foobar'. */
+
+/* #define HAVE_EXTENDED_PRINTF 1 */
+
+/* Define HAVE_CAT if your system provides the X/Open message
+catalogue functions catopen() and catgets(), and you want to use them.
+An implementations of these functions is included and will be used if
+you don't define this. On SunOS 4.1.1, if you do define this you
+should set CC=/usr/xpg2bin/cc in the makefile. */
+
+/* #define HAVE_CAT 1 */
+
+#ifdef __STDC__
+/* Define this if your compiler supports prototypes. */
+#define USE_PROTOTYPES 1
+#endif
+
+/* Can't use <stdarg.h> without prototypes. */
+#ifndef USE_PROTOTYPES
+#define VARARGS 1
+#endif
+
+/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>,
+you must define VARARGS yourself here. */
+/* #define VARARGS 1 */
+
+/* Define this if you do not have strerror(). */
+#define STRERROR_MISSING 1
+
+/* Define this unless the character testing functions in ctype.h
+are defined for all values representable as an unsigned char. You do
+not need to define this if your system is ANSI C conformant. You
+should define for old Unix systems. */
+/* #define USE_ISASCII 1 */
+
+/* Define this if your system provides the BSD style string operations
+rather than ANSI C ones (eg bcopy() rather than memcpy(), and index()
+rather than strchr()). */
+/* #define BSD_STRINGS 1 */
+
+/* Define this if you have getopt(). */
+#define HAVE_GETOPT 1
+
+/* Define this if you have access(). */
+#define HAVE_ACCESS 1
+
+/* Define this if you have <unistd.h>. */
+#define HAVE_UNISTD_H 1
+
+/* Define this if you have <sys/stat.h>. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define this if you have waitpid(). */
+#define HAVE_WAITPID 1
+
+/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */
+#define POSIX 1
+
+/* Define this if you have the vfork() system call. */
+#define HAVE_VFORK 1
+
+/* Define this if you have <vfork.h>. */
+#define HAVE_VFORK_H 1
+
+/* Define this if you don't have <stdlib.h> */
+/* #define STDLIB_H_MISSING 1 */
+
+/* Define this if you don't have <stddef.h> */
+/* #define STDDEF_H_MISSING 1 */
+
+/* Define this if you don't have <limits.h> */
+/* #define LIMITS_H_MISSING 1 */
+
+/* Define this if you don't have remove(); unlink() will be used instead. */
+#define REMOVE_MISSING 1
+
+/* Define this if you don't have raise(); kill() will be used instead. */
+#define RAISE_MISSING 1
+
+/* Define this if you don't have fsetpos() and fgetpos(). */
+#define FPOS_MISSING 1
+
+/* Universal pointer type. */
+/* If your compiler doesn't fully support void *, change `void' to `char'. */
+typedef void *UNIV;
+
+/* If your compiler doesn't support void as a function return type,
+change `void' to `int'. */
+typedef void VOID;
+
+/* If you don't have an ANSI C conformant <limits.h>, define
+CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed.
+The <limits.h> on some versions of System Release V 3.2 is not ANSI C
+conformant: the value of CHAR_MIN is 0 even though the `char' type is
+signed. */
+
+/* #define CHAR_SIGNED 1 */
+/* #define CHAR_SIGNED 0 */
+#ifndef CHAR_SIGNED
+#include <limits.h>
+#if CHAR_MIN < 0
+#define CHAR_SIGNED 1
+#else
+#define CHAR_SIGNED 0
+#endif
+#endif /* not CHAR_SIGNED */
+
+/* Assume the system character set is ISO Latin-1. */
+#include "latin1.h"
diff --git a/usr.bin/sgmls/sgmls/unixproc.c b/usr.bin/sgmls/sgmls/unixproc.c
new file mode 100644
index 0000000..9e79d62
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/unixproc.c
@@ -0,0 +1,98 @@
+/* unixproc.c -
+
+ Unix implementation of run_process().
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+#include "config.h"
+
+#ifdef SUPPORT_SUBDOC
+
+#ifdef POSIX
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#endif /* POSIX */
+
+#include "std.h"
+#include "entity.h"
+#include "appl.h"
+
+#ifndef POSIX
+
+#define WIFSTOPPED(s) (((s) & 0377) == 0177)
+#define WIFSIGNALED(s) (((s) & 0377) != 0 && ((s) & 0377 != 0177))
+#define WIFEXITED(s) (((s) & 0377) == 0)
+#define WEXITSTATUS(s) (((s) >> 8) & 0377)
+#define WTERMSIG(s) ((s) & 0177)
+#define WSTOPSIG(s) (((s) >> 8) & 0377)
+#define _SC_OPEN_MAX 0
+#define sysconf(name) (20)
+typedef int pid_t;
+
+#endif /* not POSIX */
+
+#ifndef HAVE_VFORK
+#define vfork() fork()
+#endif /* not HAVE_VFORK */
+
+#ifdef HAVE_VFORK_H
+#include <vfork.h>
+#endif /* HAVE_VFORK_H */
+
+int run_process(argv)
+char **argv;
+{
+ pid_t pid;
+ int status;
+ int ret;
+
+ /* Can't trust Unix implementations to support fflush(NULL). */
+ fflush(stderr);
+ fflush(stdout);
+
+ pid = vfork();
+ if (pid == 0) {
+ /* child */
+ int i;
+ int open_max = (int)sysconf(_SC_OPEN_MAX);
+
+ for (i = 3; i < open_max; i++)
+ (void)close(i);
+ execvp(argv[0], argv);
+ appl_error(E_EXEC, argv[0], strerror(errno));
+ fflush(stderr);
+ _exit(127);
+ }
+ if (pid < 0) {
+ appl_error(E_FORK, strerror(errno));
+ return -1;
+ }
+ /* parent */
+ while ((ret = wait(&status)) != pid)
+ if (ret < 0) {
+ appl_error(E_WAIT, strerror(errno));
+ return -1;
+ }
+ if (WIFSIGNALED(status)) {
+ appl_error(E_SIGNAL, argv[0], WTERMSIG(status));
+ return -1;
+ }
+ /* Must have exited normally. */
+ return WEXITSTATUS(status);
+}
+
+#endif /* SUPPORT_SUBDOC */
+
+/*
+Local Variables:
+c-indent-level: 5
+c-continued-statement-offset: 5
+c-brace-offset: -5
+c-argdecl-indent: 0
+c-label-offset: -5
+End:
+*/
diff --git a/usr.bin/sgmls/sgmls/version.c b/usr.bin/sgmls/sgmls/version.c
new file mode 100644
index 0000000..7144593
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/version.c
@@ -0,0 +1 @@
+char *version_string = "1.1";
diff --git a/usr.bin/sgmls/sgmls/xfprintf.c b/usr.bin/sgmls/sgmls/xfprintf.c
new file mode 100644
index 0000000..1780795
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/xfprintf.c
@@ -0,0 +1,568 @@
+/* xfprintf.c -
+ X/Open extended v?fprintf implemented in terms of v?fprintf.
+
+ Written by James Clark (jjc@jclark.com).
+*/
+
+/* Compile with:
+
+ -DVARARGS to use varargs.h instead of stdarg.h
+ -DLONG_DOUBLE_MISSING if your compiler doesn't like `long double'
+ -DFP_SUPPORT to include floating point stuff
+*/
+
+#include "config.h"
+
+#ifndef HAVE_EXTENDED_PRINTF
+
+#include "std.h"
+
+#ifdef lint
+/* avoid stupid lint warnings */
+#undef va_arg
+#define va_arg(ap, type) (ap, (type)0)
+#endif
+
+#ifdef FP_SUPPORT
+#ifdef LONG_DOUBLE_MISSING
+typedef double long_double;
+#else
+typedef long double long_double;
+#endif
+#endif /* FP_SUPPORT */
+
+#ifndef __STDC__
+#define const /* as nothing */
+#endif
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+#ifdef VARARGS
+typedef int (*printer)();
+#else
+typedef int (*printer)(UNIV, const char *, ...);
+#endif
+
+enum arg_type {
+ NONE,
+ INT,
+ UNSIGNED,
+ LONG,
+ UNSIGNED_LONG,
+#ifdef FP_SUPPORT
+ DOUBLE,
+ LONG_DOUBLE,
+#endif /* FP_SUPPORT */
+ PCHAR,
+ PINT,
+ PLONG,
+ PSHORT
+};
+
+union arg {
+ int i;
+ unsigned u;
+ long l;
+ unsigned long ul;
+#ifdef FP_SUPPORT
+ double d;
+ long_double ld;
+#endif /* FP_SUPPORT */
+ char *pc;
+ UNIV pv;
+ int *pi;
+ short *ps;
+ long *pl;
+};
+
+#define NEXT 0
+#define MISSING 10
+
+struct spec {
+ enum arg_type type;
+ char pos;
+ char field_width;
+ char precision;
+};
+
+#define FLAG_CHARS "-+ #0"
+
+static int parse_spec P((const char **, struct spec *));
+static int find_arg_types P((const char *, enum arg_type *));
+static void get_arg P((enum arg_type, va_list *, union arg *));
+static int do_arg P((UNIV, printer, const char *, enum arg_type, union arg *));
+static int xdoprt P((UNIV, printer, const char *, va_list));
+static int printit P((UNIV, printer, const char *, va_list, int, union arg *));
+static int maybe_positional P((const char *));
+
+/* Return 1 if sucessful, 0 otherwise. **pp points to character after % */
+
+static int parse_spec(pp, sp)
+const char **pp;
+struct spec *sp;
+{
+ char modifier = 0;
+ sp->pos = NEXT;
+ if (isdigit((unsigned char)(**pp)) && (*pp)[1] == '$') {
+ if (**pp == '0')
+ return 0;
+ sp->pos = **pp - '0';
+ *pp += 2;
+ }
+
+ while (**pp != '\0' && strchr(FLAG_CHARS, **pp))
+ *pp += 1;
+
+ /* handle the field width */
+
+ sp->field_width = MISSING;
+ if (**pp == '*') {
+ *pp += 1;
+ if (isdigit((unsigned char)**pp) && (*pp)[1] == '$') {
+ if (**pp == '0')
+ return 0;
+ sp->field_width = **pp - '0';
+ *pp += 2;
+ }
+ else
+ sp->field_width = NEXT;
+ }
+ else {
+ while (isdigit((unsigned char)**pp))
+ *pp += 1;
+ }
+
+ /* handle the precision */
+ sp->precision = MISSING;
+ if (**pp == '.') {
+ *pp += 1;
+ if (**pp == '*') {
+ *pp += 1;
+ if (isdigit((unsigned char)**pp) && (*pp)[1] == '$') {
+ if (**pp == '0')
+ return 0;
+ sp->precision = **pp - '0';
+ *pp += 2;
+ }
+ else
+ sp->precision = NEXT;
+ }
+ else {
+ while (isdigit((unsigned char)**pp))
+ *pp += 1;
+ }
+ }
+ /* handle h l or L */
+
+ if (**pp == 'h' || **pp == 'l' || **pp == 'L') {
+ modifier = **pp;
+ *pp += 1;
+ }
+
+ switch (**pp) {
+ case 'd':
+ case 'i':
+ sp->type = modifier == 'l' ? LONG : INT;
+ break;
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ sp->type = modifier == 'l' ? UNSIGNED_LONG : UNSIGNED;
+ break;
+#ifdef FP_SUPPORT
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'g':
+ case 'G':
+ sp->type = modifier == 'L' ? LONG_DOUBLE : DOUBLE;
+ break;
+#endif /* FP_SUPPORT */
+ case 'c':
+ sp->type = INT;
+ break;
+ case 's':
+ sp->type = PCHAR;
+ break;
+ case 'p':
+ /* a pointer to void has the same representation as a pointer to char */
+ sp->type = PCHAR;
+ break;
+ case 'n':
+ if (modifier == 'h')
+ sp->type = PSHORT;
+ else if (modifier == 'l')
+ sp->type = PLONG;
+ else
+ sp->type = PINT;
+ break;
+ case '%':
+ sp->type = NONE;
+ break;
+ default:
+ return 0;
+ }
+ *pp += 1;
+ return 1;
+}
+
+
+static int find_arg_types(format, arg_type)
+ const char *format;
+ enum arg_type *arg_type;
+{
+ int i, pos;
+ const char *p;
+ struct spec spec;
+
+ for (i = 0; i < 9; i++)
+ arg_type[i] = NONE;
+
+ pos = 0;
+
+ p = format;
+ while (*p)
+ if (*p == '%') {
+ p++;
+ if (!parse_spec(&p, &spec))
+ return 0;
+ if (spec.type != NONE) {
+ int n;
+ if (spec.pos == NEXT)
+ n = pos++;
+ else
+ n = spec.pos - 1;
+ if (n < 9) {
+ enum arg_type t = arg_type[n];
+ if (t != NONE && t != spec.type)
+ return 0;
+ arg_type[n] = spec.type;
+ }
+ }
+ if (spec.field_width != MISSING) {
+ int n;
+ if (spec.field_width == NEXT)
+ n = pos++;
+ else
+ n = spec.field_width - 1;
+ if (n < 9) {
+ enum arg_type t = arg_type[n];
+ if (t != NONE && t != INT)
+ return 0;
+ arg_type[n] = INT;
+ }
+ }
+ if (spec.precision != MISSING) {
+ int n;
+ if (spec.precision == NEXT)
+ n = pos++;
+ else
+ n = spec.precision - 1;
+ if (n < 9) {
+ enum arg_type t = arg_type[n];
+ if (t != NONE && t != INT)
+ return 0;
+ arg_type[n] = INT;
+ }
+ }
+ }
+ else
+ p++;
+ return 1;
+}
+
+static void get_arg(arg_type, app, argp)
+ enum arg_type arg_type;
+ va_list *app;
+ union arg *argp;
+{
+ switch (arg_type) {
+ case NONE:
+ break;
+ case INT:
+ argp->i = va_arg(*app, int);
+ break;
+ case UNSIGNED:
+ argp->u = va_arg(*app, unsigned);
+ break;
+ case LONG:
+ argp->l = va_arg(*app, long);
+ break;
+ case UNSIGNED_LONG:
+ argp->ul = va_arg(*app, unsigned long);
+ break;
+#ifdef FP_SUPPORT
+ case DOUBLE:
+ argp->d = va_arg(*app, double);
+ break;
+ case LONG_DOUBLE:
+ argp->ld = va_arg(*app, long_double);
+ break;
+#endif /* FP_SUPPORT */
+ case PCHAR:
+ argp->pc = va_arg(*app, char *);
+ break;
+ case PINT:
+ argp->pi = va_arg(*app, int *);
+ break;
+ case PSHORT:
+ argp->ps = va_arg(*app, short *);
+ break;
+ case PLONG:
+ argp->pl = va_arg(*app, long *);
+ break;
+ default:
+ abort();
+ }
+}
+
+static int do_arg(handle, func, buf, arg_type, argp)
+ UNIV handle;
+ printer func;
+ const char *buf;
+ enum arg_type arg_type;
+ union arg *argp;
+{
+ switch (arg_type) {
+ case NONE:
+ return (*func)(handle, buf);
+ case INT:
+ return (*func)(handle, buf, argp->i);
+ case UNSIGNED:
+ return (*func)(handle, buf, argp->u);
+ case LONG:
+ return (*func)(handle, buf, argp->l);
+ case UNSIGNED_LONG:
+ return (*func)(handle, buf, argp->ul);
+#ifdef FP_SUPPORT
+ case DOUBLE:
+ return (*func)(handle, buf, argp->d);
+ case LONG_DOUBLE:
+ return (*func)(handle, buf, argp->ld);
+#endif /* FP_SUPPORT */
+ case PCHAR:
+ return (*func)(handle, buf, argp->pc);
+ case PINT:
+ return (*func)(handle, buf, argp->pi);
+ case PSHORT:
+ return (*func)(handle, buf, argp->ps);
+ case PLONG:
+ return (*func)(handle, buf, argp->pl);
+ default:
+ abort();
+ }
+ /* NOTREACHED */
+}
+
+static int printit(handle, func, p, ap, nargs, arg)
+ UNIV handle;
+ printer func;
+ const char *p;
+ va_list ap;
+ int nargs;
+ union arg *arg;
+{
+ char buf[512]; /* enough for a spec */
+ int count = 0;
+ int pos = 0;
+
+ while (*p)
+ if (*p == '%') {
+ char *q;
+ struct spec spec;
+ const char *start;
+ int had_field_width;
+ union arg *argp;
+ union arg a;
+ int res;
+
+ start = ++p;
+ if (!parse_spec(&p, &spec))
+ abort(); /* should have caught it in find_arg_types */
+
+ buf[0] = '%';
+ q = buf + 1;
+
+ if (spec.pos != NEXT)
+ start += 2;
+
+ /* substitute in precision and field width if necessary */
+ had_field_width = 0;
+ while (start < p) {
+ if (*start == '*') {
+ char c;
+ int n, val;
+
+ start++;
+ if (!had_field_width && spec.field_width != MISSING) {
+ c = spec.field_width;
+ had_field_width = 1;
+ }
+ else
+ c = spec.precision;
+ if (c == NEXT)
+ n = pos++;
+ else {
+ start += 2;
+ n = c - 1;
+ }
+ if (n >= nargs)
+ val = va_arg(ap, int);
+ else
+ val = arg[n].i;
+
+ /* ignore negative precision */
+ if (val >= 0 || q[-1] != '.') {
+ (void)sprintf(q, "%d", val);
+ q = strchr(q, '\0');
+ }
+ }
+ else
+ *q++ = *start++;
+ }
+ *q++ = '\0';
+
+ argp = 0;
+ if (spec.type != NONE) {
+ int n = spec.pos == NEXT ? pos++ : spec.pos - 1;
+ if (n >= nargs) {
+ get_arg(spec.type, &ap, &a);
+ argp = &a;
+ }
+ else
+ argp = arg + n;
+ }
+
+ res = do_arg(handle, func, buf, spec.type, argp);
+ if (res < 0)
+ return -1;
+ count += res;
+ }
+ else {
+ if ((*func)(handle, "%c", *p++) < 0)
+ return -1;
+ count++;
+ }
+ return count;
+}
+
+/* Do a quick check to see if it may contains any positional thingies. */
+
+static int maybe_positional(format)
+ const char *format;
+{
+ const char *p;
+
+ p = format;
+ for (;;) {
+ p = strchr(p, '$');
+ if (!p)
+ return 0;
+ if (p - format >= 2
+ && isdigit((unsigned char)p[-1])
+ && (p[-2] == '%' || p[-2] == '*'))
+ break; /* might be a positional thingy */
+ }
+ return 1;
+}
+
+static int xdoprt(handle, func, format, ap)
+ UNIV handle;
+ printer func;
+ const char *format;
+ va_list ap;
+{
+ enum arg_type arg_type[9];
+ union arg arg[9];
+ int nargs, i;
+
+ if (!find_arg_types(format, arg_type))
+ return -1;
+
+ for (nargs = 0; nargs < 9; nargs++)
+ if (arg_type[nargs] == NONE)
+ break;
+
+ for (i = nargs; i < 9; i++)
+ if (arg_type[i] != NONE)
+ return -1;
+
+ for (i = 0; i < nargs; i++)
+ get_arg(arg_type[i], &ap, arg + i);
+
+ return printit(handle, func, format, ap, nargs, arg);
+}
+
+#ifdef VARARGS
+static int do_fprintf(va_alist) va_dcl
+#else
+static int do_fprintf(UNIV p, const char *format,...)
+#endif
+{
+#ifdef VARARGS
+ UNIV p;
+ const char *format;
+#endif
+ va_list ap;
+ int res;
+
+#ifdef VARARGS
+ va_start(ap);
+ p = va_arg(ap, UNIV);
+ format = va_arg(ap, char *);
+#else
+ va_start(ap, format);
+#endif
+
+ res = vfprintf((FILE *)p, format, ap);
+ va_end(ap);
+ return res;
+}
+
+#ifdef VARARGS
+int xfprintf(va_alist) va_dcl
+#else
+int xfprintf(FILE *fp, const char *format, ...)
+#endif
+{
+#ifdef VARARGS
+ FILE *fp;
+ char *format;
+#endif
+ va_list ap;
+ int res;
+
+#ifdef VARARGS
+ va_start(ap);
+ fp = va_arg(ap, FILE *);
+ format = va_arg(ap, char *);
+#else
+ va_start(ap, format);
+#endif
+ if (maybe_positional(format))
+ res = xdoprt((UNIV)fp, do_fprintf, format, ap);
+ else
+ res = vfprintf(fp, format, ap);
+ va_end(ap);
+ return res;
+}
+
+int xvfprintf(fp, format, ap)
+ FILE *fp;
+ const char *format;
+ va_list ap;
+{
+ int res;
+ if (maybe_positional(format))
+ res = xdoprt((UNIV)fp, do_fprintf, format, ap);
+ else
+ res = vfprintf(fp, format, ap);
+ return res;
+}
+
+#endif /* not HAVE_EXTENDED_PRINTF */
diff --git a/usr.bin/sgmls/sgmlsasp/Makefile b/usr.bin/sgmls/sgmlsasp/Makefile
new file mode 100644
index 0000000..1d60f29
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/Makefile
@@ -0,0 +1,18 @@
+#
+# Bmakefile for sgmlsasp
+#
+# $id$
+#
+
+PROG= sgmlsasp
+
+SRCS+= sgmlsasp.c replace.c
+
+CFLAGS+= -I${.CURDIR}/../libsgmls -I${.CURDIR}/../sgmls
+
+LDADD= ${LIBSGMLS}
+DPADD= ${LIBSGMLS}
+
+.include "../Makefile.inc"
+.include <bsd.prog.mk>
+
diff --git a/usr.bin/sgmls/sgmlsasp/replace.c b/usr.bin/sgmls/sgmlsasp/replace.c
new file mode 100644
index 0000000..95fa113
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/replace.c
@@ -0,0 +1,467 @@
+/* replace.c
+ Parse ASP style replacement file.
+
+ Written by James Clark (jjc@jclark.com). */
+
+#include "sgmlsasp.h"
+#include "replace.h"
+
+#define TABLE_SIZE 251
+
+struct table_entry {
+ enum event_type type;
+ char *gi;
+ struct replacement replacement;
+ struct table_entry *next;
+};
+
+struct replacement_table {
+ struct table_entry *table[TABLE_SIZE];
+};
+
+struct buffer {
+ char *s;
+ unsigned len;
+ unsigned size;
+};
+
+/* Tokens returned by get_token(). */
+
+#define STRING 1
+#define STAGO 2
+#define ETAGO 3
+#define PLUS 4
+
+static int get P((void));
+static int peek P((void));
+static int get_token P((void));
+static void scan_name P((struct buffer *, int));
+static struct replacement *define_replacement
+ P((struct replacement_table *, enum event_type, char *));
+static struct replacement_item **parse_string
+ P((struct replacement_item **, int));
+static UNIV xmalloc P((unsigned));
+static UNIV xrealloc P((UNIV, unsigned));
+static struct replacement_item **add_replacement_data
+ P((struct replacement_item **, char *, unsigned));
+static struct replacement_item **add_replacement_attr
+ P((struct replacement_item **, char *));
+static int hash P((enum event_type, char *));
+static NO_RETURN void parse_error VP((char *,...));
+static VOID buffer_init P((struct buffer *));
+static VOID buffer_append P((struct buffer *, int));
+static char *buffer_extract P((struct buffer *));
+#if 0
+static VOID buffer_free P((struct buffer *));
+#endif
+
+#define buffer_length(buf) ((buf)->len)
+
+#define NEW(type) ((type *)xmalloc(sizeof(type)))
+
+static int current_lineno;
+static char *current_file;
+static FILE *fp;
+
+struct replacement_table *make_replacement_table()
+{
+ int i;
+ struct replacement_table *tablep;
+
+ tablep = NEW(struct replacement_table);
+ for (i = 0; i < TABLE_SIZE; i++)
+ tablep->table[i] = 0;
+ return tablep;
+}
+
+void load_replacement_file(tablep, file)
+ struct replacement_table *tablep;
+ char *file;
+{
+ int tok;
+ struct buffer name;
+
+ buffer_init(&name);
+ errno = 0;
+ fp = fopen(file, "r");
+ if (!fp) {
+ if (errno)
+ error("can't open `%s': %s", file, strerror(errno));
+ else
+ error("can't open `%s'", file);
+ }
+
+ current_lineno = 1;
+ current_file = file;
+ tok = get_token();
+ while (tok != EOF) {
+ struct replacement *p;
+ struct replacement_item **tail;
+ enum event_type type;
+
+ if (tok != STAGO && tok != ETAGO)
+ parse_error("syntax error");
+ type = tok == STAGO ? START_ELEMENT : END_ELEMENT;
+ scan_name(&name, '>');
+ p = define_replacement(tablep, type, buffer_extract(&name));
+ tok = get_token();
+ if (tok == PLUS) {
+ if (p)
+ p->flags |= NEWLINE_BEGIN;
+ tok = get_token();
+ }
+ tail = p ? &p->items : 0;
+ while (tok == STRING) {
+ tail = parse_string(tail, type == START_ELEMENT);
+ tok = get_token();
+ }
+ if (tok == PLUS) {
+ if (p)
+ p->flags |= NEWLINE_END;
+ tok = get_token();
+ }
+ }
+ fclose(fp);
+}
+
+static
+struct replacement_item **parse_string(tail, recog_attr)
+ struct replacement_item **tail;
+ int recog_attr;
+{
+ struct buffer buf;
+ unsigned len;
+
+ buffer_init(&buf);
+ for (;;) {
+ int c = get();
+ if (c == '\"')
+ break;
+ if (recog_attr && c == '[') {
+ if (buffer_length(&buf)) {
+ len = buffer_length(&buf);
+ tail = add_replacement_data(tail, buffer_extract(&buf), len);
+ }
+ scan_name(&buf, ']');
+ tail = add_replacement_attr(tail, buffer_extract(&buf));
+ }
+ else {
+ if (c == '\\') {
+ c = get();
+ switch (c) {
+ case EOF:
+ parse_error("unfinished string at end of file");
+ case 's':
+ buffer_append(&buf, ' ');
+ break;
+ case 'n':
+ buffer_append(&buf, '\n');
+ break;
+ case 't':
+ buffer_append(&buf, '\t');
+ break;
+ case 'r':
+ buffer_append(&buf, '\r');
+ break;
+ case 'f':
+ buffer_append(&buf, '\f');
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ int val = c - '0';
+ c = peek();
+ if ('0' <= c && c <= '7') {
+ (void)get();
+ val = val*8 + (c - '0');
+ c = peek();
+ if ('0' <= c && c <= '7') {
+ (void)get();
+ val = val*8 + (c - '0');
+ }
+ }
+ buffer_append(&buf, val);
+ break;
+ }
+ default:
+ buffer_append(&buf, c);
+ break;
+ }
+ }
+ else
+ buffer_append(&buf, c);
+ }
+ }
+ len = buffer_length(&buf);
+ if (len > 0)
+ tail = add_replacement_data(tail, buffer_extract(&buf), len);
+ return tail;
+}
+
+static
+struct replacement_item **add_replacement_data(tail, buf, n)
+ struct replacement_item **tail;
+ char *buf;
+ unsigned n;
+{
+ if (!tail)
+ free(buf);
+ else {
+ *tail = NEW(struct replacement_item);
+ (*tail)->type = DATA_REPL;
+ (*tail)->u.data.n = n;
+ (*tail)->next = 0;
+ (*tail)->u.data.s = buf;
+ tail = &(*tail)->next;
+ }
+ return tail;
+}
+
+static
+struct replacement_item **add_replacement_attr(tail, name)
+ struct replacement_item **tail;
+ char *name;
+{
+ if (!tail)
+ free(name);
+ else {
+ *tail = NEW(struct replacement_item);
+ (*tail)->type = ATTR_REPL;
+ (*tail)->next = 0;
+ (*tail)->u.attr = name;
+ tail = &(*tail)->next;
+ }
+ return tail;
+}
+
+static
+int get_token()
+{
+ int c;
+
+ for (;;) {
+ c = get();
+ while (isspace(c))
+ c = get();
+ if (c != '%')
+ break;
+ do {
+ c = get();
+ if (c == EOF)
+ return EOF;
+ } while (c != '\n');
+ }
+ switch (c) {
+ case '+':
+ return PLUS;
+ case '<':
+ c = peek();
+ if (c == '/') {
+ (void)get();
+ return ETAGO;
+ }
+ return STAGO;
+ case '"':
+ return STRING;
+ case EOF:
+ return EOF;
+ default:
+ parse_error("bad input character `%c'", c);
+ }
+}
+
+static
+void scan_name(buf, term)
+ struct buffer *buf;
+ int term;
+{
+ int c;
+ for (;;) {
+ c = get();
+ if (c == term)
+ break;
+ if (c == '\n' || c == EOF)
+ parse_error("missing `%c'", term);
+ if (fold_general_names) {
+ if (islower((unsigned char)c))
+ c = toupper((unsigned char)c);
+ }
+ buffer_append(buf, c);
+ }
+ if (buffer_length(buf) == 0)
+ parse_error("empty name");
+ buffer_append(buf, '\0');
+}
+
+static
+int get()
+{
+ int c = getc(fp);
+ if (c == '\n')
+ current_lineno++;
+ return c;
+}
+
+static
+int peek()
+{
+ int c = getc(fp);
+ if (c != EOF)
+ ungetc(c, fp);
+ return c;
+}
+
+struct replacement *lookup_replacement(tablep, type, name)
+ struct replacement_table *tablep;
+ enum event_type type;
+ char *name;
+{
+ int h = hash(type, name);
+ struct table_entry *p;
+
+ for (p = tablep->table[h]; p; p = p->next)
+ if (strcmp(name, p->gi) == 0 && type == p->type)
+ return &p->replacement;
+ return 0;
+}
+
+/* Return 0 if already defined. */
+
+static
+struct replacement *define_replacement(tablep, type, name)
+ struct replacement_table *tablep;
+ enum event_type type;
+ char *name;
+{
+ int h = hash(type, name);
+ struct table_entry *p;
+
+ for (p = tablep->table[h]; p; p = p->next)
+ if (strcmp(name, p->gi) == 0 && type == p->type)
+ return 0;
+ p = NEW(struct table_entry);
+ p->next = tablep->table[h];
+ tablep->table[h] = p;
+ p->type = type;
+ p->gi = name;
+ p->replacement.flags = 0;
+ p->replacement.items = 0;
+ return &p->replacement;
+}
+
+static
+VOID buffer_init(buf)
+ struct buffer *buf;
+{
+ buf->size = buf->len = 0;
+ buf->s = 0;
+}
+
+static
+char *buffer_extract(buf)
+ struct buffer *buf;
+{
+ char *s = buf->s;
+ buf->s = 0;
+ buf->len = 0;
+ buf->size = 0;
+ return s;
+}
+
+#if 0
+static
+VOID buffer_free(buf)
+ struct buffer *buf;
+{
+ if (buf->s) {
+ free((UNIV)buf->s);
+ buf->s = 0;
+ buf->size = buf->size = 0;
+ }
+}
+#endif
+
+static
+VOID buffer_append(buf, c)
+ struct buffer *buf;
+ int c;
+{
+ if (buf->len >= buf->size) {
+ if (!buf->size)
+ buf->s = (char *)xmalloc(buf->size = 10);
+ else
+ buf->s = (char *)xrealloc((UNIV)buf->s, buf->size *= 2);
+ }
+ buf->s[buf->len] = c;
+ buf->len += 1;
+}
+
+static
+int hash(type, s)
+ enum event_type type;
+ char *s;
+{
+ unsigned long h = 0, g;
+
+ while (*s != 0) {
+ h <<= 4;
+ h += *s++;
+ if ((g = h & 0xf0000000) != 0) {
+ h ^= g >> 24;
+ h ^= g;
+ }
+ }
+ h ^= (int)type;
+ return (int)(h % TABLE_SIZE);
+}
+
+static
+UNIV xmalloc(n)
+ unsigned n;
+{
+ UNIV p = (UNIV)malloc(n);
+ if (!p)
+ parse_error("out of memory");
+ return p;
+}
+
+static
+UNIV xrealloc(p, size)
+ UNIV p;
+ unsigned size;
+{
+ p = (UNIV)realloc(p, size);
+ if (!p)
+ parse_error("out of memory");
+ return p;
+}
+
+static NO_RETURN
+#ifdef VARARGS
+void parse_error(va_alist) va_dcl
+#else
+void parse_error(char *message,...)
+#endif
+{
+ char buf[512];
+#ifdef VARARGS
+ char *message;
+#endif
+ va_list ap;
+
+#ifdef VARARGS
+ va_start(ap);
+ message = va_arg(ap, char *);
+#else
+ va_start(ap, message);
+#endif
+ vsprintf(buf, message, ap);
+ va_end(ap);
+ error("%s:%d: %s", current_file, current_lineno, buf);
+}
diff --git a/usr.bin/sgmls/sgmlsasp/replace.h b/usr.bin/sgmls/sgmlsasp/replace.h
new file mode 100644
index 0000000..18c9f82
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/replace.h
@@ -0,0 +1,35 @@
+/* replace.h
+ Interface to replacement file parser. */
+
+enum replacement_type {
+ DATA_REPL,
+ ATTR_REPL
+ };
+
+struct replacement_item {
+ union {
+ char *attr;
+ struct {
+ char *s;
+ unsigned n;
+ } data;
+ } u;
+ enum replacement_type type;
+ struct replacement_item *next;
+};
+
+#define NEWLINE_BEGIN 01
+#define NEWLINE_END 02
+
+struct replacement {
+ unsigned flags;
+ struct replacement_item *items;
+};
+
+enum event_type { START_ELEMENT, END_ELEMENT };
+
+struct replacement_table *make_replacement_table P((void));
+void load_replacement_file P((struct replacement_table *, char *));
+
+struct replacement *
+lookup_replacement P((struct replacement_table *, enum event_type, char *));
diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1
new file mode 100644
index 0000000..ab03371
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1
@@ -0,0 +1,30 @@
+.\" -*- nroff -*-
+.TH SGMLSASP 1
+.SH NAME
+sgmlsasp \- translate output of sgmls using ASP replacement files
+.SH SYNOPSIS
+.B sgmls
+.RB [ \-n ]
+.I replacement_file\|.\|.\|.
+.SH DESCRIPTION
+.I sgmlsasp
+translates the standard input using the specification in
+.I replacement_file\|.\|.\|.
+and writes the result to the standard output.
+The standard input must be in the format output by
+.IR sgmls .
+Each replacement file must be in the format of an
+Amsterdam SGML parser (ASP) replacement file;
+this format is described in the ASP documentation.
+Duplicate replacements are silently ignored.
+The
+.B \-n
+option disables upper-case substitution (folding) for names in
+replacement files; this option should be used with concrete syntaxes
+that do not specify upper-case substitution for general names (that
+is, names that are not entity names).
+.SH BUGS
+References to external data entities are ignored.
+(Support for external data entities is not implemented in ASP.)
+.SH "SEE ALSO"
+.IR sgmls (1)
diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c
new file mode 100644
index 0000000..fdaf113
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c
@@ -0,0 +1,278 @@
+/* sgmlsasp.c
+ Translate sgmls output using ASP replacement file.
+
+ Written by James Clark (jjc@jclark.com). */
+
+#include "sgmlsasp.h"
+#include "sgmls.h"
+#include "replace.h"
+#include "getopt.h"
+
+/* Non-zero if general (non-entity) names should be folded to upper case. */
+int fold_general_names = 1;
+
+static char *program_name;
+static char last_char = '\n';
+
+static void output_begin_line P((void));
+static void output_data P((struct sgmls_data *, int));
+static void output_pi P((char *, unsigned));
+static void output_token P((char *));
+static void output_attribute P((struct sgmls_attribute *));
+static void output_data_char P((int));
+static void output_replacement
+ P((struct replacement *, struct sgmls_attribute *));
+static void do_file P((FILE *, struct replacement_table *));
+static void usage P((void));
+static void input_error P((int, char *, unsigned long));
+
+#define output_char(c) (last_char = (c), putchar(c))
+
+int main(argc, argv)
+ int argc;
+ char **argv;
+{
+ struct replacement_table *tablep;
+ int i;
+ int opt;
+ program_name = argv[0];
+
+ while ((opt = getopt(argc, argv, "n")) != EOF)
+ switch (opt) {
+ case 'n':
+ fold_general_names = 0;
+ break;
+ case '?':
+ usage();
+ default:
+ assert(0);
+ }
+ if (argc - optind <= 0)
+ usage();
+ tablep = make_replacement_table();
+ for (i = optind; i < argc; i++)
+ load_replacement_file(tablep, argv[i]);
+ (void)sgmls_set_errhandler(input_error);
+ do_file(stdin, tablep);
+ exit(0);
+}
+
+static
+void usage()
+{
+ fprintf(stderr, "usage: %s [-n] replacement_file...\n", program_name);
+ exit(1);
+}
+
+static
+void input_error(num, str, lineno)
+ int num;
+ char *str;
+ unsigned long lineno;
+{
+ error("Error at input line %lu: %s", lineno, str);
+}
+
+static
+void do_file(fp, tablep)
+ FILE *fp;
+ struct replacement_table *tablep;
+{
+ struct sgmls *sp;
+ struct sgmls_event e;
+
+ sp = sgmls_create(fp);
+ while (sgmls_next(sp, &e))
+ switch (e.type) {
+ case SGMLS_EVENT_DATA:
+ output_data(e.u.data.v, e.u.data.n);
+ break;
+ case SGMLS_EVENT_ENTITY:
+ /* XXX what should we do here? */
+ break;
+ case SGMLS_EVENT_PI:
+ output_pi(e.u.pi.s, e.u.pi.len);
+ break;
+ case SGMLS_EVENT_START:
+ output_replacement(lookup_replacement(tablep,
+ START_ELEMENT, e.u.start.gi),
+ e.u.start.attributes);
+ sgmls_free_attributes(e.u.start.attributes);
+ break;
+ case SGMLS_EVENT_END:
+ output_replacement(lookup_replacement(tablep, END_ELEMENT, e.u.end.gi),
+ 0);
+ break;
+ case SGMLS_EVENT_SUBSTART:
+ break;
+ case SGMLS_EVENT_SUBEND:
+ break;
+ case SGMLS_EVENT_APPINFO:
+ break;
+ case SGMLS_EVENT_CONFORMING:
+ break;
+ default:
+ abort();
+ }
+ sgmls_free(sp);
+}
+
+static
+void output_data(v, n)
+struct sgmls_data *v;
+int n;
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ char *s = v[i].s;
+ int len = v[i].len;
+ for (; len > 0; len--, s++)
+ output_data_char(*s);
+ }
+}
+
+static
+void output_pi(s, len)
+ char *s;
+ unsigned len;
+{
+ for (; len > 0; len--, s++)
+ output_data_char(*s);
+}
+
+static
+void output_replacement(repl, attributes)
+struct replacement *repl;
+struct sgmls_attribute *attributes;
+{
+ struct replacement_item *p;
+ struct sgmls_attribute *a;
+ int i;
+
+ if (!repl)
+ return;
+ if (repl->flags & NEWLINE_BEGIN)
+ output_begin_line();
+
+ for (p = repl->items; p; p = p->next)
+ switch (p->type) {
+ case DATA_REPL:
+ for (i = 0; i < p->u.data.n; i++)
+ output_char(p->u.data.s[i]);
+ break;
+ case ATTR_REPL:
+ for (a = attributes; a; a = a->next)
+ if (strcmp(a->name, p->u.attr) == 0) {
+ output_attribute(a);
+ break;
+ }
+ break;
+ default:
+ abort();
+ }
+
+ if (repl->flags & NEWLINE_END)
+ output_begin_line();
+}
+
+static
+void output_attribute(p)
+struct sgmls_attribute *p;
+{
+ switch (p->type) {
+ case SGMLS_ATTR_IMPLIED:
+ break;
+ case SGMLS_ATTR_CDATA:
+ output_data(p->value.data.v, p->value.data.n);
+ break;
+ case SGMLS_ATTR_TOKEN:
+ {
+ char **token = p->value.token.v;
+ int n = p->value.token.n;
+
+ if (n > 0) {
+ int i;
+ output_token(token[0]);
+ for (i = 1; i < n; i++) {
+ output_char(' ');
+ output_token(token[i]);
+ }
+ }
+ }
+ break;
+ case SGMLS_ATTR_ENTITY:
+ {
+ struct sgmls_entity **v = p->value.entity.v;
+ int n = p->value.entity.n;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (i > 0)
+ output_char(' ');
+ output_token(v[i]->is_internal
+ ? v[i]->u.internal.name
+ : v[i]->u.external.name);
+ }
+ }
+ break;
+ case SGMLS_ATTR_NOTATION:
+ if (p->value.notation)
+ output_token(p->value.notation->name);
+ break;
+ default:
+ abort();
+ }
+}
+
+static
+void output_token(s)
+ char *s;
+{
+ for (; *s; s++)
+ output_char(*s);
+}
+
+static
+void output_data_char(c)
+ int c;
+{
+ if (c != RSCHAR) {
+ if (c == RECHAR)
+ c = '\n';
+ output_char(c);
+ }
+}
+
+static
+void output_begin_line()
+{
+ if (last_char != '\n')
+ output_char('\n');
+}
+
+NO_RETURN
+#ifdef VARARGS
+void error(va_alist) va_dcl
+#else
+void error(char *message,...)
+#endif
+{
+#ifdef VARARGS
+ char *message;
+#endif
+ va_list ap;
+
+ fprintf(stderr, "%s: ", program_name);
+#ifdef VARARGS
+ va_start(ap);
+ message = va_arg(ap, char *);
+#else
+ va_start(ap, message);
+#endif
+ vfprintf(stderr, message, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ fflush(stderr);
+ exit(EXIT_FAILURE);
+}
diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.h b/usr.bin/sgmls/sgmlsasp/sgmlsasp.h
new file mode 100644
index 0000000..b3ad402
--- /dev/null
+++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.h
@@ -0,0 +1,26 @@
+/* sgmlsasp.h */
+
+#include "config.h"
+#include "std.h"
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+#ifdef __GNUC__
+#define NO_RETURN volatile
+#else
+#define NO_RETURN /* as nothing */
+#endif
+
+#ifdef VARARGS
+#define VP(parms) ()
+#else
+#define VP(parms) P(parms)
+#endif
+
+NO_RETURN void error VP((char *,...));
+
+extern int fold_general_names;
diff --git a/usr.bin/sgmls/unix.cfg b/usr.bin/sgmls/unix.cfg
new file mode 100644
index 0000000..0bc8410
--- /dev/null
+++ b/usr.bin/sgmls/unix.cfg
@@ -0,0 +1,147 @@
+/* unix.cfg: Configuration file for sgmls on Unix. */
+
+/* A list of filename templates to use for searching for external entities.
+The filenames are separated by the character specified in PATH_FILE_SEP.
+See sgmls.man for details. */
+#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D"
+/* The character that separates the filenames templates. */
+#define PATH_FILE_SEP ':'
+/* The character that separates filenames in a system identifier.
+Usually the same as PATH_FILE_SEP. */
+#define SYSID_FILE_SEP ':'
+/* The environment variable that contains the list of filename templates. */
+#define PATH_ENV_VAR "SGML_PATH"
+
+/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name
+or system identifier into a legal filename. A character in
+MIN_DAT_SUBS_FROM will be transformed into the character in the
+corresponding position in MIN_DAT_SUBS_TO. If there is no such
+position, then the character is removed. */
+/* This says that spaces should be transformed to underscores, and
+slashes to percents. */
+#define MIN_DAT_SUBS_FROM " /"
+#define MIN_DAT_SUBS_TO "_%"
+
+/* Define this to allow tracing. */
+/* #define TRACE 1 */
+
+/* Define this you want support for subdocuments. This is implemented
+using features that are not part of Standard C, so you might not want
+to define it if you are porting to a new system. Otherwise I suggest
+you leave it defined. */
+#define SUPPORT_SUBDOC 1
+
+/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports
+X/Open extensions; if they do, then, for example,
+
+ printf("%2$s%1$s", "bar", "foo")
+
+should print `foobar'. */
+
+/* #define HAVE_EXTENDED_PRINTF 1 */
+
+/* Define HAVE_CAT if your system provides the X/Open message
+catalogue functions catopen() and catgets(), and you want to use them.
+An implementations of these functions is included and will be used if
+you don't define this. On SunOS 4.1.1, if you do define this you
+should set CC=/usr/xpg2bin/cc in the makefile. */
+
+/* #define HAVE_CAT 1 */
+
+#ifdef __STDC__
+/* Define this if your compiler supports prototypes. */
+#define USE_PROTOTYPES 1
+#endif
+
+/* Can't use <stdarg.h> without prototypes. */
+#ifndef USE_PROTOTYPES
+#define VARARGS 1
+#endif
+
+/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>,
+you must define VARARGS yourself here. */
+/* #define VARARGS 1 */
+
+/* Define this if you do not have strerror(). */
+#define STRERROR_MISSING 1
+
+/* Define this unless the character testing functions in ctype.h
+are defined for all values representable as an unsigned char. You do
+not need to define this if your system is ANSI C conformant. You
+should define for old Unix systems. */
+/* #define USE_ISASCII 1 */
+
+/* Define this if your system provides the BSD style string operations
+rather than ANSI C ones (eg bcopy() rather than memcpy(), and index()
+rather than strchr()). */
+/* #define BSD_STRINGS 1 */
+
+/* Define this if you have getopt(). */
+#define HAVE_GETOPT 1
+
+/* Define this if you have access(). */
+#define HAVE_ACCESS 1
+
+/* Define this if you have <unistd.h>. */
+#define HAVE_UNISTD_H 1
+
+/* Define this if you have <sys/stat.h>. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define this if you have waitpid(). */
+#define HAVE_WAITPID 1
+
+/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */
+#define POSIX 1
+
+/* Define this if you have the vfork() system call. */
+#define HAVE_VFORK 1
+
+/* Define this if you have <vfork.h>. */
+#define HAVE_VFORK_H 1
+
+/* Define this if you don't have <stdlib.h> */
+/* #define STDLIB_H_MISSING 1 */
+
+/* Define this if you don't have <stddef.h> */
+/* #define STDDEF_H_MISSING 1 */
+
+/* Define this if you don't have <limits.h> */
+/* #define LIMITS_H_MISSING 1 */
+
+/* Define this if you don't have remove(); unlink() will be used instead. */
+#define REMOVE_MISSING 1
+
+/* Define this if you don't have raise(); kill() will be used instead. */
+#define RAISE_MISSING 1
+
+/* Define this if you don't have fsetpos() and fgetpos(). */
+#define FPOS_MISSING 1
+
+/* Universal pointer type. */
+/* If your compiler doesn't fully support void *, change `void' to `char'. */
+typedef void *UNIV;
+
+/* If your compiler doesn't support void as a function return type,
+change `void' to `int'. */
+typedef void VOID;
+
+/* If you don't have an ANSI C conformant <limits.h>, define
+CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed.
+The <limits.h> on some versions of System Release V 3.2 is not ANSI C
+conformant: the value of CHAR_MIN is 0 even though the `char' type is
+signed. */
+
+/* #define CHAR_SIGNED 1 */
+/* #define CHAR_SIGNED 0 */
+#ifndef CHAR_SIGNED
+#include <limits.h>
+#if CHAR_MIN < 0
+#define CHAR_SIGNED 1
+#else
+#define CHAR_SIGNED 0
+#endif
+#endif /* not CHAR_SIGNED */
+
+/* Assume the system character set is ISO Latin-1. */
+#include "latin1.h"
OpenPOWER on IntegriCloud