diff options
author | jfieber <jfieber@FreeBSD.org> | 1995-04-27 16:03:47 +0000 |
---|---|---|
committer | jfieber <jfieber@FreeBSD.org> | 1995-04-27 16:03:47 +0000 |
commit | 86526692868a41d28d14f3fa6f0c7b9df4eb35c7 (patch) | |
tree | 3f304693f0d8eff405d1f2d324f12d865ae6baea /usr.bin | |
parent | d8e135fb5ef707aaf9114bbe25c0363537808fe1 (diff) | |
parent | fb558ed8ecbff94577b9155ee55ebc0cd9777afa (diff) | |
download | FreeBSD-src-86526692868a41d28d14f3fa6f0c7b9df4eb35c7.zip FreeBSD-src-86526692868a41d28d14f3fa6f0c7b9df4eb35c7.tar.gz |
This commit was generated by cvs2svn to compensate for changes in r8100,
which included commits to RCS files with non-trunk default branches.
Diffstat (limited to 'usr.bin')
84 files changed, 22756 insertions, 0 deletions
diff --git a/usr.bin/sgmls/LICENSE b/usr.bin/sgmls/LICENSE new file mode 100644 index 0000000..576ca35 --- /dev/null +++ b/usr.bin/sgmls/LICENSE @@ -0,0 +1,43 @@ + LICENSE AND DISCLAIMER OF WARRANTIES + + Standard Generalized Markup Language Users' Group (SGMLUG) + SGML Parser Materials + + 1. License + +SGMLUG hereby grants to any user: (1) an irrevocable royalty-free, +worldwide, non-exclusive license to use, execute, reproduce, display, +perform and distribute copies of, and to prepare derivative works +based upon these materials; and (2) the right to authorize others to +do any of the foregoing. + + 2. Disclaimer of Warranties + +(a) The SGML Parser Materials are provided "as is" to any USER. USER +assumes responsibility for determining the suitability of the SGML +Parser Materials for its use and for results obtained. SGMLUG makes +no warranty that any errors have been eliminated from the SGML Parser +Materials or that they can be eliminated by USER. SGMLUG shall not +provide any support maintenance or other aid to USER or its licensees +with respect to SGML Parser Materials. SGMLUG shall not be +responsible for losses of any kind resulting from use of the SGML +Parser Materials including (without limitation) any liability for +business expense, machine downtime, or damages caused to USER or third +parties by any deficiency, defect, error, or malfunction. + +(b) SGMLUG DISCLAIMS ALL WARRANTIES, EXPRESSED OR IMPLIED, ARISING OUT +OF OR RELATING TO THE SGML PARSER MATERIALS OR ANY USE THEREOF, +INCLUDING (WITHOUT LIMITATION) ANY WARRANTY WHATSOEVER AS TO THE +FITNESS FOR A PARTICULAR USE OR THE MERCHANTABILITY OF THE SGML PARSER +MATERIALS. + +(c) In no event shall SGMLUG be liable to USER or third parties +licensed by USER for any indirect, special, incidental, or +consequential damages (including lost profits). +(d) SGMLUG has no knowledge of any conditions that would impair its right +to license the SGML Parser Materials. Notwithstanding the foregoing, +SGMLUG does not make any warranties or representations that the +SGML Parser Materials are free of claims by third parties of patent, +copyright infringement or the like, nor does SGMLUG assume any +liability in respect of any such infringement of rights of third +parties due to USER's operation under this license. diff --git a/usr.bin/sgmls/Makefile b/usr.bin/sgmls/Makefile new file mode 100644 index 0000000..62c6cea --- /dev/null +++ b/usr.bin/sgmls/Makefile @@ -0,0 +1,9 @@ +# +# Bmake file for sgmls +# $Id:$ +# + +SUBDIR= libsgmls sgmls sgmlsasp rast + +.include <bsd.subdir.mk> + diff --git a/usr.bin/sgmls/Makefile.inc b/usr.bin/sgmls/Makefile.inc new file mode 100644 index 0000000..1e4fc2b --- /dev/null +++ b/usr.bin/sgmls/Makefile.inc @@ -0,0 +1,13 @@ +# +# Bmakefile for rast +# +# $id$ +# + +.include "${.CURDIR}/../../Makefile.inc" + +.if exists(${.CURDIR}/../libsgmls/obj) +LIBSGMLS= ${.CURDIR}/../libsgmls/obj/libsgmls.a +.else +LIBSGMLS= ${.CURDIR}/../libsgmls/libsgmls.a +.endif
\ No newline at end of file diff --git a/usr.bin/sgmls/README b/usr.bin/sgmls/README new file mode 100644 index 0000000..dd6e257 --- /dev/null +++ b/usr.bin/sgmls/README @@ -0,0 +1,138 @@ +$Id:$ + +This the sgmls release 1.1 SGML parser written by James Clark +jjc@jclark.com, repackaged for FreeBSD. The original source may be +obtained from ftp://ftp.jclark.com/. + +Pieces removed include: + * Test documents: Compiled on FreeBSD, sgmls passes all tests. + * sgml-mode.el: The sole file covered by the GNU GPL. This is not + installed anyway and anyone wishing to do serious SGML editing + would be best to get the psgml package. + * Makefiles and config files for other operating systems (vms, dos, + cms). + * Formatted versions of the man pages. + + +20-Apr-1995 John Fieber <jfieber@freebsd.org> + + +The original README and TODO follow. +---------------------------------------------------------------------- +This is sgmls, an SGML parser derived from the ARCSGML parser +materials which were written by Charles F. Goldfarb. (These are +available for anonymous ftp from ftp.ifi.uio.no [128.240.88.1] in the +directory SIGhyper/SGMLUG/distrib.) + +The version number is given in the file version.c. + +The file INSTALL contains installation instructions. + +The file NEWS describes recent user-visible changes. + +The file sgmls.man contains a Unix manual page; sgmls.txt is the +formatted version of this. + +The file sgml-mode.el contains a very simple SGML mode for GNU Emacs. + +The files sgmls.c and sgmls.h contain a small library for parsing the +output of sgmls. This is used by sgmlsasp, which translates the +output of sgmls using an ASP replacement file, and by rast, which +translates the output of sgmls to the format of a RAST result. The +files sgmlsasp.man and rast.man contain Unix manual pages for sgmlsasp +and rast; sgmlsasp.txt and rast.txt are the formatted versions of +these. + +The file LICENSE contains the license which applies to arcsgml and +accordingly to those parts of sgmls derived from arcsgml. See also +the copyright notice at the beginning of sgmlxtrn.c. The parts that +were written by me are in the public domain (any files that were +written entirely by me contain a comment to that effect.) The file +sgml-mode.el is covered by the GNU GPL. + +Please report any bugs to me. When reporting bugs, please include the +version number, details of your machine, OS and compiler, and a +complete self-contained file that will allow me to reproduce the bug. + +James Clark +jjc@jclark.com + +---------------------------------------------------------------------- +Warn about mixed content models where #PCDATA can't occur everywhere. + +Perhaps there should be a configuration option saying what a control +character is for the purpose of SHUNCHAR CONTROLS. + +Should the current character that is printed in error messages be +taken from be taken from the file entity or the current entity? + +Refine SYS_ action. If we distinguish DELNONCH in lexmark, lexgrp, +lexsd, we can have separate action that ignores the following +character as well. + +Should RSs in CDATA/SDATA entities be ignored as specified in 322:1-2? +Similarily, do the rules on REs in 322:3-11 apply to CDATA/SDATA +entities? (I don't think they count as being `in content'.) + +What should the entity manager do when it encounters code 13 in an +input file? (Currently it treats it as an RE.) + +Document when invalid exclusions are detected. + +Option not to perform capacity checking. + +Give a warning if the recommendation of 422:1-3 is contravened. + +Should an empty CDATA/RCDATA marked section be allowed in the document +type declaration subset? + +Include example of use of SGML_PATH in documentation. + +Try to detect the situation in 310:8-10 (but see 282:1-2). + +Resize hash tables if they become too full. + +Say something in the man page about message catalogues. + +Consider whether support for SHORTREF NONE requires further changes +(other than disallowing short reference mapping declaration). + +Fake /dev/fd/N and /dev/stdin for systems that don't provide it. + +Improve the effficiency of the entity manager by not closing and +reopening files. If we run out of FILEs choose the stream with the +fewest bytes remaining to be read, and read the rest of it into +memory. Each entity level will have its own read buffer. + +Support multi-line error messages: automatically indent after +newline. (We could output to a temporary file first, then copy to +stderr replacing newlines by newline+indent). + +Option that says to output out of context things. + +Divide up formal public identifier errors. Give these errors their +own type code. + +Consider whether, when OMITTAG is NO, we need to change interpretation +of an empty start-tag (7.4.1.1). + +Possibly turn errors 70 and 136 into warnings. + +Make things work with NORMSEP > 2. Would need to keep track of number +of CDATA and SDATA entities in CDATA attributes. + +Handle `SCOPE INSTANCE'. + +In entgen.c, truncate filenames for OSs that don't do this themselves. + +Provide an option that specifies that maximum number of errors; when +this limit is exceeded sgmls would exit. + +Document non-portable assumptions in the code. + +Option to write out SGML declaration. In this case make it write out +APPINFO parameter. + +Allow there to be catalogs mapping public ids to filenames. +Environment variable SGML_CATALOG containing list of filenames of +catalogs. diff --git a/usr.bin/sgmls/configure b/usr.bin/sgmls/configure new file mode 100755 index 0000000..7fd1968 --- /dev/null +++ b/usr.bin/sgmls/configure @@ -0,0 +1,617 @@ +#!/bin/sh +# Generate config.h from unix.cfg. + +trap 'rm -f doit doit.c doit.o doit.log config.out; exit 1' 1 2 3 15 + +on= +off= +CC=${CC-cc} + +# Normally we use VARARGS if __STDC__ is not defined. +# Test whether this assumption is wrong. + +cat >doit.c <<\EOF +#ifdef __STDC__ +#include <stdarg.h> +int foo(char *s,...) +{ + va_list ap; + + va_start(ap, s); + va_end(ap); + return 0; +} +#else +int foo = 0; +#endif +EOF + +$CC $CFLAGS -c doit.c >/dev/null 2>&1 +if test $? -ne 0 +then + on="$on VARARGS" +fi + +cat >doit.c <<\EOF +#include <stddef.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + off="$off STDDEF_H_MISSING" +else + on="$on STDDEF_H_MISSING" +fi + +cat >doit.c <<\EOF +#include <stdlib.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + off="$off STDLIB_H_MISSING" +else + on="$on STDLIB_H_MISSING" +fi + +cat >doit.c <<\EOF +#include <limits.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + off="$off LIMITS_H_MISSING" +else + on="$on LIMITS_H_MISSING" +fi + +cat >doit.c <<\EOF +#include <vfork.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + on="$on HAVE_VFORK_H" +else + off="$off HAVE_VFORK_H" +fi + +cat >doit.c <<\EOF +#include <unistd.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + on="$on HAVE_UNISTD_H" +else + off="$off HAVE_UNISTD_H" +fi + +cat >doit.c <<\EOF +#include <sys/types.h> +#include <sys/stat.h> +int foo = 0; +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + on="$on HAVE_SYS_STAT_H" +else + off="$off HAVE_SYS_STAT_H" +fi + +cat >doit.c <<\EOF +/* Exit normally unless we need to use isascii. */ + +#include <ctype.h> +#include <signal.h> + +static int whoops() +{ + _exit(1); +} + +main() +{ + int c; +#ifdef isascii +#ifdef SIGSEGV + signal(SIGSEGV, whoops); +#endif +#ifdef SIGBUS + signal(SIGBUS, whoops); +#endif +#ifdef SIGIOT + signal(SIGIOT, whoops); +#endif + + for (c = 128; c < 256; c++) { + if (c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' + || c == '6' || c == '7' || c == '8' || c == '9') { + if (!isdigit(c) || isalpha(c) || iscntrl(c) || isspace(c) || ispunct(c)) + exit(1); + } + else if (isdigit(c)) + exit(1); + else if (isalpha(c)) { + if (iscntrl(c) || isspace(c) || ispunct(c) + || (islower(c) && toupper(c) != c && !isupper(toupper(c))) + || (isupper(c) && tolower(c) != c && !islower(tolower(c)))) + exit(1); + } + else if (islower(c) || isupper(c)) + exit(1); + else if (iscntrl(c)) { + if (ispunct(c)) + exit(1); + } + } +#endif /* isascii */ + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + # This tries to find the symbol that looks like the array + # used by <ctype.h>, and sees if its length appears to be 128 + # rather than 256. + if test 1 = `(nm -n doit 2>/dev/null) | awk ' +BEGIN { + weight["0"] = 0; + weight["1"] = 1; + weight["2"] = 2; + weight["3"] = 3; + weight["4"] = 4; + weight["5"] = 5; + weight["6"] = 6; + weight["7"] = 7; + weight["8"] = 8; + weight["9"] = 9; + weight["a"] = weight["A"] = 10; + weight["b"] = weight["B"] = 11; + weight["c"] = weight["C"] = 12; + weight["d"] = weight["D"] = 13; + weight["e"] = weight["E"] = 14; + weight["f"] = weight["F"] = 15; +} + +/^[0-9a-zA-Z]* D .*ctype/ && ctype_nr == 0 { + ctype_nr = NR; + addr = 0; + len = length($1); + for (i = 1; i <= len; i++) + addr = addr*16 + weight[substr($1, i, 1)]; +} + +/^[0-9a-zA-Z]* D / && NR == ctype_nr + 1 { + next_addr = 0; + len = length($1); + for (i = 1; i <= len; i++) + next_addr = next_addr*16 + weight[substr($1, i, 1)]; +} + +END { + size = next_addr - addr; + if (size >= 128 && size < 256) + print "1"; + else + print "0"; +}'` + then + on="$on USE_ISASCII" + else + if ((yes | man 3 ctype) 2>/dev/null) \ + | sed -e 's/.//g' -e 's/ *$//' -e '/de-$/N' \ + -e 's/-\n//g' -e '/defined$/N' -e '/only$/N' \ + -e '/where$/N' -e '/isascii$/N' -e '/is$/N' \ + -e 's/\n/ /g' -e 's/ */ /g' \ + | grep "defined only where isascii is true" >/dev/null + then + on="$on USE_ISASCII" + else + off="$off USE_ISASCII" + fi + fi +else + on="$on USE_ISASCII" +fi + +cat >doit.c <<\EOF +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + remove("foo"); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + off="$off REMOVE_MISSING" +else + on="$on REMOVE_MISSING" +fi + +cat >doit.c <<\EOF +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + getopt(argc, argv, "v"); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_GETOPT" +else + off="$off HAVE_GETOPT" +fi + +cat >doit.c <<\EOF +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + access("foo", 4); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_ACCESS" +else + off="$off HAVE_ACCESS" +fi + +cat >doit.c <<\EOF +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + vfork(); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_VFORK" +else + off="$off HAVE_VFORK" +fi + +cat >doit.c <<\EOF +main(argc, argv) +int argc; +char **argv; +{ + + if (argc == 0) { + int status; + waitpid(-1, &status, 0); + } + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_WAITPID" +else + off="$off HAVE_WAITPID" +fi + +cat >doit.c <<\EOF +#include <string.h> +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + strerror(0); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + off="$off STRERROR_MISSING" +else + on="$on STRERROR_MISSING" +fi + +cat >doit.c <<\EOF +#include <strings.h> +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + bcopy((char *)0, (char *)0, 0); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + # Only use BSD_STRINGS if ANSI string functions don't work. + cat >doit.c <<\EOF +#include <string.h> +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + memcpy((char *)0, (char *)0, 0); + exit(0); +} +EOF + + if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null + then + off="$off BSD_STRINGS" + else + on="$on BSD_STRINGS" + fi +else + off="$off BSD_STRINGS" +fi + +cat >doit.c <<\EOF +#include <signal.h> +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) + raise(SIGINT); + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + off="$off RAISE_MISSING" +else + on="$on RAISE_MISSING" +fi + +cat >doit.c <<\EOF +#include <stdio.h> +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) { + fpos_t pos; + fsetpos(stdin, &pos); + fgetpos(stdin, &pos); + } + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + off="$off FPOS_MISSING" +else + on="$on FPOS_MISSING" +fi + +cat >doit.c <<\EOF +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> + +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) { + pid_t pid; + int status; + long n = sysconf(_SC_OPEN_MAX); + pid = waitpid(-1, &status, 0); + WIFSTOPPED(status); + WIFSIGNALED(status); + WIFEXITED(status); + WEXITSTATUS(status); + WTERMSIG(status); + WSTOPSIG(status); + } + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on POSIX" +else + off="$off POSIX" +fi + +cat >doit.c <<\EOF +#include <stdio.h> +#include <signal.h> + +static int whoops() +{ + _exit(1); +} + +main() +{ + char buf[30]; +#ifdef SIGSEGV + signal(SIGSEGV, whoops); +#endif +#ifdef SIGBUS + signal(SIGBUS, whoops); +#endif +#ifdef SIGIOT + signal(SIGIOT, whoops); +#endif + sprintf(buf, "%2$s%2$s%1$s%1$s", "bar", "foo"); + exit(!!strcmp(buf, "foofoobarbar")); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_EXTENDED_PRINTF" +else + off="$off HAVE_EXTENDED_PRINTF" +fi + +cat >doit.c <<\EOF +#include <nl_types.h> + +main(argc, argv) +int argc; +char **argv; +{ + if (argc == 0) { + nl_catd d = catopen("foo", 0); + catgets(d, 1, 1, "default"); + catclose(d); + } + exit(0); +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + on="$on HAVE_CAT" +else + off="$off HAVE_CAT" +fi + +cat >doit.c <<\EOF +#include <limits.h> + +char c = UCHAR_MAX; + +main(argc, argv) +int argc; +char **argv; +{ +#if CHAR_MIN < 0 + exit(!(c < 0)); +#else + exit(!(c > 0)); +#endif +} +EOF + +if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null +then + char_signed= +else + cat >doit.c <<\EOF +main() +{ + int i; + + for (i = 0; i < 512; i++) { + char c = (char)i; + if (c < 0) + exit(1); + } + exit(0); +} +EOF + + if $CC $CFLAGS -o doit doit.c $LIBS >/dev/null 2>&1 && ./doit 2>/dev/null + then + char_signed=0 + else + char_signed=1 + fi +fi + +cat >doit.c <<\EOF + +typedef void VOID; + +extern VOID bar(); + +VOID foo() +{ +} +EOF + +if $CC $CFLAGS -c doit.c >/dev/null 2>&1 +then + void_ret=void +else + void_ret=int +fi + + +cat >doit.c <<\EOF + +void *foo() +{ + static char *buf; + return buf; +} +EOF + +if $CC $CFLAGS -c doit.c >doit.log 2>&1 +then + if test -s doit.log + then + void_star="char \*" + else + void_star="void \*" + fi + +else + void_star="char \*" +fi + +edit= + +rm -f doit.c doit doit.log doit.o + +for var in $on +do + edit="$edit -e 's;^/\\* *\\(#define $var [^/]*\\) *\\*/;\\1;'" +done +for var in $off +do + edit="$edit -e 's;^#define $var [^/]*;/* & */;'" +done + +if test -n "$char_signed" +then + edit="$edit -e 's;^/\\* *\\(#define CHAR_SIGNED $char_signed\\) *\\*/;\\1;'" +fi + +edit="$edit -e 's/^typedef .*VOID;/typedef $void_ret VOID;/'" +edit="$edit -e 's/^typedef .*UNIV;/typedef ${void_star}UNIV;/'" + +if test "X$(PREFIX)" != "X/usr/local" +then + edit="$edit -e '/DEFAULT_PATH/s;/usr/local;$PREFIX;g'" +fi + +eval sed $edit unix.cfg ">config.out" + +mv config.out config.h + +exit 0 diff --git a/usr.bin/sgmls/libsgmls/Makefile b/usr.bin/sgmls/libsgmls/Makefile new file mode 100644 index 0000000..e94fcc4 --- /dev/null +++ b/usr.bin/sgmls/libsgmls/Makefile @@ -0,0 +1,17 @@ +# +# Bmakefile for libsgmls +# +# $id$ +# + +LIB= sgmls +SRCS= sgmls.c + +CFLAGS+= -I${.CURDIR}/../sgmls + +NOMAN= noman +NOPROFILE= noprofile + +install: + +.include <bsd.lib.mk> diff --git a/usr.bin/sgmls/libsgmls/sgmls.c b/usr.bin/sgmls/libsgmls/sgmls.c new file mode 100644 index 0000000..5af7e5b --- /dev/null +++ b/usr.bin/sgmls/libsgmls/sgmls.c @@ -0,0 +1,1036 @@ +/* sgmls.c: + Library for reading output of sgmls. + + Written by James Clark (jjc@jclark.com). */ + +#include "config.h" +#include "std.h" +#include "sgmls.h" +#include "lineout.h" + +#ifdef __GNUC__ +#define NO_RETURN volatile +#else +#define NO_RETURN /* as nothing */ +#endif + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +#ifndef __STDC__ +#define const /* as nothing */ +#endif + +typedef struct sgmls_data data_s; +typedef struct sgmls_notation notation_s; +typedef struct sgmls_internal_entity internal_entity_s; +typedef struct sgmls_external_entity external_entity_s; +typedef struct sgmls_entity entity_s; +typedef struct sgmls_attribute attribute_s; +typedef struct sgmls_event event_s; + +/* lists are sorted in reverse order of level */ +struct list { + int subdoc_level; /* -1 if associated with finished subdoc */ + struct list *next; + char *name; +}; + +struct entity_list { + int subdoc_level; + struct entity_list *next; + entity_s entity; +}; + +struct notation_list { + int subdoc_level; + struct notation_list *next; + notation_s notation; +}; + +struct sgmls { + FILE *fp; + char *buf; + unsigned buf_size; + struct entity_list *entities; + struct notation_list *notations; + attribute_s *attributes; + unsigned long lineno; + char *filename; + unsigned filename_size; + unsigned long input_lineno; + int subdoc_level; + char **files; /* from `f' commands */ + int nfiles; + char *sysid; /* from `s' command */ + char *pubid; /* from `p' command */ +}; + +enum error_code { + E_ZERO, /* Not an error */ + E_NOMEM, /* Out of memory */ + E_BADESCAPE, /* Bad escape */ + E_NULESCAPE, /* \000 other than in data */ + E_NUL, /* A null input character */ + E_BADENTITY, /* Reference to undefined entity */ + E_INTERNALENTITY, /* Internal entity when external was needed */ + E_SYSTEM, /* System input error */ + E_COMMAND, /* Bad command letter */ + E_MISSING, /* Missing arguments */ + E_NUMBER, /* Not a number */ + E_ATTR, /* Bad attribute type */ + E_BADNOTATION, /* Reference to undefined notation */ + E_BADINTERNAL, /* Bad internal entity type */ + E_BADEXTERNAL, /* Bad external entity type */ + E_EOF, /* EOF in middle of line */ + E_SDATA, /* \| other than in data */ + E_LINELENGTH /* line longer than UNSIGNED_MAX */ +}; + +static char *errlist[] = { + 0, + "Out of memory", + "Bad escape", + "\\0 escape not in data", + "Nul character in input", + "Reference to undefined entity", + "Internal entity when external was needed", + "System input error", + "Bad command letter", + "Missing arguments", + "Not a number", + "Bad attribute type", + "Reference to undefined notation", + "Bad internal entity type", + "Bad external entity type", + "EOF in middle of line", + "\\| other than in data", + "Too many V commands", + "Input line too long" +}; + +static void NO_RETURN error P((enum error_code)); +static int parse_data P((char *, unsigned long *)); +static void parse_location P((char *, struct sgmls *)); +static void parse_notation P((char *, notation_s *)); +static void parse_internal_entity P((char *, internal_entity_s *)); +static void parse_external_entity + P((char *, struct sgmls *, external_entity_s *)); +static void parse_subdoc_entity P((char *, external_entity_s *)); +static attribute_s *parse_attribute P((struct sgmls *, char *)); +static void grow_datav P((void)); +static char *unescape P((char *)); +static char *unescape_file P((char *)); +static int unescape1 P((char *)); +static char *scan_token P((char **)); +static int count_args P((char *)); +static struct list *list_find P((struct list *, char *, int)); +static UNIV xmalloc P((unsigned)); +static UNIV xrealloc P((UNIV , unsigned)); +static char *strsave P((char *)); +static int read_line P((struct sgmls *)); +static notation_s *lookup_notation P((struct sgmls *, char *)); +static entity_s *lookup_entity P((struct sgmls *, char *)); +static external_entity_s *lookup_external_entity P((struct sgmls *, char *)); +static void define_external_entity P((struct sgmls *, external_entity_s *)); +static void define_internal_entity P((struct sgmls *, internal_entity_s *)); +static void define_notation P((struct sgmls *, notation_s *)); +static data_s *copy_data P((data_s *, int)); +static void list_finish_level P((struct list **, int)); +static void add_attribute P((attribute_s **, attribute_s *)); +static void default_errhandler P((int, char *, unsigned long)); + +#define xfree(s) do { if (s) free(s); } while (0) + +static sgmls_errhandler *errhandler = default_errhandler; +static unsigned long input_lineno = 0; + +static data_s *datav = 0; +static int datav_size = 0; + +struct sgmls *sgmls_create(fp) + FILE *fp; +{ + struct sgmls *sp; + + sp = (struct sgmls *)malloc(sizeof(struct sgmls)); + if (!sp) + return 0; + sp->fp = fp; + sp->entities = 0; + sp->notations = 0; + sp->attributes = 0; + sp->lineno = 0; + sp->filename = 0; + sp->filename_size = 0; + sp->input_lineno = 0; + sp->buf_size = 0; + sp->buf = 0; + sp->subdoc_level = 0; + sp->files = 0; + sp->nfiles = 0; + sp->sysid = 0; + sp->pubid = 0; + return sp; +} + +void sgmls_free(sp) + struct sgmls *sp; +{ + struct entity_list *ep; + struct notation_list *np; + + if (!sp) + return; + xfree(sp->filename); + sgmls_free_attributes(sp->attributes); + + for (ep = sp->entities; ep;) { + struct entity_list *tem = ep->next; + if (ep->entity.is_internal) { + xfree(ep->entity.u.internal.data.s); + free(ep->entity.u.internal.name); + } + else { + int i; + for (i = 0; i < ep->entity.u.external.nfilenames; i++) + xfree(ep->entity.u.external.filenames[i]); + xfree(ep->entity.u.external.filenames); + xfree(ep->entity.u.external.sysid); + xfree(ep->entity.u.external.pubid); + sgmls_free_attributes(ep->entity.u.external.attributes); + free(ep->entity.u.internal.name); + } + free(ep); + ep = tem; + } + + for (np = sp->notations; np;) { + struct notation_list *tem = np->next; + xfree(np->notation.sysid); + xfree(np->notation.pubid); + free(np->notation.name); + free(np); + np = tem; + } + + xfree(sp->buf); + xfree(sp->pubid); + xfree(sp->sysid); + if (sp->files) { + int i; + for (i = 0; i < sp->nfiles; i++) + free(sp->files[i]); + free(sp->files); + } + free(sp); + + xfree(datav); + datav = 0; + datav_size = 0; +} + +sgmls_errhandler *sgmls_set_errhandler(handler) + sgmls_errhandler *handler; +{ + sgmls_errhandler *old = errhandler; + if (handler) + errhandler = handler; + return old; +} + +int sgmls_next(sp, e) + struct sgmls *sp; + event_s *e; +{ + while (read_line(sp)) { + char *buf = sp->buf; + + e->filename = sp->filename; + e->lineno = sp->lineno; + + switch (buf[0]) { + case DATA_CODE: + e->u.data.n = parse_data(buf + 1, &sp->lineno); + e->u.data.v = datav; + e->type = SGMLS_EVENT_DATA; + return 1; + case START_CODE: + { + char *p; + e->u.start.attributes = sp->attributes; + sp->attributes = 0; + e->type = SGMLS_EVENT_START; + p = buf + 1; + e->u.start.gi = scan_token(&p); + return 1; + } + case END_CODE: + { + char *p = buf + 1; + e->type = SGMLS_EVENT_END; + e->u.end.gi = scan_token(&p); + return 1; + } + case START_SUBDOC_CODE: + case END_SUBDOC_CODE: + { + char *p = buf + 1; + char *name = scan_token(&p); + if (buf[0] == START_SUBDOC_CODE) { + e->u.entity = lookup_external_entity(sp, name); + sp->subdoc_level++; + e->type = SGMLS_EVENT_SUBSTART; + } + else { + e->type = SGMLS_EVENT_SUBEND; + list_finish_level((struct list **)&sp->entities, sp->subdoc_level); + list_finish_level((struct list **)&sp->notations, sp->subdoc_level); + sp->subdoc_level--; + e->u.entity = lookup_external_entity(sp, name); + } + return 1; + } + case ATTRIBUTE_CODE: + add_attribute(&sp->attributes, parse_attribute(sp, buf + 1)); + break; + case DATA_ATTRIBUTE_CODE: + { + char *p = buf + 1; + char *name; + attribute_s *a; + external_entity_s *ext; + + name = scan_token(&p); + a = parse_attribute(sp, p); + ext = lookup_external_entity(sp, name); + add_attribute(&ext->attributes, a); + } + break; + case REFERENCE_ENTITY_CODE: + { + char *p = buf + 1; + char *name; + name = scan_token(&p); + e->u.entity = lookup_external_entity(sp, name); + e->type = SGMLS_EVENT_ENTITY; + return 1; + } + case DEFINE_NOTATION_CODE: + { + notation_s notation; + + parse_notation(buf + 1, ¬ation); + define_notation(sp, ¬ation); + } + break; + case DEFINE_EXTERNAL_ENTITY_CODE: + { + external_entity_s external; + + parse_external_entity(buf + 1, sp, &external); + define_external_entity(sp, &external); + } + break; + case DEFINE_SUBDOC_ENTITY_CODE: + { + external_entity_s external; + + parse_subdoc_entity(buf + 1, &external); + define_external_entity(sp, &external); + } + break; + case DEFINE_INTERNAL_ENTITY_CODE: + { + internal_entity_s internal; + + parse_internal_entity(buf + 1, &internal); + define_internal_entity(sp, &internal); + } + break; + case PI_CODE: + e->u.pi.len = unescape1(buf + 1); + e->u.pi.s = buf + 1; + e->type = SGMLS_EVENT_PI; + return 1; + case LOCATION_CODE: + parse_location(buf + 1, sp); + break; + case APPINFO_CODE: + e->u.appinfo = unescape(buf + 1); + e->type = SGMLS_EVENT_APPINFO; + return 1; + case SYSID_CODE: + sp->sysid = strsave(unescape(buf + 1)); + break; + case PUBID_CODE: + sp->pubid = strsave(unescape(buf + 1)); + break; + case FILE_CODE: + sp->files = xrealloc(sp->files, (sp->nfiles + 1)*sizeof(char *)); + sp->files[sp->nfiles] = strsave(unescape_file(buf + 1)); + sp->nfiles += 1; + break; + case CONFORMING_CODE: + e->type = SGMLS_EVENT_CONFORMING; + return 1; + default: + error(E_COMMAND); + } + } + + return 0; +} + +static +int parse_data(p, linenop) + char *p; + unsigned long *linenop; +{ + int n = 0; + char *start = p; + char *q; + int is_sdata = 0; + + /* No need to copy before first escape. */ + + for (; *p != '\\' && *p != '\0'; p++) + ; + q = p; + while (*p) { + if (*p == '\\') { + switch (*++p) { + case '\\': + *q++ = *p++; + break; + case 'n': + *q++ = RECHAR; + *linenop += 1; + p++; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + int val = *p++ - '0'; + if (*p >= '0' && *p <= '7') { + val = val*8 + (*p++ - '0'); + if (*p >= '0' && *p <= '7') + val = val*8 + (*p++ - '0'); + } + *q++ = (char)val; + } + break; + case '|': + if (q > start || is_sdata) { + if (n >= datav_size) + grow_datav(); + datav[n].s = start; + datav[n].len = q - start; + datav[n].is_sdata = is_sdata; + n++; + } + is_sdata = !is_sdata; + start = q; + p++; + break; + default: + error(E_BADESCAPE); + } + } + else + *q++ = *p++; + } + + if (q > start || is_sdata) { + if (n >= datav_size) + grow_datav(); + datav[n].s = start; + datav[n].len = q - start; + datav[n].is_sdata = is_sdata; + n++; + } + return n; +} + +static +void grow_datav() +{ + unsigned size = datav_size ? 2*datav_size : 2; + datav = (data_s *)xrealloc((UNIV)datav, size*sizeof(data_s)); + datav_size = size; +} + +static +void parse_location(s, sp) + char *s; + struct sgmls *sp; +{ + unsigned size; + + if (*s < '0' || *s > '9' || sscanf(s, "%lu", &sp->lineno) != 1) + error(E_NUMBER); + do { + ++s; + } while (*s >= '0' && *s <= '9'); + + if (*s != ' ') + return; + s++; + s = unescape_file(s); + size = strlen(s) + 1; + if (size <= sp->filename_size) + strcpy(sp->filename, s); + else { + sp->filename = xrealloc(sp->filename, size); + strcpy(sp->filename, s); + sp->filename_size = size; + } +} + +static +void parse_notation(s, n) + char *s; + notation_s *n; +{ + n->name = strsave(scan_token(&s)); +} + +static +void parse_internal_entity(s, e) + char *s; + internal_entity_s *e; +{ + char *type; + + e->name = strsave(scan_token(&s)); + type = scan_token(&s); + if (strcmp(type, "CDATA") == 0) + e->data.is_sdata = 0; + else if (strcmp(type, "SDATA") == 0) + e->data.is_sdata = 1; + else + error(E_BADINTERNAL); + e->data.len = unescape1(s); + if (e->data.len == 0) + e->data.s = 0; + else { + e->data.s = xmalloc(e->data.len); + memcpy(e->data.s, s, e->data.len); + } +} + +static +void parse_external_entity(s, sp, e) + char *s; + struct sgmls *sp; + external_entity_s *e; +{ + char *type; + char *notation; + + e->name = strsave(scan_token(&s)); + type = scan_token(&s); + if (strcmp(type, "CDATA") == 0) + e->type = SGMLS_ENTITY_CDATA; + else if (strcmp(type, "SDATA") == 0) + e->type = SGMLS_ENTITY_SDATA; + else if (strcmp(type, "NDATA") == 0) + e->type = SGMLS_ENTITY_NDATA; + else + error(E_BADEXTERNAL); + notation = scan_token(&s); + e->notation = lookup_notation(sp, notation); +} + +static +void parse_subdoc_entity(s, e) + char *s; + external_entity_s *e; +{ + e->name = strsave(scan_token(&s)); + e->type = SGMLS_ENTITY_SUBDOC; +} + +static +attribute_s *parse_attribute(sp, s) + struct sgmls *sp; + char *s; +{ + attribute_s *a; + char *type; + + a = (attribute_s *)xmalloc(sizeof(*a)); + a->name = strsave(scan_token(&s)); + type = scan_token(&s); + if (strcmp(type, "CDATA") == 0) { + unsigned long lineno = 0; + a->type = SGMLS_ATTR_CDATA; + a->value.data.n = parse_data(s, &lineno); + a->value.data.v = copy_data(datav, a->value.data.n); + } + else if (strcmp(type, "IMPLIED") == 0) { + a->type = SGMLS_ATTR_IMPLIED; + } + else if (strcmp(type, "NOTATION") == 0) { + a->type = SGMLS_ATTR_NOTATION; + a->value.notation = lookup_notation(sp, scan_token(&s)); + } + else if (strcmp(type, "ENTITY") == 0) { + int n, i; + a->type = SGMLS_ATTR_ENTITY; + n = count_args(s); + if (n == 0) + error(E_MISSING); + a->value.entity.v = (entity_s **)xmalloc(n*sizeof(entity_s *)); + a->value.entity.n = n; + for (i = 0; i < n; i++) + a->value.entity.v[i] = lookup_entity(sp, scan_token(&s)); + } + else if (strcmp(type, "TOKEN") == 0) { + int n, i; + a->type = SGMLS_ATTR_TOKEN; + n = count_args(s); + if (n == 0) + error(E_MISSING); + a->value.token.v = (char **)xmalloc(n * sizeof(char *)); + for (i = 0; i < n; i++) + a->value.token.v[i] = strsave(scan_token(&s)); + a->value.token.n = n; + } + else + error(E_ATTR); + return a; +} + +void sgmls_free_attributes(p) + attribute_s *p; +{ + while (p) { + attribute_s *nextp = p->next; + switch (p->type) { + case SGMLS_ATTR_CDATA: + if (p->value.data.v) { + free(p->value.data.v[0].s); + free(p->value.data.v); + } + break; + case SGMLS_ATTR_TOKEN: + { + int i; + for (i = 0; i < p->value.token.n; i++) + free(p->value.token.v[i]); + xfree(p->value.token.v); + } + break; + case SGMLS_ATTR_ENTITY: + xfree(p->value.entity.v); + break; + case SGMLS_ATTR_IMPLIED: + case SGMLS_ATTR_NOTATION: + break; + } + free(p->name); + free(p); + p = nextp; + } +} + +static +data_s *copy_data(v, n) + data_s *v; + int n; +{ + if (n == 0) + return 0; + else { + int i; + unsigned total; + char *p; + data_s *result; + + result = (data_s *)xmalloc(n*sizeof(data_s)); + total = 0; + for (i = 0; i < n; i++) + total += v[i].len; + if (!total) + total++; + p = xmalloc(total); + for (i = 0; i < n; i++) { + result[i].s = p; + memcpy(result[i].s, v[i].s, v[i].len); + result[i].len = v[i].len; + p += v[i].len; + result[i].is_sdata = v[i].is_sdata; + } + return result; + } +} + +/* Unescape s, and return nul-terminated data. Give an error +if the data contains 0. */ + +static +char *unescape(s) + char *s; +{ + int len = unescape1(s); + if (memchr(s, '\0', len)) + error(E_NULESCAPE); + s[len] = '\0'; + return s; +} + +/* Like unescape(), but REs are represented by 012 not 015. */ + +static +char *unescape_file(s) + char *s; +{ + char *p; + p = s = unescape(s); + while ((p = strchr(p, RECHAR)) != 0) + *p++ = '\n'; + return s; + +} + +/* Unescape s, and return length of data. The data may contain 0. */ + +static +int unescape1(s) + char *s; +{ + const char *p; + char *q; + + q = strchr(s, '\\'); + if (!q) + return strlen(s); + p = q; + while (*p) { + if (*p == '\\') { + switch (*++p) { + case '\\': + *q++ = *p++; + break; + case 'n': + *q++ = RECHAR; + p++; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + int val = *p++ - '0'; + if (*p >= '0' && *p <= '7') { + val = val*8 + (*p++ - '0'); + if (*p >= '0' && *p <= '7') + val = val*8 + (*p++ - '0'); + } + *q++ = (char)val; + } + break; + case '|': + error(E_SDATA); + default: + error(E_BADESCAPE); + } + } + else + *q++ = *p++; + } + return q - s; +} + +static +char *scan_token(pp) + char **pp; +{ + char *start = *pp; + while (**pp != '\0') { + if (**pp == ' ') { + **pp = '\0'; + *pp += 1; + break; + } + *pp += 1; + } + if (!*start) + error(E_MISSING); + return start; +} + +static +int count_args(p) + char *p; +{ + int n = 0; + + while (*p != '\0') { + n++; + do { + ++p; + if (*p == ' ') { + p++; + break; + } + } while (*p != '\0'); + } + return n; +} + +static +int read_line(sp) + struct sgmls *sp; +{ + unsigned i = 0; + FILE *fp = sp->fp; + int c; + char *buf = sp->buf; + unsigned buf_size = sp->buf_size; + + c = getc(fp); + if (c == EOF) { + input_lineno = sp->input_lineno; + if (ferror(fp)) + error(E_SYSTEM); + return 0; + } + + sp->input_lineno++; + input_lineno = sp->input_lineno; + for (;;) { + if (i >= buf_size) { + if (buf_size == 0) + buf_size = 24; + else if (buf_size > (unsigned)UINT_MAX/2) { + if (buf_size == (unsigned)UINT_MAX) + error(E_LINELENGTH); + buf_size = (unsigned)UINT_MAX; + } + else + buf_size *= 2; + buf = xrealloc(buf, buf_size); + sp->buf = buf; + sp->buf_size = buf_size; + } + if (c == '\0') + error(E_NUL); + if (c == '\n') { + buf[i] = '\0'; + break; + } + buf[i++] = c; + c = getc(fp); + if (c == EOF) { + if (ferror(fp)) + error(E_SYSTEM); + else + error(E_EOF); + } + } + return 1; +} + +static +notation_s *lookup_notation(sp, name) +struct sgmls *sp; +char *name; +{ + struct notation_list *p + = (struct notation_list *)list_find((struct list *)sp->notations, name, + sp->subdoc_level); + if (!p) + error(E_BADNOTATION); + return &p->notation; +} + +static +entity_s *lookup_entity(sp, name) +struct sgmls *sp; +char *name; +{ + struct entity_list *p + = (struct entity_list *)list_find((struct list *)sp->entities, name, + sp->subdoc_level); + if (!p) + error(E_BADENTITY); + return &p->entity; +} + +static +external_entity_s *lookup_external_entity(sp, name) +struct sgmls *sp; +char *name; +{ + entity_s *p = lookup_entity(sp, name); + if (p->is_internal) + error(E_INTERNALENTITY); + return &p->u.external; +} + +static +void define_external_entity(sp, e) +struct sgmls *sp; +external_entity_s *e; +{ + struct entity_list *p; + e->attributes = 0; + e->filenames = sp->files; + e->nfilenames = sp->nfiles; + sp->files = 0; + sp->nfiles = 0; + e->pubid = sp->pubid; + sp->pubid = 0; + e->sysid = sp->sysid; + sp->sysid = 0; + p = (struct entity_list *)xmalloc(sizeof(struct entity_list)); + memcpy((UNIV)&p->entity.u.external, (UNIV)e, sizeof(*e)); + p->entity.is_internal = 0; + p->subdoc_level = sp->subdoc_level; + p->next = sp->entities; + sp->entities = p; +} + +static +void define_internal_entity(sp, e) +struct sgmls *sp; +internal_entity_s *e; +{ + struct entity_list *p; + p = (struct entity_list *)xmalloc(sizeof(struct entity_list)); + memcpy((UNIV)&p->entity.u.internal, (UNIV)e, sizeof(*e)); + p->entity.is_internal = 1; + p->subdoc_level = sp->subdoc_level; + p->next = sp->entities; + sp->entities = p; +} + +static +void define_notation(sp, np) +struct sgmls *sp; +notation_s *np; +{ + struct notation_list *p; + np->sysid = sp->sysid; + sp->sysid = 0; + np->pubid = sp->pubid; + sp->pubid = 0; + p = (struct notation_list *)xmalloc(sizeof(struct notation_list)); + memcpy((UNIV)&p->notation, (UNIV)np, sizeof(*np)); + p->subdoc_level = sp->subdoc_level; + p->next = sp->notations; + sp->notations = p; +} + +static +struct list *list_find(p, name, level) + struct list *p; + char *name; + int level; +{ + for (; p && p->subdoc_level == level; p = p->next) + if (strcmp(p->name, name) == 0) + return p; + return 0; +} + +/* Move all the items in the list whose subdoc level is level to the +end of the list and make their subdoc_level -1. */ + +static +void list_finish_level(listp, level) + struct list **listp; + int level; +{ + struct list **pp, *next_level, *old_level; + for (pp = listp; *pp && (*pp)->subdoc_level == level; pp = &(*pp)->next) + (*pp)->subdoc_level = -1; + next_level = *pp; + *pp = 0; + old_level = *listp; + *listp = next_level; + for (pp = listp; *pp; pp = &(*pp)->next) + ; + *pp = old_level; +} + +static +void add_attribute(pp, a) + attribute_s **pp, *a; +{ + for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next) + ; + a->next = *pp; + *pp = a; +} + + +static +char *strsave(s) +char *s; +{ + if (!s) + return s; + else { + char *p = xmalloc(strlen(s) + 1); + strcpy(p, s); + return p; + } +} + +static +UNIV xmalloc(n) + unsigned n; +{ + UNIV p = malloc(n); + if (!p) + error(E_NOMEM); + return p; +} + +/* ANSI C says first argument to realloc can be NULL, but not everybody + appears to support this. */ + +static +UNIV xrealloc(p, n) + UNIV p; + unsigned n; +{ + p = p ? realloc(p, n) : malloc(n); + if (!p) + error(E_NOMEM); + return p; +} + +static NO_RETURN +void error(num) + enum error_code num; +{ + (*errhandler)((int)num, errlist[num], input_lineno); + abort(); +} + +static +void default_errhandler(num, msg, lineno) + int num; + char *msg; + unsigned long lineno; +{ + fprintf(stderr, "Line %lu: %s\n", lineno, msg); + exit(1); +} diff --git a/usr.bin/sgmls/libsgmls/sgmls.h b/usr.bin/sgmls/libsgmls/sgmls.h new file mode 100644 index 0000000..79b2658 --- /dev/null +++ b/usr.bin/sgmls/libsgmls/sgmls.h @@ -0,0 +1,127 @@ +/* sgmls.h + Interface to a library for reading output of sgmls. */ + +struct sgmls_data { + char *s; + unsigned len; + char is_sdata; +}; + +struct sgmls_notation { + char *name; + char *sysid; + char *pubid; +}; + +struct sgmls_internal_entity { + char *name; + struct sgmls_data data; +}; + +enum sgmls_external_entity_type { + SGMLS_ENTITY_CDATA, + SGMLS_ENTITY_SDATA, + SGMLS_ENTITY_NDATA, + SGMLS_ENTITY_SUBDOC + }; + +struct sgmls_external_entity { + char *name; + enum sgmls_external_entity_type type; + char **filenames; + int nfilenames; + char *pubid; + char *sysid; + struct sgmls_attribute *attributes; + struct sgmls_notation *notation; +}; + +struct sgmls_entity { + union { + struct sgmls_internal_entity internal; + struct sgmls_external_entity external; + } u; + char is_internal; +}; + +enum sgmls_attribute_type { + SGMLS_ATTR_IMPLIED, + SGMLS_ATTR_CDATA, + SGMLS_ATTR_TOKEN, + SGMLS_ATTR_ENTITY, + SGMLS_ATTR_NOTATION +}; + +struct sgmls_attribute { + struct sgmls_attribute *next; + char *name; + enum sgmls_attribute_type type; + union { + struct { + struct sgmls_data *v; + int n; + } data; + struct { + struct sgmls_entity **v; + int n; + } entity; + struct { + char **v; + int n; + } token; + struct sgmls_notation *notation; + } value; +}; + +enum sgmls_event_type { + SGMLS_EVENT_DATA, /* data */ + SGMLS_EVENT_ENTITY, /* external entity reference */ + SGMLS_EVENT_PI, /* processing instruction */ + SGMLS_EVENT_START, /* element start */ + SGMLS_EVENT_END, /* element end */ + SGMLS_EVENT_SUBSTART, /* subdocument start */ + SGMLS_EVENT_SUBEND, /* subdocument end */ + SGMLS_EVENT_APPINFO, /* appinfo */ + SGMLS_EVENT_CONFORMING /* the document was conforming */ + }; + +struct sgmls_event { + enum sgmls_event_type type; + union { + struct { + struct sgmls_data *v; + int n; + } data; + struct sgmls_external_entity *entity; + struct { + char *s; + unsigned len; + } pi; + struct { + char *gi; + struct sgmls_attribute *attributes; + } start; + struct { + char *gi; + } end; + char *appinfo; + } u; + char *filename; /* SGML filename */ + unsigned long lineno; /* SGML lineno */ +}; + +#ifdef __STDC__ +void sgmls_free_attributes(struct sgmls_attribute *); +struct sgmls *sgmls_create(FILE *); +int sgmls_next(struct sgmls *, struct sgmls_event *); +void sgmls_free(struct sgmls *); +typedef void sgmls_errhandler(int, char *, unsigned long); +sgmls_errhandler *sgmls_set_errhandler(sgmls_errhandler *); +#else /* not __STDC__ */ +void sgmls_free_attributes(); +struct sgmls *sgmls_create(); +int sgmls_next(); +void sgmls_free(); +typedef void sgmls_errhandler(); +sgmls_errhandler *sgmls_set_errhandler(); +#endif /* not __STDC__ */ diff --git a/usr.bin/sgmls/rast/Makefile b/usr.bin/sgmls/rast/Makefile new file mode 100644 index 0000000..3be6c87 --- /dev/null +++ b/usr.bin/sgmls/rast/Makefile @@ -0,0 +1,18 @@ +# +# Bmakefile for rast +# +# $id$ +# + +PROG= rast + +SRCS+= rast.c + +CFLAGS+= -I${.CURDIR}/../libsgmls -I${.CURDIR}/../sgmls + +LDADD= ${LIBSGMLS} +DPADD= ${LIBSGMLS} + +.include "../Makefile.inc" +.include <bsd.prog.mk> + diff --git a/usr.bin/sgmls/rast/rast.1 b/usr.bin/sgmls/rast/rast.1 new file mode 100644 index 0000000..2d167fc --- /dev/null +++ b/usr.bin/sgmls/rast/rast.1 @@ -0,0 +1,75 @@ +.\" -*- nroff -*- +.tr \(ts" +.TH RAST 1 +.SH NAME +rast \- translate output of sgmls to RAST format +.SH SYNOPSIS +.B rast +[ +.BI \-o output_file +] +[ +.I input_file +] +.SH DESCRIPTION +.I Rast +translates the output of sgmls to the format of a RAST result. +RAST is the Reference Application for SGML Testing defined in the Proposed +American National Standard on Conformance Testing for Standard Generalized +Markup Language (SGML) Systems (X3.190-199X). +.I Rast +reads from +.I input_file +or from standard input if +.I input_file +is not specified. +It writes to +.I output_file +or to standard output if +.I output_file +is not specified; +use of the +.B \-o +option avoids the need for +.I rast +to use a temporary file. +.LP +Note that the +.B -c +option of +.I sgmls +can generate a capacity report in RACT format. +.SH BUGS +Production [9] in clause 14.5.5 of the draft standard is clearly wrong; +.I rast +corrects it by appending +.RI `,\ LE '. +An alternative way to correct it would be to delete the +.RB `, \(tsEND-ENTITY\(ts '. +.LP +In production [18] in clause 14.5.9, +.RI ` markup\ data +' +should be +.RI ` markup\ data *' +since internal sdata entities need not contain any characters (14.5.11), +and +.I markup\ data +cannot be empty (14.5.9, 14.5.12). +.LP +The RAST result for the example in Annex B.4 is incorrect. +The line +.B G03-A1= +should be immediately followed by a line +.BR !g03-e1! . +(The problem with production [9] also applies to this example.) +.LP +.I Rast +outputs a newline after +.B #ERROR +in order to avoid producing files with partial lines. +.SH "SEE ALSO" +.IR sgmls (1) +.br +.I +Conformance Testing for Standard Generalized Markup Language (SGML) Systems, +(X3.190-199X), Draft July 1991 diff --git a/usr.bin/sgmls/rast/rast.c b/usr.bin/sgmls/rast/rast.c new file mode 100644 index 0000000..31b48de --- /dev/null +++ b/usr.bin/sgmls/rast/rast.c @@ -0,0 +1,534 @@ +/* rast.c + Translate sgmls output to RAST result format. + + Written by James Clark (jjc@jclark.com). */ + +#include "config.h" +#include "std.h" +#include "sgmls.h" +#include "getopt.h" + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +#ifdef __GNUC__ +#define NO_RETURN volatile +#else +#define NO_RETURN /* as nothing */ +#endif + +#ifdef VARARGS +#define VP(parms) () +#else +#define VP(parms) P(parms) +#endif + +#ifdef USE_ISASCII +#define ISASCII(c) isascii(c) +#else +#define ISASCII(c) (1) +#endif + +NO_RETURN void error VP((char *,...)); + +static void input_error P((int, char *, unsigned long)); +static int do_file P((FILE *)); +static void usage P((void)); + +static void output_processing_instruction P((char *, unsigned)); +static void output_data P((struct sgmls_data *, int)); +static void output_data_lines P((char *, unsigned)); +static void output_internal_sdata P((char *, unsigned)); +static void output_external_entity P((struct sgmls_external_entity *)); +static void output_external_entity_info P((struct sgmls_external_entity *)); +static void output_element_start P((char *, struct sgmls_attribute *)); +static void output_element_end P((char *)); +static void output_attribute P((struct sgmls_attribute *)); +static void output_tokens P((char **, int)); +static void output_markup_chars P((char *, unsigned)); +static void output_markup_string P((char *)); +static void output_char P((int, int)); +static void output_flush P((int)); +static void output_external_id P((char *, char *)); +static void output_entity P((struct sgmls_entity *)); +static void output_external_entity_info P((struct sgmls_external_entity *)); +static void output_internal_entity P((struct sgmls_internal_entity *)); + +#define output_flush_markup() output_flush('!') +#define output_flush_data() output_flush('|') + +static FILE *outfp; +static int char_count = 0; +static char *program_name; + +int main(argc, argv) + int argc; + char **argv; +{ + int c; + int opt; + char *output_file = 0; + + program_name = argv[0]; + + while ((opt = getopt(argc, argv, "o:")) != EOF) + switch (opt) { + case 'o': + output_file = optarg; + break; + case '?': + usage(); + default: + abort(); + } + + if (output_file) { + errno = 0; + outfp = fopen(output_file, "w"); + if (!outfp) + error("couldn't open `%s' for output: %s", strerror(errno)); + } + else { + outfp = tmpfile(); + if (!outfp) + error("couldn't create temporary file: %s", strerror(errno)); + } + + if (argc - optind > 1) + usage(); + + if (argc - optind == 1) { + if (!freopen(argv[optind], "r", stdin)) + error("couldn't open `%s' for input: %s", argv[optind], strerror(errno)); + } + + (void)sgmls_set_errhandler(input_error); + + if (!do_file(stdin)) { + fclose(outfp); + if (output_file) { + if (!freopen(output_file, "w", stdout)) + error("couldn't reopen `%s' for output: %s", strerror(errno)); + } + fputs("#ERROR\n", stdout); + exit(EXIT_FAILURE); + } + + if (output_file) { + errno = 0; + if (fclose(outfp) == EOF) + error("error closing `%s': %s", output_file, strerror(errno)); + } + else { + errno = 0; + if (fseek(outfp, 0L, SEEK_SET)) + error("couldn't rewind temporary file: %s", strerror(errno)); + while ((c = getc(outfp)) != EOF) + if (putchar(c) == EOF) + error("error writing standard output: %s", strerror(errno)); + } + exit(EXIT_SUCCESS); +} + +static +void usage() +{ + fprintf(stderr, "usage: %s [-o output_file] [input_file]\n", program_name); + exit(EXIT_FAILURE); +} + +static +int do_file(fp) + FILE *fp; +{ + struct sgmls *sp; + struct sgmls_event e; + int conforming = 0; + + sp = sgmls_create(fp); + while (sgmls_next(sp, &e)) + switch (e.type) { + case SGMLS_EVENT_DATA: + output_data(e.u.data.v, e.u.data.n); + break; + case SGMLS_EVENT_ENTITY: + output_external_entity(e.u.entity); + break; + case SGMLS_EVENT_PI: + output_processing_instruction(e.u.pi.s, e.u.pi.len); + break; + case SGMLS_EVENT_START: + output_element_start(e.u.start.gi, e.u.start.attributes); + sgmls_free_attributes(e.u.start.attributes); + break; + case SGMLS_EVENT_END: + output_element_end(e.u.end.gi); + break; + case SGMLS_EVENT_SUBSTART: + { + int level = 1; + output_external_entity(e.u.entity); + while (level > 0) { + if (!sgmls_next(sp, &e)) + return 0; + switch (e.type) { + case SGMLS_EVENT_SUBSTART: + level++; + break; + case SGMLS_EVENT_SUBEND: + level--; + break; + case SGMLS_EVENT_START: + sgmls_free_attributes(e.u.start.attributes); + break; + default: + /* prevent compiler warnings */ + break; + } + } + } + break; + case SGMLS_EVENT_APPINFO: + break; + case SGMLS_EVENT_CONFORMING: + conforming = 1; + break; + default: + abort(); + } + sgmls_free(sp); + return conforming; +} + +static +void output_processing_instruction(s, len) + char *s; + unsigned len; +{ + fputs("[?", outfp); + if (len > 0) { + putc('\n', outfp); + output_data_lines(s, len); + output_flush_data(); + } + fputs("]\n", outfp); +} + +static +void output_data(v, n) + struct sgmls_data *v; + int n; +{ + int i; + for (i = 0; i < n; i++) { + if (v[i].is_sdata) + output_internal_sdata(v[i].s, v[i].len); + else if (v[i].len > 0) + output_data_lines(v[i].s, v[i].len); + } +} + +static +void output_data_lines(s, n) + char *s; + unsigned n; +{ + assert(n > 0); + for (; n > 0; --n) + output_char((unsigned char)*s++, '|'); + output_flush_data(); +} + +static +void output_internal_sdata(s, n) + char *s; + unsigned n; +{ + fputs("#SDATA-TEXT\n", outfp); + output_markup_chars(s, n); + output_flush_markup(); + fputs("#END-SDATA\n", outfp); +} + +static +void output_external_entity(e) + struct sgmls_external_entity *e; +{ + fprintf(outfp, "[&%s\n", e->name); + output_external_entity_info(e); + fputs("]\n", outfp); +} + +static +void output_element_start(gi, att) + char *gi; + struct sgmls_attribute *att; +{ + fprintf(outfp, "[%s", gi); + if (att) { + struct sgmls_attribute *p; + putc('\n', outfp); + for (p = att; p; p = p->next) + output_attribute(p); + } + fputs("]\n", outfp); +} + +static +void output_element_end(gi) + char *gi; +{ + fprintf(outfp, "[/%s]\n", gi); +} + +static +void output_attribute(p) + struct sgmls_attribute *p; +{ + fprintf(outfp, "%s=\n", p->name); + switch (p->type) { + case SGMLS_ATTR_IMPLIED: + fputs("#IMPLIED\n", outfp); + break; + case SGMLS_ATTR_CDATA: + { + struct sgmls_data *v = p->value.data.v; + int n = p->value.data.n; + int i; + for (i = 0; i < n; i++) + if (v[i].is_sdata) + output_internal_sdata(v[i].s, v[i].len); + else { + output_markup_chars(v[i].s, v[i].len); + output_flush_markup(); + } + } + break; + case SGMLS_ATTR_TOKEN: + output_tokens(p->value.token.v, p->value.token.n); + break; + case SGMLS_ATTR_ENTITY: + { + int i; + for (i = 0; i < p->value.entity.n; i++) { + struct sgmls_entity *e = p->value.entity.v[i]; + char *name; + + if (e->is_internal) + name = e->u.internal.name; + else + name = e->u.external.name; + if (i > 0) + output_markup_string(" "); + output_markup_string(name); + } + output_flush_markup(); + for (i = 0; i < p->value.entity.n; i++) + output_entity(p->value.entity.v[i]); + } + break; + case SGMLS_ATTR_NOTATION: + output_tokens(&p->value.notation->name, 1); + output_external_id(p->value.notation->pubid, p->value.notation->sysid); + break; + } +} + +static void output_tokens(v, n) + char **v; + int n; +{ + int i; + assert(n > 0); + output_markup_string(v[0]); + for (i = 1; i < n; i++) { + output_markup_string(" "); + output_markup_string(v[i]); + } + output_flush_markup(); +} + +static +void output_markup_chars(s, n) + char *s; + unsigned n; +{ + for (; n > 0; --n) + output_char((unsigned char)*s++, '!'); +} + +static +void output_markup_string(s) + char *s; +{ + while (*s) + output_char((unsigned char)*s++, '!'); +} + +static +void output_char(c, delim) + int c; + int delim; +{ + if (ISASCII(c) && isprint(c)) { + if (char_count == 0) + putc(delim, outfp); + putc(c, outfp); + char_count++; + if (char_count == 60) { + putc(delim, outfp); + putc('\n', outfp); + char_count = 0; + } + } + else { + output_flush(delim); + switch (c) { + case RECHAR: + fputs("#RE\n", outfp); + break; + case RSCHAR: + fputs("#RS\n", outfp); + break; + case TABCHAR: + fputs("#TAB\n", outfp); + break; + default: + fprintf(outfp, "#%d\n", c); + } + } +} + +static +void output_flush(delim) + int delim; +{ + if (char_count > 0) { + putc(delim, outfp); + putc('\n', outfp); + char_count = 0; + } +} + +static +void output_external_id(pubid, sysid) + char *pubid; + char *sysid; +{ + if (!pubid && !sysid) + fputs("#SYSTEM\n#NONE\n", outfp); + else { + if (pubid) { + fputs("#PUBLIC\n", outfp); + if (*pubid) { + output_markup_string(pubid); + output_flush_markup(); + } + else + fputs("#EMPTY\n", outfp); + } + if (sysid) { + fputs("#SYSTEM\n", outfp); + if (*sysid) { + output_markup_string(sysid); + output_flush_markup(); + } + else + fputs("#EMPTY\n", outfp); + } + } +} + +static +void output_entity(e) + struct sgmls_entity *e; +{ + if (e->is_internal) + output_internal_entity(&e->u.internal); + else + output_external_entity_info(&e->u.external); + fputs("#END-ENTITY", outfp); +#ifndef ASIS + putc('\n', outfp); +#endif +} + +static +void output_external_entity_info(e) + struct sgmls_external_entity *e; +{ + switch (e->type) { + case SGMLS_ENTITY_CDATA: + fputs("#CDATA-EXTERNAL", outfp); + break; + case SGMLS_ENTITY_SDATA: + fputs("#SDATA-EXTERNAL", outfp); + break; + case SGMLS_ENTITY_NDATA: + fputs("#NDATA-EXTERNAL", outfp); + break; + case SGMLS_ENTITY_SUBDOC: + fputs("#SUBDOC", outfp); + break; + } + putc('\n', outfp); + output_external_id(e->pubid, e->sysid); + if (e->type != SGMLS_ENTITY_SUBDOC) { + struct sgmls_attribute *p; + fprintf(outfp, "#NOTATION=%s\n", e->notation->name); + output_external_id(e->notation->pubid, e->notation->sysid); + for (p = e->attributes; p; p = p->next) + output_attribute(p); + } +} + +static +void output_internal_entity(e) + struct sgmls_internal_entity *e; +{ + if (e->data.is_sdata) + fputs("#SDATA-INTERNAL", outfp); + else + fputs("#CDATA-INTERNAL", outfp); + putc('\n', outfp); + output_markup_chars(e->data.s, e->data.len); + output_flush_markup(); +} + +static +void input_error(num, str, lineno) + int num; + char *str; + unsigned long lineno; +{ + error("Error at input line %lu: %s", lineno, str); +} + +NO_RETURN +#ifdef VARARGS +void error(va_alist) va_dcl +#else +void error(char *message,...) +#endif +{ +#ifdef VARARGS + char *message; +#endif + va_list ap; + + fprintf(stderr, "%s: ", program_name); +#ifdef VARARGS + va_start(ap); + message = va_arg(ap, char *); +#else + va_start(ap, message); +#endif + vfprintf(stderr, message, ap); + va_end(ap); + fputc('\n', stderr); + fflush(stderr); + exit(EXIT_FAILURE); +} diff --git a/usr.bin/sgmls/sgmls.pl b/usr.bin/sgmls/sgmls.pl new file mode 100755 index 0000000..edb9eb6 --- /dev/null +++ b/usr.bin/sgmls/sgmls.pl @@ -0,0 +1,247 @@ +#! /usr/bin/perl + +# This is a skeleton of a perl script for processing the output of +# sgmls. You must change the parts marked with "XXX". + +# XXX This is for troff: in data, turn \ into \e (which prints as \). +# Backslashes in SDATA entities are left as backslashes. + +$backslash_in_data = "\\e"; + +$prog = $0; + +$prog =~ s|.*/||; + +$level = 0; + +while (<STDIN>) { + chop; + $command = substr($_, 0, 1); + substr($_, 0, 1) = ""; + if ($command eq '(') { + &start_element($_); + $level++; + } + elsif ($command eq ')') { + $level--; + &end_element($_); + foreach $key (keys %attribute_value) { + @splitkey = split($;, $key); + if ($splitkey[0] == $level) { + delete $attribute_value{$key}; + delete $attribute_type{$key}; + } + } + } + elsif ($command eq '-') { + &unescape_data($_); + &data($_); + } + elsif ($command eq 'A') { + @field = split(/ /, $_, 3); + $attribute_type{$level,$field[0]} = $field[1]; + &unescape_data($field[2]); + $attribute_value{$level,$field[0]} = $field[2]; + } + elsif ($command eq '&') { + &entity($_); + } + elsif ($command eq 'D') { + @field = split(/ /, $_, 4); + $data_attribute_type{$field[0], $field[1]} = $field[2]; + &unescape_data($field[3]); + $data_attribute_value{$field[0], $field[1]} = $field[3]; + } + elsif ($command eq 'N') { + $notation{$_} = 1; + if (defined($sysid)) { + $notation_sysid{$_} = $sysid; + undef($sysid); + } + if (defined($pubid)) { + $notation_pubid{$_} = $pubid; + undef($pubid); + } + } + elsif ($command eq 'I') { + @field = split(/ /, $_, 3); + $entity_type{$field[0]} = $field[1]; + &unescape($field[2]); + # You may want to substitute \e for \ if the type is CDATA. + $entity_text{$field[0]} = $field[2]; + $entity_code{$field[0]} = 'I'; + } + elsif ($command eq 'E') { + @field = split(/ /, $_); + $entity_code{$field[0]} = 'E'; + $entity_type{$field[0]} = $field[1]; + $entity_notation{$field[0]} = $field[2]; + if (defined(@files)) { + foreach $i (0..$#files) { + $entity_filename{$field[0], $i} = $files[i]; + } + undef(@files); + } + if (defined($sysid)) { + $entity_sysid{$field[0]} = $sysid; + undef($sysid); + } + if (defined($pubid)) { + $entity_pubid{$field[0]} = $pubid; + undef($pubid); + } + } + elsif ($command eq 'S') { + $entity_code{$_} = 'S'; + if (defined(@files)) { + foreach $i (0..$#files) { + $entity_filename{$_, $i} = $files[i]; + } + undef(@files); + } + if (defined($sysid)) { + $entity_sysid{$_} = $sysid; + undef($sysid); + } + if (defined($pubid)) { + $entity_pubid{$_} = $pubid; + undef($pubid); + } + } + elsif ($command eq '?') { + &unescape($_); + &pi($_); + } + elsif ($command eq 'L') { + @field = split(/ /, $_); + $lineno = $field[0]; + if ($#field >= 1) { + &unescape($field[1]); + $filename = $field[1]; + } + } + elsif ($command eq 'V') { + @field = split(/ /, $_, 2); + &unescape($field[1]); + $environment{$field[0]} = $field[1]; + } + elsif ($command eq '{') { + &start_subdoc($_); + } + elsif ($command eq '}') { + &end_subdoc($_); + } + elsif ($command eq 'f') { + &unescape($_); + push(@files, $_); + } + elsif ($command eq 'p') { + &unescape($_); + $pubid = $_; + } + elsif ($command eq 's') { + &unescape($_); + $sysid = $_; + } + elsif ($command eq 'C') { + $conforming = 1; + } + else { + warn "$prog:$ARGV:$.: unrecognized command \`$command'\n"; + } +} + +sub unescape { + $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc($1)/eg; +} + +sub esc { + local($_) = $_[0]; + if ($_ eq '012' || $_ eq '12') { + ""; # ignore RS + } + elsif (/^[0-7]/) { + sprintf("%c", oct); + } + elsif ($_ eq 'n') { + "\n"; + } + elsif ($_ eq '|') { + ""; + } + elsif ($_ eq "\\") { + "\\"; + } + else { + $_; + } +} + +sub unescape_data { + local($sdata) = 0; + $_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc_data($1)/eg; +} + +sub esc_data { + local($_) = $_[0]; + if ($_ eq '012' || $_ eq '12') { + ""; # ignore RS + } + elsif (/^[0-7]/) { + sprintf("%c", oct); + } + elsif ($_ eq 'n') { + "\n"; + } + elsif ($_ eq '|') { + $sdata = !$sdata; + ""; + } + elsif ($_ eq "\\") { + $sdata ? "\\" : $backslash_in_data; + } + else { + $_; + } +} + + +sub start_element { + local($gi) = $_[0]; + # XXX +} + +sub end_element { + local($gi) = $_[0]; + # XXX +} + +sub data { + local($data) = $_[0]; + # XXX +} + +# A processing instruction. + +sub pi { + local($data) = $_[0]; + # XXX +} + +# A reference to an external entity. + +sub entity { + local($name) = $_[0]; + # XXX +} + +sub start_subdoc { + local($name) = $_[0]; + # XXX +} + +sub end_subdoc { + local($name) = $_[0]; + # XXX +} + diff --git a/usr.bin/sgmls/sgmls/Makefile b/usr.bin/sgmls/sgmls/Makefile new file mode 100644 index 0000000..3a0a0cf --- /dev/null +++ b/usr.bin/sgmls/sgmls/Makefile @@ -0,0 +1,18 @@ +# +# Bmakefile for sgmls +# +# $id$ +# + +PROG= sgmls + +SRCS+= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c +SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c +SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c +SRCS+= msgcat.c lineout.c ambig.c exclude.c lextaba.c + +CFLAGS+= -I${.CURDIR}/../libsgmls + +.include "../Makefile.inc" +.include <bsd.prog.mk> + diff --git a/usr.bin/sgmls/sgmls/action.h b/usr.bin/sgmls/sgmls/action.h new file mode 100644 index 0000000..08475bf --- /dev/null +++ b/usr.bin/sgmls/sgmls/action.h @@ -0,0 +1,179 @@ +/* ACTION.H: Symbols for all PCB action codes. */ +/* CONACT.H: Symbols for content parse action names (end with '_'). + There must be no conflict with PARSEACT.H, which + uses 0 through 19, or SGMLACT.H, which uses 20 through 32 + (except that 31 - 32 can be defined here because they are + used only by PARSEPRO and do not conflict with SGML.C). +*/ +#define CIR_ 31 /* Invalid character(s) ignored in MDS; restarting parse. */ +#define DTD_ 32 /* Process DOCTYPE declaration. */ +#define DTE_ 33 /* End of DOCTYPE declaration. */ +#define PEP_ 34 /* TEMP: Previous character ended prolog. */ +#define DAS_ 35 /* Current character begins data. */ +#define FCE_ 36 /* Process free character (SR12-18, 21-30). */ +#define DCE_ 37 /* Data character in element text; change PCB. */ +#define LAS_ 38 /* Start lookahead buffer with current character. */ +#define LAM_ 39 /* Move character to lookahead buffer. */ +#define LAF_ 40 /* Flush the lookahead buffer; REPEATCC. */ +#define NED_ 41 /* Process null end-tag delimiter. */ +#define NET_ 42 /* Process null end-tag. */ +#define NST_ 43 /* Process null start-tag. */ +#define NLF_ 44 /* Flush lookahead buffer except for trailing NET or SR. */ +#define ETC_ 45 /* End-tag in CDATA or RCDATA; treat as data if invalid. */ +#define SRMIN 46 /* Dummy for SHORT REFERENCES: srn = SRn - SRMIN. */ +#define SR1_ 47 /* TAB */ +#define SR2_ 48 /* RE */ +#define SR3_ 49 /* RS */ +#define SR4_ 50 /* Leading blanks */ +#define SR5_ 51 /* Null record */ +#define DAR_ 52 /* Flush data buffer after repeating current character. */ +#define SR7_ 53 /* Trailing blanks */ +#define SR8_ 54 /* Space */ +#define SR9_ 55 /* Two or more blanks */ +#define SR10 56 /* Quotation mark (first data character) */ +#define SR11 57 /* Number sign */ +#define SR12 58 /* FCE CHARACTERS start here */ +/* _ 59 */ +#define BSQ_ 60 /* Blank sequence begun; find its end. */ +/* 61 In use by PARSEACT.H */ +/* 62 In use by PARSEACT.H */ +/* 63 In use by PARSEACT.H */ +/* 64 In use by PARSEACT.H */ +#define SR19 65 /* Hyphen */ +#define SR20 66 /* Two hyphens */ +#define SR25 71 /* Left bracket */ +#define SR26 72 /* Right bracket */ +#define RBR_ 73 /* Two right brackets. */ +#define GTR_ 74 /* EOB with pending data character */ +#define MSP_ 75 /* Marked section start in prolog outside DTD */ +#define APP_ 76 /* APPINFO (other than NONE) */ +#define STE_ 77 /* Start tag ended prolog */ + +/* GRPACT.H: Symbols for group tokenization action names (all alpha). + There must be no conflict with PARSEACT.H, which + uses 0 - 19. +*/ +#define AND 20 /* AND connector found. */ +#define DTAG 21 /* Data tag token group occurred (treat as #CHARS). */ +#define GRPE 22 /* Group ended. */ +#define GRP_ 23 /* Group started. */ +#define NAS_ 24 /* Name started in content model or name group. */ +#define NMT_ 25 /* Name or name token started in name token group. */ +#define OPT 26 /* OPT occurrence indicator for previous token. */ +#define OR 27 /* OR connector found. */ +#define OREP 28 /* OREP occurrence indicator for previous token. */ +#define REP 29 /* REP occurrence indicator for previous token. */ +#define RNS_ 30 /* Reserved name started (#PCDATA). */ +#define SEQ 31 /* SEQ connector found. */ +/* LITACT.H: Symbols for content parse action names (end with '_'). + There must be no conflict with PARSEACT.H, which + uses 0 through 19. +*/ +#define MLA_ 20 /* Move character to look-aside data buffer. */ +#define LPR_ 21 /* Move previous character to data buffer. */ +#define RSM_ 22 /* Process record start and move it to data buffer. */ +#define FUN_ 23 /* Replace function character with a space. */ +#define LP2_ 24 /* Move previous two characters to data buffer. */ +#define MLE_ 25 /* Minimum literal error: invalid character ignored. */ +#define RPR_ 26 /* Remove previous character from data buffer; terminate. */ +#define TER_ 27 /* Terminate the parse. */ +/* MDACT.H: Symbols for markup declaration parse action names (all alpha). + There must be no conflict with PARSEACT.H, which + uses 0 - 19. +*/ +#define CDR 20 /* CD[1] (MINUS) occurred previously. */ +#define EMD 21 /* End of markup declaration. */ +#define GRPS 22 /* Group started. */ +#define LIT 23 /* Literal started: character data. */ +#define LITE 24 /* Literal started: character data; LITA is delimiter. */ +#define MGRP 25 /* Minus exception group (MINUS,GRPO). */ +#define NAS 26 /* Name started. */ +#define NMT 27 /* Name token started. */ +#define NUM 28 /* Number or number token started. */ +#define PEN 29 /* Parameter entity name being defined (PERO found). */ +#define PGRP 30 /* Plus exception group (PLUS,GRPO). */ +#define RNS 31 /* Reserved name started. */ +#define MDS 32 /* Markup declaration subset start. */ +#define PENR 33 /* REPEATCC; PERO found. */ +/* PARSEACT.H: Symbols for common parse action names (end with '_'). + There must be no conflict with other action name + files, which use numbers greater than 19. +*/ +#define CRA_ 1 /* Character reference: alphabetic. */ +#define CRN_ 2 /* Character reference: numeric; non-char refs o.k.. */ +#define NON_ 3 /* Single byte of non-character data found. */ +#define EOF_ 4 /* Error: illegal entity end; resume old input; return. */ +#define ER_ 5 /* Entity reference; start new input source; continue. */ +#define GET_ 6 /* EOB, EOS, or EE: resume old input source; continue. */ +#define INV_ 7 /* Error: invalid char terminated markup; repeat char. */ +#define LEN_ 8 /* Error: length limit exceeded; end markup; repeat char. */ +#define NOP_ 9 /* No action necessary. */ +#define PCI_ 10 /* Previous character was invalid. */ +#define PER_ 11 /* Parameter reference; start new input source; continue. */ +#define RC2_ 12 /* Back up two characters. */ +#define RCC_ 13 /* Repeat current character. */ +#define RCR_ 14 /* Repeat current character and return to caller. */ +#define EE_ 15 /* EOS or EE: resume old input source; return to caller. */ +#define RS_ 16 /* Record start: ccnt=0; ++rcnt. */ +#define ERX_ 17 /* Entity reference; start new input source; return. */ +#define SYS_ 18 /* Error allowed: SYSCHAR in input stream; replace it. */ +#define EOD_ 19 /* End of document. */ +/* Number way out of order to avoid recompilation. */ +#define NSC_ 58 /* Handle DELNONCH/DELXNONCH when NON_ is allowed */ +#define PEX_ 61 /* Parameter entity ref; start new input source; return. */ +#define DEF_ 62 /* Data entity found. */ +#define PIE_ 63 /* PI entity found (needed in markup). */ +#define LNR_ 64 /* LEN_ error with extra REPEATCC. */ +/* SGMLACT.H: Symbols for content parse action names (end with '_') + that are returned to SGML.C for processing. + There must be no conflict with PARSEACT.H, which + uses 0 through 19, or CONACT.H, which uses 34 and above. + (Note: 31 is also used in CONACT.H, but no conflict + is created because they are tested only in PARSEPRO.C, which + completes before SGML.C starts to examine those codes. + Also, when EOD_ is returned from PARSECON, it is changed + to LOP_.) +*/ +#define CON_ 20 /* Normal content action (one of the following). */ +#define DAF_ 21 /* Data found. */ +#define ETG_ 22 /* Process end-tag. */ +#define MD_ 23 /* Process markup declaration (NAMESTRT found). */ +#define MDC_ 24 /* Process markup declaration comment (CD found). */ +#define MSS_ 25 /* Process marked section start. */ +#define MSE_ 26 /* Process marked section end. */ +#define PIS_ 27 /* Processing instruction (string). */ +#define REF_ 28 /* Record end found. */ +#define STG_ 29 /* Process start-tag. */ +#define RSR_ 30 /* Return RS to effect SGML state transition. */ +#define LOP_ 31 /* Loop for new content without returning anything. */ +/* TAGACT.H: Symbols for tag parse action names (all alpha). + There must be no conflict with PARSEACT.H, which + uses 0 - 19. +*/ +#define AVD 20 /* Delimited attribute value started: normal delimiter. */ +#define AVU 21 /* Undelimited value started. */ +#define ETIC 22 /* Tag closed with ETI. */ +#define NVS 23 /* Name of attribute or value started. */ +#define NASV 24 /* Saved NAS was actually an NTV. */ +#define NTV 25 /* Name token value started; get name and full value. */ +#define TAGC 26 /* Tag closed normally. */ +#define TAGO 27 /* Tag closed implicitly by TAGO character. */ +#define AVDA 28 /* Delimited attribute value started: alternative delim. */ +#define DSC 29 /* Closed by DSC character. */ +/* VALACT.H: Symbols for attribute value tokenization action names (all alpha). +*/ +#define NOPA 0 /* No action necessary. */ +#define INVA 1 /* Invalid character; terminate parse. */ +#define LENA 2 /* Length limit of token exceeded; terminate parse. */ +#define NASA 3 /* Name started. */ +#define NMTA 4 /* Name token started. */ +#define NUMA 5 /* Number or number token started. */ + +/* SGML declaration parsing actions. */ + +#define ESGD 20 /* End of SGML declaration. */ +#define LIT1 21 /* Literal started. */ +#define LIT2 22 /* Literal started with LITA delimiter. */ +#define NUM1 23 /* Number started. */ +#define NAS1 24 /* Name started. */ +#define ISIG 25 /* Insignificant character occurred. */ diff --git a/usr.bin/sgmls/sgmls/adl.h b/usr.bin/sgmls/sgmls/adl.h new file mode 100644 index 0000000..930e1e8 --- /dev/null +++ b/usr.bin/sgmls/sgmls/adl.h @@ -0,0 +1,118 @@ +/* ADL.H: Definitions for attribute descriptor list processing. +*/ +/* N/C/SDATA external entity types for nxetype member of ne structure. */ +#define ESNCDATA 1 /* External character data entity. */ +#define ESNNDATA 2 /* Non-SGML data entity. */ +#define ESNSDATA 3 /* External specific character data entity. */ +#define ESNSUB 4 /* SGML subdocument entity. */ + +/* N/C/SDATA control block for AENTITY attributes and NDATA returns.*/ +struct ne { /* N/C/SDATA entity control block. */ + UNIV neid; /* Files for NDATA entity. */ + UNCH *nepubid; /* Public identifier if specified. */ + UNCH *nesysid; /* System identifier if specified. */ + PDCB nedcn; /* Data content notation control block. */ + struct ad *neal; /* Data attribute list (NULL if none). */ + UNCH *neename; /* Ptr to entity name (length and EOS). */ + UNCH nextype; /* Entity type: NDATA SDATA CDATA SUBDOC. */ +}; +#define NESZ (sizeof(struct ne)) +typedef struct ne *PNE; +/* NDATA entity control block fields. */ +#define NEID(p) (((PNE)p)->neid) /* File ID of NDATA entity. */ +#define NESYSID(p) (((PNE)p)->nesysid) /* System ID of NDATA entity. */ +#define NEPUBID(p) (((PNE)p)->nepubid) /* Public ID of NDATA entity. */ +#define NEDCN(p) (((PNE)p)->nedcn->ename) /* Data content notation name. */ +#define NEDCNSYSID(p) (((PNE)p)->nedcn->sysid) /* Notation system ID.*/ +#define NEDCNPUBID(p) (((PNE)p)->nedcn->pubid) /* Notation public ID.*/ +#define NEDCNDEFINED(p) (((PNE)p)->nedcn->defined) /* Notation defined? */ +#define NEDCNADL(p) (((PNE)p)->nedcn->adl) /* Data content notation attlist.*/ +#define NEENAME(p) (((PNE)p)->neename) /* Entity name pointer. */ +#define NEXTYPE(p) (((PNE)p)->nextype) /* External entity type. */ +#define NEAL(p) (((PNE)p)->neal) /* Data attributes (if any). */ +#define NEDCNMARK(p) DCNMARK(((PNE)p)->nedcn) + +/* Attribute descriptor list entry. */ +struct ad { + UNCH *adname; /* Attribute name with length and EOS. */ + UNCH adflags; /* Attribute flags. */ + UNCH adtype; /* Value type. */ + UNS adnum; /* Group size or member pos in grp. */ + UNS adlen; /* Length of default or value (for capacity). */ + UNCH *addef; /* Default value (NULL if REQUIRED or IMPLIED). */ + union { + PNE n; /* AENTITY: NDATA control block. */ + PDCB x; /* ANOTEGRP: DCN control block. */ + } addata; /* Special data associated with some attributes.*/ +}; +#define ADSZ (sizeof(struct ad)) /* Size of an ad structure. */ + +/* Attribute flags for entire list adflags: ADLF. */ +#define ADLREQ 0x80 /* Attribute list: 1=REQUIRED att defined. */ +#define ADLNOTE 0x40 /* Attribute list: 1=NOTATION att defined. */ +#define ADLCONR 0x20 /* Attribute list: 1=CONREF att defined. */ + +/* Attribute flags for list member adflags: ADFLAGS(n). */ +#define AREQ 0x80 /* Attribute: 0=null; 1=required. */ +#define ACURRENT 0x40 /* Attribute: 0=normal; 1=current. */ +#define AFIXED 0x20 /* Attribute: 0=normal; 1=must equal default. */ +#define AGROUP 0x10 /* Attribute: 0=single; 1=group of ad's. */ +#define ACONREF 0x08 /* Attribute: 0=normal; 1=att is CONREF. */ +#define AINVALID 0x04 /* Attribute: 1=value is invalid; 0=o.k. */ +#define AERROR 0x02 /* Attribute: 1=error was specified; 0=o.k. */ +#define ASPEC 0x01 /* Attribute: 1=value was specified; 0=default. */ + +/* Attribute types for adtype. */ +#define ANMTGRP 0x00 /* Attribute: Name token group or member. */ +#define ANOTEGRP 0x01 /* Attribute: Notation (name group). */ +#define ACHARS 0x02 /* Attribute: Character string. */ +#define AENTITY 0x03 /* Attribute: Data entity (name). */ +#define AID 0x04 /* Attribute: ID value (name). */ +#define AIDREF 0x05 /* Attribute: ID reference value (name). */ +#define ANAME 0x06 /* Attribute: Name. */ +#define ANMTOKE 0x07 /* Attribute: Name token. */ +#define ANUMBER 0x08 /* Attribute: Number. */ +#define ANUTOKE 0x09 /* Attribute: Number token. */ +#define ATKNLIST 0x0A /* Attribute: >= means value is a token list. */ +#define AENTITYS 0x0A /* Attribute: Data entities (name list). */ +#define AIDREFS 0x0B /* Attribute: ID reference value (name list). */ +#define ANAMES 0x0C /* Attribute: Name list. */ +#define ANMTOKES 0x0D /* Attribute: Name token list. */ +#define ANUMBERS 0x0E /* Attribute: Number list. */ +#define ANUTOKES 0x0F /* Attribute: Number token list. */ + +/* Field definitions for entries in an attribute list. + The first argument to all of these is the list address. +*/ +/* Attribute list: flags. */ +#define ADLF(a) ((a)[0].adflags) +/* Attribute list: number of list members. */ +#define ADN(a) ((a)[0].adtype) +/* Attribute list: number of attributes. */ +#define AN(a) ((a)[0].adnum) +/* Nth attribute in list: name. */ +#define ADNAME(a, n) (((a)[n].adname+1)) +/* Nth att in list: number of val)ues. */ +#define ADNUM(a, n) ((a)[n].adnum) +/* Nth attribute in list: flags. */ +#define ADFLAGS(a, n) ((a)[n].adflags) +/* Nth attribute in list: type. */ +#define ADTYPE(a, n) ((a)[n].adtype) +/* Nth attribute in list: len of def or val.*/ +#define ADLEN(a, n) ((a)[n].adlen) +/* Nth attribute in list: def or value. */ +#define ADVAL(a, n) ((a)[n].addef) +/* Nth attribute in list: special data. */ +#define ADDATA(a, n) ((a)[n].addata) +/* Nth att: token at Pth pos in value. */ +#define ADTOKEN(a, n, p)(((a)[n].addef+(p))) + +#define IDHASH 101 /* Size of ID hash table. Must be prime. */ +struct id { /* ID attribute control block. */ + struct id *idnext; /* Next ID in chain. */ + UNCH *idname; /* ID name with length prefix and EOS. */ + UNCH iddefed; /* Non-zero if it has been defined. */ + struct fwdref *idrl; /* Chain of forward references to this ID. */ +}; +#define IDSZ sizeof(struct id) +typedef struct id *PID; /* Ptr to ID attribute control block. */ diff --git a/usr.bin/sgmls/sgmls/ambig.c b/usr.bin/sgmls/sgmls/ambig.c new file mode 100644 index 0000000..9da02eb --- /dev/null +++ b/usr.bin/sgmls/sgmls/ambig.c @@ -0,0 +1,438 @@ +/* ambig.c - + Content model ambiguity checking. + + Written by James Clark (jjc@jclark.com). +*/ +/* +This uses the construction in pp8-9 of [1], extended to deal with AND +groups. + +Note that it is not correct for the purposes of ambiguity analysis to +handle AND groups by turning them into an OR group of SEQ groups +(consider (a&b?)). + +We build an automaton for the entire content model by adding the +following case for AND: + +nullable(v) := nullable(left child) and nullable(right child) +if nullable(right child) then + for each x in last(left child) do + follow(v,x) = follow(left child,x) U first(right child); +if nullable(left child) then + for each x in last(right child) do + follow(v,x) = follow(right child,x) U first(left child); +first(v) := first(left child) U first(right child); +last(v) := first(left child) U first(right child); + +We also build an automaton for each AND group by building automata for +each of the members of the AND group using the above procedure and +then combine the members using: + +for each x in last(left child) do + follow(v,x) = follow(left child,x) U first(right child); +for each x in last(right child) do + follow(v,x) = follow(right child,x) U first(left child); +first(v) := first(left child) U first(right child); + +The content model is ambiguous just in case one of these automata is +non-deterministic. (Note that when checking determinism we need to +check the `first' set as well as all the `follow' sets.) + +Why is this correct? Consider a primitive token in a member of an AND +group. There are two worst cases for ambiguity: firstly, when none of +the other members of AND group have been matched; secondly, when just +the nullable members remain to be matched. The first case is not +affected by context of the AND group (unless the first case is +identical to the second case.) + +Note that inclusions are not relevant for the purposes of determining +the ambiguity of content models. Otherwise the case in clause +11.2.5.1: + + An element that can satisfy an element in the content model is + considered to do so, even if the element is also an inclusion. + +could never arise. + +[1] Anne Brueggemann-Klein, Regular Expressions into Finite Automata, +Universitaet Freiburg, Institut fur Informatik, 33 July 1991. +*/ + +#include "sgmlincl.h" + +/* Sets of states are represented by 0-terminated, ordered lists of +indexes in gbuf. */ + +#define MAXSTATES (GRPGTCNT+2) +#define listcat(x, y) strcat((char *)(x), (char *)(y)) +#define listcpy(x, y) strcpy((char *)(x), (char *)(y)) + +/* Information about a content token. */ + +struct contoken { + UNCH size; + UNCH nullable; + UNCH *first; + UNCH *last; +}; + +static VOID contoken P((int, int, struct contoken *)); +static VOID andgroup P((int, int, struct contoken *)); +static VOID orgroup P((int, int, struct contoken *)); +static VOID seqgroup P((int, int, struct contoken *)); +static VOID andambig P((int)); +static int listambig P((UNCH *)); +static VOID listmerge P((UNCH *, UNCH *)); +static struct contoken *newcontoken P((void)); +static VOID freecontoken P((struct contoken *)); + + +/* Dynamically allocated vector of follow sets. */ + +static UNCH **follow; +static UNCH *mergebuf; /* for use by listmerge */ + +/* Set to non-zero if the content model is ambiguous. */ + +static int ambigsw; + +/* Check the current content model (in gbuf) for ambiguity. */ + +VOID ambig() +{ + struct contoken *s; + int i; + + if (!follow) { + /* We can't allocate everything in one chunk, because that would + overflow a 16-bit unsigned if GRPGTCNT was 253. */ + UNCH *ptr; + follow = (UNCH **)rmalloc(MAXSTATES*sizeof(UNCH *)); + follow[0] = 0; + ptr = (UNCH *)rmalloc((MAXSTATES - 1)*MAXSTATES); + for (i = 1; i < MAXSTATES; i++) { + follow[i] = ptr; + ptr += MAXSTATES; + } + mergebuf = (UNCH *)rmalloc(MAXSTATES); + } + + for (i = 1; i < MAXSTATES; i++) + follow[i][0] = 0; + + ambigsw = 0; + + s = newcontoken(); + contoken(1, 1, s); + + ambigsw = ambigsw || listambig(s->first); + + freecontoken(s); + + for (i = 1; !ambigsw && i < MAXSTATES; i++) + if (listambig(follow[i])) + ambigsw = 1; + + if (ambigsw) + mderr(137, (UNCH *)0, (UNCH *)0); +} + +/* Free memory used for ambiguity checking. */ + +VOID ambigfree() +{ + if (follow) { + frem((UNIV)follow[1]); + frem((UNIV)follow); + frem((UNIV)mergebuf); + follow = 0; + } +} + +/* Determine whether a list of primitive content tokens (each +represented by its index in gbuf) is ambiguous. */ + +static +int listambig(list) +UNCH *list; +{ + UNCH *p; + int chars = 0; + int rc = 0; + + for (p = list; *p; p++) { + if ((gbuf[*p].ttype & TTMASK) == TTETD) { + struct etd *e = gbuf[*p].tu.thetd; + if (e->mark) { + rc = 1; + break; + } + e->mark = 1; + } + else { + assert((gbuf[*p].ttype & TTMASK) == TTCHARS); + if (chars) { + rc = 1; + break; + } + chars = 1; + } + } + + for (p = list; *p; p++) + if ((gbuf[*p].ttype & TTMASK) == TTETD) + gbuf[*p].tu.thetd->mark = 0; + + return rc; +} + + +/* Analyze a content token. The `checkand' argument is needed to ensure +that the algorithm is not exponential in the AND-group nesting depth. +*/ + +static +VOID contoken(m, checkand, res) +int m; /* Index of content token in gbuf */ +int checkand; /* Non-zero if AND groups should be checked */ +struct contoken *res; /* Result */ +{ + UNCH flags = gbuf[m].ttype; + switch (flags & TTMASK) { + case TTCHARS: + case TTETD: + res->first[0] = m; + res->first[1] = 0; + res->last[0] = m; + res->last[1] = 0; + res->size = 1; + res->nullable = 0; + break; + case TTAND: + if (checkand) + andambig(m); + andgroup(m, checkand, res); + break; + case TTOR: + orgroup(m, checkand, res); + break; + case TTSEQ: + seqgroup(m, checkand, res); + break; + default: + abort(); + } + if (flags & TREP) { + UNCH *p; + for (p = res->last; *p; p++) + listmerge(follow[*p], res->first); + } + if (flags & TOPT) + res->nullable = 1; +} + +/* Check an AND group for ambiguity. */ + +static +VOID andambig(m) +int m; +{ + int i, tnum; + int lim; + struct contoken *curr; + struct contoken *next; + + tnum = gbuf[m].tu.tnum; + assert(tnum > 0); + curr = newcontoken(); + next = newcontoken(); + contoken(m + 1, 0, curr); + i = m + 1 + curr->size; + curr->size += 1; + for (--tnum; tnum > 0; --tnum) { + UNCH *p; + contoken(i, 0, next); + curr->size += next->size; + i += next->size; + for (p = curr->last; *p; p++) + listcat(follow[*p], next->first); + for (p = next->last; *p; p++) + listmerge(follow[*p], curr->first); + listcat(curr->first, next->first); + listcat(curr->last, next->last); + } + lim = m + curr->size; + for (i = m + 1; i < lim; i++) { + if (listambig(follow[i])) + ambigsw = 1; + follow[i][0] = 0; + } + freecontoken(curr); + freecontoken(next); +} + +/* Handle an AND group. */ + +static +VOID andgroup(m, checkand, res) +int m; +int checkand; +struct contoken *res; +{ + int i, tnum; + /* union of the first sets of nullable members of the group */ + UNCH *nullablefirst; + struct contoken *next; + + tnum = gbuf[m].tu.tnum; + assert(tnum > 0); + contoken(m + 1, checkand, res); + nullablefirst = (UNCH *)rmalloc(MAXSTATES); + if (res->nullable) + listcpy(nullablefirst, res->first); + else + nullablefirst[0] = 0; + i = m + 1 + res->size; + res->size += 1; + next = newcontoken(); + for (--tnum; tnum > 0; --tnum) { + UNCH *p; + contoken(i, checkand, next); + res->size += next->size; + i += next->size; + if (next->nullable) + for (p = res->last; *p; p++) + listcat(follow[*p], next->first); + for (p = next->last; *p; p++) + listmerge(follow[*p], nullablefirst); + listcat(res->first, next->first); + if (next->nullable) + listcat(nullablefirst, next->first); + listcat(res->last, next->last); + res->nullable &= next->nullable; + } + frem((UNIV)nullablefirst); + freecontoken(next); +} + +/* Handle a SEQ group. */ + +static +VOID seqgroup(m, checkand, res) +int m; +int checkand; +struct contoken *res; +{ + int i, tnum; + struct contoken *next; + + tnum = gbuf[m].tu.tnum; + assert(tnum > 0); + contoken(m + 1, checkand, res); + i = m + 1 + res->size; + res->size += 1; + next = newcontoken(); + for (--tnum; tnum > 0; --tnum) { + UNCH *p; + contoken(i, checkand, next); + res->size += next->size; + i += next->size; + for (p = res->last; *p; p++) + listcat(follow[*p], next->first); + if (res->nullable) + listcat(res->first, next->first); + if (next->nullable) + listcat(res->last, next->last); + else + listcpy(res->last, next->last); + res->nullable &= next->nullable; + } + freecontoken(next); +} + +/* Handle an OR group. */ + +static +VOID orgroup(m, checkand, res) +int m; +int checkand; +struct contoken *res; +{ + int i, tnum; + struct contoken *next; + + tnum = gbuf[m].tu.tnum; + assert(tnum > 0); + contoken(m + 1, checkand, res); + i = m + 1 + res->size; + res->size += 1; + next = newcontoken(); + for (--tnum; tnum > 0; --tnum) { + contoken(i, checkand, next); + res->size += next->size; + i += next->size; + listcat(res->first, next->first); + listcat(res->last, next->last); + res->nullable |= next->nullable; + } + freecontoken(next); +} + + +/* Merge the second ordered list into the first. */ + +static +VOID listmerge(p, b) +UNCH *p, *b; +{ + UNCH *a = mergebuf; + + strcpy((char *)a, (char *)p); + + for (;;) { + if (*a) { + if (*b) { + if (*a < *b) + *p++ = *a++; + else if (*a > *b) + *p++ = *b++; + else + a++; + } + else + *p++ = *a++; + } + else if (*b) + *p++ = *b++; + else + break; + } + *p = '\0'; +} + +static +struct contoken *newcontoken() +{ + struct contoken *p = (struct contoken *)rmalloc(sizeof(struct contoken) + + MAXSTATES*2); + p->first = (UNCH *)(p + 1); + p->last = p->first + MAXSTATES; + return p; +} + +static +VOID freecontoken(p) +struct contoken *p; +{ + frem((UNIV)p); +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/appl.h b/usr.bin/sgmls/sgmls/appl.h new file mode 100644 index 0000000..404d749 --- /dev/null +++ b/usr.bin/sgmls/sgmls/appl.h @@ -0,0 +1,33 @@ +/* appl.h */ + +enum { + E_NOMEM = 1, + E_DOC, + E_EXEC, + E_FORK, + E_WAIT, + E_SIGNAL, + E_OPEN, + E_CAPBOTCH, + E_SUBDOC +}; + +VOID process_document P((int)); +VOID output_conforming P((void)); + +UNIV xmalloc P((UNS)); +UNIV xrealloc P((UNIV, UNS)); +VOID appl_error VP((int, ...)); + +#ifdef SUPPORT_SUBDOC +int run_process P((char **)); +char **make_argv P((UNIV)); +VOID get_subcaps P((void)); +#endif + +#ifdef SUPPORT_SUBDOC +extern int suberr; +#endif + +extern int suppsw; +extern int locsw; diff --git a/usr.bin/sgmls/sgmls/config.h b/usr.bin/sgmls/sgmls/config.h new file mode 100644 index 0000000..562cdcf --- /dev/null +++ b/usr.bin/sgmls/sgmls/config.h @@ -0,0 +1,147 @@ +/* unix.cfg: Configuration file for sgmls on Unix. */ + +/* A list of filename templates to use for searching for external entities. +The filenames are separated by the character specified in PATH_FILE_SEP. +See sgmls.man for details. */ +#define DEFAULT_PATH "/usr/share/sgml/%O/%C/%T:%N.%X:%N.%D" +/* The character that separates the filenames templates. */ +#define PATH_FILE_SEP ':' +/* The character that separates filenames in a system identifier. +Usually the same as PATH_FILE_SEP. */ +#define SYSID_FILE_SEP ':' +/* The environment variable that contains the list of filename templates. */ +#define PATH_ENV_VAR "SGML_PATH" + +/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name +or system identifier into a legal filename. A character in +MIN_DAT_SUBS_FROM will be transformed into the character in the +corresponding position in MIN_DAT_SUBS_TO. If there is no such +position, then the character is removed. */ +/* This says that spaces should be transformed to underscores, and +slashes to percents. */ +#define MIN_DAT_SUBS_FROM " /" +#define MIN_DAT_SUBS_TO "_%" + +/* Define this to allow tracing. */ +/* #define TRACE 1 */ + +/* Define this you want support for subdocuments. This is implemented +using features that are not part of Standard C, so you might not want +to define it if you are porting to a new system. Otherwise I suggest +you leave it defined. */ +#define SUPPORT_SUBDOC 1 + +/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports +X/Open extensions; if they do, then, for example, + + printf("%2$s%1$s", "bar", "foo") + +should print `foobar'. */ + +/* #define HAVE_EXTENDED_PRINTF 1 */ + +/* Define HAVE_CAT if your system provides the X/Open message +catalogue functions catopen() and catgets(), and you want to use them. +An implementations of these functions is included and will be used if +you don't define this. On SunOS 4.1.1, if you do define this you +should set CC=/usr/xpg2bin/cc in the makefile. */ + +/* #define HAVE_CAT 1 */ + +#ifdef __STDC__ +/* Define this if your compiler supports prototypes. */ +#define USE_PROTOTYPES 1 +#endif + +/* Can't use <stdarg.h> without prototypes. */ +#ifndef USE_PROTOTYPES +#define VARARGS 1 +#endif + +/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>, +you must define VARARGS yourself here. */ +/* #define VARARGS 1 */ + +/* Define this if you do not have strerror(). */ +/* #define STRERROR_MISSING 1 */ + +/* Define this unless the character testing functions in ctype.h +are defined for all values representable as an unsigned char. You do +not need to define this if your system is ANSI C conformant. You +should define for old Unix systems. */ +/* #define USE_ISASCII 1 */ + +/* Define this if your system provides the BSD style string operations +rather than ANSI C ones (eg bcopy() rather than memcpy(), and index() +rather than strchr()). */ +/* #define BSD_STRINGS 1 */ + +/* Define this if you have getopt(). */ +#define HAVE_GETOPT 1 + +/* Define this if you have access(). */ +#define HAVE_ACCESS 1 + +/* Define this if you have <unistd.h>. */ +#define HAVE_UNISTD_H 1 + +/* Define this if you have <sys/stat.h>. */ +#define HAVE_SYS_STAT_H 1 + +/* Define this if you have waitpid(). */ +#define HAVE_WAITPID 1 + +/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */ +#define POSIX 1 + +/* Define this if you have the vfork() system call. */ +#define HAVE_VFORK 1 + +/* Define this if you have <vfork.h>. */ +/* #define HAVE_VFORK_H 1 */ + +/* Define this if you don't have <stdlib.h> */ +/* #define STDLIB_H_MISSING 1 */ + +/* Define this if you don't have <stddef.h> */ +/* #define STDDEF_H_MISSING 1 */ + +/* Define this if you don't have <limits.h> */ +/* #define LIMITS_H_MISSING 1 */ + +/* Define this if you don't have remove(); unlink() will be used instead. */ +/* #define REMOVE_MISSING 1 */ + +/* Define this if you don't have raise(); kill() will be used instead. */ +/* #define RAISE_MISSING 1 */ + +/* Define this if you don't have fsetpos() and fgetpos(). */ +/* #define FPOS_MISSING 1 */ + +/* Universal pointer type. */ +/* If your compiler doesn't fully support void *, change `void' to `char'. */ +typedef void *UNIV; + +/* If your compiler doesn't support void as a function return type, +change `void' to `int'. */ +typedef void VOID; + +/* If you don't have an ANSI C conformant <limits.h>, define +CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed. +The <limits.h> on some versions of System Release V 3.2 is not ANSI C +conformant: the value of CHAR_MIN is 0 even though the `char' type is +signed. */ + +/* #define CHAR_SIGNED 1 */ +/* #define CHAR_SIGNED 0 */ +#ifndef CHAR_SIGNED +#include <limits.h> +#if CHAR_MIN < 0 +#define CHAR_SIGNED 1 +#else +#define CHAR_SIGNED 0 +#endif +#endif /* not CHAR_SIGNED */ + +/* Assume the system character set is ISO Latin-1. */ +#include "latin1.h" diff --git a/usr.bin/sgmls/sgmls/context.c b/usr.bin/sgmls/sgmls/context.c new file mode 100644 index 0000000..1eb5a5c --- /dev/null +++ b/usr.bin/sgmls/sgmls/context.c @@ -0,0 +1,444 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +#include "context.h" + +#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */ +#define NEWGI (newetd->etdgi+1) /* GI of new tag. */ +#define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ +#define PEX (-1) /* GI is a plus exception and not a minus. */ + +#define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h)) +#define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \ + |= (1L<<((n-1)&(LONGBITS-1)))) +#define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1)))) + +#define HITOFF(h, n) (!(HITON(h, n))) + +#define TOKENHIT HITON(H,T) + +static +VOID copypos(to, from) +struct mpos *to, *from; +{ + int i; + for (i = 0; i <= (int)from[0].t; i++) { + to[i].g = from[i].g; + to[i].t = from[i].t; + memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long)); + } +} + +/* CONTEXT: Determine whether a GI is valid in the present structural context. + Returns RCHIT if valid, RCEND if element has ended, RCREQ if a + different element is required, and RCMISS if it is totally invalid. + On entry, pos points to the model token to be tested against the GI. + TO DO: Save allowed GIs for an error message on an RCMISS. + Support a "query" mode (what is allowed now?) by working + with a copy of pos. +*/ +int context(gi, mod, pos, statuspt, mexts) +struct etd *gi; /* ETD of new GI. */ +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ +int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/ +{ + UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/ + + if (mexts == -1) { + if (STATUS == RCEND) + return RCPEX; + copypos(savedpos, pos); + } + Tstart = T; /* Save starting token for AND group testing. */ + while (STATUS!=RCMISS && STATUS!=RCEND) { + TRACEGI("CONTEXT", gi, mod, pos, Tstart); + while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) { + pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H); + Tstart = T; /* Save starting token for AND group testing. */ + TRACEGI("OPENGRP", gi, mod, pos, Tstart); + } + STATUS = (UNCH)tokenreq(gi, mod, pos); + TRACEGI("STATUS", gi, mod, pos, Tstart); + if (gi==TOKEN.tu.thetd) { /* Hit in model. */ + STATUS = (UNCH)RCHIT; + gtypesv = GTYPE; toccsv = TOCC; + newtoken(mod, pos, statuspt); + return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT)) + ? RCMEX : RCHITMEX); + } + if (STATUS==RCREQ) { + if (mexts == -1) + break; + STATUS = RCHIT; + nextetd = TOKEN.tu.thetd; + newtoken(mod, pos, statuspt); + return(RCREQ); + } + /* else if (STATUS==RCNREQ) */ + if (mexts>0) return(RCMEX); + newtoken(mod, pos, statuspt); + } + if (mexts == -1) { + copypos(pos, savedpos); + return STATUS = RCPEX; + } + return((int)STATUS); +} +/* ECONTEXT: Determine whether the current element can be ended, or whether + non-optional tokens remain at the current level or higher. + Returns 1 if element can be ended, or 0 if tokens remain. + On entry, STATUS==RCEND if there are no tokens left; if not, + pos points to the next model token to be tested. + TO DO: Support a "query" mode (what is required now?) by working + with a copy of pos. +*/ +int econtext(mod, pos, statuspt) +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ +{ + unsigned next; /* Position in AND group of next testable token.*/ + + Tstart = T; + TRACEEND("ECONT", mod, pos, 0, 0, Tstart); + if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));} + nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0; + while (STATUS!=RCMISS && STATUS!=RCEND) { + STATUS = (UNCH)testend(mod, pos, 0, 0); + TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart); + nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd; + if (STATUS==RCEND) return(1); + if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT)); + if (STATUS==RCMISS) { + if (BITON(TOCC, TOPT)) nextetd = 0; + return(0); + } + if (!tokenopt(mod, pos)) return(0); + + STATUS = RCNREQ; + if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */ + else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ? + next : offbit(H, 0, GNUM)); + + M = G + grpsz(&GHDR, (int)T-1) + 1; + TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart); + } + if (STATUS==RCMISS) { + if (BITON(TOCC, TOPT)) nextetd = 0; + return(0); + } + return(1); /* STATUS==RCEND */ +} +/* NEWTOKEN: Find the next token to test. Set STATUS to indicate results: + RCEND if element has ended (no more tokens to test); + RCREQ if required new token was found; + RCNREQ if non-required new token was found; + RCHIT if a hit token was repeated (now non-required); + and RCMISS if a new token can't be found because current token + (which was not hit) was neither unconditionally required nor + optional. +*/ +VOID newtoken(mod, pos, statuspt) +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ +{ + unsigned nextand = 0; /* Position in AND group of next testable token.*/ + int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */ + + /* If the GI was a hit, turn on the hit bit and set the status to + assume that the token to be tested against the next GI will + be non-required. If the current token is repeatable, exit so + it will stand as the next token to test. + */ + if (STATUS==RCHIT) { + HITSET(H, T); + STATUS = RCNREQ; + if (BITON(TOCC, TREP)) return; + } + /* At this point, we must determine the next token to test: + either against the next GI, if this one was a hit, or + against the same GI if conditions permit a retry. + To find the next token, we must first end the current group, + if possible, and any we can that contain it. + If the outermost group was a hit and is repeatable, or + if the element has ended, we exit now. + If it hasn't ended, or was optional and ended with a miss, + we can retry the GI against the next token. + */ + if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return; + + /* At this point, the "current token" is either the original one, + or the token for the highest level unhit group that it ended. + We will retry a missed GI, by testing it against the next + token, if the current token: + 1. Is optional; + 2. Was hit (i.e., because it is repeatable and was hit by a + previous GI or because it is a hit group that just ended); + 3. Is in an AND or OR group and is not the last testable token. + + It will be the next sequential one (unhit one, in an AND group); + if there are none left, use the first unhit token in the group. + In either case, set M to correspond to the new T. + */ + retest: + TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart); + if (GTYPE==TTAND) { + nextand = offbit(H, (int)T, GNUM); + if (!nextand) + nextand = offbit(H, 0, GNUM); + } + if ( BITON(TOCC, TOPT) + || TOKENHIT + || GTYPE==TTOR /* T!=GNUM or group would have ended. */ + || nextand ) { + if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */ + else T = nextand; + M = G + grpsz(&GHDR, (int)T-1) + 1; + if (GTYPE==TTAND) { + /* If AND group wrapped, it can end if all non-optionals were + hit. */ + if (T==Tstart && !currhit) { + UNCH Psave = P; + int rc = testend(mod, pos, 0, 1); + if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;} + else STATUS = RCMISS; + } + + /* We only test unhit tokens, so we must use an unhit token + as Tstart (which is used to detect when the AND group has + wrapped). */ + else if (HITON(H,Tstart)) Tstart = T; + } + } + else STATUS = RCMISS; + TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart); +} +/* TESTEND: End the current group, if possible, and any that it is nested in. + The current token will either be a group header, or some token + that could not end its group. Return 1 if the (possibly new) + current token is repeatable; 0 if it is not. +*/ +int testend(mod, pos, andoptsw, newtknsw) +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +int andoptsw; /* 1=test optional AND members; 0=ignore. */ +int newtknsw; /* 1=new token test; 0=end element test. */ +{ + int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */ + + while (!rc) { + TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart); + /* TESTMISS: + If we've hit no tokens yet in the current group, and + the current token is the last unhit one in the group we can test, + we will end the group (it may never really have started!) + because we might be able to try the token that follows it. + In any group, a token is the last testable unhit token if it + is the last sequential one, as the GI was already tested against + the preceding unhit tokens. In addition, + in a SEQ group, it is the last testable unhit token if it isn't + optional, because we can't skip past it to the following ones. + If we end the group, before popping the level, set M to G, as this + level`s group header will be the next level's current token. + */ + if (!ANYHIT(H) && (T==GNUM + || (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) { + M = G; --P; Tstart = T; + if (P<=1) { + if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND; + else rc = RCMISS; + } + continue; + } + /* TESTHIT: + See if we've hit all the non-optional tokens in the group. + If so, pop to the previous level and set the group's hit bit. + If we were called from NEWTOKEN we are trying to find the token + to test against the next start-tag, so if the group is repeatable, + process it again. (If not, we were called from ECONTEXT and + are testing whether the element can be ended.) + Otherwise, if we are at the first level, the element is over. + */ + if ((GTYPE==TTOR && TOKENHIT) + || (GTYPE==TTSEQ && T==(UNCH)GNUM + && (TOKENHIT || BITON(TOCC, TOPT))) + || (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) { + M = G; + --P; + HITSET(H, T); + Tstart = T; + if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT; + else if (P<=1) rc = RCEND; + /* If we are looking for a new token to test against the next + start-tag, then we need to consider optional and members + in this group, even if we didn't need to consider them + in the group that we just ended because that group had + wrapped. */ + else if (newtknsw) andoptsw = 1; + /* Else loop to test new outer group. */ + } + else rc = RCNREQ; /* No group ended this time, so return. */ + } + TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart); + return(rc); +} +/* TOKENOPT: Return 1 if current token is contextually optional; + otherwise, return 0. +*/ +int tokenopt(mod, pos) +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +{ + TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart); + return (BITON(TOCC, TOPT) /* Inherently optional. */ + || TOKENHIT /* Was hit (handles "plus" suffix case). */ + || (!ANYHIT(H) && groupopt(mod, pos))); + /* In optional group with no hits. */ +} +/* GROUPOPT: Temporarily makes the current group be the current token so that + TOKENOPT() can be applied to it. Returns the value returned + by TOKENOPT. +*/ +int groupopt(mod, pos) +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +{ + UNCH saveM; /* Save M when testing if group is not required.*/ + int rc; /* 1=contextually optional; 0=not. */ + + if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT); + saveM = M; M = G; --P; + rc = tokenopt(mod, pos); + ++P; G = M; M = saveM; + return(rc); +} +/* TOKENREQ: Returns RCREQ if the current token is "contextually required". + That is, it is not contextually optional and + 1) it is a member of a "seq" group that is either required + or has at least 1 hit token. + 2) it is a member of an "and" group in which all other + tokens were hit. + Optional tokens are not counted + if GI is ETDCDATA, as we are looking for an + omitted start-tag. Otherwise, they are counted, + as the GI might match one of them. + Returns RCNREQ if the current token is "not required". +*/ +int tokenreq(gi, mod, pos) +struct etd *gi; /* ETD of new GI. */ +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +{ + TRACEGI("TOKENREQ", gi, mod, pos, Tstart); + return( tokenopt(mod, pos) ? RCNREQ + : ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ) +#if 0 + || (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1)) +#endif + ) + ? RCREQ : RCNREQ ); +} +/* GROUPREQ: Temporarily makes the current group be the current token so that + TOKENREQ() can be applied to it. Returns the value returned + by TOKENREQ. +*/ +int groupreq(gi, mod, pos) +struct etd *gi; /* ETD of new GI. */ +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +{ + UNCH saveM; /* Save M when testing if group is not required.*/ + int rc; /* Return code: RCREQ RCNREQ */ + + if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ); + saveM = M; M = G; --P; + rc = tokenreq(gi, mod, pos); + ++P; G = M; M = saveM; + return(rc); +} +/* GRPSZ: Returns the number of tokens spanned by a group in the model (M), + from the group's start (G) to a specified index within the group (T). + M = 0, plus 1 for each token in the group, plus the size of + any subgroups (gotten by calling GRPSZ recursively). On entry, + M must be equal to G at the current level. +*/ +int grpsz(g, t) +struct thdr *g; /* mod[G]: Ptr to group in the model. */ +int t; /* T: Index of last token in the group. */ +{ + struct thdr *p = g; /* Ptr to current token in the model. */ + int m = 0; /* Size of group (including nested groups). */ + int i = 0; /* Number of group members (loop counter). */ + UNS type; /* Token type (without TOREP bits). */ + + while (++i<=t) { + ++p; ++m; + type = GET(p->ttype, TTMASK); + if (type==TTOR || type==TTSEQ || type==TTAND) { + m += grpsz(p, p->tu.tnum); + p = g+m; + } + } + return(m); +} +/* ALLHIT: Returns 1 if all hit bits for the specified group are turned on, + (other than those that correspond to optional tokens if "opt" is + 0) and the "but" bit (all bits if "but" bit is zero). Otherwise, + returns 0. GRPSZ is used to skip past subgroup tokens. +*/ +int allhit(p, hits, but, opt) +struct thdr *p; /* mod[G]: Ptr to group in the model. */ +unsigned long *hits; /* H: Hit bits to be tested. */ +int but; /* Index of bit to ignore; 0=test all. */ +int opt; /* 1=optional tokens must be hit; 0=ignore. */ +{ + int b = 0; /* Index of bit being tested in hits. */ + int e = p->tu.tnum; /* Ending index (number of bits to test). */ + unsigned type; /* Token type (without TOREP bits). */ + + while (++p, ++b<=e) { + if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but) + return 0; + if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND) + p += grpsz(p, p->tu.tnum); + } + return 1; +} +/* OFFBIT: Returns the index of the first unset bit after (i.e., not including) + the caller's "first" bit. If all bits through the + specified last bit are on, it returns 0. +*/ +int offbit(bits, first, last) +unsigned long *bits; /* Bits to be tested. */ +int first; /* Index of first bit to be tested in bits. */ +int last; /* Index of last bit to be tested in bits. */ +{ + while (++first <= last) + if (HITOFF(bits, first)) + return first; + return 0; +} + +/* ANYHIT: Return 1 if any bit is set. */ + +int anyhit(bits) +unsigned long *bits; +{ + int i; + for (i = 0; i < grplongs; i++) + if (bits[i] != 0) + return 1; + return 0; +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/context.h b/usr.bin/sgmls/sgmls/context.h new file mode 100644 index 0000000..04350c7 --- /dev/null +++ b/usr.bin/sgmls/sgmls/context.h @@ -0,0 +1,17 @@ +/* context.h */ + +#define M pos[0].g /* Index of current token in model. */ +#ifdef P +#undef P +#endif +#define P pos[0].t /* Index of current group in pos. */ +#define G pos[P].g /* Index of current group in model. */ +#define T pos[P].t /* Index of current token in its group. */ +#define H pos[P].h /* Pointer to hit bits for current group. */ +#define GHDR mod[G] /* Current group header. */ +#define TOKEN mod[M] /* Current token. */ +#define TTYPE (GET(TOKEN.ttype, TTMASK)) /* Token type of current token. */ +#define TOCC (GET(TOKEN.ttype, TOREP)) /* Occurrence for current token. */ +#define GTYPE (GET(GHDR.ttype, TTMASK)) /* Token type of current group. */ +#define GOCC (GET(GHDR.ttype, TOREP)) /* Occurrence for current group. */ +#define GNUM GHDR.tu.tnum /* Number of tokens in current grp. */ diff --git a/usr.bin/sgmls/sgmls/dosproc.c b/usr.bin/sgmls/sgmls/dosproc.c new file mode 100644 index 0000000..99b526d --- /dev/null +++ b/usr.bin/sgmls/sgmls/dosproc.c @@ -0,0 +1,40 @@ +/* dosproc.c - + + MS-DOS implementation of run_process(). + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifdef SUPPORT_SUBDOC + +#include "std.h" +#include "entity.h" +#include "appl.h" + +#include <process.h> + +int run_process(argv) +char **argv; +{ + int ret; + fflush(stdout); + fflush(stderr); + ret = spawnvp(P_WAIT, argv[0], argv); + if (ret < 0) + appl_error(E_EXEC, argv[0], strerror(errno)); + return ret; +} + +#endif /* SUPPORT_SUBDOC */ + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/ebcdic.c b/usr.bin/sgmls/sgmls/ebcdic.c new file mode 100644 index 0000000..b8188c7 --- /dev/null +++ b/usr.bin/sgmls/sgmls/ebcdic.c @@ -0,0 +1,42 @@ +/* ASCII to EBCDIC (ISO 8859-1 to IBM CP 37v2) table. */ +/* Contributed by C. M. Sperberg-McQueen <u35395@uicvm.uic.edu>. */ + +/* The mapping must be 1 to 1. The positions of *CHAR and *CH in the table +must not be changed, although the values in ebcdic.h can be. */ + +#include "ebcdic.h" + +unsigned char charset[] = { + 0, 1, 2, 3, 55, 45, 46, 47, + GENRECHAR, TABCHAR, RSCHAR, 11, 12, RECHAR, 14, 15, + 16, 17, 18, 19, 60, 61, 50, 38, + 24, 25, EOFCHAR, 39, EOBCHAR, DELCDATA, DELSDATA, DELNONCH, + SPCCHAR, 90, 127, 123, 91, 108, 80, 125, + 77, 93, 92, 78, 107, 96, 75, 97, +240, 241, 242, 243, 244, 245, 246, 247, +248, 249, 122, 94, 76, 126, 110, 111, +124, 193, 194, 195, 196, 197, 198, 199, +200, 201, 209, 210, 211, 212, 213, 214, +215, 216, 217, 226, 227, 228, 229, 230, +231, 232, 233, 173, 224, 189, 176, 109, +121, 129, 130, 131, 132, 133, 134, 135, +136, 137, 145, 146, 147, 148, 149, 150, +151, 152, 153, 162, 163, 164, 165, 166, +167, 168, 169, 192, 79, 208, 161, 7, + 4, 6, 8, 9, 10, 20, 21, 23, + 26, 27, 32, 33, 34, 35, 36, 40, + 41, 42, 43, 44, 48, 49, 51, 52, + 53, 54, 56, 57, 58, 59, 62, 255, + 65, 170, 74, 177, 159, 178, 106, 181, +187, 180, 154, 138, 95, 202, 175, 188, +144, 143, 234, 250, 190, 160, 182, 179, +157, 218, 155, 139, 183, 184, 185, 171, +100, 101, 98, 102, 99, 103, 158, 104, +116, 113, 114, 115, 120, 117, 118, 119, +172, 105, 237, 238, 235, 239, 236, 191, +128, 253, 254, 251, 252, 186, 174, 89, + 68, 69, 66, 70, 67, 71, 156, 72, + 84, 81, 82, 83, 88, 85, 86, 87, +140, 73, 205, 206, 203, 207, 204, 225, +112, 221, 222, 219, 220, 141, 142, 223, +}; diff --git a/usr.bin/sgmls/sgmls/ebcdic.h b/usr.bin/sgmls/sgmls/ebcdic.h new file mode 100644 index 0000000..1c35bcb --- /dev/null +++ b/usr.bin/sgmls/sgmls/ebcdic.h @@ -0,0 +1,40 @@ +/* SGML Character Use: EBCDIC +*/ + +#define EOFCHAR '\077' /* FUNCTION: EE (entity end: files). */ +#define EOBCHAR '\034' /* NONCHAR: EOB (file entity: end of buffer. */ +#define RSCHAR '\045' /* FUNCTION: RS (record start). */ +#define RECHAR '\015' /* FUNCTION: RE (record end). */ +#define TABCHAR '\005' /* FUNCTION: TAB (horizontal tab). */ +#define SPCCHAR '\100' /* FUNCTION: SPACE (horizontal space). */ +#define GENRECHAR '\026' /* NONCHAR: Generated RE. */ +#define DELCDATA '\035' /* NONCHAR: Delimiter for CDATA entity in + attribute value. */ +#define DELSDATA '\036' /* NONCHAR: Delimiter for SDATA entity in + attribute value. */ +#define DELNONCH '\037' /* NONCHAR: non-SGML character prefix. */ + +/* This should work for EBCDIC. See comment in latin1.h. */ +#define SHIFTNON(ch) ((UNCH)(ch) | 0200) +#define UNSHIFTNON(ch) ((UNCH)(ch) & ~0200) + +/* See comment in latin1.h. */ +#define CANON_NONSGML 255 + +/* See comment in latin1.h. */ +#define CANON_DATACHAR 254 + +/* Components for a formal public identifier for the whole of the +system character set. Protect with ifndef so that it can be overriden +in config.h. */ + +/* Use a private escape sequence. */ +#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE +#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0" +#endif +#ifndef SYSTEM_CHARSET_OWNER +#define SYSTEM_CHARSET_OWNER "-//IBM" +#endif +#ifndef SYSTEM_CHARSET_DESCRIPTION +#define SYSTEM_CHARSET_DESCRIPTION "Code Page 1047" +#endif diff --git a/usr.bin/sgmls/sgmls/entgen.c b/usr.bin/sgmls/sgmls/entgen.c new file mode 100644 index 0000000..0829495 --- /dev/null +++ b/usr.bin/sgmls/sgmls/entgen.c @@ -0,0 +1,405 @@ +/* entgen.c - + + Implement entgen() which generates a list of filenames from a struct fpi. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifdef HAVE_ACCESS + +#ifdef HAVE_UNISTD_H +#include <unistd.h> /* For R_OK. */ +#endif /* HAVE_UNISTD_H */ + +#ifndef R_OK +#define R_OK 4 +#endif /* not R_OK */ + +#endif /* HAVE_ACCESS */ + +#include "sgmlaux.h" + +/* Environment variable that contains path. */ +#ifndef PATH_ENV_VAR +#define PATH_ENV_VAR "SGML_PATH" +#endif +/* Default search path. See field() for interpretation of %*. */ +#ifndef DEFAULT_PATH +#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D" +#endif + +#ifndef PATH_FILE_SEP +#define PATH_FILE_SEP ':' +#endif + +#ifndef SYSID_FILE_SEP +#define SYSID_FILE_SEP ':' +#endif + +/* This says: change space to underscore, slash to percent. */ + +#ifndef MIN_DAT_SUBS_FROM +#define MIN_DAT_SUBS_FROM " /" +#endif +#ifndef MIN_DAT_SUBS_TO +#define MIN_DAT_SUBS_TO "_%" +#endif + +static int field P((struct fpi *, int, char *)); +static int mindatcpy P((char *, char *, int, int)); +static int testopen P((char *)); +static UNIV sysidgen P((char *)); + +static char *path = 0; + +/* Non-zero if searching should be performed when a system identifier +is specified. */ +static int sysidsrch = 0; + +#define EMPTY_VERSION "default" + +static char *classes[] = { + "capacity", + "charset", + "notation", + "syntax", + "document", + "dtd", + "elements", + "entities", + "lpd", + "nonsgml", + "shortref", + "subdoc", + "text" + }; + +/* This is mainly for compatibility with arcsgml. */ + +static char *genext[] = { + "nsd", /* Non-SGML data entity. */ + "gml", /* GML document or text entity. */ + "spe", /* System parameter entity. */ + "dtd", /* Document type definition. */ + "lpd", /* Link process definition. */ + "pns", /* Public non-SGML data entity. */ + "pge", /* Public general entity. */ + "ppe", /* Public parameter entity. */ + "pdt", /* Public document type definition. */ + "plp", /* Public link process definition. */ + "vns", /* Display version non-SGML data entity. */ + "vge", /* Display version general entity. */ + "vpe", /* Display version parameter entity. */ + "vdt", /* Display version document type definition.*/ + "vlp", /* Display version link process definition.*/ +}; + +static char *ext[] = { + "sgml", /* SGML subdocument */ + "data", /* Data */ + "text", /* General text */ + "parm", /* Parameter entity */ + "dtd", /* Document type definition */ + "lpd", /* Link process definition */ +}; + +/* Like memcpy, but substitute, fold to lower case (if fold is +non-zero) and null terminate. This is used both for minimum data and +for names. If p is NULL, do nothing. Return len. */ + +static int mindatcpy(p, q, len, fold) +char *p, *q; +int len; +int fold; +{ + static char subsfrom[] = MIN_DAT_SUBS_FROM; + static char substo[] = MIN_DAT_SUBS_TO; + int n; + + if (!p) + return len; + for (n = len; --n >= 0; q++) { + char *r = strchr(subsfrom, *q); + if (!r) { + if (fold && ISASCII(*q) && isupper((UNCH)*q)) + *p++ = tolower((UNCH)*q); + else + *p++ = *q; + } + else { + int i = r - subsfrom; + if (i < sizeof(substo) - 1) + *p++ = substo[i]; + } + } + *p = '\0'; + return len; +} + + +/* Return length of field. Copy into buf if non-NULL. */ + +static int field(f, c, buf) +struct fpi *f; +int c; +char *buf; +{ + int n; + + switch (c) { + case '%': + if (buf) { + buf[0] = '%'; + buf[1] = '\0'; + } + return 1; + case 'N': /* the entity, document or dcn name */ + return mindatcpy(buf, (char *)f->fpinm, ustrlen(f->fpinm), + (f->fpistore != 1 && f->fpistore != 2 && f->fpistore != 3 + ? NAMECASE + : ENTCASE)); + case 'D': /* dcn name */ + if (f->fpistore != 1) /* not a external data entity */ + return -1; + if (f->fpinedcn == 0) /* it's a SUBDOC */ + return -1; + return mindatcpy(buf, (char *)f->fpinedcn, ustrlen(f->fpinedcn), + NAMECASE); + case 'X': + /* This is for compatibility with arcsgml */ + if (f->fpistore < 1 || f->fpistore > 5) + return -1; + n = (f->fpipubis != 0)*(f->fpiversw > 0 ? 2 : 1)*5+f->fpistore - 1; + if (buf) + strcpy(buf, genext[n]); + return strlen(genext[n]); + case 'Y': /* tYpe */ + n = f->fpistore; + if (n < 1 || n > 5) + return -1; + if (n == 1 && f->fpinedcn == 0) /* it's a SUBDOC */ + n = 0; + if (buf) + strcpy(buf, ext[n]); + return strlen(ext[n]); + case 'P': /* public identifier */ + if (!f->fpipubis) + return -1; + return mindatcpy(buf, (char *)f->fpipubis, ustrlen(f->fpipubis), 0); + case 'S': /* system identifier */ + if (!f->fpisysis) + return -1; + else { + UNCH *p; + n = 0; + for (p = f->fpisysis; *p; p++) + if (*p != RSCHAR) { + if (buf) + buf[n] = *p == RECHAR ? '\n' : *p; + n++; + } + return n; + } + } + /* Other fields need a formal public identifier. */ + /* return -1 if the formal public identifier was invalid or missing. */ + if (f->fpiversw < 0 || !f->fpipubis) + return -1; + + switch (c) { + case 'A': /* Is it available? */ + return f->fpitt == '+' ? 0 : -1; + case 'I': /* Is it ISO? */ + return f->fpiot == '!' ? 0 : -1; + case 'R': /* Is it registered? */ + return f->fpiot == '+' ? 0 : -1; + case 'U': /* Is it unregistered? */ + return f->fpiot == '-' ? 0 : -1; + case 'L': /* public text language */ + if (f->fpic == FPICHARS) + return -1; + /* it's entered in all upper case letters */ + return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 1); + case 'O': /* owner identifier */ + return mindatcpy(buf, (char *)f->fpipubis + f->fpio, f->fpiol, 0); + case 'C': /* public text class */ + n = f->fpic - 1; + if (n < 0 || n >= sizeof(classes)/sizeof(classes[0])) + return -1; + if (buf) + strcpy(buf, classes[n]); + return strlen(classes[n]); + case 'T': /* text description */ + return mindatcpy(buf, (char *)f->fpipubis + f->fpit, f->fpitl, 0); + case 'V': + if (f->fpic < FPICMINV) /* class doesn't have version */ + return -1; + if (f->fpiversw > 0) /* no version */ + return -1; + if (f->fpivl == 0) { /* empty version: */ + /* use device-independent version*/ + if (buf) + strcpy(buf, EMPTY_VERSION); + return strlen(EMPTY_VERSION); + } + return mindatcpy(buf, (char *)f->fpipubis + f->fpiv, f->fpivl, 0); + case 'E': /* public text designating (escape) sequence */ + if (f->fpic != FPICHARS) + return -1; + return mindatcpy(buf, (char *)f->fpipubis + f->fpil, f->fpill, 0); + default: + break; + } + return -1; +} + +static int testopen(pathname) +char *pathname; +{ +#ifdef HAVE_ACCESS + return access(pathname, R_OK) >= 0; +#else /* not HAVE_ACCESS */ + FILE *fp; + fp = fopen(pathname, "r"); + if (!fp) + return 0; + fclose(fp); + return 1; +#endif /* not HAVE_ACCESS */ +} + +/* Return a pointer to an dynamically-allocated buffer that contains + the names of the files containing this entity, with each filename + terminated by a '\0', and with the list of filenames terminated by + another '\0'. */ + +UNIV entgen(f) +struct fpi *f; +{ + char *file; + + assert(f->fpistore != 6); /* Musn't call entgen for a notation. */ + if (!path) { + char *p; + char c; + path = getenv(PATH_ENV_VAR); + if (!path) + path = DEFAULT_PATH; + p = path; + + /* Only search for system identifiers if path uses %S. */ + while ((c = *p++) != '\0') + if (c == '%') { + if (*p == 'S') { + sysidsrch = 1; + break; + } + if (*p != '\0' && *p != PATH_FILE_SEP) + p++; + } + } + if (f->fpisysis + && (!sysidsrch + || strchr((char *)f->fpisysis, SYSID_FILE_SEP) + || strcmp((char *)f->fpisysis, STDINNAME) == 0)) + return sysidgen((char *)f->fpisysis); + + file = path; + + for (;;) { + char *p; + int len = 0; + char *fileend = strchr(file, PATH_FILE_SEP); + if (!fileend) + fileend = strchr(file, '\0'); + + /* Check that all substitutions are non-null, and calculate + the resulting total length of the filename. */ + for (p = file; p < fileend; p++) + if (*p == '%') { + int n; + /* Set len to -1 if a substitution is invalid. */ + if (++p >= fileend) { + len = -1; + break; + } + n = field(f, *p, (char *)0); + if (n < 0) { + len = -1; + break; + } + len += n; + } + else + len++; + + if (len > 0) { + /* We've got a valid non-empty filename. */ + char *s; + char *buf; + + s = buf = (char *)rmalloc(len + 2); + for (p = file; p < fileend; p++) + if (*p == '%') + s += field(f, *++p, s); + else + *s++ = *p; + *s++ = '\0'; + if (testopen(buf)) { + /* Terminate the array of filenames. */ + *s++ = '\0'; + return buf; + } + free((UNIV)buf); + } + if (*fileend == '\0') + break; + file = ++fileend; + } + return 0; +} + +/* Handle a system identifier without searching. */ + +static +UNIV sysidgen(s) +char *s; +{ + char *buf, *p; + + buf = (char *)rmalloc(strlen(s) + 2); + + for (p = buf; *s; s++) { + if (*s == SYSID_FILE_SEP) { + if (p > buf && p[-1] != '\0') + *p++ = '\0'; + } + else if (*s == RECHAR) + *p++ = '\n'; + else if (*s != RSCHAR) + *p++ = *s; + } + /* Terminate this filename. */ + if (p > buf && p[-1] != '\0') + *p++ = '\0'; + if (p == buf) { + /* No filenames. */ + frem((UNIV)buf); + return 0; + } + /* Terminate the list. */ + *p++ = '\0'; + return buf; +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/entity.h b/usr.bin/sgmls/sgmls/entity.h new file mode 100644 index 0000000..d7d3096 --- /dev/null +++ b/usr.bin/sgmls/sgmls/entity.h @@ -0,0 +1,189 @@ +/* Struct dcncb: attribute list added to support data attributes. */ +#ifndef ENTITY_H /* Don't include this file more than once. */ +#define ENTITY_H +/* ENTITY.H: Definitions and control block templates for entity management. +*/ +#include "tools.h" /* Definitions for type declarations, etc. */ +#include "msgcat.h" + +#define STDINNAME "-" /* File name that refers to standard input. */ + +#define EOS '\0' /* NONCHAR: EE (entity end: strings). */ + +#define AVALCASE 2 /* 2=untranslated string of name characters. */ + +#define REFNAMELEN 8 /* reference quantity set NAMELEN */ +#define REFLITLEN 240 /* reference quantity set LITLEN */ + +/* Minimization status of returned tags. +*/ +#define MINNONE 0 /* Minimization: tag not minimized. */ +#define MINNULL 1 /* Minimization: tag was null. */ +#define MINNET 2 /* Minimization: end-tag was NET delimiter. */ +#define MINDATA 3 /* Minimization: end-tag was data tag. */ +#define MINSTAG 4 /* Minimization: tag implied by start-tag. */ +#define MINETAG 5 /* Minimization: end-tag implied by end-tag. */ + +/* Formal public identifier public text classes. +*/ +#define FPICAP 1 +#define FPICHARS 2 +#define FPINOT 3 +#define FPISYN 4 +#define FPICMINV 5 /* Minimum fpic value for versionable text. */ +#define FPIDOC 5 +#define FPIDTD 6 +#define FPIELEM 7 +#define FPIENT 8 +#define FPILPD 9 +#define FPINON 10 +#define FPISHORT 11 +#define FPISUB 12 +#define FPITEXT 13 +struct fpi { /* Formal public identifier. */ + UNCH fpiot; /* Owner type: + or - or ! (for ISO). */ + UNS fpiol; /* Length of owner identifier. */ + UNS fpio; /* Offset in pubis of owner identifier (no EOS).*/ + int fpic; /* Public text class. */ + UNCH fpitt; /* Text type: - or + (for available). */ + UNS fpitl; /* Length of text identifier. */ + UNS fpit; /* Offset in pubis of text identifier (no EOS). */ + UNS fpill; /* Language/designating sequence length. */ + UNS fpil; /* Offset in pubis of language. */ + UNS fpivl; /* Length of display version . */ + UNS fpiv; /* Offset in pubis of display version (no EOS). */ + int fpiversw; /* 1=use best ver; 0=use stated ver; -1=error. */ + UNCH *fpinm; /* Entity/DCN name (EOS, no length). */ + UNCH fpistore; /* 1=NDATA 2=general 3=parm 4=DTD 5=LPD 6=DCN. */ + /* Name of the entity's DCN. Valid only when fpistore == 1. + NULL if it's a SUBDOC. */ + UNCH *fpinedcn; + UNCH *fpipubis; /* Public ID string (EOS). */ + UNCH *fpisysis; /* System ID string (EOS). */ +}; +#define FPISZ sizeof(struct fpi) +typedef struct fpi *PFPI; /* Ptr to FPI control block. */ + +/* General control blocks. +*/ +#define NONONCH 1 /* Character references to non-chars invalid. */ +#define OKNONCH 0 /* Character references to non-chars allowed. */ +struct parse { /* Parse control block. */ + char *pname; /* Parse name; content, tag, etc. */ + UNCH *plex; /* Lexical analysis table. */ + UNCH **ptab; /* State and action table. */ + UNS state; /* State. */ + UNS input; /* Input. */ + UNS action; /* Action. */ + UNS newstate; /* Next state. */ +}; +struct restate { + UNS sstate; /* State. */ + UNS sinput; /* Input. */ + UNS saction; /* Action. */ + UNS snext; /* Next state. */ +}; +struct map { + UNCH *mapnm; /* Name followed by EOS. */ + int mapdata; /* Data associated with that name. */ +}; +struct hash { /* Dummy structure for function arguments. */ + struct hash *enext; /* Next entry in chain. */ + UNCH *ename; /* Entry name with size and EOS. */ +}; +typedef struct hash *PHASH; /* Ptr to hash table entry. */ +typedef struct hash **THASH; /* Ptr to hash table. */ + +struct fwdref { /* A forward id reference. */ + struct fwdref *next; /* Pt to next reference in chain. */ + UNIV msg; /* Ptr to saved error messsage. */ +}; +#define FWDREFSZ sizeof(struct fwdref) + +struct dcncb { /* Data content notation control block. */ + struct dcncb *enext; /* Next DCN in chain. */ + UNCH *ename; /* Notation name followed by EOS. */ + UNCH mark; /* For use by application. */ + UNCH entsw; /* Entity defined with this notation? */ + UNCH defined; /* Has this notation been defined. */ + UNCH *sysid; /* System identifier of notation. */ + UNCH *pubid; /* Public identifier of notation. */ + struct ad *adl; /* Data attribute list (NULL if none). */ +}; +#define DCBSZ sizeof(struct dcncb) +#define DCNMARK(p) ((p)->mark ? 1 : ((p)->mark = 1, 0)) + +typedef struct dcncb *PDCB; /* Ptr to DCN control block. */ + +/* Number of capacities in a capacity set. */ + +#define NCAPACITY 17 + +struct sgmlcap { + char **name; + UNCH *points; + long *number; + long *limit; +}; + +struct sgmlstat { /* Document statistics. */ + UNS dcncnt; /* Number of data content notations defined. */ + UNS pmexgcnt; /* Number of plus or minus exception groups. */ + UNS etdcnt; /* Number of element types declared. */ + UNS etdercnt; /* Number of element types defined by default. */ + UNS pmexcnt; /* Number of plus/minus exception grp members. */ + UNS modcnt; /* Number of content model tokens defined. */ + UNS attcnt; /* Number of attributes defined. */ + UNS attdef; /* Characters of attribute defaults defined. */ + UNS attgcnt; /* Number of att value grp members (incl dcn). */ + UNS idcnt; /* Number of ID attributes specified. */ + UNS idrcnt; /* Number of ID references specified. */ + UNS ecbcnt; /* Number of entities declared. */ + UNS ecbtext; /* Characters of entity text defined. */ + UNS srcnt; /* Number of short reference tables defined. */ + UNS dcntext; /* Characters of notation identifiers defined. */ +}; +struct switches { /* Parser control switches (1=non-standard). */ + int swdupent; /* 1=msg if duplicate ENTITY def attempted;0=no.*/ + int swcommnt; /* 1=return comment declarations as data; 0=no. */ + int swrefmsg; /* 1=msg if undeclared ref is defaulted; 0=no. */ + UNS swbufsz; /* Size of source file buffer for READ(). */ + int swenttr; /* 1=trace entity stack in error messages; 0=no.*/ + int sweltr; /* 1=trace element stack in error messages; 0=no. */ + int swambig; /* 1=check content model ambiguity */ + int swundef; /* 1=warn about undefined elements and notations. */ + char *prog; /* Program name for error messages. */ +#ifdef TRACE + char *trace; /* What to trace in the body. */ + char *ptrace; /* What to trace in the prolog. */ +#endif /* TRACE */ + nl_catd catd; /* Message catalog descriptor. */ + long nopen; /* Number of open document entities */ + int onlypro; /* Parse only the prolog. */ + char **includes; /* List of parameter entities to be defined + as "INCLUDE"; NULL terminated.*/ + VOID (*die) P((void)); /* Function to call on fatal error. */ +}; +struct markup { /* Delimiter strings for text processor. */ + UNCH *cro; /* LEXCON markup string: CRO */ + UNCH *dso; /* LEXCON markup string: DSO */ + UNCH *ero; /* LEXCON markup string: ERO */ + UNCH *etag; /* LEXMARK markup string: end-tag */ + UNCH *lit; /* LEXMARK markup string: LIT */ + UNCH *lita; /* LEXMARK markup string: LITA */ + UNCH *mdc; /* LEXCON markup string: MDC */ + UNCH *mdo; /* LEXCON markup string: MDO */ + UNCH *mse; /* LEXCON markup string: mse */ + UNCH *mss; /* LEXCON markup string: mss */ + UNCH *mssc; /* LEXCON markup string: mss CDATA */ + UNCH *mssr; /* LEXCON markup string: mss RCDATA */ + UNCH *pic; /* LEXCON markup string: PIC */ + UNCH *pio; /* LEXCON markup string: PIO */ + UNCH *refc; /* LEXGRP markup string: REFC */ + UNCH *stag; /* LEXMARK markup string: start-tag */ + UNCH *tagc; /* LEXMARK markup string: TAGC */ + UNCH *vi; /* LEXMARK markup string: VI */ + int lennet; /* LEXMARK markup string length: null end-tag. */ + int lennst; /* LEXMARK markup string length: null start-tag.*/ +}; +#endif /* ndef ENTITY_H */ diff --git a/usr.bin/sgmls/sgmls/error.h b/usr.bin/sgmls/sgmls/error.h new file mode 100644 index 0000000..d37d493 --- /dev/null +++ b/usr.bin/sgmls/sgmls/error.h @@ -0,0 +1,61 @@ +/* ERROR.H: Symbols for SGML error codes (start with 'E_'). + Numbers 46 - 56 are generated by ERROR.C. + Later numbers are coded directly. +*/ +/* SGMLERR.C: General errors and syntax errors. +*/ +#define E_CONTEXT 1 /* W GI not allowed at this point in structure. */ +#define E_MDNAME 2 /* E Invalid markup declaration name. */ +/*efine E_LEN 3 E Syntax error: length exceeded. */ +#define E_SYS 4 /* W Illegal system character. */ +#define E_ETAG 5 /* E End-tag does not match any open start-tag. */ +#define E_STAGMAX 6 /* E Maximum number of open elements exceeded. */ +/* E_ALLNULL 7 W Start- and end-tag omitted with null content. */ +#define E_EOF 8 /* E/W Illegal entity end in markup or delimited text. */ +/* fine E_INV 9 E Markup error: invalid character. */ +#define E_CHARS 10 /* W Data found in content that doesn't allow it. */ +/* fine E_NOETDE 11 E End-tag GI not defined by element declaration. */ +#define E_BADNM 12 /* E Name is not syntactically valid. */ +#define E_BADATT 13 /* E Attribute was not defined by element declaration. */ +#define E_VALINV 14 /* W Att value/declaration conflict: invalid char. */ +#define E_VALLEN 15 /* W Att value/declaration conflict: token too long. */ +#define E_VALCNT 16 /* W Att value/declaration conflict: too many tokens. */ +#define E_VALTYPE 17 /* W Att value/declaration conflict: wrong token type.*/ +#define E_VALGRP 18 /* W Att value/declaration conflict: token not in grp.*/ +#define E_VALREQ 19 /* W Att value/declaration conflict: req unspecified. */ +/* E_EMIN 20 W End-tag implied by end-tag; not minimizable. */ +/* E_SMIN 21 W Omitted start-tag was not minimizable. */ +#define E_POSSATT 22 /* E Possible att found but not defined; used as data.*/ +/* Late additions numbered out of order to avoid recompilation. */ +/*efine E_ENTSYNC 37 E Entity and group nesting levels out of sync. */ +#define E_BADVAL 25 /* W Att value omitted (null); default used. */ +/* E_ECONTXT 30 W Element ended prematurely (some content omitted).*/ +/* E_EMINST 39 W End-tag implied by start-tag; not minimizable. */ +/* E_MEXTAG 40 W *** In Use *** */ +#define E_MEXERR 41 /* W Attempt to exclude contextually required element.*/ +#define E_DOCTYPE 42 /* W No document type defined; *DOCTYPE assumed. */ +/* E_NOETDS 43 E Start-tag GI not defined by element declaration. */ +#define E_RESTART 44 /* E Invalid chars ignored; trying to restart parse. */ + +/* MDERROR.C: Errors in markup declarations. +*/ +/*efine E_DUP 23 E Duplicate specification. */ +/*efine E_KEY 24 E Incorrect keyword for parameter. */ +/*efine E_MSE 26 E MSE occurred with no corresponding MS. */ +/*efine E_MSS 27 E MSS exceeded maximum nesting level. */ +/*efine E_NUM 28 E Incorrect number of parameters. */ +#define E_TYPE 29 /* E Incorrect parameter type. */ +/* Late additions numbered out of order to avoid recompilation. */ +/*efine E_VAL 38 W Incorrect parameter value. */ + +/* RESERROR.C: Errors in resource routines. +*/ +/* Unused I End of primary source entity. */ +/* fine E_FILBUF 31 E Could not read next buffer. */ +/* fine E_ERFILE 32 E Could not open file. */ +/* fine E_MALLOC 33 T Could not obtain required main storage. */ +/* fine E_ERMAX 34 E Maximum number of open entities exceeded. */ +/* fine E_ERNAME 35 E Referenced entity undeclared. */ +/* fine E_ERLOOP 36 E Entity referenced within itself: ref ignored. */ +/* Late additions numbered out of order to avoid recompilation. */ +/* E_ERDEF 45 E Referenced entity undeclared; SYSTEM assumed. */ diff --git a/usr.bin/sgmls/sgmls/etype.h b/usr.bin/sgmls/sgmls/etype.h new file mode 100644 index 0000000..e4ee1f9 --- /dev/null +++ b/usr.bin/sgmls/sgmls/etype.h @@ -0,0 +1,91 @@ +/* ETYPE.H: Definitions for element type and group processing. +*/ +#define MCHARS 0x80 /* Model: contains #CHARS. */ +#define MGI 0x40 /* Model: contains GI names. */ +#define MPHRASE 0x20 /* Model: first token is #CHARS. */ +#define MKEYWORD 0x1F /* Model: defined with single keyword. */ +#define MNONE 0x10 /* Model: contains no GIs or #CHARS. */ +#define MANY 0x08 /* Model: contains any GIs or #CHARS. */ +#define MRCDATA 0x04 /* Model: contains RCDATA. */ +#define MCDATA 0x02 /* Model: contains CDATA. */ + +#define TOREP (TOPT+TREP) /* 11000000 Optional and repeatable. */ +#define TOPT 0x80 /* Token: 1=optional; 0=required. */ +#define TREP 0x40 /* Token: 1=repeatable; 0=not. */ +#define TXOREP (TXOPT+TXREP) /* * explicitly specified */ +#define TXOPT 0x20 /* ? explicitly specified */ +#define TXREP 0x10 /* + explicitly specified */ +#define TTMASK 0x0F /* 00001111 Mask for testing token type. */ +#define TTETD 4 /* 00000100 Token is an ETD. */ +#define TTAND 3 /* 00000011 Token is an AND group. */ +#define TTSEQ 2 /* 00000010 Token is a sequence group. */ +#define TTOR 1 /* 00000001 Token is an OR group. */ +#define TTCHARS 0 /* 00000000 Token is #CHARS. */ + +struct thdr { /* Token header or model header. */ + UNCH ttype; /* Token type attributes or model content. */ + union { + int tnum; /* Group token: tokens in group. + Model header: content tokens at any level. */ + struct etd *thetd; /* GI token: ptr to etd. */ + } tu; +}; +#define THSZ (sizeof(struct thdr)) + +#define ETDHASH 211 /* Size of element hash table. Must be prime. */ +#define SMO 0x40 /* ETDMIN: Start-tag O minimization. */ +#define EMO 0x04 /* ETDMIN: End-tag O minimization. */ +#define EMM 0x02 /* ETDMIN: End-tag minimization explicitly + specified to be minus */ +#define ETDDCL 0x80 /* ETDMIN: Element was declared. */ +#define ETDUSED 0x20 /* ETDMIN: Element used in another declaration. */ +#define ETDOCC 0x10 /* ETDMIN: Element occurred in document. */ + +struct etd { /* Element type definition. */ + struct etd *etdnext; /* Next element type definition in hash chain. */ + UNCH *etdgi; /* GI preceded by its length, followed by EOS. */ + UNCH etdmin; /* Flag bits: minimization. */ + UNCH mark; /* Mark bit: for ambiguity checking */ + struct thdr *etdmod; /* Content model. */ + struct etd **etdmex; /* Minus exceptions. */ + struct etd **etdpex; /* Plus exceptions. */ + struct ad *adl; /* Attribute descriptor list. */ + struct entity **etdsrm; /* Short reference map. */ +}; +#define ETDSZ (sizeof(struct etd)) +typedef struct etd *PETD; +extern struct etd dumetd[]; + +/* Number of bits in a long must be >= 1<<LONGPOW */ +#define LONGPOW 5 + +#define LONGBITS (1<<LONGPOW) + +struct mpos { /* Position of current element in model. */ + UNCH g; /* Index of this group in the model. */ + UNCH t; /* Index of the current token in this group. */ + unsigned long *h; /* Hit bits of this group's tokens. */ +}; + +#define HITCLEAR(h) MEMZERO((UNIV)(h), grplongs*sizeof(unsigned long)) + +#define TAGCONER 0x01 /* 00000001 (contersw) Tag was out of context. */ +#define TAGNET 0x02 /* 00000010 (etisw) Tag has NET enabled. */ +#define TAGPEX 0x04 /* 00000100 (pexsw) Tag was plus exception. */ +#define TAGREF 0x08 /* 00001000 (conrefsw) Tag had CONREF or EMPTY.*/ +struct tag { /* Tag control block. */ + UNCH status; /* Status of context check. */ + UNCH tflags; /* Flags: TAGCONER TAGNET TAGPEX TAGREF */ + struct etd *tetd; /* Element type definition for tag. */ + struct entity **tsrm; /* Current short reference map. */ + struct mpos *tpos; /* Position of next tag in this model. */ +}; + +#define RCEND 1 /* No more tokens: end element and retry GI. */ +#define RCREQ 2 /* Required GI must precede proposed GI. */ +#define RCMISS 3 /* GI invalid: not element end; no required GI. */ +#define RCHIT 4 /* GI is the one expected next. */ +#define RCMEX 5 /* GI invalid: minus exception. */ +#define RCHITMEX 6 /* RCMEX with invalid attempted minus exclusion.*/ +#define RCPEX 7 /* GI is valid solely because of plus exclusion.*/ +#define RCNREQ 8 /* Token is not required; can retry invalid GI. */ diff --git a/usr.bin/sgmls/sgmls/exclude.c b/usr.bin/sgmls/sgmls/exclude.c new file mode 100644 index 0000000..c3968b4 --- /dev/null +++ b/usr.bin/sgmls/sgmls/exclude.c @@ -0,0 +1,121 @@ +/* exclude.c - + Exclusion checking. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "sgmlincl.h" + +static int excktok P((struct thdr *, int, int *)); +static int exmark P((int)); + +/* Check that the current exclusions are legal for the content model +of the current element. */ + +VOID exclude() +{ + struct thdr *mod = tags[ts].tetd->etdmod; + + if ((mod->ttype & MKEYWORD) == 0 && exmark(1)) { + int excl; + + excktok(mod + 1, 0, &excl); + exmark(0); + } +} + +/* Set the mark field of all current exclusions to val. Return 1 if +there are some current exclusions. */ + +static +int exmark(val) +int val; +{ + int i; + int gotone = 0; + + for (i = ts; i > 0; --i) { + struct etd **p = tags[i].tetd->etdmex; + if (p) { + for (; *p; p++) + (*p)->mark = val; + gotone = 1; + } + } + return gotone; +} + +/* Check exclusions for this token. Return size of token. */ + +static +int excktok(t, orgrp, excl) +struct thdr *t; +int orgrp; /* 1 if token is member of or group */ +int *excl; /* Set to 1 if token is excluded. */ +{ + int size; + struct thdr *tem; + int tnum; + int optional = 0; + int hadopt, hadreq; + + *excl = 0; + + switch (t->ttype & TTMASK) { + case TTETD: + if (t->tu.thetd->mark) { + if (orgrp || (t->ttype & TOPT)) + *excl = 1; + else + sgmlerr(217, &pcbstag, t->tu.thetd->etdgi + 1, + tags[ts].tetd->etdgi + 1); + } + /* fall through */ + case TTCHARS: + size = 1; + break; + case TTOR: + case TTAND: + case TTSEQ: + tem = t + 1; + hadopt = 0; + hadreq = 0; + for (tnum = t->tu.tnum; tnum > 0; --tnum) { + int ex; + int n = excktok(tem, (t->ttype & TTMASK) == TTOR, &ex); + if (!ex) { + if (tem->ttype & TOPT) + hadopt = 1; + else + hadreq = 1; + } + tem += n; + } + size = tem - t; + if ((t->ttype & TTMASK) == TTOR) + optional = hadreq ? hadopt : 1; + else + optional = !hadreq; + break; + default: + abort(); + } + + /* Was required, but exclusions have made it optional. + eg <!element foo - - (a | b) -(a, b)> */ + + if (optional && !(t->ttype & TOPT)) + sgmlerr(216, &pcbstag, tags[ts].tetd->etdgi + 1, (UNCH *)0); + + return size; +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/genlex.c b/usr.bin/sgmls/sgmls/genlex.c new file mode 100644 index 0000000..1e84ecf --- /dev/null +++ b/usr.bin/sgmls/sgmls/genlex.c @@ -0,0 +1,114 @@ +/* genlex: Generate lexical tables for non-ASCII charsets. */ + +#include "config.h" +#include "std.h" +#include "tools.h" + +#define CANON_ASCII_NONSGML 255 /* Canonical non-SGML character in ASCII. */ +#define CANON_ASCII_DATACHAR 254 /* Canonical DATACHAR in ASCII. */ + +extern unsigned char charset[]; +extern UNCH *lextabs[]; +extern UNCH lextran[]; + +static char *lextabnames[] = { + "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke" +}; + +static VOID print_tab(s, t) + char *s; + UNCH *t; +{ + int i; + printf("UNCH %s[] = {\n", s); + for (i = 0; i < 256; i++) + printf("%2d,%c", t[i], (i + 1) % 16 == 0 ? '\n' : ' '); + fputs("};\n\n", stdout); +} + +int main(argc, argv) + int argc; + char **argv; +{ + int i; + UNCH tab[256]; + char special[256]; + /* Shunned character numbers in the reference concrete syntax. */ + static UNCH refshun[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 + }; + char shunned[256]; + char *program_name; + + program_name = strrchr(argv[0], '/'); + if (program_name) + program_name++; + else + program_name = argv[0]; + + /* Check that the mapping is 1-1. */ + for (i = 0; i < 256; i++) + tab[i] = 0; + for (i = 0; i < 256; i++) + tab[charset[i]] = 1; + for (i = 0; i < 256; i++) + if (!tab[i]) { + fprintf(stderr, "%s: bad mapping: no character mapped to %d\n", + program_name, i); + exit(EXIT_FAILURE); + } + + /* Compute special. */ + for (i = 0; i < 256; i++) + special[i] = 0; + for (i = 0; lextabs[i]; i++) { + int j; + for (j = 0; j < 256; j++) + if (lextabs[i][j] != lextabs[i][CANON_ASCII_NONSGML] + && lextabs[i][j] != lextabs[i][CANON_ASCII_DATACHAR]) + special[charset[j]] = 1; + } + + /* Compute shunned. */ + for (i = 0; i < 256; i++) + shunned[i] = 0; + for (i = 0; i < sizeof(refshun); i++) + shunned[refshun[i]] = 1; + + printf("/* This file was automatically generated by %s. Do not edit. */\n\n", + program_name); + fputs("#include \"config.h\"\n#include \"entity.h\"\n#include \"sgmldecl.h\"\n\n", + stdout); + + /* Generate each of the lexical tables. */ + for (i = 0; lextabs[i]; i++) { + int j; + for (j = 0; j < 256; j++) + tab[charset[j]] = lextabs[i][j]; + + for (j = 0; j < 256; j++) + if (!special[j]) { + if (shunned[j]) + tab[j] = lextabs[i][CANON_ASCII_NONSGML]; + else + tab[j] = lextabs[i][CANON_ASCII_DATACHAR]; + } + print_tab(lextabnames[i], tab); + } + + /* Generate lextran. */ + for (i = 0; i < 256; i++) + tab[charset[i]] = charset[lextran[i]]; + print_tab("lextran", tab); + + /* Generate asciicharset. */ + fputs("int asciicharset[] = {\n", stdout); + for (i = 0; i < 128; i++) + printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' '); + for (i = 128; i < 256; i++) + printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' '); + fputs("};\n", stdout); + + exit(EXIT_SUCCESS); +} diff --git a/usr.bin/sgmls/sgmls/getopt.c b/usr.bin/sgmls/sgmls/getopt.c new file mode 100644 index 0000000..9a218b3 --- /dev/null +++ b/usr.bin/sgmls/sgmls/getopt.c @@ -0,0 +1,166 @@ +/* getopt.c - + getopt() for those systems that don't have it. + + Derived from comp.sources.unix/volume3/att_getopt. + Modified by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifndef HAVE_GETOPT + +#include "std.h" +#include "getopt.h" + +#ifdef SWITCHAR +#include <dos.h> +#endif + +int opterr = 1; +int optind = 1; +int optopt; +char *optarg; + +#ifndef OPTION_CHAR +#define OPTION_CHAR '-' +#endif + +int getopt(argc, argv, opts) +int argc; +char **argv; +char *opts; +{ +#ifdef SWITCHAR + union REGS regs; + static char switchar = '\0'; +#endif + static int sp = 1; + register int c; + register char *cp; + char *message; +#ifdef SWITCHAR + if (switchar == '\0') { + regs.x.ax = 0x3700; + intdos(®s, ®s); + if (!regs.x.cflag) + switchar = regs.h.dl; + else + switchar = '/'; + } +#endif + if (sp == 1) { + if (optind >= argc) + return EOF; + if (( +#ifdef SWITCHAR + argv[optind][0] != switchar && +#endif + argv[optind][0] != OPTION_CHAR) || argv[optind][1] == '\0') { +#ifdef REORDER_ARGS + int i; + for (i = optind; i < argc; i++) + if (( +#ifdef SWITCHAR + argv[i][0] == switchar || +#endif + argv[i][0] == OPTION_CHAR) && argv[i][1] != '\0') + break; + if (i < argc) { + c = argv[i][1]; +#ifdef CASE_INSENSITIVE_OPTIONS + if (isupper(c)) + c = tolower(c); +#endif + if (c != ':' && c != OPTION_CHAR && (cp = strchr(opts, c)) != NULL + && cp[1] == ':' && argv[i][2] == 0 && i < argc - 1) { + int j; + char *temp1 = argv[i]; + char *temp2 = argv[i+1]; + for (j = i - 1; j >= optind; j--) + argv[j+2] = argv[j]; + argv[optind] = temp1; + argv[optind+1] = temp2; + } + else { + int j; + char *temp = argv[i]; + for (j = i - 1; j >= optind; j--) + argv[j+1] = argv[j]; + argv[optind] = temp; + } + } + else +#endif + return EOF; + } + if ((argv[optind][0] == OPTION_CHAR && argv[optind][1] == OPTION_CHAR + && argv[optind][2] == '\0') +#ifdef SWITCHAR + || (argv[optind][0] == switchar && argv[optind][1] == switchar + && argv[optind][2] == '\0') +#endif + ) { + optind++; + return(EOF); + } + } + optopt = c = argv[optind][sp]; +#ifdef CASE_INSENSITIVE_OPTIONS + if ( +#ifdef USE_ISASCII + isascii(c) && +#endif /* USE_ISASCII */ + isupper((unsigned char)c)) + optopt = c = tolower((unsigned char)c); +#endif /* CASE_INSENSITIVE_OPTIONS */ + if (c == ':' || (cp = strchr(opts, c)) == NULL) { + if (argv[optind][++sp] == '\0') { + optind++; + sp = 1; + } + message = ": illegal option -- "; + goto bad; + } + if (*++cp == ':') { + if (argv[optind][sp+1] != '\0') + optarg = &argv[optind++][sp+1]; + else if (++optind >= argc) { + sp = 1; + message = ": option requires an argument -- "; + goto bad; + } + else + optarg = argv[optind++]; + sp = 1; + } + else { + if (argv[optind][++sp] == '\0') { + sp = 1; + optind++; + } + optarg = NULL; + } + return c; +bad: + if (opterr) { + fputs(argv[0], stderr); + fputs(message, stderr); + fputc(optopt, stderr); + fputc('\n', stderr); + } + return '?'; +} + +#endif /* not HAVE_GETOPT */ + +/* +Local Variables: +c-indent-level: 4 +c-continued-statement-offset: 4 +c-brace-offset: 4 +c-argdecl-indent: 4 +c-label-offset: -4 +tab-width: 4 +End: +*/ + diff --git a/usr.bin/sgmls/sgmls/getopt.h b/usr.bin/sgmls/sgmls/getopt.h new file mode 100644 index 0000000..4856560 --- /dev/null +++ b/usr.bin/sgmls/sgmls/getopt.h @@ -0,0 +1,11 @@ +/* Declare getopt() and associated variables. */ + +/* Don't use prototypes in case some system header file has a +conflicting definition. Systems differ on how they declare the second +parameter. */ + +extern int getopt(); + +extern char *optarg; +extern int optind; +extern int opterr; diff --git a/usr.bin/sgmls/sgmls/keyword.h b/usr.bin/sgmls/sgmls/keyword.h new file mode 100644 index 0000000..6c092f0 --- /dev/null +++ b/usr.bin/sgmls/sgmls/keyword.h @@ -0,0 +1,22 @@ +/* KEYWORD.H: Definitions for markup declaration keyword processing. +*/ +/* Default value types for attribute definition list declaration. +*/ +#define DNULL 1 /* Default value: implied attribute. */ +#define DREQ 2 /* Default value: required attribute. */ +#define DCURR 3 /* Default value: current attribute. */ +#define DCONR 4 /* Default value: content reference attribute. */ +#define DFIXED 5 /* Default value: fixed attribute. */ + +/* External identifier types for entity and notation declarations. +*/ +#define EDSYSTEM 1 /* SYSTEM (but not PUBLIC) identifier specified.*/ +#define EDPUBLIC 2 /* PUBLIC (but not SYSTEM) identifier specified.*/ +#define EDBOTH 3 /* PUBLIC and also SYSTEM identifiers specified.*/ + +/* Marked section keywords. +*/ +#define MSTEMP 1 +#define MSRCDATA 2 +#define MSCDATA 3 +#define MSIGNORE 4 diff --git a/usr.bin/sgmls/sgmls/latin1.h b/usr.bin/sgmls/sgmls/latin1.h new file mode 100644 index 0000000..44f43f3 --- /dev/null +++ b/usr.bin/sgmls/sgmls/latin1.h @@ -0,0 +1,51 @@ +/* SGML Character Use: ISO Latin 1. +*/ +#define EOFCHAR '\032' /* FUNCTION: EE (entity end: files). */ +#define EOBCHAR '\034' /* NONCHAR: EOB (file entity: end of buffer. */ +#define RSCHAR '\012' /* FUNCTION: RS (record start). */ +#define RECHAR '\015' /* FUNCTION: RE (record end). */ +#define TABCHAR '\011' /* FUNCTION: TAB (horizontal tab). */ +#define SPCCHAR '\040' /* FUNCTION: SPACE (horizontal space). */ +#define GENRECHAR '\010' /* NONCHAR: Generated RE. */ +#define DELCDATA '\035' /* NONCHAR: Delimiter for CDATA entity in + attribute value. */ +#define DELSDATA '\036' /* NONCHAR: Delimiter for SDATA entity in + attribute value. */ +#define DELNONCH '\037' /* NONCHAR: non-SGML character prefix. */ + +/* These two macros are used to handle non-SGML characters. A non-SGML +by character is represented by a DELNONCH character followed by +SHIFTNON(original_character). SHIFTNON must transform any character +in the set 0, EOFCHAR, EOBCHAR, GENRECHAR, DELCDATA, DELSDATA, +DELNONCH into a character that is not one of the set 0, EOFCHAR, +EOBCHAR. Furthermore UNSHIFTNON(SHIFTNON(c)) must be equal to c for +every character c in the former set. */ +/* This is a simple definition that works for ASCII-like character sets. */ +#define SHIFTNON(ch) ((UNCH)(ch) | 0100) +#define UNSHIFTNON(ch) ((UNCH)(ch) & ~0100) + +/* A canonical NONSGML character. The character number that is shunned +in the reference concrete syntax and is not the number of a +significant (in the reference concrete syntax) character nor one of +the above characters nor 0. */ +#define CANON_NONSGML 255 + +/* A canonical DATACHAR character. The character number that is not +shunned in the reference concrete syntax and is not the number of a +significant (in the reference concrete syntax) SGML character nor one +of the above characters. */ +#define CANON_DATACHAR 254 + +/* Components for a formal public identifier for the whole of the +system character set. Protect with ifndef so that it can be overriden +in config.h. */ + +#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE +#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/13 4/1" +#endif +#ifndef SYSTEM_CHARSET_OWNER +#define SYSTEM_CHARSET_OWNER "ISO Registration Number 100" +#endif +#ifndef SYSTEM_CHARSET_DESCRIPTION +#define SYSTEM_CHARSET_DESCRIPTION "ECMA-94 Right Part of Latin Alphabet Nr. 1" +#endif diff --git a/usr.bin/sgmls/sgmls/lexcode.h b/usr.bin/sgmls/sgmls/lexcode.h new file mode 100644 index 0000000..e4047ba --- /dev/null +++ b/usr.bin/sgmls/sgmls/lexcode.h @@ -0,0 +1,11 @@ +/* Definitions of lexical codes needed by both lextaba.c and lexrf.c. */ + +#define FCE 27 /* FRE Free character in use as an entity reference */ +#define FRE 0 /* FREECHAR that is not in a CON delimiter-in-context. */ +#define LITC 21 /* LIT LITA PIC or EE in use as a literal terminator */ +#define MSC3 15 /* ] Also MSC[2]. */ +#define NET 17 /* / When enabled. */ +#define ETI 16 /* / Actually ETAGO[2] */ +#define SPCR 19 /* Space in use as SR8. */ +#define TGO2 25 /* < TAGO; also MDO[1], PIO[1] */ +#define CDE 11 /* NONSGML delcdata CDATA/SDATA delimiter */ diff --git a/usr.bin/sgmls/sgmls/lexrf.c b/usr.bin/sgmls/sgmls/lexrf.c new file mode 100644 index 0000000..ec3db83 --- /dev/null +++ b/usr.bin/sgmls/sgmls/lexrf.c @@ -0,0 +1,124 @@ +/* LEXRF: Lexical tables for reference concrete syntax. +*/ + +#include "config.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "synxtrn.h" /* Declarations for concrete syntax constants. */ +#include "action.h" /* Action names for all parsing. */ +#include "lexcode.h" + +static UNCH SRTAB[] = { TABCHAR, '\0' }; +static UNCH SRRE[] = { RECHAR, '\0' }; +static UNCH SRRS[] = { RSCHAR, '\0' }; +static UNCH SRRSB[] = { RSCHAR, 'B', '\0' }; +static UNCH SRRSRE[] = { RSCHAR, RECHAR, '\0' }; +static UNCH SRRSBRE[] = { RSCHAR, 'B', RECHAR, '\0' }; +static UNCH SRBRE[] = { 'B', RECHAR, '\0' }; + +struct lexical lex = { /* Delimiter set constants for parser use. */ + { /* Markup strings for text processor use. */ + (UNCH *)"\4&#", /* LEXCON markup string: CRO */ + (UNCH *)"[", /* LEXCON markup string: DSO */ + (UNCH *)"\3&", /* LEXCON markup string: ERO */ + (UNCH *)"\4</", /* LEXMARK markup string: end-tag */ + (UNCH *)"\3\"", /* LEXMARK markup string: LIT */ + (UNCH *)"\3'", /* LEXMARK markup string: LITA */ + (UNCH *)"\3>", /* LEXCON markup string: MDC */ + (UNCH *)"\4<!", /* LEXCON markup string: MDO */ + (UNCH *)"\5]]>", /* LEXCON markup string: mse */ + (UNCH *)"\5<![", /* LEXCON markup string: mss */ + (UNCH *)"\13<![CDATA[", /* LEXCON markup string: mss CDATA */ + (UNCH *)"\14<![RCDATA[", /* LEXCON markup string: mss RCDATA */ + (UNCH *)"\3>", /* LEXCON markup string: PIC */ + (UNCH *)"\4<?", /* LEXCON markup string: PIO */ + (UNCH *)"\3;", /* LEXGRP markup string: ref close. */ + (UNCH *)"\3<", /* LEXMARK markup string: start-tag */ + (UNCH *)"\3>", /* LEXMARK markup string: TAGC */ + (UNCH *)"\3=", /* LEXMARK markup string: VI */ + 3, /* LEXMARK: length of null end-tag. */ + 2 /* LEXMARK: length of null start-tag. */ + }, + { /* Short reference delimiters. */ + { /* Short reference delimiter table. */ + {(UNCH *)"", SRCT}, /* Dummy entry to store SR count. */ + {SRTAB, 1}, /* TAB */ + {SRRE, 2}, /* RE */ + {SRRS, 3}, /* RS */ + {SRRSB, 4}, /* Leading blanks */ + {SRRSRE, 5}, /* Null record */ + {SRRSBRE, 6}, /* Blank record */ + {SRBRE, 7}, /* Trailing blanks */ + {(UNCH *)" ", 8}, /* Space */ + {(UNCH *)"BB", 9}, /* Two or more blanks */ + {(UNCH *)"\"", 10}, /* Quotation mark (first data character) */ + {(UNCH *)"#", 11}, /* Number sign */ + {(UNCH *)"%", 12}, /* FCE CHARACTERS start here */ + {(UNCH *)"'", 13}, + {(UNCH *)"(", 14}, + {(UNCH *)")", 15}, + {(UNCH *)"*", 16}, + {(UNCH *)"+", 17}, + {(UNCH *)",", 18}, + {(UNCH *)"-", 19}, /* Hyphen */ + {(UNCH *)"--", 20}, /* Two hyphens */ + {(UNCH *)":", 21}, + {(UNCH *)";", 22}, + {(UNCH *)"=", 23}, + {(UNCH *)"@", 24}, + {(UNCH *)"[", 25}, + {(UNCH *)"]", 26}, + {(UNCH *)"^", 27}, + {(UNCH *)"_", 28}, /* Low line */ + {(UNCH *)"{", 29}, + {(UNCH *)"|", 30}, + {(UNCH *)"}", 31}, + {(UNCH *)"~", 32}, + {0, 0} + }, + { /* Printable form of unprintable SR delims.*/ + "", /* Dummy entry to balance s.dtb. */ + "&#TAB;", /* TAB */ + "&#RE;", /* RE */ + "&#RS;", /* RS */ + "&#RS;B", /* Leading blanks */ + "&#RS;&#RE;", /* Null record */ + "&#RS;B&#RE;", /* Blank record */ + "B&#RE;", /* Trailing blanks */ + "&#SPACE;" /* Space */ + }, + 12, /* LEXCNM: Index of first FCE in srdeltab. */ + 20, /*LEXCNM:Index of "two hyphens" in srdeltab*/ + 10, /* LEXCNM: Index of first SR with data char. */ + 19, /* LEXCNM: Index of hyphen in srdeltab. */ + SRNPRT+1, /* LEXCNM: Index of 1st printable SR. */ + 8, /* LEXCNM: Index of space in srdeltab. */ + 25, /* LEXCNM: Index of left bracket in srdeltab. */ + 26, /* LEXCNM: Index of right bracket in srdeltab. */ + }, /* End of short reference delimiters. */ + { /* General delimiter characters. */ + GENRECHAR, /*LEXCNM:(BS)Generated RE; can't be markup.*/ + '"', /* LEXMARK: Char used as LIT delimiter.*/ + '\'', /* LEXMARK: Char used as LITA delimiter.*/ + '>', /* LEXLMS: Char used as MDC delimiter.*/ + ']', /* LEXLMS: Char used as MSC when enabled.*/ + '/', /* LEXCON: Char used as NET when enabled.*/ + '%', /* LEXMARK: Char used as PERO delimiter. */ + '>', /* LEXCON: Char used as PIC delimiter.*/ + '<' /* LEXCON: Char used as TAGO when enabled.*/ + }, + { /* Lexical table code assignments. */ + FCE, /* LEXCNM: FRE char as entity reference.*/ + FRE, /* LEXLMS: Free character not an entity ref.*/ + LITC, /* LEXLMS: Literal close delimiter enabled. */ + MSC3, /* LEXLMS: Marked section close delim enabled. */ + NET, /* LEXCON: Null end-tag delimiter enabled. */ + ETI, /* LEXCON: NET disabled; still used as ETI. */ + SPCR, /* LEXCNM: Space in use as SHORTREF delim. */ + TGO2, /* LEXCON: Tag open delimiter enabled. */ + CDE /* LEXLMS: CDATA/SDATA delimiters. */ + } +}; + +UNCH *lextabs[] = { + lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, 0 +}; diff --git a/usr.bin/sgmls/sgmls/lextaba.c b/usr.bin/sgmls/sgmls/lextaba.c new file mode 100644 index 0000000..54f9395 --- /dev/null +++ b/usr.bin/sgmls/sgmls/lextaba.c @@ -0,0 +1,559 @@ +/* lextaba.c: lexical tables for ASCII. */ + +/* These tables are munged by setnonsgml(). */ + +#include "config.h" +#include "entity.h" +#include "lexcode.h" +#include "sgmldecl.h" + +/* LEXCNM: Lexical table for mixed content (PCBCONM) parse. +*/ +/* Symbols for SGML character set divisions and function characters. */ +#define NU 1 /* NUMERAL Numerals */ +#define NMC 2 /* LC/UCNMCHAR . - Period and hyphen */ +#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */ +#define SPC 4 /* SPACE 32 Space */ +#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */ +#define EE 6 /* NONSGML 00 26 Entity end (end of file) */ +#define EOB 7 /* NONSGML 28 End disk buffer */ +#define RS 8 /* Function 10 Line feed */ +#define RE 9 /* Function 13 Carrier return */ +#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */ +#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */ + +/* Symbols for SGML delimiter roles in CON and CXT. + ETI and NET must be the same in LEXCNM and LEXCON. + FRE characters are changed to FCE if an FCE entity is declared. + They are changed back to FRE when the entity is canceled. +*/ +#define ERO 13 /* & Also CRO[1] */ +#define NMRE 14 /* 08 Generated non-markup RE */ +#define COM 15 /* - For MDO context; also SR19 and SR20. */ +#undef LIT1 +#define LIT1 18 /* " SR10 */ +#define MDO 20 /* ! Actually MDO[2] */ +#define MSC1 21 /* ] Both MSC[1] and MSC[2]; also SR26. */ +#define MSO 22 /* [ For MDO context; also SR25. */ +#define PIO 23 /* ? Actually PIO[2] */ +#define RNI 24 /* # For CRO[2]; also SR11. */ +#define TGC1 25 /* > For TAGO and MSC context; also MDC, PIC */ +#define TGO1 26 /* < TAGO; also MDO[1], PIO[1] */ + +UNCH lexcnm[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE, NON, NON, NON, NON, NON, NON, NON, NMRE,SEP, RS, NON, NON, RE, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, MDO, LIT1,RNI, FRE, FRE ,ERO, FRE, FRE, FRE, FRE, FRE, FRE, COM, NMC, ETI, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , FRE, FRE, TGO1,FRE, TGC1,PIO, /* +@ A B C D E F G H I J K L M N O */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC1,FRE, FRE, /* +` a b c d e f g h i j k l m n o */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON +}; +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti lit spcr mdo msc mso net pio rni tagc tago fce */ +#undef ERO +#undef NMRE +#undef COM +#undef LIT1 +/* def SPCR*/ +#undef MDO +#undef MSC1 +#undef MSO +#undef PIO +#undef RNI +#undef TGC1 +/* def TGO1*/ +/* def FCE*/ +/* LEXCON: Lexical table for RCDATA and CDATA content (PCBCON?), + prolog (PCBPRO), and nested declaration set (PCBMDS) parses. + Note: NMC is same as FRE; kept for consistency with LEXCNM and LEXLMS. +*/ +/* Symbols for SGML character set divisions and function characters. */ +/* Same as for LEXCNM. */ + +/* Symbols for SGML delimiter roles in CON, CXT, and DS. + ETI and NET must be the same in LEXCNM and LEXCON. + FRE characters are changed to FCE if an FCE entity is declared. + They are changed back to FRE when the entity is canceled. +*/ +#define ERO 13 /* & Also CRO[1] */ +#define NMRE 14 /* 08 Generated non-markup RE */ +#define COM 15 /* - For MDO context. */ +/*#define ETI 16 / Actually ETAGO[2] */ +/*#define NET 17 / When enabled. */ +#define MDO 18 /* ! Actually MDO[2] */ +#define MSC2 19 /* ] Both MSC[1] and MSC[2]. */ +#define MSO 20 /* [ For MDO context. */ +#define PERO 21 /* % For prolog */ +#define PIO 22 /* ? Actually PIO[2] */ +#define RNI 23 /* # For CRO[2]. */ +#define TGC2 24 /* > For TAGO and MSC context; also MDC, PIC */ + +UNCH lexcon[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE, NON, NON, NON, NON, NON, NON, NON, NMRE,SEP, RS, NON, NON, RE, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, MDO, FRE, RNI, FRE, PERO,ERO, FRE, FRE, FRE, FRE, FRE, FRE, COM, NMC, ETI, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , FRE, FRE, TGO2,FRE, TGC2,PIO, /* +@ A B C D E F G H I J K L M N O */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC2,FRE, FRE, /* +` a b c d e f g h i j k l m n o */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON +}; +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +#undef FRE +#undef NU +#undef NMC +#undef NMS +#undef SPC +#undef NON +#undef EE +#undef EOB +#undef RS +#undef RE +#undef SEP +#undef NSC +#undef ERO +#undef NMRE +#undef COM +/* def ETI*/ +/* def NET*/ +#undef MDO +#undef MSC2 +#undef MSO +#undef PERO +#undef PIO +#undef RNI +#undef TGC2 +/* LEXGRP: Lexical table for group parses, including PCBREF. +*/ +/* Symbols for SGML character set divisions. */ +#define BIT 0 /* Bit combinations (not NONCHAR) not allowed in a group. */ +#define NMC 1 /* NAMECHAR . - Period, underscore, and numerals */ +#define NMS 2 /* NAMESTRT Lower and uppercase letters */ +#define RE 3 /* Function 13 Carrier return */ +#define SPC 4 /* SPACE 32 09 Space; includes TAB */ +#define NON 5 /* NONCHAR 0-31 127 255 Unused, except for: */ +#define EE 6 /* Function 26 00 EE: entity end (end of file) */ +#define EOB 7 /* NONCHAR 28 End disk buffer. */ +#define RS 8 /* Function 10 RS: record start (line feed) */ + +/* Symbols for SGML delimiter roles in GRP. */ +#define AND1 9 /* & */ +#define GRPC 10 /* ) */ +#define GRPO 11 /* ( */ +#undef LIT2 +#define LIT2 12 /* " For datatags. */ +#define LITA 13 /* ' For datatags. */ +#define DTGC 14 /* ] For datatags. */ +#define DTGO 15 /* [ For datatags. */ +#define OPT1 16 /* ? */ +#define OR1 17 /* | */ +#define PERO 18 /* % */ +#define PLUS 19 /* + */ +#define REP1 20 /* * */ +#define RNI 21 /* # For #CHARS */ +#define SEQ1 22 /* , */ +#define REFC 23 /* ; For references */ + +UNCH lexgrp[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, RE, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, BIT, LIT2,RNI, BIT, PERO,AND1,LITA,GRPO,GRPC,REP1,PLUS,SEQ1,NMC, NMC, BIT, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, NMC, BIT, REFC,BIT, BIT, BIT, OPT1,/* +@ A B C D E F G H I J K L M N O */ +BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, DTGO,BIT, DTGC,BIT, BIT, /* +` a b c d e f g h i j k l m n o */ +BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, BIT, OR1, BIT, BIT, NON, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, NON +}; +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +#undef BIT +#undef NMC +#undef NMS +#undef RE +#undef SPC +#undef NON +#undef EE +#undef EOB +#undef RS +#undef AND1 +#undef GRPC +#undef GRPO +#undef LIT2 +#undef LITA +#undef DTGC +#undef DTGO +#undef OPT1 +#undef OR1 +#undef PERO +#undef PLUS +#undef REP1 +#undef RNI +#undef SEQ1 +#undef REFC +/* LEXLMS: Lexical table for literal parses and marked sections. +*/ +/* Symbols for SGML character set divisions and function characters. +*/ +#define FRE 0 /* Free char: not in a delimiter or minimum literal. */ +#define NU 1 /* Numeral Numerals */ +#undef MIN +#define MIN 2 /* Minimum literal '()+,-./:?= */ +#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */ +#define SPC 4 /* SPACE 32 Space */ +#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */ +#define EE 6 /* NONSGML 00 26 Entity end (end of file) */ +#define EOB 7 /* NONSGML 28 End disk buffer */ +#define RS 8 /* Function 10 Line feed */ +#define RE 9 /* Function 13 Carrier return */ +#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */ +/*#define CDE 11 NONSGML delcdata CDATA/SDATA delimiter */ +#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */ +/* Symbols for SGML delimiter roles in LIT, PI, and marked sections. + Either LIT, LITA, PIC, or EE, is changed to LITC when a literal is begun. + It is changed back when the LITC occurs (i.e., when the literal ends). +*/ +#define ERO 13 /* & */ +#define MDO 14 /* ! Actually MDO[2] */ +#define MSO 16 /* [ For MDO context. */ +#define PERO 17 /* % For prolog. */ +#define RNI 18 /* # For CRO[2] */ +#define TGC3 19 /* > Also MDC for MSC context. */ +#define TGO3 20 /* < TAGO; also MDO[1] */ + +/* Room has been left in the parse tables in case re-parsing of text + is eventually supported (i.e., saved parsed text is used by the + application to create a new SGML document, but CDATA and SDATA + entities in literals, and non-SGML characters, are left in their + parsed state to avoid the overhead of reconstituting the original + markup). In such a case, the two non-SGML characters DELCDATA and + DELSDATA are changed to CDE. + NOTE: The idea is a bad one, because the generated document would + be non-conforming, as it would contain non-SGML characters. +*/ +UNCH lexlms[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE, NON, NON, NON, NON, NON, NON, NON, NON ,SEP, RS, NON, NON, RE, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, MDO, FRE, RNI, FRE, PERO,ERO, MIN, MIN, MIN, FRE, MIN, MIN, MIN, MIN, MIN, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , MIN, FRE, TGO3,MIN, TGC3,MIN, /* +@ A B C D E F G H I J K L M N O */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, MSO, FRE, MSC3,FRE, FRE, /* +` a b c d e f g h i j k l m n o */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON +}; +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tago tagc litc */ +/* def FRE*/ +#undef NU +#undef MIN +#undef NMS +#undef SPC +#undef NON +#undef EE +#undef EOB +#undef RS +#undef RE +#undef SEP +/* def CDE*/ +/* def NSC*/ +#undef ERO +#undef MDO +/* def MSC3*/ +#undef MSO +#undef PERO +#undef RNI +#undef TGC3 +#undef TGO3 +/* def LITC*/ +/* LEXMARK: Lexical scan table for markup: PCBMD? and PCB?TAG. +*/ +/* Symbols for SGML character set divisions. */ +#define BIT 0 /* Bit combinations not allowed; includes ESC SO SI */ +#define NMC 1 /* NAMECHAR . _ Period and underscore */ +#define NU 2 /* NUMERAL Numerals */ +#define NMS 3 /* NAMESTRT Lower and uppercase letters */ +#define SPC 4 /* SPACE 32 13 09 Space; includes RE TAB */ +#define NON 5 /* NONCHAR 0-31 127 255 Unused, except for: */ +#define EE 6 /* Function 26 00 EE: entity end (end of file) */ +#define EOB 7 /* NONCHAR 28 End disk buffer. */ +#define RS 8 /* Function 10 RS: record start (line feed) */ + +/* Symbols for SGML delimiter roles in MD and TAG. */ +#define COM1 9 /* - Actually COM[1]; also COM[2], MINUS. */ +#define ETIB 10 /* / ETI; actually ETAGO[2]. */ +#define GRPO 11 /* ( */ +#define LIT3 12 /* " */ +#define LITA 13 /* ' */ +#define DSO 14 /* [ */ +#define DSC1 15 /* ] For data attribute specifications */ +#define PERO 16 /* % */ +#define PLUS 17 /* + */ +#define REFC 18 /* ; For references */ +#define RNI 19 /* # Also CRO[2] */ +#define TGC4 20 /* > Also MDC, PIC */ +#define TGO4 21 /* < TAGO; also MDO[1] */ +#define VI 22 /* = */ + +UNCH lexmark[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, SPC, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, BIT, LIT3,RNI, BIT, PERO,BIT, LITA,GRPO,BIT, BIT, PLUS,BIT, COM1,NMC ,ETIB,/* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, BIT, REFC,TGO4,VI, TGC4,BIT, /* +@ A B C D E F G H I J K L M N O */ +BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, DSO, BIT, DSC1, BIT, BIT, /* +` a b c d e f g h i j k l m n o */ +BIT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, BIT, BIT, BIT, BIT, NON, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, +BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, BIT, NON +}; +/* bit nmc nu nms spc non ee eob rs com eti grpo lit lita + dso pero plus refc rni tagc tago vi */ +#undef BIT +#undef NMC +#undef NU +#undef NMS +#undef SPC +#undef NON +#undef EE +#undef EOB +#undef RS +#undef COM1 +#undef ETIB +#undef GRPO +#undef LIT3 +#undef LITA +#undef DSO +#undef DSC +#undef PERO +#undef PLUS +#undef REFC +#undef RNI +#undef TGC4 +#undef TGO4 +#undef VI +/* LEXSD: Lexical scan table for SGML declaration. +*/ + +/* Symbols for SGML character set divisions. */ +#define SIG 0 /* Significant SGML characters. */ +#define DAT 1 /* DATACHAR Not significant, and not non-sgml. */ +#define NU 2 /* NUMERAL Numerals */ +#define NMS 3 /* NAMESTRT Lower and uppercase letters */ +#define SPC 4 /* SPACE 32 13 09 Space; includes RE TAB */ +#define NON 5 /* NONCHAR NONSGML */ +#define EE 6 /* Function 26 00 EE: entity end (end of file) */ +#define EOB 7 /* NONCHAR 28 End disk buffer. */ +#define RS 8 /* Function 10 RS: record start (line feed) */ +/* Symbols for SGML delimiter roles in SGML declaration. */ +#define COM1 9 /* - Actually COM[1]; also COM[2]. */ +#define LIT3 10 /* " */ +#define LITA 11 /* ' */ +#define TGC4 12 /* > Also MDC, PIC */ + +UNCH lexsd[256] = { /* +000 001 bs tab lf home ff cr so si */ +EE , NON, NON, NON, NON, NON, NON, NON, NON, SPC, RS, NON, NON, SPC, NON, NON, /* + eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE , NON, EOB, NON, NON, NON, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, SIG, LIT3,SIG, DAT, SIG ,SIG, LITA,SIG, SIG, SIG, SIG, SIG, COM1,SIG ,SIG,/* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SIG, SIG, SIG, SIG, TGC4,SIG, /* +@ A B C D E F G H I J K L M N O */ +SIG, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, SIG, DAT, SIG, SIG, SIG, /* +` a b c d e f g h i j k l m n o */ +DAT, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, SIG, SIG, SIG, SIG, NON, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, +DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, NON +}; + +#undef SIG +#undef DAT +#undef NON +#undef NU +#undef NMS +#undef SPC +#undef EE +#undef EOB +#undef RS +#undef COM1 +#undef LIT3 +#undef LITA +#undef TGC4 + +/* LEXTRAN: Translation table for SGML names. +*/ +UNCH lextran[256] = { /* +000 001 bs tab lf home ff cr so si */ +0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , /* + eof esc rt left up down */ +16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 , /* +space! " # $ % & ' ( ) * + , - . / */ +32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 , /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 , 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 , /* +@ A B C D E F G H I J K L M N O */ +64 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 , /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 , 91 , 92 , 93 , 94 , 95 , /* +` a b c d e f g h i j k l m n o */ +96 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 , 78 , 79 , /* +p q r s t u v w x y z { | } ~ 127 */ +80 , 81 , 82 , 83 , 84 , 85 , 86 , 87 , 88 , 89 , 90 , 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +}; +/* LEXTOKE: Lexical class table for tokenization scan. +*/ +#include "lextoke.h" /* Symbols for tokenization lexical classes. */ +UNCH lextoke[256] = { /* + +000 001 bs tab lf home ff cr */ +INV, INV, INV, INV, INV, INV, INV, INV, INV, SEP, REC, INV, INV, REC, INV, INV, /* + eof esc rt left up down */ +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, EOB, INV, INV, INV, /* +space! " # $ % & ' ( ) * + , - . / */ +SP , INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, NMC, NMC, INV, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , INV, INV, INV, INV, INV, INV, /* +@ A B C D E F G H I J K L M N O */ +INV, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, INV, INV, INV, INV, INV, /* +` a b c d e f g h i j k l m n o */ +INV, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV +}; + +/* This table maps ASCII to the system character set. */ +int asciicharset[] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; diff --git a/usr.bin/sgmls/sgmls/lextabe.c b/usr.bin/sgmls/sgmls/lextabe.c new file mode 100644 index 0000000..f93af89 --- /dev/null +++ b/usr.bin/sgmls/sgmls/lextabe.c @@ -0,0 +1,184 @@ +/* This file was automatically generated by genlex. Do not edit. */ + +#include "config.h" +#include "entity.h" +#include "sgmldecl.h" + +UNCH lexcnm[] = { + 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5, + 5, 5, 5, 5, 5, 5, 14, 5, 5, 5, 5, 5, 7, 5, 5, 12, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 26, 0, 0, 0, +13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, +15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 23, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 18, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 22, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, +}; + +UNCH lexcon[] = { + 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5, + 5, 5, 5, 5, 5, 5, 14, 5, 5, 5, 5, 5, 7, 5, 5, 12, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 25, 0, 0, 0, +13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, +15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 24, 22, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 20, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, +}; + +UNCH lexgrp[] = { + 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 11, 19, 17, + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 10, 23, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 18, 0, 0, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 13, 0, 12, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 15, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, +}; + +UNCH lexlms[] = { + 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 12, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 20, 2, 2, 0, +13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 2, 0, 0, + 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 17, 0, 19, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 18, 0, 2, 2, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, +}; + +UNCH lexmark[] = { + 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 21, 11, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, + 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 20, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 13, 22, 12, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 14, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 5, +}; + +UNCH lexsd[] = { + 6, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, + 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, + 9, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 12, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 11, 0, 10, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, + 1, 0, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 0, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 5, +}; + +UNCH lextoke[] = { + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, +}; + +UNCH lextran[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 193, 194, 195, 196, 197, 198, 199, 200, 201, 138, 139, 140, 141, 142, 143, +144, 209, 210, 211, 212, 213, 214, 215, 216, 217, 154, 155, 156, 157, 158, 159, +160, 161, 226, 227, 228, 229, 230, 231, 232, 233, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; + +int asciicharset[] = { + 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, + 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, +124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, +215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109, +121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, +151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; diff --git a/usr.bin/sgmls/sgmls/lextoke.h b/usr.bin/sgmls/sgmls/lextoke.h new file mode 100644 index 0000000..d2bcfa0 --- /dev/null +++ b/usr.bin/sgmls/sgmls/lextoke.h @@ -0,0 +1,10 @@ +/* LEXTOKE.H: Symbols for tokenization lexical classes. +*/ +#define INV 0 /* Invalid Chars Not allowed in an SGML name. */ +#define REC 1 /* Record Boundary RS and RE. */ +#define SEP 2 /* Separator TAB. */ +#define SP 3 /* SPACE */ +#define NMC 4 /* NAMECHAR . _ Period, underscore (plus NMS, NUM). */ +#define NMS 5 /* NAMESTRT Lower and uppercase letters */ +#define NU 6 /* NUMERAL Numerals */ +#define EOB 7 /* NONCHAR 28 End disk buffer. */ diff --git a/usr.bin/sgmls/sgmls/lineout.c b/usr.bin/sgmls/sgmls/lineout.c new file mode 100644 index 0000000..fd856ce --- /dev/null +++ b/usr.bin/sgmls/sgmls/lineout.c @@ -0,0 +1,653 @@ +/* lineout.c - + Implements line-oriented output format. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" +#include "std.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "adl.h" /* Definitions for attribute list processing. */ +#include "sgmlmain.h" /* Main interface to SGML services. */ +#include "lineout.h" +#include "appl.h" + +static VOID flush_data P((void)); +static VOID define_external_entity P((PNE)); +static VOID define_entity P((UNCH *)); +static VOID handle_attributes P((UNCH *, struct ad *)); +static VOID handle_token_list P((UNCH *, struct ad *, int)); +static VOID handle_single_token P((UNCH *, struct ad *, int)); +static VOID output_notation P((UNCH *, UNCH *, UNCH *)); +static VOID output_internal_entity P((UNCH *, int, UNCH *)); +static VOID output_external_entity P((UNCH *, int, UNIV, UNCH *, UNCH *, + UNCH *)); +static VOID output_subdoc P((UNCH *, UNIV, UNCH *, UNCH *)); +#ifdef SUPPORT_SUBDOC +static VOID process_subdoc P((UNCH *, UNIV)); +#endif /* SUPPORT_SUBDOC */ +static VOID output_record_end P((void)); +static VOID output_pcdata P((UNS, UNCH *)); +static VOID output_cdata P((UNS, UNCH *)); +static VOID output_sdata P((UNS, UNCH *)); +static VOID output_entity_reference P((UNCH *)); +static VOID output_start_tag P((UNCH *)); +static VOID output_end_tag P((UNCH *)); +static VOID output_processing_instruction P((UNS, UNCH *)); +static VOID output_implied_attribute P((UNCH *, UNCH *)); +static char *attribute_type_string P((int)); +static VOID output_begin_attribute P((UNCH *, UNCH *, int)); +static VOID output_attribute_token P((UNS, UNCH *)); +static VOID output_end_attribute P((void)); +static VOID print_data P((UNS, UNCH *, int)); +static VOID print_string P((UNS, UNCH *, int)); +static VOID print_id P((UNIV, UNCH *, UNCH *)); +static VOID print_filename P((char *)); +static VOID output_location P((void)); +static VOID output_appinfo P((UNS, UNCH *)); + +static int have_data = 0; +static char *current_filename = 0; +static unsigned long current_lineno = 0; + +VOID process_document(subdocsw) +int subdocsw; +{ + enum sgmlevent rc; + struct rcbtag rcbtag; + struct rcbdata rcbdaf; + + while ((rc = sgmlnext(&rcbdaf, &rcbtag)) != SGMLEOD) { +#ifdef SUPPORT_SUBDOC + if (rc == SGMLDAF && !CONTERSW(rcbdaf) && NDESW(rcbdaf) + && NEXTYPE(NEPTR(rcbdaf)) == ESNSUB) { + if (!suppsw && !sgmlment(NEENAME(NEPTR(rcbdaf)))) + define_external_entity(NEPTR(rcbdaf)); + process_subdoc(NEENAME(NEPTR(rcbdaf)) + 1, + NEID(NEPTR(rcbdaf))); + continue; + } +#endif /* SUPPORT_SUBDOC */ + if (!suppsw) + switch (rc) { + case SGMLDAF: + if (CONTERSW(rcbdaf)) + break; + if (CDESW(rcbdaf)) + output_cdata(CDATALEN(rcbdaf), CDATA(rcbdaf)); + else if (SDESW(rcbdaf)) + output_sdata(CDATALEN(rcbdaf), CDATA(rcbdaf)); + else if (NDESW(rcbdaf)) { + assert(NEXTYPE(NEPTR(rcbdaf)) != ESNSUB); + if (!sgmlment(NEENAME(NEPTR(rcbdaf)))) + define_external_entity(NEPTR(rcbdaf)); + output_entity_reference(NEENAME(NEPTR(rcbdaf)) + 1); + } + else + output_pcdata(CDATALEN(rcbdaf), CDATA(rcbdaf)); + break; + case SGMLSTG: + if (CONTERSW(rcbtag)) + break; + if (ALPTR(rcbtag)) + handle_attributes((UNCH *)NULL, ALPTR(rcbtag)); + output_start_tag(CURGI(rcbtag)); + break; + case SGMLETG: + if (CONTERSW(rcbtag)) + break; + output_end_tag(CURGI(rcbtag)); + break; + case SGMLPIS: + if (CONTERSW(rcbdaf)) + break; + output_processing_instruction(PDATALEN(rcbdaf), + PDATA(rcbdaf)); + break; + case SGMLREF: + if (CONTERSW(rcbdaf)) + break; + output_record_end(); + break; + case SGMLAPP: + if (CONTERSW(rcbdaf)) + break; + if (!subdocsw) + output_appinfo(ADATALEN(rcbdaf), ADATA(rcbdaf)); + break; + default: + abort(); + } + } +} + +/* Output an indication that the document was conforming. */ + +VOID output_conforming() +{ + if (!suppsw) + printf("%c\n", CONFORMING_CODE); +} + +static VOID define_external_entity(p) +PNE p; +{ + if (NEXTYPE(p) == ESNSUB) + output_subdoc(NEENAME(p) + 1, NEID(p), NEPUBID(p), NESYSID(p)); + else { + if (!NEDCNMARK(p)) + output_notation(NEDCN(p) + 1, NEDCNPUBID(p), NEDCNSYSID(p)); + output_external_entity(NEENAME(p) + 1, NEXTYPE(p), NEID(p), + NEPUBID(p), NESYSID(p), NEDCN(p) + 1); + if (NEAL(p)) + handle_attributes(NEENAME(p) + 1, NEAL(p)); + } +} + +static VOID define_entity(ename) +UNCH *ename; +{ + int rc; + PNE np; + UNCH *tp; + + if (sgmlment(ename)) /* already defined it */ + return; + rc = sgmlgent(ename, &np, &tp); + switch (rc) { + case 1: + define_external_entity(np); + break; + case 2: + case 3: + output_internal_entity(ename + 1, rc == 3, tp); + break; + } +} + +/* ENT is the name of the entity with which these attributes are associated; +if it's NULL, they're associated with the next start tag. */ + +static VOID handle_attributes(ent, al) +UNCH *ent; +struct ad *al; +{ + int aln; + + for (aln = 1; aln <= ADN(al); aln++) { + if (GET(ADFLAGS(al, aln), AERROR)) + ; + else if (GET(ADFLAGS(al, aln), AINVALID)) + ; + else if (ADVAL(al, aln) == NULL) + output_implied_attribute(ent, ADNAME(al, aln)); + else if (ADTYPE(al, aln) >= ATKNLIST) + handle_token_list(ent, al, aln); + else + handle_single_token(ent, al, aln); + if (BITON(ADFLAGS(al, aln), AGROUP)) + aln += ADNUM(al, aln); + } +} + +static VOID handle_token_list(ent, al, aln) +UNCH *ent; +struct ad *al; +int aln; +{ + UNCH *ptr; + int i; + if (ADTYPE(al, aln) == AENTITYS) { + ptr = ADVAL(al, aln); + for (i = 0; i < ADNUM(al, aln); i++) { + /* Temporarily make token look like normal + name with length and EOS. */ + UNCH c = ptr[*ptr + 1]; + ptr[*ptr + 1] = '\0'; + *ptr += 2; + define_entity(ptr); + *ptr -= 2; + ptr += *ptr + 1; + *ptr = c; + } + } + output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln)); + ptr = ADVAL(al, aln); + for (i = 0; i < ADNUM(al, aln); i++) { + /* The first byte is a length NOT including the length + byte; the tokens are not EOS terminated. */ + output_attribute_token(*ptr, ptr + 1); + ptr += *ptr + 1; + } + output_end_attribute(); +} + +static VOID handle_single_token(ent, al, aln) +UNCH *ent; +struct ad *al; +int aln; +{ + if (ADTYPE(al, aln) == ANOTEGRP && !DCNMARK(ADDATA(al, aln).x)) + output_notation(ADVAL(al, aln) + 1, + ADDATA(al, aln).x->pubid, + ADDATA(al, aln).x->sysid); + else if (ADTYPE(al, aln) == AENTITY) + define_entity(ADVAL(al, aln)); + output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln)); + if (ADTYPE(al, aln) == ACHARS) + output_attribute_token(ustrlen(ADVAL(al, aln)), ADVAL(al, aln)); + else + output_attribute_token(*ADVAL(al, aln) - 2, ADVAL(al, aln) + 1); + output_end_attribute(); +} + +static VOID output_notation(name, pubid, sysid) +UNCH *name; +UNCH *pubid, *sysid; +{ + flush_data(); + print_id((UNIV)0, pubid, sysid); + printf("%c%s\n", DEFINE_NOTATION_CODE, name); +} + +static VOID output_internal_entity(ename, is_sdata, text) +UNCH *ename; +int is_sdata; +UNCH *text; +{ + flush_data(); + printf("%c%s %s ", DEFINE_INTERNAL_ENTITY_CODE, ename, + is_sdata ? "SDATA" : "CDATA"); + print_string(text ? ustrlen(text) : 0, text, 0); + putchar('\n'); +} + +static VOID output_subdoc(nm, id, pubid, sysid) +UNCH *nm; +UNIV id; +UNCH *pubid, *sysid; +{ + flush_data(); + print_id(id, pubid, sysid); + printf("%c%s\n", DEFINE_SUBDOC_ENTITY_CODE, nm); +} + +#ifdef SUPPORT_SUBDOC + +static VOID process_subdoc(nm, id) +UNCH *nm; +UNIV id; +{ + if (!suppsw) { + flush_data(); + output_location(); + printf("%c%s\n", START_SUBDOC_CODE, nm); + fflush(stdout); + } + fflush(stderr); + + if (id) { + char **argv; + int ret; + + argv = make_argv(id); + ret = run_process(argv); + if (ret != 0) + suberr++; + + current_filename = 0; + free(argv); + if (ret == 0) + get_subcaps(); + } + else { + suberr++; + appl_error(E_SUBDOC, nm); + } + + if (!suppsw) + printf("%c%s\n", END_SUBDOC_CODE, nm); +} + +#endif /* SUPPORT_SUBDOC */ + +static VOID output_external_entity(nm, xtype, id, pubid, sysid, dcn) +UNCH *nm, *dcn; +UNIV id; +UNCH *pubid, *sysid; +int xtype; +{ + char *type; + + flush_data(); + + print_id(id, pubid, sysid); + + switch (xtype) { + case ESNCDATA: + type = "CDATA"; + break; + case ESNNDATA: + type = "NDATA"; + break; + case ESNSDATA: + type = "SDATA"; + break; + default: + return; + } + printf("%c%s %s %s\n", DEFINE_EXTERNAL_ENTITY_CODE, nm, type, dcn); +} + +static VOID output_record_end() +{ + static UNCH re = RECHAR; + print_data(1, &re, 0); +} + +static VOID output_pcdata(n, s) +UNS n; +UNCH *s; +{ + print_data(n, s, 0); +} + +static VOID output_cdata(n, s) +UNS n; +UNCH *s; +{ + print_data(n, s, 0); +} + +static VOID output_sdata(n, s) +UNS n; +UNCH *s; +{ + print_data(n, s, 1); +} + +static VOID output_entity_reference(s) +UNCH *s; +{ + flush_data(); + output_location(); + printf("%c%s\n", REFERENCE_ENTITY_CODE, s); +} + +static VOID output_start_tag(s) +UNCH *s; +{ + flush_data(); + output_location(); + printf("%c%s\n", START_CODE, s); +} + +static VOID output_end_tag(s) +UNCH *s; +{ + flush_data(); + printf("%c%s\n", END_CODE, s); +} + +static VOID output_processing_instruction(n, s) +UNS n; +UNCH *s; +{ + flush_data(); + output_location(); + putchar(PI_CODE); + print_string(n, s, 0); + putchar('\n'); +} + +static VOID output_appinfo(n, s) +UNS n; +UNCH *s; +{ + flush_data(); + output_location(); + putchar(APPINFO_CODE); + print_string(n, s, 0); + putchar('\n'); +} + + +static VOID output_implied_attribute(ent, aname) +UNCH *ent, *aname; +{ + flush_data(); + if (ent) + printf("%c%s %s IMPLIED\n", DATA_ATTRIBUTE_CODE, ent, aname); + else + printf("%c%s IMPLIED\n", ATTRIBUTE_CODE, aname); +} + +static char *attribute_type_string(type) +int type; +{ + switch (type) { + case ANMTGRP: + case ANAME: + case ANMTOKE: + case ANUTOKE: + case ANUMBER: + case ANAMES: + case ANMTOKES: + case ANUTOKES: + case ANUMBERS: + case AID: + case AIDREF: + case AIDREFS: + return "TOKEN"; + case ANOTEGRP: + return "NOTATION"; + case ACHARS: + return "CDATA"; + case AENTITY: + case AENTITYS: + return "ENTITY"; + } +#if 0 + fatal("invalid attribute type %d", type); +#endif + return "INVALID"; +} + +static VOID output_begin_attribute(ent, aname, type) +UNCH *ent, *aname; +int type; +{ + flush_data(); + if (ent) + printf("%c%s %s %s", DATA_ATTRIBUTE_CODE, ent, aname, + attribute_type_string(type)); + else + printf("%c%s %s", ATTRIBUTE_CODE, aname, + attribute_type_string(type)); + +} + +static VOID output_attribute_token(vallen, val) +UNS vallen; +UNCH *val; +{ + putchar(' '); + print_string(vallen, val, 0); +} + +static VOID output_end_attribute() +{ + putchar('\n'); +} + +static VOID print_data(n, s, is_sdata) +UNS n; +UNCH *s; +int is_sdata; +{ + if (n > 0 || is_sdata) { + if (n == 1 && *s == RECHAR) + current_lineno++; + else + output_location(); + if (!have_data) + putchar(DATA_CODE); + print_string(n, s, is_sdata); + have_data = 1; + } +} + +static VOID flush_data() +{ + if (have_data) { + putchar('\n'); + have_data = 0; + } +} + +static VOID output_location() +{ + char *filename; + unsigned long lineno; + int filename_changed = 0; + + if (!locsw) + return; + if (!sgmlloc(&lineno, &filename)) + return; + if (!current_filename || strcmp(filename, current_filename) != 0) + filename_changed = 1; + else if (lineno == current_lineno) + return; + flush_data(); + printf("%c%lu", LOCATION_CODE, lineno); + current_lineno = lineno; + if (filename_changed) { + putchar(' '); + print_filename(filename); + current_filename = filename; + } + putchar('\n'); +} + +static VOID print_string(slen, s, is_sdata) +UNS slen; +UNCH *s; +int is_sdata; +{ + if (is_sdata) + fputs("\\|", stdout); + while (slen > 0) { + UNCH ch = *s++; + slen--; + if (ch == DELSDATA) { + if (is_sdata) + ; /* I don't think this should happen */ + else + fputs("\\|", stdout); + ; + } + else if (ch == DELCDATA) + ; + else { + if (ch == DELNONCH) { + if (!slen) + break; + ch = UNSHIFTNON(*s); + s++; + slen--; + } + switch (ch) { + case RECHAR: + fputs("\\n", stdout); + break; + case '\\': + fputs("\\\\", stdout); + break; + default: + if (ISASCII(ch) && isprint(ch)) + putchar(ch); + else + printf("\\%03o", ch); + break; + } + } + } + if (is_sdata) + fputs("\\|", stdout); +} + + +static VOID print_id(id, pubid, sysid) +UNIV id; +UNCH *pubid; +UNCH *sysid; +{ + + if (pubid) { + putchar(PUBID_CODE); + print_string(ustrlen(pubid), pubid, 0); + putchar('\n'); + } + + if (sysid) { + putchar(SYSID_CODE); + print_string(ustrlen(sysid), sysid, 0); + putchar('\n'); + } + + if (id) { + char *p; + + for (p = id; *p != '\0'; p++) { + putchar(FILE_CODE); + do { + switch (*p) { + case '\\': + fputs("\\\\", stdout); + break; + case '\n': + fputs("\\n", stdout); + break; + default: + if (ISASCII(*p) && isprint((UNCH)*p)) + putchar(*p); + else + printf("\\%03o", (UNCH)*p); + break; + } + } while (*++p); + putchar('\n'); + } + } +} + +static VOID print_filename(s) +char *s; +{ + for (; *s; s++) + switch (*s) { + case '\\': + fputs("\\\\", stdout); + break; + case '\n': + fputs("\\n", stdout); + break; + default: + if (ISASCII(*s) && isprint((UNCH)*s)) + putchar(*s); + else + printf("\\%03o", (UNCH)*s); + break; + } +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/lineout.h b/usr.bin/sgmls/sgmls/lineout.h new file mode 100644 index 0000000..f3c4231 --- /dev/null +++ b/usr.bin/sgmls/sgmls/lineout.h @@ -0,0 +1,23 @@ +/* lineout.h */ + +/* Output codes used by sgmls. */ + +#define DATA_CODE '-' +#define START_CODE '(' +#define END_CODE ')' +#define ATTRIBUTE_CODE 'A' +#define DATA_ATTRIBUTE_CODE 'D' +#define REFERENCE_ENTITY_CODE '&' +#define DEFINE_NOTATION_CODE 'N' +#define DEFINE_EXTERNAL_ENTITY_CODE 'E' +#define DEFINE_INTERNAL_ENTITY_CODE 'I' +#define PI_CODE '?' +#define DEFINE_SUBDOC_ENTITY_CODE 'S' +#define START_SUBDOC_CODE '{' +#define END_SUBDOC_CODE '}' +#define LOCATION_CODE 'L' +#define APPINFO_CODE '#' +#define PUBID_CODE 'p' +#define SYSID_CODE 's' +#define FILE_CODE 'f' +#define CONFORMING_CODE 'C' diff --git a/usr.bin/sgmls/sgmls/main.c b/usr.bin/sgmls/sgmls/main.c new file mode 100644 index 0000000..3435dce --- /dev/null +++ b/usr.bin/sgmls/sgmls/main.c @@ -0,0 +1,602 @@ +/* main.c - + Main program for sgmls. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" +#include "std.h" +#include "getopt.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "adl.h" /* Definitions for attribute list processing. */ +#include "sgmlmain.h" /* Main interface to SGML services. */ +#include "appl.h" + +#define READCNT 512 + +/* Before using argv[0] in error messages, strip off everything up to and +including the last character in prog that occurs in PROG_PREFIX. */ + +#ifndef PROG_PREFIX +#define PROG_PREFIX "/" +#endif /* not PROG_PREFIX */ + +/* Message catalogue name. */ +#define CAT_NAME "sgmls" +/* Message set to use for application error messages. */ +#define APP_SET 4 + +#ifdef HAVE_EXTENDED_PRINTF +#define xvfprintf vfprintf +#else +extern int xvfprintf P((FILE *, char *, va_list)); +#endif + +static VOID usage P((void)); +static VOID fatal VP((int, ...)); +static VOID do_error P((int, va_list)); +static VOID swinit P((struct switches *)); +static VOID write_caps P((char *, struct sgmlcap *)); + +static UNIV make_docent P((int, char **)); +static char *munge_program_name P((char *, char *)); +static VOID die P((void)); +#ifdef SUPPORT_SUBDOC +static VOID build_subargv P((struct switches *)); +static VOID cleanup P((void)); +static char *create_subcap_file P((void)); +#endif /* SUPPORT_SUBDOC */ + +static char *errlist[] = { + 0, + "Out of memory", + "Cannot open SGML document entity", + "Cannot exec `%s': %s", + "Cannot fork: %s", + "Error waiting for process: %s", + "Program %s got fatal signal %d", + "Cannot open `%s': %s", + "Subdocument capacity botch", + "Non-existent subdocument entity `%s' not processed", +}; + +int suppsw = 0; /* Non-zero means suppress output. */ +int locsw = 0; /* Non-zero means generate location info. */ +static char *prog; /* Program name (for error messages). */ +static nl_catd catd; /* Message catalogue descriptor. */ +static char *capfile = 0; /* File for capacity report. */ +extern char *version_string; + +char options[] = { + 'c', ':', 'd', 'e', 'g', 'i', ':', 'l', 'o', ':', 'p', 'r', 's', 'u', 'v', +#ifdef CANT_REDIRECT_STDERR + 'f', ':', +#endif /* CANT_REDIRECT_STDERR */ +#ifdef TRACE + 'x', ':', 'y', ':', +#endif /* TRACE */ + '\0' +}; + +#ifdef SUPPORT_SUBDOC +int suberr = 0; /* Error in subdocument. */ +static char *subargv[sizeof(options)]; +static int subargc = 0; +static char nopenbuf[sizeof(long)*3 + 1]; +static char sgmldecl_file[L_tmpnam]; +static char subcap_file[L_tmpnam]; +#endif + +int main(argc, argv) +int argc; +char **argv; +{ + static char stderr_buf[BUFSIZ]; + int opt; +#ifdef CANT_REDIRECT_STDERR + char *errfile = 0; +#endif + struct sgmlcap cap; + struct switches sw; + int nincludes = 0; /* number of -i options */ + setbuf(stderr, stderr_buf); + + /* Define MAIN_HOOK in config.h if some function needs to be called here. */ +#ifdef MAIN_HOOK + MAIN_HOOK(argc, argv); +#endif +#ifdef SUPPORT_SUBDOC + subargv[subargc++] = argv[0]; +#endif + + prog = argv[0] = munge_program_name(argv[0], "sgmls"); + + catd = catopen(CAT_NAME, 0); + swinit(&sw); + + while ((opt = getopt(argc, argv, options)) != EOF) { + switch (opt) { + case 'l': /* Generate location information. */ + locsw = 1; + break; + case 'c': /* Print capacity usage. */ + capfile = optarg; + break; + case 's': /* Suppress output. */ + suppsw = 1; + break; + case 'd': /* Report duplicate entity declarations. */ + sw.swdupent = 1; + break; + case 'e': /* Provide entity stack trace in error msg. */ + sw.swenttr = 1; + break; +#ifdef CANT_REDIRECT_STDERR + case 'f': /* Redirect errors. */ + errfile = optarg; + break; +#endif /* CANT_REDIRECT_STDERR */ + case 'g': /* Provide GI stack trace in error messages. */ + sw.sweltr = 1; + break; + case 'p': /* Parse only the prolog. */ + sw.onlypro = 1; + suppsw = 1; + break; + case 'r': /* Give warning for defaulted references. */ + sw.swrefmsg = 1; + break; + case 'u': + sw.swundef = 1; + break; +#ifdef TRACE + case 'x': /* Trace options for the document body. */ + sw.trace = optarg; + break; + case 'y': /* Trace options for the prolog. */ + sw.ptrace = optarg; + break; +#endif /* TRACE */ + case 'v': /* Print the version number. */ + fprintf(stderr, "sgmls version %s\n", version_string); + fflush(stderr); + break; + case 'o': + sw.nopen = atol(optarg); + if (sw.nopen <= 0) + usage(); + break; + case 'i': /* Define parameter entity as "INCLUDE". */ + sw.includes = (char **)xrealloc((UNIV)sw.includes, + (nincludes + 2)*sizeof(char *)); + sw.includes[nincludes++] = optarg; + sw.includes[nincludes] = 0; + break; + case '?': + usage(); + default: + abort(); + } + } + +#ifdef CANT_REDIRECT_STDERR + if (errfile) { + FILE *fp; + errno = 0; + fp = fopen(errfile, "w"); + if (!fp) + fatal(E_OPEN, errfile, strerror(errno)); + fclose(fp); + errno = 0; + if (!freopen(errfile, "w", stderr)) { + /* Can't use fatal() since stderr is now closed */ + printf("%s: ", prog); + printf(errlist[E_OPEN], errfile, strerror(errno)); + putchar('\n'); + exit(EXIT_FAILURE); + } + } +#endif /* CANT_REDIRECT_STDERR */ + + (void)sgmlset(&sw); + +#ifdef SUPPORT_SUBDOC + build_subargv(&sw); +#endif + if (sgmlsdoc(make_docent(argc - optind, argv + optind))) + fatal(E_DOC); + + process_document(sw.nopen > 0); + sgmlend(&cap); + if (capfile) + write_caps(capfile, &cap); +#ifdef SUPPORT_SUBDOC + cleanup(); + if (suberr) + exit(EXIT_FAILURE); +#endif /* SUPPORT_SUBDOC */ + if (sgmlgcnterr() > 0) + exit(EXIT_FAILURE); + if (!sw.nopen) + output_conforming(); + exit(EXIT_SUCCESS); +} + +static char *munge_program_name(arg, dflt) +char *arg, *dflt; +{ + char *p; +#ifdef PROG_STRIP_EXTENSION + char *ext; +#endif + if (!arg || !*arg) + return dflt; + p = strchr(arg, '\0'); + for (;;) { + if (p == arg) + break; + --p; + if (strchr(PROG_PREFIX, *p)) { + p++; + break; + } + } + arg = p; +#ifdef PROG_STRIP_EXTENSION + ext = strrchr(arg, '.'); + if (ext) { + p = (char *)xmalloc(ext - arg + 1); + memcpy(p, arg, ext - arg); + p[ext - arg] = '\0'; + arg = p; + } +#endif /* PROG_STRIP_EXTENSION */ +#ifdef PROG_FOLD +#ifdef PROG_STRIP_EXTENSION + if (!ext) { +#endif + p = xmalloc(strlen(arg) + 1); + strcpy(p, arg); + arg = p; +#ifdef PROG_STRIP_EXTENSION + } +#endif + for (p = arg; *p; p++) + if (ISASCII((unsigned char)*p) && isupper((unsigned char)*p)) + *p = tolower((unsigned char)*p); +#endif /* PROG_FOLD */ + return arg; +} + +static UNIV make_docent(argc, argv) +int argc; +char **argv; +{ + UNS len = 1; + int i; + UNIV res; + char *ptr; + static char *stdinname = STDINNAME; + + if (argc == 0) { + argv = &stdinname; + argc = 1; + } + + for (i = 0; i < argc; i++) + len += strlen(argv[i]) + 1; + + res = xmalloc(len); + ptr = (char *)res; + for (i = 0; i < argc; i++) { + strcpy(ptr, argv[i]); + ptr = strchr(ptr, '\0') + 1; + } + *ptr = '\0'; + return res; +} + + +static VOID usage() +{ + /* Don't mention -o since this are for internal use only. */ + fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity]%s [filename ...]\n", + prog, +#ifdef CANT_REDIRECT_STDERR + " [-f file]", +#else /* not CANT_REDIRECT_STDERR */ + "", +#endif /* not CANT_REDIRECT_STDERR */ +#ifdef TRACE + " [-x flags] [-y flags]" +#else /* not TRACE */ + "" +#endif /* not TRACE */ + ); + exit(EXIT_FAILURE); +} + +static VOID die() +{ +#ifdef SUPPORT_SUBDOC + cleanup(); +#endif /* SUPPORT_SUBDOC */ + exit(EXIT_FAILURE); +} + +static VOID swinit(swp) +struct switches *swp; +{ + swp->swenttr = 0; + swp->sweltr = 0; + swp->swbufsz = READCNT+2; + swp->prog = prog; + swp->swdupent = 0; + swp->swrefmsg = 0; +#ifdef TRACE + swp->trace = 0; + swp->ptrace = 0; +#endif /* TRACE */ + swp->catd = catd; + swp->swambig = 1; /* Always check for ambiguity. */ + swp->swundef = 0; + swp->nopen = 0; + swp->onlypro = 0; + swp->includes = 0; + swp->die = die; +} + +#ifdef SUPPORT_SUBDOC + +static VOID build_subargv(swp) +struct switches *swp; +{ + if (suppsw) + subargv[subargc++] = "-s"; + if (locsw) + subargv[subargc++] = "-l"; + if (swp->swdupent) + subargv[subargc++] = "-d"; + if (swp->swenttr) + subargv[subargc++] = "-e"; + if (swp->sweltr) + subargv[subargc++] = "-g"; + if (swp->swrefmsg) + subargv[subargc++] = "-r"; +#ifdef TRACE + if (swp->trace) { + subargv[subargc++] = "-x"; + subargv[subargc++] = swp->trace; + } + if (swp->ptrace) { + subargv[subargc++] = "-y"; + subargv[subargc++] = swp->ptrace; + } +#endif /* TRACE */ + subargv[subargc++] = "-o"; + sprintf(nopenbuf, "%ld", swp->nopen + 1); + subargv[subargc++] = nopenbuf; +} + + +static +VOID handler(sig) +int sig; +{ + signal(sig, SIG_DFL); + cleanup(); + raise(sig); +} + +static +VOID cleanup() +{ + if (sgmldecl_file[0]) { + (void)remove(sgmldecl_file); + sgmldecl_file[0] = '\0'; + } + if (subcap_file[0]) { + (void)remove(subcap_file); + subcap_file[0] = '\0'; + } +} + +static +char *store_sgmldecl() +{ + if (!sgmldecl_file[0]) { + FILE *fp; + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, handler); +#ifdef SIGTERM + if (signal(SIGTERM, SIG_IGN) != SIG_IGN) + signal(SIGTERM, handler); +#endif /* SIGTERM */ +#ifdef SIGPIPE + if (signal(SIGPIPE, SIG_IGN) != SIG_IGN) + signal(SIGPIPE, handler); +#endif +#ifdef SIGHUP + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, handler); +#endif + tmpnam(sgmldecl_file); + errno = 0; + fp = fopen(sgmldecl_file, "w"); + if (!fp) + fatal(E_OPEN, sgmldecl_file, strerror(errno)); + sgmlwrsd(fp); + fclose(fp); + } + return sgmldecl_file; +} + +static +char *create_subcap_file() +{ + if (subcap_file[0] == '\0') { + FILE *fp; + tmpnam(subcap_file); + fp = fopen(subcap_file, "w"); + if (!fp) + fatal(E_OPEN, subcap_file, strerror(errno)); + fclose(fp); + } + return subcap_file; +} + +char **make_argv(id) +UNIV id; +{ + int nfiles; + char *p; + char **argv; + int i; + + for (p = (char *)id, nfiles = 0; *p; p = strchr(p, '\0') + 1) + nfiles++; + + argv = (char **)xmalloc((subargc + 2 + 1 + nfiles + 1)*sizeof(char *)); + memcpy((UNIV)argv, (UNIV)subargv, subargc*sizeof(char *)); + + i = subargc; + + argv[i++] = "-c"; + argv[i++] = create_subcap_file(); + + argv[i++] = store_sgmldecl(); + + for (p = (char *)id; *p; p = strchr(p, '\0') + 1) + argv[i++] = p; + argv[i] = 0; + return argv; +} + +VOID get_subcaps() +{ + long cap[NCAPACITY]; + FILE *fp; + int i; + + if (!subcap_file[0]) + return; + errno = 0; + fp = fopen(subcap_file, "r"); + if (!fp) + fatal(E_OPEN, subcap_file, strerror(errno)); + for (i = 0; i < NCAPACITY; i++) + if (fscanf(fp, "%*s %ld", cap + i) != 1) + fatal(E_CAPBOTCH); + fclose(fp); + sgmlsubcap(cap); +} + + +#endif /* SUPPORT_SUBDOC */ + +/* Print capacity statistics.*/ + +static VOID write_caps(name, p) +char *name; +struct sgmlcap *p; +{ + FILE *fp; + int i; + fp = fopen(name, "w"); + if (!fp) + fatal(E_OPEN, name, strerror(errno)); + /* This is in RACT format. */ + for (i = 0; i < NCAPACITY; i++) + fprintf(fp, "%s %ld\n", p->name[i], p->number[i]*p->points[i]); + fclose(fp); +} + +UNIV xmalloc(n) +UNS n; +{ + UNIV p = malloc(n); + if (!p) + fatal(E_NOMEM); + return p; +} + +UNIV xrealloc(s, n) +UNIV s; +UNS n; +{ + s = s ? realloc(s, n) : malloc(n); + if (!s) + fatal(E_NOMEM); + return s; +} + +static +#ifdef VARARGS +VOID fatal(va_alist) va_dcl +#else +VOID fatal(int errnum,...) +#endif +{ +#ifdef VARARGS + int errnum; +#endif + va_list ap; + +#ifdef VARARGS + va_start(ap); + errnum = va_arg(ap, int); +#else + va_start(ap, errnum); +#endif + do_error(errnum, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +#ifdef VARARGS +VOID appl_error(va_alist) va_dcl +#else +VOID appl_error(int errnum,...) +#endif +{ +#ifdef VARARGS + int errnum; +#endif + va_list ap; + +#ifdef VARARGS + va_start(ap); + errnum = va_arg(ap, int); +#else + va_start(ap, errnum); +#endif + do_error(errnum, ap); + va_end(ap); +} + +static +VOID do_error(errnum, ap) +int errnum; +va_list ap; +{ + char *text; + fprintf(stderr, "%s: ", prog); + assert(errnum > 0); + assert(errnum < sizeof(errlist)/sizeof(errlist[0])); + text = catgets(catd, APP_SET, errnum, errlist[errnum]); + assert(text != 0); + xvfprintf(stderr, text, ap); + fputc('\n', stderr); + fflush(stderr); +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/md1.c b/usr.bin/sgmls/sgmls/md1.c new file mode 100644 index 0000000..a2db320 --- /dev/null +++ b/usr.bin/sgmls/sgmls/md1.c @@ -0,0 +1,862 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +/* MDADL: Process ATTLIST declaration. +*/ +VOID mdadl(tbuf) +UNCH *tbuf; /* Work area for tokenization (tbuf). */ +{ + int i; /* Loop counter; temporary variable. */ + int adlim; /* Number of unused ad slots in al. */ + struct ad *alperm = 0; /* Attribute definition list. */ + int stored = 0; + + mdname = key[KATTLIST]; /* Identify declaration for messages. */ + subdcl = 0; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es level for entity nesting check. */ + reqadn = noteadn = 0; /* No required attributes yet. */ + idadn = conradn = 0; /* No special atts yet.*/ + AN(al) = 0; /* Number of attributes defined. */ + ADN(al) = 0; /* Number of ad's in al (atts + name vals).*/ + /* PARAMETER 1: Element name or a group of them. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: element name or group"); + switch (pcbmd.action) { + case NAS: + nmgrp[0] = etddef(tbuf); + nmgrp[1] = 0; + break; + case GRPS: + parsegrp(nmgrp, &pcbgrnm, tbuf); + break; + case RNS: /* Reserved name started. */ + if (ustrcmp(tbuf+1, key[KNOTATION])) { + mderr(118, tbuf+1, key[KNOTATION]); + return; + } + mdnadl(tbuf); + return; + default: + mderr(121, (UNCH *)0, (UNCH *)0); + return; + } + /* Save first GI for error msgs. */ + if (nmgrp[0]) + subdcl = nmgrp[0]->etdgi+1; + /* PARAMETER 2: Attribute definition list. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: attribute list"); + if (pcbmd.action!=NAS) { + mderr(120, (UNCH *)0, (UNCH *)0); + return; + } + while (pcbmd.action==NAS) { + al[ADN(al)+1].adname = savenm(tbuf); + if ((adlim = ATTCNT-((int)++ADN(al)))<0) { + mderr(111, (UNCH *)0, (UNCH *)0); + adlfree(al, 1); + return; + } + ++AN(al); + if (mdattdef(adlim, 0)) { + adlfree(al, 1); + return; + } + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + } + if (AN(al)>0) { /* Save list only if 1 or more good atts. */ + if (reqadn) SET(ADLF(al), ADLREQ); /* Element must have start-tag. */ + if (noteadn) SET(ADLF(al), ADLNOTE); /* Element cannot be EMPTY. */ + if (conradn) SET(ADLF(al), ADLCONR); /* Element cannot be EMPTY. */ + alperm = (struct ad *)rmalloc((1+ADN(al))*ADSZ); + memcpy((UNIV)alperm, (UNIV)al, (1+ADN(al))*ADSZ ); + ds.attcnt += AN(al); /* Number of attributes defined. */ + ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */ + TRACEADL(alperm); + } + /* Clear attribute list for next declaration. */ + MEMZERO((UNIV)al, (1+ADN(al))*ADSZ); + + /* PARAMETER 3: End of declaration. + */ + /* Next pcb.action was set during attribute definition loop. */ + TRACEMD(emd); + if (pcbmd.action!=EMD) {mderr(126, (UNCH *)0, (UNCH *)0); return;} + if (es!=mdessv) synerr(37, &pcbmd); + + /* EXECUTE: Store the definition for each element name specified. + */ + TRACEGRP(nmgrp); + for (i = 0; nmgrp[i]; i++) { + if (nmgrp[i]->adl) { /* Error if an ADL exists. */ + mderr(112, (UNCH *)0, (UNCH *)0); + continue; + } + nmgrp[i]->adl = alperm; /* If virgin, store the adl ptr. */ + stored = 1; + if (alperm && nmgrp[i]->etdmod) + etdadl(nmgrp[i]); /* Check for conflicts with ETD. */ + } + if (!stored && alperm) { + adlfree(alperm, 1); + frem((UNIV)alperm); + } +} +/* ETDADL: Check compatibility between ETD and ADL. +*/ +VOID etdadl(p) +struct etd *p; /* Pointer to element type definition. */ +{ + parmno = 0; + /* Minimizable element cannot have required attribute. */ + if (GET(p->etdmin, SMO) && GET(p->adl[0].adflags, ADLREQ)) { + mderr(40, (UNCH *)0, (UNCH *)0); + RESET(p->etdmin, SMO); + } + /* Empty element cannot have NOTATION attribute. + Attribute is not removed (too much trouble), but we trap + attempts to specify it on the start-tag in adlval(). + */ + if (GET(p->etdmod->ttype, MNONE)) { + if (GET(p->adl[0].adflags, ADLNOTE)) + mderr(83, (UNCH *)0, (UNCH *)0); + + /* Empty element cannot have CONREF attribute. + Attribute is not removed because it just acts + like IMPLIED anyway. + */ + if (GET(p->adl[0].adflags, ADLCONR)) + mderr(85, (UNCH *)0, (UNCH *)0); + } + /* "-" should not be specified for the end-tag minimization if + the element has a content reference attribute. */ + if (GET(p->adl[0].adflags, ADLCONR) && BITON(p->etdmin, EMM)) + mderr(153, (UNCH *)0, (UNCH *)0); +} +/* MDNADL: Process ATTLIST declaration for notation. + TO DO: Pass deftab and dvtab as parameters so + that prohibited types can be handled by leaving + them out of the tables. +*/ +VOID mdnadl(tbuf) +UNCH *tbuf; /* Work area for tokenization (tbuf). */ +{ + int i; /* Loop counter; temporary variable. */ + int adlim; /* Number of unused ad slots in al. */ + struct ad *alperm = 0; /* Attribute definition list. */ + int stored = 0; + + /* PARAMETER 1: Notation name or a group of them. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: notation name or group"); + switch (pcbmd.action) { + case NAS: + nnmgrp[0] = dcndef(tbuf); + nnmgrp[1] = 0; + break; + case GRPS: + parsngrp(nnmgrp, &pcbgrnm, tbuf); + break; + default: + mderr(121, (UNCH *)0, (UNCH *)0); + return; + } + subdcl = nnmgrp[0]->ename+1; /* Save first name for error msgs. */ + /* PARAMETER 2: Attribute definition list. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: attribute list"); + if (pcbmd.action!=NAS) { + mderr(120, (UNCH *)0, (UNCH *)0); + return; + } + while (pcbmd.action==NAS) { + al[ADN(al)+1].adname = savenm(tbuf); + if ((adlim = ATTCNT-((int)ADN(al)++))<0) { + mderr(111, (UNCH *)0, (UNCH *)0); + adlfree(al, 1); + return; + } + ++AN(al); + if (mdattdef(adlim, 1)) { + adlfree(al, 1); + return; + } + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + } + if (AN(al)>0) { /* Save list only if 1 or more good atts. */ + alperm = (struct ad *)rmalloc((1+ADN(al))*ADSZ); + memcpy((UNIV)alperm, (UNIV)al, (1+ADN(al))*ADSZ ); + ds.attcnt += AN(al); /* Number of attributes defined. */ + ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */ + TRACEADL(alperm); + } + /* Clear attribute list for next declaration. */ + MEMZERO((UNIV)al, (1+ADN(al))*ADSZ); + + /* PARAMETER 3: End of declaration. + */ + /* Next pcb.action was set during attribute definition loop. */ + TRACEMD(emd); + if (pcbmd.action!=EMD) {mderr(126, (UNCH *)0, (UNCH *)0); return;} + if (es!=mdessv) synerr(37, &pcbmd); + + /* EXECUTE: Store the definition for each notation name specified. + */ + TRACENGR(nnmgrp); + for (i = 0; nnmgrp[i]; i++) { + if (nnmgrp[i]->adl) { /* Error if an ADL exists. */ + mderr(112, (UNCH *)0, (UNCH *)0); + continue; + } + nnmgrp[i]->adl = alperm; /* If virgin, store the adl ptr. */ + if (nnmgrp[i]->entsw) + fixdatt(nnmgrp[i]); + stored = 1; + TRACEDCN(nnmgrp[i]); + } + if (!stored && alperm) { + adlfree(alperm, 1); + frem((UNIV)alperm); + } +} + +/* Data attributes have been specified for notation p, but entities +have already been declared with notation p. Fix up the definitions of +all entities with notation p. Generate an error for any data +attribute that was required. */ + +VOID fixdatt(p) +struct dcncb *p; +{ + int i; + for (i = 0; i < ENTHASH; i++) { + struct entity *ep; + for (ep = etab[i]; ep; ep = ep->enext) + if (ep->estore == ESN && ep->etx.n && ep->etx.n->nedcn == p) { + int adn; + initatt(p->adl); + /* Don't use adlval because if there were required + attributes the error message wouldn't say what + entity was involved. */ + for (adn = 1; adn <= ADN(al); adn++) { + if (GET(ADFLAGS(al,adn), AREQ)) { + sgmlerr(218, &pcbstag, ADNAME(al,adn), + ep->ename + 1); + SET(ADFLAGS(al,adn), AINVALID); + } + if (BITON(ADFLAGS(al, adn), AGROUP)) + adn += ADNUM(al, adn); + } + storedatt(ep->etx.n); + } + } +} + +/* MDATTDEF: Process an individual attribute definition. + The attribute name is parsed by the caller. + Duplicate attributes are parsed, but removed from list. + Returns 0 if successful, otherwise returns 1. +*/ +int mdattdef(adlim, datt) +int adlim; /* Remaining capacity of al (in tokens).*/ +int datt; /* Non-zero if a data attribute. */ +{ + int deftype; /* Default value type: 0=not keyword. */ + int errsw = 0; /* 1=semantic error; ignore att. */ + int novalsw = 0; /* 1=semantic error; treat as IMPLIED. */ + int attadn = (int)ADN(al); /* Save ad number of this attribute. */ + struct parse *grppcb = NULL; /* PCB for name/token grp parse. */ + int errcode; /* Error type returned by PARSEVAL, ANMTGRP. */ + UNCH *advalsv; /* Save area for permanent value ptr. */ + + /* PARAMETER 1: Attribute name (parsed by caller). + */ + TRACEMD("1: attribute name"); + if (anmget((int)ADN(al)-1, al[attadn].adname)) { + errsw = 1; + mderr(99, ADNAME(al,attadn), (UNCH *)0); + } + ADNUM(al,attadn) = ADFLAGS(al,attadn) = ADLEN(al,attadn) = 0; + ADVAL(al,attadn) = 0; ADDATA(al,attadn).x = 0; ADTYPE(al,attadn) = ANMTGRP; + /* PARAMETER 2: Declared value. + */ + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: declared value"); + switch (pcbmd.action) { + case NAS: /* Keyword for value type. */ + switch (ADTYPE(al,attadn) = (UNCH)mapsrch(dvtab, lbuf+1)) { + case 0: + mderr(100, ADNAME(al,attadn), lbuf+1); + return 1; + case ANOTEGRP: + if (datt) { + errsw = 1; + mderr(156, (UNCH *)0, (UNCH *)0); + } + else if (!noteadn) noteadn = ADN(al); + else { + errsw = 1; + mderr(101, ADNAME(al,attadn), (UNCH *)0); + } + grppcb = &pcbgrnm; /* NOTATION requires name grp. */ + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);/* Get GRPO*/ + break; + case AID: + if (datt) { + errsw = 1; + mderr(144, (UNCH *)0, (UNCH *)0); + } + else if (!idadn) + idadn = attadn; + else { + errsw = 1; + mderr(102, ADNAME(al,attadn), (UNCH *)0); + } + break; + case AIDREF: + case AIDREFS: + if (datt) { + errsw = 1; + mderr(155, (UNCH *)0, (UNCH *)0); + } + break; + case AENTITY: + case AENTITYS: + if (datt) { + errsw = 1; + mderr(154, (UNCH *)0, (UNCH *)0); + } + break; + } + break; + case GRPS: + grppcb = &pcbgrnt; /* Normal grp is name token grp. */ + break; + case EMD: + mderr(103, ADNAME(al,attadn), (UNCH *)0); + return 1; + default: + mderr(104, ADNAME(al,attadn), (UNCH *)0); + return 1; + } + /* PARAMETER 2A: Name token group. + */ + if (grppcb != NULL) { + TRACEMD("2A: name group"); + switch (pcbmd.action) { + case GRPS: /* Name token list. */ + SET(ADFLAGS(al,attadn), AGROUP); + /* Call routine to parse group, create ad entries in adl. */ + errcode = anmtgrp(grppcb, al+attadn, + (GRPCNT<adlim ? GRPCNT+1 : adlim+1), + &al[attadn].adnum, ADN(al)); + if (errcode<=0) { + if (adlim < GRPCNT) + mderr(111, (UNCH *)0, (UNCH *)0); + else + mderr(105, ADNAME(al,attadn), (UNCH *)0); + return 1; + } + ADN(al) += ADNUM(al,attadn); /* Add grp size to total ad cnt.*/ + break; + default: + mderr(106, ADNAME(al,attadn), (UNCH *)0); + return 1; + } + } + /* PARAMETER 3: Default value keyword. + */ + parsemd(lbuf, AVALCASE, + (ADTYPE(al,attadn)==ACHARS) ? &pcblitr : &pcblitt, LITLEN); + TRACEMD("3: default keyword"); + switch (pcbmd.action) { + case RNS: /* Keyword. */ + deftype = mapsrch(deftab, lbuf+1); + switch (deftype) { + case DFIXED: /* FIXED */ + SET(ADFLAGS(al,attadn), AFIXED); + parsemd(lbuf, AVALCASE, + (ADTYPE(al,attadn)==ACHARS) ? &pcblitr : &pcblitt, + LITLEN); /* Real default. */ + goto parm3x; /* Go process specified value. */ + case DCURR: /* CURRENT: If ID, treat as IMPLIED. */ + if (ADTYPE(al,attadn)==AID) { + mderr(80, ADNAME(al,attadn), (UNCH *)0); + break; + } + if (datt) { + mderr(157, (UNCH *)0, (UNCH *)0); + break; + } + SET(ADFLAGS(al,attadn), ACURRENT); + break; + case DREQ: /* REQUIRED */ + SET(ADFLAGS(al,attadn), AREQ); ++reqadn; + break; + case DCONR: /* CONREF */ + if (ADTYPE(al,attadn)==AID) { + mderr(107, ADNAME(al,attadn), (UNCH *)0); + break; + } + if (datt) { + mderr(158, (UNCH *)0, (UNCH *)0); + break; + } + SET(ADFLAGS(al,attadn), ACONREF); conradn = 1; + case DNULL: /* IMPLIED */ + break; + default: /* Unknown keyword is an error. */ + mderr(108, ADNAME(al,attadn), lbuf+1); + errsw = 1; + } + if (errsw) { + /* Ignore erroneous att. */ + adlfree(al, attadn); + --AN(al); + ADN(al) = (UNCH)attadn-1; + } + return(0); + default: + break; + } + /* PARAMETER 3x: Default value (non-keyword). + */ + parm3x: + TRACEMD("3x: default (non-keyword)"); + if (ADTYPE(al,attadn)==AID) { /* If ID, treat as IMPLIED. */ + mderr(81, ADNAME(al,attadn), (UNCH *)0); + novalsw = 1; /* Keep parsing to keep things straight. */ + } + switch (pcbmd.action) { + case LIT: /* Literal. */ + case LITE: /* Literal. */ + /* Null string (except CDATA) is error: msg and treat as IMPLIED. */ + if (*lbuf == '\0' && ADTYPE(al,attadn)!=ACHARS) { + mderr(82, ADNAME(al,attadn), (UNCH *)0); + novalsw = 1; + } + break; + case NAS: /* Name character string. */ + case NMT: /* Name character string. */ + case NUM: /* Number or number token string. */ + /* The name won't have a length byte because AVALCASE was specified. */ + break; + case CDR: + parsetkn(lbuf, NMC, LITLEN); + break; + case EMD: + mderr(109, ADNAME(al,attadn), (UNCH *)0); + return 1; + default: + mderr(110, ADNAME(al,attadn), (UNCH *)0); + return 1; + } + if (errsw) { + /* Ignore erroneous att. */ + adlfree(al, attadn); + --AN(al); + ADN(al) = (UNCH)attadn-1; + return(0); + } + if (novalsw) return(0); + + /* PARAMETER 3y: Validate and store default value. + */ + if (ADTYPE(al,attadn)==ACHARS) { + UNS len = vallen(ACHARS, 0, lbuf); + if (len > LITLEN) { + /* Treat as implied. */ + sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0); + return 0; + } + /* No more checking for CDATA value. */ + ADNUM(al,attadn) = 0; /* CDATA is 0 tokens. */ + ADVAL(al,attadn) = savestr(lbuf);/* Store default; save ptr. */ + ADLEN(al,attadn) = len; + ds.attdef += len; + return 0; + } + /* Parse value and save token count (GROUP implies 1 token). */ + advalsv = (UNCH *)rmalloc(ustrlen(lbuf)+2); /* Storage for tokenized value. */ + errcode = parseval(lbuf, (UNS)ADTYPE(al,attadn), advalsv); + if (BITOFF(ADFLAGS(al,attadn), AGROUP)) ADNUM(al,attadn) = (UNCH)tokencnt; + + /* If value was invalid, or was a group member that was not in the group, + issue an appropriate message and set the error switch. */ + if (errcode) + {sgmlerr((UNS)errcode, &pcbmd, ADNAME(al,attadn), lbuf); errsw = 1;} + else if ( BITON(ADFLAGS(al,attadn), AGROUP) + && !amemget(&al[attadn], (int)ADNUM(al,attadn), advalsv) ) { + sgmlerr(79, &pcbmd, ADNAME(al,attadn), advalsv+1); + errsw = 1; + } + ADLEN(al,attadn) = vallen(ADTYPE(al,attadn), ADNUM(al,attadn), advalsv); + if (ADLEN(al,attadn) > LITLEN) { + sgmlerr(224, &pcbmd, ADNAME(al,attadn), (UNCH *)0); + ADLEN(al,attadn) = 0; + errsw = 1; + } + /* For valid tokenized value, save it and update statistics. */ + if (!errsw) { + ADVAL(al,attadn) = advalsv; + ds.attdef += ADLEN(al,attadn); + return 0; + } + /* If value was bad, free the value's storage and treat as + IMPLIED or REQUIRED. */ + frem((UNIV)advalsv); /* Release storage for value. */ + ADVAL(al,attadn) = NULL; /* And make value NULL. */ + return 0; +} +/* ANMTGRP: Parse a name or name token group, create attribute descriptors + for its members, and add them to the attribute descriptor list. + The parse either terminates or returns a good token, so no + switch is needed. +*/ +int anmtgrp(pcb, nt, grplim, adn, adsz) +struct parse *pcb; /* PCB for name or name token grp. */ +struct ad nt[]; /* Buffer for creating name token list. */ +int grplim; /* Maximum size of list (plus 1). */ +UNS *adn; /* Ptr to number of names or tokens in grp. */ +int adsz; /* Size of att def list. */ +{ + UNCH adtype = (UNCH)(pcb==&pcbgrnt ? ANMTGRP:ANOTEGRP);/*Attribute type.*/ + int essv = es; /* Entity stack level when grp started. */ + + *adn = 0; /* Group is empty to start. */ + while (parse(pcb)!=GRPE && *adn<grplim) { + switch (pcb->action) { + case NAS_: /* Name or name token (depending on pcb). */ + case NMT_: + parsenm(lbuf, NAMECASE); + nt[*adn+1].adname = savenm(lbuf); + if (antvget((int)(adsz+*adn), nt[*adn+1].adname, (UNCH **)0)) + mderr(98, ntoa((int)*adn+1), nt[*adn+1].adname+1); + nt[++*adn].adtype = adtype; + nt[*adn].addef = NULL; + continue; + + case EE_: /* Entity ended (correctly or incorrectly). */ + if (es<essv) {synerr(37, pcb); essv = es;} + continue; + + case PIE_: /* PI entity reference (invalid). */ + entpisw = 0; /* Reset PI entity indicator. */ + synerr(59, pcb); + continue; + + default: + break; + } + break; + } + if (es!=essv) synerr(37, pcb); + if (*adn==grplim) return -1; + else return *adn; /* Return number of tokens. */ +} +/* MDDTDS: Process start of DOCTYPE declaration (through MSO). +*/ +VOID mddtds(tbuf) +UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ +{ + struct fpi fpicb; /* Formal public identifier structure. */ + union etext etx; /* Ptr to entity text. */ + UNCH estore = ESD; /* Entity storage class. */ + int emdsw = 0; /* 1=end of declaration found; 0=not yet. */ + + mdname = key[KDOCTYPE]; /* Identify declaration for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es for checking entity nesting. */ + dtdrefsw = 0; /* No external DTD entity as yet. */ + /* PARAMETER 1: Document type name. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: doc type name"); + if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;} + dtype = savenm(tbuf); + subdcl = dtype+1; /* Subject of declaration for error msgs. */ + + /* PARAMETER 2: External identifier keyword or MDS. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: extid or MDS"); + switch (pcbmd.action) { + case NAS: + if (mdextid(tbuf, &fpicb, dtype+1, &estore, (PNE)0)==0) return; + if ((etx.x = entgen(&fpicb))==0) + mderr(146, dtype+1, (UNCH *)0); + else + dtdrefsw = 1; /* Signal external DTD entity. */ + break; + case MDS: + goto execute; + default: + mderr(128, (UNCH *)0, (UNCH *)0); + return; + } + /* PARAMETER 3: MDS or end of declaration. + */ + TRACEMD("3: MDS or EMD"); + switch (pcbmd.action) { + default: /* Treat as end of declaration. */ + mderr(126, (UNCH *)0, (UNCH *)0); + case EMD: + emdsw = 1; + case MDS: + break; + } + /* EXECUTE: Store entity definition if an external ID was specified. + */ + execute: + if (es!=mdessv) synerr(37, &pcbmd); + propcb = &pcbmds; /* Prepare to parse doc type definition (MDS). */ + if (dtdrefsw) { + /* TO DO: If concurrent DTD's supported, free existing + etext for all but first DTD (or reuse it). */ + entdef(indtdent, estore, &etx); + ++ds.ecbcnt; ds.ecbtext += entlen; + if (emdsw) { + REPEATCC; /* Push back the MDC. */ + *FPOS = lex.d.msc; /* Simulate end of DTD subset. */ + REPEATCC; /* Back up to read MSC next. */ + delmscsw = 1; /* Insert MSC after referenced DTD. */ + } + } + indtdsw = 1; /* Allow "DTD only" parameters. */ + return; +} +/* MDDTDE: Process DOCTYPE declaration end. +*/ +VOID mddtde(tbuf) +UNCH *tbuf; /* Work area for tokenization. */ +{ + mdessv = es; /* Save es for checking entity nesting. */ + propcb = &pcbpro; /* Restore normal prolog parse. */ + indtdsw = 0; /* Prohibit "DTD only" parameters. */ + + mdname = key[KDOCTYPE]; /* Identify declaration for messages. */ + subdcl = dtype+1; /* Subject of declaration for error msgs. */ + parmno = 0; /* No parameters as yet. */ + /* PARAMETER 4: End of declaration. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + TRACEMD(emd); + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + if (es!=mdessv) synerr(37, &pcbmd); +} +/* MDELEM: Process ELEMENT declaration. +*/ +VOID mdelem(tbuf) +UNCH *tbuf; /* Work area for tokenization (tbuf). */ +{ + UNCH *ranksuff = lbuf; /* Rank suffix. */ + UNS dctype = 0; /* Declared content type (from dctab). */ + UNCH fmin = 0; /* Minimization bit flags. */ + int i; /* Loop counter. */ + UNS u; /* Temporary variable. */ + struct etd **mexgrp, **pexgrp; /* Ptr to model exceptions array. */ + struct thdr *cmod, *cmodsv; /* Ptr to content model. */ + UNCH *etdgi; /* GI of current etd (when going through group).*/ + int minomitted = 0; /* Tag minimization parameters omitted. */ + + mdname = key[KELEMENT]; /* Identify declaration for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es level for entity nesting check. */ + ranksuff[0] = 0; + mexgrp = pexgrp = 0; + + /* PARAMETER 1: Element name or a group of them. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: element name or grp"); + switch (pcbmd.action) { + case NAS: + nmgrp[0] = etddef(tbuf); + nmgrp[1] = 0; + break; + case GRPS: + parsegrp(nmgrp, &pcbgrnm, tbuf); + break; + default: + mderr(121, (UNCH *)0, (UNCH *)0); + return; + } + /* Save first GI for trace and error messages. */ + if (nmgrp[0]) + subdcl = nmgrp[0]->etdgi+1; + + /* PARAMETER 1A: Rank suffix (optional). + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1A: rank suffix"); + switch (pcbmd.action) { + case NUM: + ustrcpy(ranksuff, tbuf); + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + default: + break; + } + /* PARAMETER 2A: Start-tag minimization. + */ + TRACEMD("2A: start min"); + switch (pcbmd.action) { + case CDR: + break; + case NAS: + if (!ustrcmp(tbuf+1, key[KO])) { + if (OMITTAG==YES) SET(fmin, SMO); + break; + } + /* fall through */ + default: + if (OMITTAG==NO) {minomitted=1; break;} + mderr(129, tbuf+1, (UNCH *)0); + return; + } + /* Must omit omitted end-tag minimization, if omitted + start-tag minimization was omitted (because OMITTAG == NO). */ + if (!minomitted) { + /* PARAMETER 2B: End-tag minimization. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2B: end min"); + switch (pcbmd.action) { + case NAS: + if (ustrcmp(tbuf+1, key[KO])) {mderr(129, tbuf+1, (UNCH *)0); return;} + if (OMITTAG==YES) SET(fmin, EMO); + break; + case CDR: + SET(fmin, EMM); + break; + default: + mderr(129, tbuf+1, (UNCH *)0); + return; + } + /* PARAMETER 3: Declared content. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + } + TRACEMD("3: declared content"); + switch (pcbmd.action) { + case NAS: + dctype = mapsrch(dctab, tbuf+1); + if (!dctype) {mderr(24, tbuf+1, (UNCH *)0); return;} + /* Eliminate incompatibilities among parameters. */ + if (GET(fmin, SMO) && GET(dctype, MNONE+MCDATA+MRCDATA)) { + mderr(58, (UNCH *)0, (UNCH *)0); + RESET(fmin, SMO); + } + if (GET(dctype, MNONE) && BITON(fmin, EMM)) { + mderr(87, (UNCH *)0, (UNCH *)0); + SET(fmin, EMO); + } + /* If valid, process like a content model. */ + case GRPS: + cmodsv = parsemod((int)(pcbmd.action==GRPS ? 0 : dctype)); + if (cmodsv==0) return; + u = (dctype ? 1 : cmodsv->tu.tnum+2) * THSZ; + cmod = (struct thdr *)rmalloc(u); + memcpy((UNIV)cmod , (UNIV)cmodsv, u ); + ds.modcnt += cmod->tu.tnum; + TRACEMOD(cmod); + break; + default: + mderr(130, (UNCH *)0, (UNCH *)0); + return; + } + /* PARAMETERS 3A, 3B: Exceptions or end. + */ + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + if (BITOFF(cmod->ttype, MCDATA+MRCDATA+MNONE)) { + /* PARAMETER 3A: Minus exceptions. + */ + TRACEMD("3A: -grp"); + switch (pcbmd.action) { + case MGRP: + /* We cheat and use nnmgrp for this. */ + mexgrp = copygrp((PETD *)nnmgrp, + u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf)); + ++ds.pmexgcnt; ds.pmexcnt += u-1; + TRACEGRP(mexgrp); + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + default: + break; + } + /* PARAMETER 3B: Plus exceptions. + */ + TRACEMD("3B: +grp"); + switch (pcbmd.action) { + case PGRP: + pexgrp = copygrp((PETD *)nnmgrp, + u = parsegrp((PETD *)nnmgrp, &pcbgrnm, tbuf)); + ++ds.pmexgcnt; ds.pmexcnt += u-1; + TRACEGRP(pexgrp); + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + default: + break; + } + } + /* PARAMETER 4: End of declaration. + */ + TRACEMD(emd); + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + if (es!=mdessv) synerr(37, &pcbmd); + + /* EXECUTE: Store the definition for each element name specified. + */ + TRACEGRP(nmgrp); + for (i = -1; nmgrp[++i];) { + etdgi = nmgrp[i]->etdgi; + if (*ranksuff) { + if ((tbuf[0] = *etdgi + ustrlen(ranksuff)) - 2 > NAMELEN) { + mderr(131, etdgi+1, ranksuff); + continue; + } + memcpy(tbuf+1, etdgi+1, *etdgi-1); + ustrcpy(tbuf+*etdgi-1, ranksuff); + etdcan(etdgi); + nmgrp[i] = etddef(tbuf); + } + if (nmgrp[i]->etdmod) {mderr(56, etdgi+1, (UNCH *)0); continue;} + etdset(nmgrp[i], fmin+ETDDCL, cmod, mexgrp, pexgrp, nmgrp[i]->etdsrm); + ++ds.etdcnt; + if (nmgrp[i]->adl) etdadl(nmgrp[i]); /* Check ETD conflicts. */ + TRACEETD(nmgrp[i]); + } +} + +VOID adlfree(al, aln) +struct ad *al; +int aln; +{ + for (; aln <= ADN(al); aln++) { + frem((UNIV)al[aln].adname); + if (ADVAL(al, aln)) + frem((UNIV)ADVAL(al, aln)); + if (BITON(ADFLAGS(al, aln), AGROUP)) { + int i; + for (i = 0; i < ADNUM(al, aln); i++) + frem((UNIV)al[aln + i + 1].adname); + aln += ADNUM(al, aln); + } + } +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/md2.c b/usr.bin/sgmls/sgmls/md2.c new file mode 100644 index 0000000..846c555 --- /dev/null +++ b/usr.bin/sgmls/sgmls/md2.c @@ -0,0 +1,801 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +/* MDENTITY: Process ENTITY declaration. +*/ +VOID mdentity(tbuf) +UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ +{ + struct fpi fpicb; /* Formal public identifier structure. */ + struct fpi *fpis = &fpicb; /* Ptr to current or #DEFAULT fpi. */ + union etext etx; /* Ptr to entity text. */ + UNCH estore = ESM; /* Entity storage class. */ + struct entity *ecb; /* Ptr to entity control block. */ + int parmsw = 0; /* 1=parameter entity declaration; 0 = not. */ + int defltsw = 0; /* 1=#DEFAULT declaration; 0=not. */ + PNE pne = 0; /* Ptr to N/C/SDATA entity control block. */ + + mdname = key[KENTITY]; /* Declaration name for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es for checking entity nesting. */ + /* PARAMETER 1: Entity name. + */ + pcbmd.newstate = 0; + parsemd(nmbuf, ENTCASE, &pcblitp, NAMELEN); + TRACEMD("1: entity nm"); + switch (pcbmd.action) { + case PEN: + parsemd(nmbuf + 1, ENTCASE, &pcblitp, NAMELEN); + if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;} + if (nmbuf[1] == NAMELEN + 2) { + /* It was too long. */ + nmbuf[0] = NAMELEN + 2; + nmbuf[NAMELEN + 1] = '\0'; + mderr(65, (UNCH *)0, (UNCH *)0); + } + else + nmbuf[0] = nmbuf[1] + 1; /* Increment length for PERO. */ + nmbuf[1] = lex.d.pero; /* Prefix PERO to name. */ + parmsw = 1; /* Indicate parameter entity. */ + case NAS: + break; + case RNS: /* Reserved name started. */ + if (ustrcmp(nmbuf+1, key[KDEFAULT])) { + mderr(118, nmbuf+1, key[KDEFAULT]); + return; + } + memcpy(nmbuf, indefent, *indefent);/* Copy #DEFAULT to name buffer. */ + fpis = &fpidf; /* Use #DEFAULT fpi if external. */ + defltsw = 1; /* Indicate #DEFAULT is being defined.*/ + break; + default: + mderr(122, (UNCH *)0, (UNCH *)0); + return; + } + subdcl = nmbuf+1; /* Subject name for error messages. */ + /* PARAMETER 2: Entity text keyword (optional). + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + TRACEMD("2: keyword"); + switch (pcbmd.action) { + case NAS: + if ((estore = (UNCH)mapsrch(enttab, tbuf+1))==0) { + estore = parmsw ? ESP : ESF; + pne = (PNE)rmalloc(NESZ); + if (mdextid(tbuf, fpis, nmbuf+1+parmsw, &estore, pne)==0) + return; + if (defltsw) etx.x = NULL; + else if ((etx.x = entgen(&fpicb))==0) { + if (parmsw) + mderr(148, nmbuf+2, (UNCH *)0); + else + mderr(147, nmbuf+1, (UNCH *)0); + } + goto parm4; + } + if (parmsw && (estore==ESX || estore==ESC)) { + mderr(38, tbuf+1, (UNCH *)0); + estore = ESM; + } + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + break; + default: + estore = ESM; + break; + } + /* PARAMETER 3: Parameter literal. + */ + TRACEMD("3: literal"); + switch (pcbmd.action) { + case LITE: + case LIT: + switch (estore) { + case ESM: /* LITERAL: parameter literal required. */ + case ESC: /* CDATA: parameter literal required. */ + case ESX: /* SDATA: parameter literal required. */ + case ESI: /* PI: parameter literal required. */ + etx.c = savestr(tbuf); + break; + case ESMD: /* MD: parameter literal required. */ + etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc); + goto bcheck; + case ESMS: /* MS: parameter literal required. */ + etx.c = sandwich(tbuf, lex.m.mss, lex.m.mse); + goto bcheck; + case ESS: /* STARTTAG: parameter literal required. */ + etx.c = sandwich(tbuf, lex.m.stag, lex.m.tagc); + goto bcheck; + case ESE: /* ENDTAG: parameter literal required. */ + etx.c = sandwich(tbuf, lex.m.etag, lex.m.tagc); + bcheck: + if (etx.c == 0) { + mderr(225, (UNCH *)0, (UNCH *)0); + return; + } + break; + } + break; + default: + mderr(123, (UNCH *)0, (UNCH *)0); + return; + } + /* PARAMETER 4: End of declaration. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + parm4: + TRACEMD(emd); + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + if (es!=mdessv) synerr(37, &pcbmd); + + /* EXECUTE: If the entity already exists, ignore the new definition. + If it is a new entity, store the definition. + */ + if ((ecb = entfind(nmbuf))!=0 && ecb->estore) { + if (ecb->dflt) { + mderr(228, nmbuf + 1, (UNCH *)0); + hout((THASH)etab, nmbuf, hash(nmbuf, ENTHASH)); + if (ecb->estore == ESN) { + frem((UNIV)NEID(ecb->etx.n)); + frem((UNIV)ecb->etx.n); + } + else if (ecb->estore >= ESFM) + frem((UNIV)ecb->etx.x); + frem((UNIV)ecb); + } + else { + /* Duplicate definition: not an error. */ + if (sw.swdupent) mderr(68, nmbuf+1, (UNCH *)0); + if (estore<ESFM) frem((UNIV)etx.c); + return; + } + } + ++ds.ecbcnt; /* Do capacity before NOTATION. */ + ds.ecbtext += estore<ESFM ? ustrlen(etx.c) : entlen; + ecb = entdef(nmbuf, estore, &etx); /* Define the entity. */ + if (estore==ESN) { /* If entity is external: */ + NEENAME(pne) = ecb->ename; /* Store entity name in ne. */ + NEID(pne) = etx.x; /* Store system fileid in ne. */ + NESYSID(pne) = fpis->fpisysis ? savestr(fpis->fpisysis) : 0; + NEPUBID(pne) = fpis->fpipubis ? savestr(fpis->fpipubis) : 0; + ecb->etx.n = pne; /* Store ne control block in etx. */ + TRACEESN(pne); + } + else if (pne) + frem((UNIV)pne); + if (defltsw) { + ecbdeflt = ecb; /* If #DEFAULT save ecb. */ + if (fpidf.fpipubis) + fpidf.fpipubis = savestr(fpidf.fpipubis); + if (fpidf.fpisysis) + fpidf.fpisysis = savestr(fpidf.fpisysis); + } +} +/* SANDWICH: Catenate a prefix and suffix to a string. + The result has an EOS but no length. + Return 0 if the result if longer than LITLEN. +*/ +UNCH *sandwich(s, pref, suff) +UNCH *s; /* String, with EOS. */ +UNCH *pref; /* Prefix, with length and EOS. */ +UNCH *suff; /* Suffix, with length and EOS. */ +{ + UNCH *pt; + UNS slen, tlen; + + slen = ustrlen(s); + tlen = slen + (*pref - 2) + (*suff - 2); + if (tlen > LITLEN) + return 0; + pt = (UNCH *)rmalloc(tlen + 1); + memcpy(pt, pref + 1, *pref - 2); + memcpy(pt + (*pref - 2), s, slen); + memcpy(pt + (*pref - 2) + slen, suff + 1, *suff - 1); + return pt; +} +/* MDEXTID: Process external identifier parameter of a markup declaration. + On entry, tbuf contains SYSTEM or PUBLIC if all is well. + NULL is returned if an error, otherwise fpis. If it is a + valid external data entity, the caller's estore is set to ESN + and its nxetype is set to the code for the external entity type. + The event that terminated the parse is preserved in pcb.action, + so the caller should process it before further parsing. +*/ +struct fpi *mdextid(tbuf, fpis, ename, estore, pne) +UNCH *tbuf; /* Work area for tokenization[2*(LITLEN+2)]. */ +struct fpi *fpis; /* FPI structure. */ +UNCH *ename; /* Entity or notation name, with EOS, no length.*/ + /* NOTE: No PERO on parameter entity name. */ +UNCH *estore; /* DTD, general or parameter entity, DCN. */ +PNE pne; /* Caller's external entity ptr. */ +{ + PDCB dcb; /* Ptr to DCN control block. */ + int exidtype; /* External ID type: 0=none 1=system 2=public. */ + int exetype; /* External entity type. */ + + MEMZERO((UNIV)fpis, (UNS)FPISZ); /* Initialize fpi structure. */ + /* Move entity name into fpi (any PERO was stripped by caller). */ + fpis->fpinm = ename; + entlen = 0; /* Initialize external ID length. */ + + /* PARAMETER 1: External identifier keyword or error. + */ + TRACEMD("1: extid keyword"); + if ((exidtype = mapsrch(exttab, tbuf+1))==0) { + mderr(29, (UNCH *)0, (UNCH *)0); + return (struct fpi *)0; + } + if (exidtype==EDSYSTEM) goto parm3; + + /* PARAMETER 2: Public ID literal. + */ + pcbmd.newstate = 0; + /* The length of a minimum literal cannot exceed the value of LITLEN + in the reference quantity set. */ + parsemd(pubibuf, NAMECASE, &pcblitv, REFLITLEN); + TRACEMD("2: pub ID literal"); + switch (pcbmd.action) { + case LITE: /* Use alternative literal delimiter. */ + case LIT: /* Save literal as public ID string. */ + entlen = ustrlen(pubibuf); + fpis->fpipubis = pubibuf; + break; + default: + mderr(117, (UNCH *)0, (UNCH *)0); + return (struct fpi *)0; /* Signal error to caller. */ + } + /* PARAMETER 3: System ID literal. + */ + parm3: + pcbmd.newstate = 0; + parsemd(sysibuf, NAMECASE, &pcblitc, LITLEN); + TRACEMD("3: sys ID literal"); + if (pcbmd.action==LIT || pcbmd.action==LITE) { + entlen += ustrlen(sysibuf); + fpis->fpisysis = sysibuf; + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + } + else memcpy(tbuf, sysibuf, *sysibuf); + if (*estore!=ESF || pcbmd.action!=NAS) goto genfpi; + + /* PARAMETER 4: Entity type keyword. + */ + TRACEMD("4: Entity type"); + if ((exetype = mapsrch(extettab, tbuf+1))==0) { + mderr(24, tbuf+1, (UNCH *)0); + return (struct fpi *)0; + } + if (exetype==ESNSUB && SUBDOC == NO) { + mderr(90, tbuf+1, (UNCH *)0); + return (struct fpi *)0; + } + + NEXTYPE(pne) = (UNCH)exetype; /* Save entity type in caller's ne. */ + *estore = ESN; /* Signal that entity is a data entity. */ + + if (exetype==ESNSUB) { + pne->nedcn = 0; + pcbmd.newstate = 0; /* Parse next token for caller. */ + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + goto genfpi; + } + /* PARAMETER 5: Notation name. + */ + pcbmd.newstate = 0; + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("5: notation"); + if (pcbmd.action!=NAS) {mderr(119, tbuf+1, (UNCH *)0); return (struct fpi *)0;} + /* Locate the data content notation. */ + pne->nedcn = dcb = dcndef(lbuf); + /* Note that we have defined an entity with this notation. + If attributes are later defined for this notation, we'll + have to fix up this entity. */ + dcb->entsw = 1; + + /* PARAMETER 6: Data attribute specification. + */ + pcbmd.newstate = 0; + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("6: [att list]"); + if (pcbmd.action!=MDS) { /* No attributes specified. */ + if (dcb->adl == 0) + NEAL(pne) = 0; + else { + initatt(dcb->adl); + adlval((int)ADN(al), (struct etd *)0); + storedatt(pne); + } + goto genfpi; + } + if (dcb->adl==0) { /* Atts specified, but none defined. */ + mderr(22, (UNCH *)0, (UNCH *)0); + return (struct fpi *)0; + } + pcbstag.newstate = pcbstan; /* First separator is optional. */ + if ((parseatt(dcb->adl, tbuf))==0)/* Empty list. */ + mderr(91, (UNCH *)0, (UNCH *)0); + else { + adlval((int)ADN(al), (struct etd *)0); + storedatt(pne); + } + parse(&pcbeal); /* Parse the list ending. */ + pcbmd.newstate = 0; /* Parse next token for caller. */ + parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); + + /* GENFPI: Builds a formal public identifier structure, including the + entity name, offsets of the components of the public ID, and + other data a system might use to identify the actual file. + */ + genfpi: + TRACEMD("7: generate fpi"); + fpis->fpistore = *estore - ESFM + 1; /* External entity type: 1-6. */ + if (*estore == ESN) { + if (NEXTYPE(pne) == ESNSUB) + fpis->fpinedcn = 0; + else + fpis->fpinedcn = NEDCN(pne) + 1; + } + /* Analyze public ID and make structure entries. */ + if (exidtype==EDPUBLIC) { + if (FORMAL==NO) + fpis->fpiversw = -1; + else if (parsefpi(fpis)>0) { + mderr(88, fpis->fpipubis, (UNCH *)0); + fpis->fpiversw = -1; /* Signal bad formal public ID. */ + } + } + return fpis; +} + +/* Store a data attribute. */ + +VOID storedatt(pne) +PNE pne; +{ + int i; + + NEAL(pne) = (struct ad *)rmalloc((1+ADN(al))*ADSZ); + memcpy((UNIV)NEAL(pne), (UNIV)al, (1+ADN(al))*ADSZ); + for (i = 1; i <= (int)ADN(al); i++) { + if (GET(ADFLAGS(al, i), ASPEC)) + ds.attdef += ADLEN(al, i); + if (NEAL(pne)[i].addef != 0) + NEAL(pne)[i].addef = savestr(NEAL(pne)[i].addef); + } + ds.attcnt += AN(al); /* Number of attributes defined. */ +#if 0 + /* I can't see any reason to increase AVGRPCNT here. */ + ds.attgcnt += ADN(al) - AN(al); /* Number of att grp members. */ +#endif +} + +/* PARSEFPI: Parses a formal public identifier and builds a control block. + PARSEFPI returns a positive error code (1-10), or 0 if no errors. + It set fpiversw if no version was specified in the ID and the + public text is in a class that permits display versions. + Note: An empty version ("//") can be specified (usually it is + the non-device-specific form, such as a definitional entity set). +*/ +int parsefpi(f) +PFPI f; /* Ptr to formal public identifier structure. */ +{ + UNCH *l; /* Pointer to EOS of public identifier. */ + UNCH *p, *q; /* Ptrs to current field in public identifier. */ + UNS len; /* Field length */ + + p = f->fpipubis; /* Point to start of identifier. */ + l = p + ustrlen(p); /* Point to EOS of identifier. */ + if (*p=='+' || *p=='-') { /* If owner registered, unregistered. */ + f->fpiot = *p; /* Save owner type. */ + if ((p += 3)>=l) return 1; /* Get to owner ID field. */ + } + else f->fpiot = '!'; /* Indicate ISO owner identifier. */ + if ((q = pubfield(p, l, '/', &len))==0) /* Find end of owner ID field. */ + return 2; + f->fpiol = len; /* Save owner ID length. */ + f->fpio = p - f->fpipubis; /* Save offset in pubis to owner ID. */ + + if ((p = pubfield(q, l, ' ', &len))==0) /* Find end of text class field. */ + return 3; + *(--p) = EOS; /* Temporarily make class a string. */ + f->fpic = mapsrch(pubcltab, q); /* Check for valid uc class name.*/ + *p++ = ' '; /* Restore the SPACE delimiter. */ + if (f->fpic==0) return 4; /* Error if not valid uc class name.*/ + + /* The public text class in a notation identifier must be NOTATION. */ + if (f->fpistore == ESK - ESFM + 1 && f->fpic != FPINOT) return 10; + + if (*p=='-') { /* If text is unavailable public text.*/ + f->fpitt = *p; /* Save text type. */ + if ((p += 3)>=l) return 5; /* Get to text description field. */ + } + else f->fpitt = '+'; /* Indicate available public text. */ + if ((q = pubfield(p, l, '/', &len))==0) /* Find end of text description. */ + return 6; + f->fpitl = len; /* Save text description length. */ + f->fpit = p - f->fpipubis; /* Save ptr to description.*/ + + p = pubfield(q, l, '/', &len); /* Bound language field. */ + if (f->fpic != FPICHARS) { + int i; + /* Language must be all upper-case letters. */ + /* The standard only says that it *should* be two letters, so + don't enforce that. */ + for (i = 0; i < len; i++) { + /* Don't assume ASCII. */ + if (!strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", q[i])) + return 7; + } + } + f->fpill = len; + f->fpil = q - f->fpipubis; + if (p!=0) { /* If there is a version field: */ + if (f->fpic<FPICMINV) /* Error if class prohibits versions. */ + return 8; + if ((pubfield(p, l, '/', &len))!=0) /* Bound version field. */ + return 9; /* Error if yet another field. */ + f->fpivl = len; /* Save version length. */ + f->fpiv = p - f->fpipubis; /* Save ptr (in pubis) to version. */ + } + else if (f->fpic>=FPICMINV) f->fpiversw = 1;/* No version: get the best. */ + return(0); +} +/* PUBFIELD: Returns ptr to next field, or NULL if ID has ended. +*/ +#ifdef USE_PROTOTYPES +UNCH *pubfield(UNCH *p, UNCH *l, UNCH d, UNS *lenp) +#else +UNCH *pubfield(p, l, d, lenp) +UNCH *p; /* Public identifier field (no length or EOS). */ +UNCH *l; /* Pointer to EOS of public identifier. */ +UNCH d; /* Field delimiter: ' ' or '/'. */ +UNS *lenp; /* Gets field length */ +#endif +{ + UNCH *psv = p+1; /* Save starting value of p. */ + + while (p<l) { + if (*p++==d) { /* Test for delimiter character. */ + *lenp = p - psv; /* Save field length (no len or EOS). */ + if (d=='/' && *p++!=d) /* Solidus requires a second one. */ + continue; + return(p); /* Return ptr to next field. */ + } + } + *lenp = p - --psv; /* Save field length (no len or EOS). */ + return NULL; +} +/* MDMS: Process marked section start. + If already in special parse, bump the level counters and return + without parsing the declaration. +*/ +struct parse *mdms(tbuf, pcb) +UNCH *tbuf; /* Work area for tokenization [NAMELEN+2]. */ +struct parse *pcb; /* Parse control block for this parse. */ +{ + int key; /* Index of keyword in mslist. */ + int ptype; /* Parameter token type. */ + int pcbcode = 0; /* Parse code: 0=same; 2-4 per defines. */ + + if (++mslevel>TAGLVL) { + --mslevel; + sgmlerr(27, (struct parse *)0, ntoa(TAGLVL), (UNCH *)0); + } + + /* If already in IGNORE mode, return without parsing parameters. */ + if (msplevel) {++msplevel; return(pcb);} + + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es for checking entity nesting. */ + pcbmd.newstate = pcbmdtk; /* First separator is optional. */ + + /* PARAMETERS: TEMP, RCDATA, CDATA, IGNORE, INCLUDE, or MDS. */ + while ((ptype = parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN))==NAS){ + if ((key = mapsrch(mstab, tbuf+1))==0) { + sgmlerr(64, (struct parse *)0, ntoa(parmno), tbuf+1); + continue; + } + if (key==MSTEMP) continue; /* TEMP: for documentation. */ + msplevel = 1; /* Special parse required. */ + if (key>pcbcode) pcbcode = key; /* Update if higher priority. */ + } + if (ptype!=MDS) { + NEWCC; /* Syntax error did REPEATCC. */ + sgmlerr(97, (struct parse *)0, lex.m.dso, (UNCH *)0); + REPEATCC; /* 1st char of marked section. */ + } + if (es!=mdessv) synerr(37, pcb); + TRACEMS(1, pcbcode, mslevel, msplevel); + if (pcbcode==MSIGNORE) pcb = &pcbmsi; + else if (pcbcode) { + pcb = pcbcode==MSCDATA ? &pcbmsc : (rcessv = es, &pcbmsrc); + } + return(pcb); /* Tell caller whether to change the parse. */ +} +/* MDMSE: Process marked section end. + Issue an error if no marked section had started. +*/ +int mdmse() +{ + int retcode = 0; /* Return code: 0=same parse; 1=cancel special. */ + + if (mslevel) --mslevel; + else sgmlerr(26, (struct parse *)0, (UNCH *)0, (UNCH *)0); + + if (msplevel) if (--msplevel==0) retcode = 1; + TRACEMS(0, retcode, mslevel, msplevel); + return retcode; +} +/* MDNOT: Process NOTATION declaration. +*/ +VOID mdnot(tbuf) +UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ +{ + struct fpi fpicb; /* Formal public identifier structure. */ + PDCB dcb; /* Ptr to notation entity in dcntab. */ + UNCH estore = ESK; /* Entity storage class. */ + + mdname = key[KNOTATION]; /* Identify declaration for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es for checking entity nesting. */ + + /* PARAMETER 1: Notation name. + */ + pcbmd.newstate = 0; + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: name"); + if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;} + subdcl = lbuf+1; /* Save notation name for error msgs. */ + + /* PARAMETER 2: External identifier keyword. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: extid"); + if (pcbmd.action!=NAS) {mderr(29, (UNCH *)0, (UNCH *)0); return;} + if (mdextid(tbuf, &fpicb, lbuf+1, &estore, (PNE)0)==0) return; + + /* PARAMETER 3: End of declaration. + Token was parsed by MDEXTID. + */ + TRACEMD(emd); + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + if (es!=mdessv) synerr(37, &pcbmd); + + /* EXECUTE: Store notation name. + */ + if ((dcb = dcnfind(lbuf)) != 0 && dcb->defined) { + mderr(56, lbuf+1, (UNCH *)0); + return; + } + /* else */ + dcb = dcndef(lbuf); + dcb->defined = 1; + dcb->sysid = fpicb.fpisysis ? savestr(fpicb.fpisysis) : 0; + dcb->pubid = fpicb.fpipubis ? savestr(fpicb.fpipubis) : 0; + ++ds.dcncnt; + ds.dcntext += entlen; + TRACEDCN(dcb); + return; +} +/* DCNDEF: Define a notation and return its DCNCB. + If caller does not care if it already exists, + he should specify NULL for the notation text + so we don't clobber the existing text (if any). +*/ +struct dcncb *dcndef(nname) +UNCH *nname; /* Notation name (with length and EOS). */ +{ + return((PDCB)hin((THASH)dcntab, nname, 0, DCBSZ)); +} +/* DCNFIND: If a notation was declared, return its DCNCB. + Return NULL if it is not defined. +*/ +struct dcncb *dcnfind(nname) +UNCH *nname; /* Notation name (with length and EOS). */ +{ + return((PDCB)hfind((THASH)dcntab, nname, 0)); +} +#define SRM(i) (srhptr->srhsrm[i]) /* Current entry in SHORTREF map. */ +/* MDSRMDEF: Process short reference mapping declaration. +*/ +VOID mdsrmdef(tbuf) +UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ +{ + struct entity *entcb; /* Ptr to defined entity. */ + PSRH srhptr; /* Ptr to short reference map hdr (in srhtab).*/ + int srn; /* Short reference delimiter number in srdeltab.*/ + int mapused = 0; /* Has map already been used? */ + + mdname = key[KSHORTREF]; /* Identify declaration for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + if (!sd.shortref) {mderr(198, (UNCH *)0, (UNCH *)0); return;} + mdessv = es; /* Save es for checking entity nesting. */ + /* PARAMETER 1: SHORTREF map name. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: map nm"); + if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); return;} + if ((srhptr = srhfind(tbuf))!=0) { + mapused = 1; + /* Error if map was declared (not just used). */ + if (SRM(0)) {mderr(56, tbuf+1, (UNCH *)0); return;} + } + else srhptr = srhdef(tbuf); /* Create map with SRs mapped to NULL.*/ + SRM(0) = (PECB)srhptr; /* Indicate map was actually declared.*/ + subdcl = srhptr->ename+1; /* Save map name for error msgs. */ + + while ( pcbmd.newstate = 0, + parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN)==LIT + || pcbmd.action==LITE ) { + /* PARAMETER 2: Delimiter string. + */ + TRACEMD("2: SR string"); + if ((srn = mapsrch(lex.s.dtb, tbuf))==0) { + mderr(124, tbuf, (UNCH *)0); + goto cleanup; + } + /* PARAMETER 3: Entity name. + */ + pcbmd.newstate = 0; + parsemd(tbuf, ENTCASE, &pcblitp, NAMELEN); + TRACEMD("3: entity"); + if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); goto cleanup;} + if ((entcb = entfind(tbuf))==0) { + union etext etx; + etx.x = 0; + entcb = entdef(tbuf, '\0', &etx); + } + if (SRM(srn)) { + mderr(56, (srn<lex.s.prtmin ? (UNCH *)lex.s.pdtb[srn] + : lex.s.dtb[srn].mapnm), (UNCH *)0); + continue; + } + SRM(srn) = entcb; + if (srn>=lex.s.fce && srn!=lex.s.hyp && srn!=lex.s.hyp2 + && srn!=lex.s.lbr && srn!=lex.s.rbr) + lexcnm[*lex.s.dtb[srn].mapnm] = lex.l.fce; + else if (srn==lex.s.spc) lexcnm[' '] = lex.l.spcr; + } + /* PARAMETER 4: End of declaration. + */ + TRACEMD(emd); + if (parmno==2) + {mderr((UNS)(pcbmd.action==EMD ? 28:123), (UNCH *)0, (UNCH *)0); goto cleanup;} + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + if (es!=mdessv) synerr(37, &pcbmd); + ++ds.srcnt; + TRACESRM("SHORTREF", srhptr->srhsrm, (UNCH *)0); + return; + + cleanup: + /* Don't free the map if the map was in use (because of a USEMAP + declaration) before this declaration. */ + if (mapused) + MEMZERO((UNIV)srhptr->srhsrm, sizeof(PECB)*(lex.s.dtb[0].mapdata+1)); + else { + frem((UNIV)srhptr->srhsrm); + hout((THASH)srhtab, srhptr->ename, 0); + frem((UNIV)srhptr); + } +} +/* MDSRMUSE: Activate a short reference map. +*/ +VOID mdsrmuse(tbuf) +UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ +{ + PSRH srhptr; /* Ptr to short reference map hdr (in srhtab).*/ + TECB srmptr; /* Ptr to short reference map (in header). */ + int i; /* Loop counter; temporary variable. */ + + mdname = key[KUSEMAP]; /* Identify declaration for messages. */ + subdcl = NULL; /* No subject as yet. */ + parmno = 0; /* No parameters as yet. */ + mdessv = es; /* Save es for checking entity nesting. */ + /* PARAMETER 1: SHORTREF map name or "#EMPTY". + */ + pcbmd.newstate = 0; + parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("1: map nm"); + subdcl = lbuf+1; /* Subject name for error messages. */ + switch (pcbmd.action) { + case RNS: /* Empty SHORTREF map requested. */ + if (ustrcmp(lbuf+1, key[KEMPTY])) { + mderr(118, lbuf+1, key[KEMPTY]); + return; + } + srmptr = SRMNULL; + break; + case NAS: /* Map name specified; save if undefined. */ + if ((srhptr = srhfind(lbuf))==0) { + if (!indtdsw) {mderr(125, (UNCH *)0, (UNCH *)0); return;} + srmptr = NULL; + } + else + srmptr = srhptr->srhsrm; + break; + default: + mderr(120, (UNCH *)0, (UNCH *)0); + return; + } + /* PARAMETER 2: Element name or a group of them. (In DTD only.) + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD("2: GI or grp"); + switch (pcbmd.action) { + case NAS: + if (!indtdsw) {mderr(142, (UNCH *)0, (UNCH *)0); return;} + nmgrp[0] = etddef(tbuf); + nmgrp[1] = (PETD)NULL; + break; + case GRPS: + if (!indtdsw) {mderr(142, (UNCH *)0, (UNCH *)0); return;} + parsegrp(nmgrp, &pcbgrnm, tbuf); + break; + case EMD: + if (indtdsw) {mderr(28, (UNCH *)0, (UNCH *)0); return;} + tags[ts].tsrm = srmptr; + TRACESRM("USEMAP", tags[ts].tsrm, tags[ts].tetd->etdgi+1); + goto realemd; + default: + mderr(indtdsw ? 121 : 126, (UNCH *)0, (UNCH *)0); + return; + } + /* PARAMETER 3: End of declaration. + */ + pcbmd.newstate = 0; + parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); + TRACEMD(emd); + if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); + /* If map has not yet been defined, do it and get map pointer. */ + if (!srmptr) srmptr = (srhdef(lbuf))->srhsrm; + + /* Store the map pointer for each element name specified. + */ + TRACEGRP(nmgrp); + for (i = -1; nmgrp[++i];) { + if (!nmgrp[i]->etdsrm) nmgrp[i]->etdsrm = srmptr; + else if (sw.swdupent) mderr(68, nmgrp[i]->etdgi+1, (UNCH *)0); + } + realemd: + if (es!=mdessv) synerr(37, &pcbmd); +} +/* SRHDEF: Define a SHORTREF map and return ptr to its header. + All entries in map are mapped to NULL. + Caller must determine whether it already exists. +*/ +PSRH srhdef(sname) +UNCH *sname; /* SHORTREF map name (with length and EOS). */ +{ + PSRH srh; /* Ptr to SHORTREF map hdr in srhtab. */ + + (srh = (PSRH)hin((THASH)srhtab, sname, 0, SRHSZ))->srhsrm = + (TECB)rmalloc((UNS)(lex.s.dtb[0].mapdata+1)*sizeof(PECB)); + return(srh); +} +/* SRHFIND: If a SHORTREF map was declared, return the ptr to its header. + Return NULL if it is not defined. +*/ +PSRH srhfind(sname) +UNCH *sname; /* SHORTREF map name (with length and EOS). */ +{ + return((PSRH)hfind((THASH)srhtab, sname, 0)); +} +#undef SRM + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/msg.h b/usr.bin/sgmls/sgmls/msg.h new file mode 100644 index 0000000..fa97a4c --- /dev/null +++ b/usr.bin/sgmls/sgmls/msg.h @@ -0,0 +1,252 @@ +/* +Severity codes: +I information (not an SGML error at all) +W warning (an SGML markup error but it knows what you mean) +E error +C critical (fatal) + +Type codes: +R resource +C ?context/content +M minimization +Q quantity +S syntax +D declaration +U unsupported feature +*/ +struct { + char *text; + char severity; + char type; +} messages[] = { +/* 0 */ {0}, +/* 1 */ {"%s element not allowed at this point in %s element", 'E', 'C'}, +/* 2 */ {"%s markup declaration not permitted here; declaration ended", 'E', 'D'}, +/* 3 */ {"Length of name, number, or token exceeded NAMELEN or LITLEN limit", 'E', 'Q'}, +/* 4 */ {"Non-SGML character ignored", 'E', 'S'}, +/* 5 */ {"%s end-tag ignored: doesn't end any open element (current is %s)", 'E', 'C'}, +/* 6 */ {"%s start-tag exceeds open element limit; possible lies from %s on", 'E', 'Q'}, +/* 7 */ {"Start-tag omitted from %s with empty content", 'E', 'M'}, +/* 8 */ {"Illegal entity end in markup or delimited text", 'E', 'S'}, +/* 9 */ {"Incorrect character in markup; markup terminated", 'E', 'S'}, +/* 10 */ {"Data not allowed at this point in %s element", 'E', 'C'}, +/* 11 */ {"No element declaration for %s end-tag GI; end-tag ignored", 'E', 'C'}, +/* 12 */ {"%s name ignored: not a syntactically valid SGML name", 'E', 'S'}, +/* 13 */ {"%s = \"%s\" attribute ignored: not defined for this element", 'E', 'C'}, +/* 14 */ {"%s = \"%s\" attribute value defaulted: invalid character", 'E', 'S'}, +/* 15 */ {"%s = \"%s\" attribute value defaulted: token too long", 'E', 'Q'}, +/* 16 */ {"%s = \"%s\" attribute value defaulted: too many tokens", 'E', 'C'}, +/* 17 */ {"%s = \"%s\" attribute value defaulted: wrong token type", 'E', 'C'}, +/* 18 */ {"%s = \"%s\" attribute value defaulted: token not in group", 'E', 'C'}, +/* 19 */ {"Required %s attribute was not specified; may affect processing", 'E', 'C'}, +/* 20 */ {"%s end-tag implied by %s end-tag; not minimizable", 'E', 'M'}, +/* 21 */ {"%s start-tag implied by %s start-tag; not minimizable", 'W', 'M'}, +/* 22 */ {"Possible attributes treated as data because none were defined", 'E', 'C'}, +/* 23 */ {"Duplicate specification occurred for \"%s\"; may affect processing", 'E', 'D'}, +/* 24 */ {"\"%s\" keyword invalid; declaration terminated", 'E', 'D'}, +/* 25 */ {"%s = \"%s\" attribute defaulted: empty string not allowed for token", 'E', 'C'}, +/* 26 */ {"Marked section end ignored; not in a marked section", 'E', 'S'}, +/* 27 */ {"Marked section start ignored; %s marked sections open already", 'E', 'Q'}, +/* 28 */ {"One or more parameters missing; declaration ignored", 'E', 'D'}, +/* 29 */ {"\"PUBLIC\" or \"SYSTEM\" required; declaration terminated", 'E', 'D'}, +/* 30 */ {"%s element ended prematurely; required %s omitted", 'E', 'C'}, +/* 31 */ {"Entity \"%s\" terminated: could not read file", 'E', 'R'}, +/* 32 */ {"Could not open file for entity \"%s\"; entity reference ignored", 'E', 'R'}, +/* 33 */ {"Insufficient main memory; unable to continue parsing", 'C', 'R'}, +/* 34 */ {"%s entity reference ignored; exceeded open entity limit (%s)", 'E', 'Q'}, +/* 35 */ {"No declaration for entity \"%s\"; reference ignored", 'E', 'C'}, +/* 36 */ {"%s entity reference occurred within own text; reference ignored", 'E', 'C'}, +/* 37 */ {"Entity nesting level out of sync", 'E', 'S'}, +/* 38 */ {"Parameter entity text cannot have %s keyword; keyword ignored", 'E', 'D'}, +/* 39 */ {"%s end-tag implied by %s start-tag; not minimizable", 'W', 'M'}, +/* 40 */ {"Start-tag minimization ignored; element has required attribute", 'E', 'D'}, +/* 41 */ {"Required %s element cannot be excluded from %s element", 'E', 'C'}, +/* 42 */ {"No DOCTYPE declaration; document type is unknown", 'E', 'C'}, +/* 43 */ {"Undefined %1$s start-tag GI was used in DTD; \"%1$s O O ANY\" assumed", 'E', 'C'}, +/* 44 */ {"Invalid character(s) ignored; attempting to resume DOCTYPE subset", 'E', 'S'}, +/* 45 */ {"No declaration for entity \"%s\"; default definition used", 'I', 'C'}, +/* 46 */ {"%s end-tag implied by NET delimiter; not minimizable", 'W', 'M'}, +/* 47 */ {"%s end-tag implied by data; not minimizable", 'W', 'M'}, +/* 48 */ {"%s end-tag implied by short start-tag (no GI); not minimizable", 'W', 'M'}, +/* 49 */ {"%s start-tag implied by data; not minimizable", 'W', 'M'}, +/* 50 */ {"%s start-tag implied by short start-tag (no GI); not minimizable", 'W', 'M'}, +/* 51 */ {"Short end-tag (no GI) ignored: no open elements", 'E', 'C'}, +/* 52 */ {"No definition for %1$s document type; \"%1$s O O ANY\" assumed", 'E', 'C'}, +/* 53 */ {"No definition for %1$s implied start-tag; \"%1$s O O ANY\" assumed", 'E', 'C'}, +/* 54 */ {"%s element ended prematurely; required subelement omitted", 'E', 'C'}, +/* 55 */ {"Content model token %s: connectors conflict; first was used", 'E', 'D'}, +/* 56 */ {"Duplicate specification occurred for \"%s\"; duplicate ignored", 'E', 'D'}, +/* 57 */ {"Bad end-tag in R/CDATA element; treated as short (no GI) end-tag", 'E', 'S'}, +/* 58 */ {"Start-tag minimization should be \"-\" for element with declared content", 'I', 'D'}, +/* 59 */ {"Reference to PI entity not permitted here; reference ignored", 'E', 'S'}, +/* 60 */ {"Non-SGML character found; should have been character reference", 'W', 'S'}, +/* 61 */ {"Numeric character reference exceeds 255; reference ignored", 'E', 'S'}, +/* 62 */ {"Invalid alphabetic character reference ignored", 'E', 'S'}, +/* 63 */ {"Invalid character in minimum literal; character ignored", 'E', 'S'}, +/* 64 */ {"Keyword %s ignored; \"%s\" is not a valid marked section keyword", 'E', 'D'}, +/* 65 */ {"Parameter entity name longer than (NAMELEN-1); truncated", 'E', 'Q'}, +/* 66 */ {"Start-tag length exceeds TAGLEN limit; parsed correctly", 'W', 'Q'}, +/* 67 */ {"%s attribute defaulted: FIXED attribute must equal default", 'W', 'C'}, +/* 68 */ {"Duplicate specification occurred for \"%s\"; duplicate ignored", 'I', 'D'}, +/* 69 */ {"%s = \"%s\" IDREF attribute ignored: referenced ID does not exist", 'E', 'C'}, +/* 70 */ {"%s = \"%s\" IDREF attribute ignored: number of IDs in list exceeds GRPCNT limit", 'E', 'Q'}, +/* 71 */ {"%s = \"%s\" ID attribute ignored: ID in use for another element", 'E', 'C'}, +/* 72 */ {"%s = \"%s\" ENTITY attribute not general entity; may affect processing", 'E', 'C'}, +/* 73 */ {"%s = \"%s\" attribute ignored: previously specified in same list", 'W', 'C'}, +/* 74 */ {"\"?\" = \"%s\" name token ignored: not in any group in this list", 'E', 'C'}, +/* 75 */ {"Normalized attribute specification length over ATTSPLEN limit", 'E', 'Q'}, +/* 76 */ {"%s = \"%s\" NOTATION ignored: element content is empty", 'E', 'C'}, +/* 77 */ {"%s = \"%s\" NOTATION undefined: may affect processing", 'E', 'C'}, +/* 78 */ {"Entity \"%2$s\" has undefined notation \"%1$s\"", 'E', 'C'}, +/* 79 */ {"%s = \"%s\" default attribute value not in group; #IMPLIED used", 'E', 'C'}, +/* 80 */ {"#CURRENT default value treated as #IMPLIED for %s ID attribute", 'E', 'D'}, +/* 81 */ {"ID attribute %s cannot have a default value; treated as #IMPLIED", 'E', 'D'}, +/* 82 */ {"%s attribute must be token, not empty string; treated as #IMPLIED", 'E', 'D'}, +/* 83 */ {"NOTATION attribute ignored for EMPTY element", 'E', 'D'}, +/* 84 */ {"%s = \"%s\" NOTATION ignored: content reference specified", 'E', 'C'}, +/* 85 */ {"#CONREF default value treated as #IMPLIED for EMPTY element", 'W', 'D'}, +/* 86 */ {"%s = \"%s\" entity not data entity; may affect processing", 'E', 'C'}, +/* 87 */ {"End-tag minimization should be \"O\" for EMPTY element", 'I', 'D'}, +/* 88 */ {"Formal public identifier \"%s\" invalid; treated as informal", 'E', 'S'}, +/* 89 */ {"Out-of-context %2$s start-tag ended %1$s document element (and parse)", 'E', 'C'}, +/* 90 */ {"\"%s\" keyword is for unsupported feature; declaration terminated", 'E', 'D'}, +/* 91 */ {"Attribute specification list in prolog cannot be empty", 'E', 'D'}, +/* 92 */ {"Document ended invalidly within a literal; parsing ended", 'C', 'S'}, +/* 93 */ {"Short ref in map \"%2$s\" to undeclared entity \"%1$s\" treated as data", 'E', 'C'}, +/* 94 */ {"Could not reopen file to continue entity \"%s\"; entity terminated", 'E', 'R'}, +/* 95 */ {"Out-of-context data ended %s document element (and parse)", 'E', 'C'}, +/* 96 */ {"Short start-tag (no GI) ended %s document element (and parse)", 'E', 'C'}, +/* 97 */ {"DSO delimiter (%s) omitted from marked section declaration", 'E', 'D'}, +/* 98 */ {"Group token %s: duplicate name or name token \"%s\" ignored", 'E', 'D'}, +/* 99 */ {"Attempt to redefine %s attribute ignored", 'E', 'D'}, +/* 100 */ {"%s definition ignored: %s is not a valid declared value keyword", 'E', 'D'}, +/* 101 */ {"%s definition ignored: NOTATION attribute already defined", 'E', 'D'}, +/* 102 */ {"%s definition ignored: ID attribute already defined", 'E', 'D'}, +/* 103 */ {"%s definition ignored: no declared value specified", 'E', 'D'}, +/* 104 */ {"%s definition ignored: invalid declared value specified", 'E', 'D'}, +/* 105 */ {"%s definition ignored: number of names or name tokens in group exceeded GRPCNT limit", 'E', 'D'}, +/* 106 */ {"%s definition ignored: name group omitted for NOTATION attribute", 'E', 'D'}, +/* 107 */ {"#CONREF default value treated as #IMPLIED for %s ID attribute", 'E', 'D'}, +/* 108 */ {"%s definition ignored: %s is not a valid default value keyword", 'E', 'D'}, +/* 109 */ {"%s definition ignored: no default value specified", 'E', 'D'}, +/* 110 */ {"%s definition ignored: invalid default value specified", 'E', 'D'}, +/* 111 */ {"More than ATTCNT attribute names and/or name (token) values; terminated", 'E', 'D'}, +/* 112 */ {"Attempted redefinition of attribute definition list ignored", 'E', 'D'}, +/* 113 */ {"Content model token %s: more than GRPCNT model group tokens; terminated", 'E', 'Q'}, +/* 114 */ {"Content model token %s: more than GRPGTCNT content model tokens; terminated", 'E', 'Q'}, +/* 115 */ {"Content model token %s: more than GRPLVL nested model groups; terminated", 'E', 'Q'}, +/* 116 */ {"Content model token %s: %s invalid; declaration terminated", 'E', 'D'}, +/* 117 */ {"\"PUBLIC\" specified without public ID; declaration terminated", 'E', 'D'}, +/* 118 */ {"\"%s\" keyword invalid (only %s permitted); declaration terminated", 'E', 'D'}, +/* 119 */ {"\"%s\" specified without notation name; declaration terminated", 'E', 'D'}, +/* 120 */ {"Parameter must be a name; declaration terminated", 'E', 'D'}, +/* 121 */ {"Parameter must be a GI or a group of them; declaration terminated", 'E', 'D'}, +/* 122 */ {"Parameter must be a name or PERO (%%); declaration terminated", 'E', 'D'}, +/* 123 */ {"Parameter must be a literal; declaration terminated", 'E', 'D'}, +/* 124 */ {"\"%s\" not valid short reference delimiter; declaration terminated", 'E', 'D'}, +/* 125 */ {"Map does not exist; declaration ignored", 'E', 'C'}, +/* 126 */ {"MDC delimiter (>) expected; following text may be misinterpreted", 'E', 'D'}, +/* 127 */ {"Document ended invalidly within prolog; parsing ended", 'C', 'S'}, +/* 128 */ {"\"PUBLIC\" or \"SYSTEM\" or DSO ([) required; declaration terminated", 'E', 'D'}, +/* 129 */ {"Minimization must be \"-\" or \"O\" (not \"%s\"); declaration terminated", 'E', 'D'}, +/* 130 */ {"Content model or keyword expected; declaration terminated", 'E', 'D'}, +/* 131 */ {"Rank stem \"%s\" + suffix \"%s\" more than NAMELEN characters; not defined", 'E', 'D'}, +/* 132 */ {"Undefined %s start-tag GI ignored; not used in DTD", 'E', 'C'}, +/* 133 */ {"Document ended invalidly within a markup declaration; parsing ended", 'C', 'S'}, +/* 134 */ {"Normalized length of literal exceeded %s; markup terminated", 'E', 'Q'}, +/* 135 */ {"R/CDATA marked section in declaration subset; prolog terminated", 'E', 'D'}, +/* 136 */ {"%s = \"%s\" ENTITIES attribute ignored: more than GRPCNT in list", 'E', 'Q'}, +/* 137 */ {"Content model is ambiguous", 'W', 'D'}, +/* 138 */ {"Invalid parameter entity name \"%s\"", 'E', 'S'}, +/* 139 */ {"Document ended invalidly within a marked section; parsing ended", 'C', 'S'}, +/* 140 */ {"Element \"%s\" used in DTD but not defined", 'I', 'D'}, +/* 141 */ {"Invalid NDATA or SUBDOC entity reference occurred; ignored", 'E', 'S'}, +/* 142 */ {"Associated element type not allowed in document instance", 'E', 'C'}, +/* 143 */ {"Illegal DSC character; in different entity from DSO", 'E', 'C'}, +/* 144 */ {"Declared value of data attribute cannot be ID", 'E', 'D' }, +/* 145 */ {"Invalid reference to external CDATA or SDATA entity; ignored", 'E', 'S'}, +/* 146 */ {"Could not find external document type \"%s\"", 'E', 'R'}, +/* 147 */ {"Could not find external general entity \"%s\"", 'I', 'R'}, +/* 148 */ {"Could not find external parameter entity \"%s\"", 'I', 'R'}, +/* 149 */ {"Reference to non-existent general entity \"%s\" ignored", 'E', 'R'}, +/* 150 */ {"Could not find entity \"%s\" using default declaration", 'E', 'R'}, +/* 151 */ {"Could not find entity \"%2$s\" in attribute %1$s using default declaration", 'E', 'R'}, +/* 152 */ {"Short reference map \"%s\" used in DTD but not defined", 'I', 'D'}, +/* 153 */ {"End-tag minimization should be \"O\" for element with CONREF attribute", 'I', 'D'}, +/* 154 */ {"Declared value of data attribute cannot be ENTITY or ENTITIES", 'E', 'D' }, +/* 155 */ {"Declared value of data attribute cannot be IDREF or IDREFS", 'E', 'D' }, +/* 156 */ {"Declared value of data attribute cannot be NOTATION", 'E', 'D' }, +/* 157 */ {"CURRENT cannot be specified for a data attribute", 'E', 'D' }, +/* 158 */ {"CONREF cannot be specified for a data attribute", 'E', 'D' }, +/* 159 */ {"Short reference map for element \"%s\" not defined; ignored", 'E', 'C'}, +/* 160 */ {"Cannot create temporary file", 'C', 'R'}, +/* 161 */ {"Document ended invalidly within SGML declaration", 'C', 'D'}, +/* 162 */ {"Capacity limit %s exceeded by %s points", 'W', 'Q'}, +/* 163 */ {"Amendment 1 requires \"ISO 8879:1986\" instead of \"ISO 8879-1986\"", 'W', 'D'}, +/* 164 */ {"Non-markup, non-minimum data character in SGML declaration", 'E', 'D'}, +/* 165 */ {"Parameter cannot be a literal", 'E', 'D'}, +/* 166 */ {"Invalid concrete syntax scope \"%s\"", 'E', 'D'}, +/* 167 */ {"Parameter must be a number", 'E', 'D'}, +/* 168 */ {"\"%s\" should have been \"%s\"", 'E', 'D'}, +/* 169 */ {"Character number %s is not supported as an additional name character", 'E', 'U'}, +/* 170 */ {"Parameter must be a literal or \"%s\"", 'E', 'D'}, +/* 171 */ {"Bad character description for character %s", 'E', 'D'}, +/* 172 */ {"Character number %s is described more than once", 'W', 'D'}, +/* 173 */ {"Character number plus number of characters exceeds 256", 'E', 'D'}, +/* 174 */ {"No description for upper half of character set: assuming \"128 128 UNUSED\"", 'W', 'D'}, +/* 175 */ {"Character number %s was not described; assuming UNUSED", 'E', 'D'}, +/* 176 */ {"Non-significant shunned character number %s not declared UNUSED", 'E', 'D'}, +/* 177 */ {"Significant character \"%s\" cannot be non-SGML", 'E', 'D'}, +/* 178 */ {"Unknown capacity set \"%s\"", 'E', 'U'}, +/* 179 */ {"No capacities specified." , 'E', 'D'}, +/* 180 */ {"Unknown concrete syntax \"%s\"", 'E', 'U'}, +/* 181 */ {"Character number exceeds 255", 'E', 'D'}, +/* 182 */ {"Concrete syntax SWITCHES not supported", 'E', 'U'}, +/* 183 */ {"\"INSTANCE\" scope not supported", 'E', 'U'}, +/* 184 */ {"Value of \"%s\" feature must be one or more", 'E', 'D'}, +/* 185 */ {"\"%s\" invalid; must be \"YES\" or \"NO\"", 'E', 'D'}, +/* 186 */ {"\"%s\" invalid; must be \"PUBLIC\" or \"SGMLREF\"", 'E', 'D'}, +/* 187 */ {"Feature \"%s\" is not supported", 'E', 'U'}, +/* 188 */ {"Too many open subdocument entities", 'E', 'Q'}, +/* 189 */ {"Invalid formal public identifier", 'I', 'D'}, +/* 190 */ {"Public text class should have been \"%s\"", 'I', 'D'}, +/* 191 */ {"Character number %s must be non-SGML", 'W', 'D'}, +/* 192 */ {"Notation \"%s\" not defined in DTD", 'W', 'D'}, +/* 193 */ {"Unclosed start or end tag requires \"SHORTTAG YES\"", 'W', 'M'}, +/* 194 */ {"Net-enabling start tag requires \"SHORTTAG YES\"", 'W', 'M'}, +/* 195 */ {"Attribute name omission requires \"SHORTTAG YES\"", 'W', 'M'}, +/* 196 */ {"Undelimited attribute value requires \"SHORTTAG YES\"", 'W', 'M'}, +/* 197 */ {"Attribute specification omitted for \"%s\": requires markup minimization", 'W', 'M'}, +/* 198 */ {"Concrete syntax does not have any short reference delimiters", 'E', 'D'}, +/* 199 */ {"Character number %s does not exist in the base character set", 'E', 'D'}, +/* 200 */ {"Character number %s is UNUSED in the syntax reference character set", 'E', 'D'}, +/* 201 */ {"Character number %s was not described in the syntax reference character set", 'E', 'D'}, +/* 202 */ {"Character number %s in the syntax reference character set has no corresponding character in the system character set", 'E', 'D'}, +/* 203 */ {"Character number %s was described using an unknown base set", 'E', 'D'}, +/* 204 */ {"Duplication specification for added funtion \"%s\"", 'E', 'D'}, +/* 205 */ {"Added function character cannot be \"%s\"", 'E', 'D'}, +/* 206 */ {"Only reference concrete syntax function characters supported", 'E', 'U'}, +/* 207 */ {"Only reference concrete syntax general delimiters supported", 'E', 'U'}, +/* 208 */ {"Only reference concrete syntax short reference delimiters supported", 'E', 'U'}, +/* 209 */ {"Unrecognized keyword \"%s\"", 'E', 'D'}, +/* 210 */ {"Unrecognized quantity name \"%s\"", 'E', 'D'}, +/* 211 */ {"Interpretation of \"%s\" is not a valid name in the declared concrete syntax", 'E', 'D'}, +/* 212 */ {"Replacement reserved name \"%s\" cannot be reference reserved name", 'E', 'D'}, +/* 213 */ {"Duplicate replacement reserved name \"%s\"", 'E', 'D'}, +/* 214 */ {"Quantity \"%s\" must not be less than %s", 'E', 'D'}, +/* 215 */ {"Only values up to %2$s are supported for quantity \"%1$s\"", 'E', 'U'}, +/* 216 */ {"Exclusions attempt to change required status of group in \"%s\"", 'E', 'C'}, +/* 217 */ {"Exclusion cannot apply to token \"%s\" in content model for \"%s\"", 'E', 'C'}, +/* 218 */ {"Required %s attribute was not specified for entity %s", 'E', 'C'}, +/* 219 */ {"UCNMSTRT must have the same number of characters as LCNMSTRT", 'E', 'D'}, +/* 220 */ {"UCNMCHAR must have the same number of characters as LCNMCHAR", 'E', 'D'}, +/* 221 */ {"Character number %s assigned to both LCNMSTRT or UCNMSTRT and LCNMCHAR or UCNMCHAR", 'E', 'D'}, +/* 222 */ {"Character number %s cannot be an additional name character", 'E', 'D'}, +/* 223 */ {"It is unsupported for \"-\" not to be assigned to UCNMCHAR or LCNMCHAR", 'E', 'U'}, +/* 224 */ {"Normalized length of value of attribute \"%s\" exceeded LITLEN", 'E', 'Q'}, +/* 225 */ {"Length of interpreted parameter literal exceeds LITLEN less the length of the bracketing delimiters", 'E', 'Q'}, +/* 226 */ {"Start tag of document element omitted; not minimizable", 'W', 'M'}, +/* 227 */ {"Unrecognized designating escape sequence \"%s\"", 'I', 'U'}, +/* 228 */ {"Earlier reference to entity \"%s\" used default entity", 'I', 'D'}, +/* 229 */ {"Reference to non-existent parameter entity \"%s\" ignored", 'E', 'R'}, +}; diff --git a/usr.bin/sgmls/sgmls/msgcat.c b/usr.bin/sgmls/sgmls/msgcat.c new file mode 100644 index 0000000..6b0d9cb --- /dev/null +++ b/usr.bin/sgmls/sgmls/msgcat.c @@ -0,0 +1,833 @@ +/* msgcat.c - + X/Open message catalogue functions and gencat utility. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifndef HAVE_CAT + +/* In this implementation the message catalogue format is the same as the +message text source file format (see pp 42-43 of the X/Open +Portability Guide, Issue 3, Volume 3.) This means that you don't have +to use the gencat utility, but it is still useful for checking and +merging catalogues. */ + +/* Compile this with -DGENCAT to get the gencat utility. */ + +#include "std.h" +#include "msgcat.h" + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +/* Default message set. */ +#define NL_SETD 1 + +#ifndef PATH_FILE_SEP +#define PATH_FILE_SEP ':' +#endif + +#ifndef DEFAULT_NLSPATH +#define DEFAULT_NLSPATH "" +#endif + +#ifndef DEFAULT_LANG +#define DEFAULT_LANG "default" +#endif + +#define HASH_TAB_SIZE 251 + +struct message { + struct message *next; + unsigned msgnum; + unsigned setnum; + char *text; +}; + +struct cat { + char *name; + int loaded; + int bad; + struct message *table[HASH_TAB_SIZE]; +}; + +static char *read_buf = 0; +static unsigned read_buf_len = 0; + +/* Errors that can be generated by read_catalog. */ + +enum cat_err { + E_ZERO, /* not an error */ + E_BADARG, + E_NOMEM, + E_NOSUCHCOMMAND, + E_INPUT, + E_EOF, + E_BADSEP, + E_BADLINE +}; + +#ifdef GENCAT +/* These must match enum cat_err. */ +static char *cat_errlist[] = { + "Error 0", + "Invalid argument to command", + "Out of memory", + "Unrecognized command", + "Input error", + "Unexpected end of file", + "Space or tab expected after message number", + "Invalid line", +}; +#endif /* GENCAT */ + +#ifndef GENCAT +/* The value of NLSPATH. */ +static char *nlspath = 0; +/* The value of LANG. */ +static char *lang = 0; +#endif /* not GENCAT */ + +static int current_lineno = -1; +static enum cat_err cat_errno = E_ZERO; + +#ifndef GENCAT +static void load_catalog P((struct cat *)); +static FILE *find_catalog P((char *, char **)); +#endif +static int read_catalog P((FILE *, struct message **)); +static void delete_set P((struct message **, unsigned)); +static void delete_message P((struct message **, unsigned, unsigned)); +static int hash P((unsigned setnum, unsigned msgnum)); +static char *parse_text P((FILE *, int)); + +#ifndef GENCAT + +nl_catd catopen(name, oflag) +char *name; +int oflag; +{ + struct cat *catp; + int i; + + if (!name) + return 0; + + catp = (struct cat *)malloc(sizeof *catp); + if (!catp) + return 0; + for (i = 0; i < HASH_TAB_SIZE; i++) + catp->table[i] = 0; + catp->name = malloc(strlen(name) + 1); + catp->loaded = 0; + catp->bad = 0; + strcpy(catp->name, name); + return (nl_catd)catp; +} + +int catclose(catd) +nl_catd catd; +{ + int i; + struct cat *catp = (struct cat *)catd; + + if (!catp) + return 0; + + for (i = 0; i < HASH_TAB_SIZE; i++) { + struct message *p, *nextp; + for (p = catp->table[i]; p; p = nextp) { + nextp = p->next; + free(p->text); + free((char *)p); + } + } + if (catp->name) + free(catp->name); + free((char *)catp); + return 0; +} + +char *catgets(catd, setnum, msgnum, dflt) +nl_catd catd; +int setnum, msgnum; +char *dflt; +{ + struct message *p; + struct cat *catp; + + /* setnum and msgnum are required to be >= 1. */ + if (!catd || setnum <= 0 || msgnum <= 0) + return dflt; + catp = (struct cat *)catd; + if (!catp->loaded) + load_catalog(catp); + if (catp->bad) + return dflt; + for (p = catp->table[hash(setnum, msgnum)]; p; p = p->next) + if (p->msgnum == msgnum && p->setnum == setnum) + break; + if (!p) + return dflt; + return p->text; +} + +static +VOID load_catalog(catp) +struct cat *catp; +{ + FILE *fp; + char *path; + + catp->loaded = 1; + fp = find_catalog(catp->name, &path); + if (!fp) { + catp->bad = 1; + return; + } + current_lineno = 0; + if (read_catalog(fp, catp->table) < 0) + catp->bad = 1; + fclose(fp); + if (read_buf) { + free(read_buf); + read_buf = 0; + } + read_buf_len = 0; + free(path); +} + +static +FILE *find_catalog(name, pathp) +char *name; +char **pathp; +{ + char *path; + + if (!name) + return 0; + if (!nlspath) { + nlspath = getenv("NLSPATH"); + if (!nlspath) + nlspath = DEFAULT_NLSPATH; + } + if (!lang) { + lang = getenv("LANG"); + if (!lang) + lang = DEFAULT_LANG; + } + path = nlspath; + for (;;) { + char *p; + unsigned len = 0; + + for (p = path; *p != '\0' && *p != PATH_FILE_SEP; p++) { + if (*p == '%') { + if (p[1] == 'N') { + p++; + len += strlen(name); + } + else if (p[1] == 'L') { + p++; + len += strlen(lang); + } + else if (p[1] == '%') { + p++; + len++; + } + else + len++; + + } + else + len++; + } + if (len > 0) { + char *s, *try; + FILE *fp; + s = try = malloc(len + 1); + if (!s) + return 0; + for (p = path; *p != '\0' && *p != PATH_FILE_SEP; p++) { + if (*p == '%') { + if (p[1] == 'N') { + p++; + strcpy(s, name); + s += strlen(name); + } + else if (p[1] == 'L') { + p++; + strcpy(s, lang); + s += strlen(lang); + } + else if (p[1] == '%') { + p++; + *s++ = '%'; + } + else + *s++ = *p; + } + else + *s++ = *p; + } + *s++ = '\0'; + fp = fopen(try, "r"); + if (fp) { + *pathp = try; + return fp; + } + free(try); + } + if (*p == '\0') + break; + path = ++p; + } + return 0; +} + +#endif /* not GENCAT */ + +/* 0 success, -1 error */ + +static +int parse_message(c, fp, table, setnum, quote) +int c; +FILE *fp; +struct message **table; +unsigned setnum; +int quote; +{ + unsigned msgnum; + struct message *msgp; + char *text; + int hc; + + msgnum = c - '0'; + for (;;) { + c = getc(fp); + if (!isdigit(c)) + break; + msgnum = msgnum*10 + (c - '0'); + } + if (c == '\n') { + delete_message(table, setnum, msgnum); + return 0; + } + if (c != ' ' && c != '\t') { + cat_errno = E_BADSEP; + return -1; + } + text = parse_text(fp, quote); + if (!text) + return -1; + hc = hash(setnum, msgnum); + for (msgp = table[hc]; msgp; msgp = msgp->next) + if (msgp->setnum == setnum && msgp->msgnum == msgnum) + break; + if (msgp) + free(msgp->text); + else { + msgp = (struct message *)malloc(sizeof *msgp); + if (!msgp) { + cat_errno = E_NOMEM; + return -1; + } + msgp->next = table[hc]; + table[hc] = msgp; + msgp->msgnum = msgnum; + msgp->setnum = setnum; + } + msgp->text = text; + return 0; +} + +static +char *parse_text(fp, quote) +FILE *fp; +int quote; +{ + unsigned i = 0; + char *p; + int c; + int quoted; + + c = getc(fp); + if (c == quote) { + quoted = 1; + c = getc(fp); + } + else + quoted = 0; + for (;; c = getc(fp)) { + if (c == EOF) { + if (ferror(fp)) { + cat_errno = E_INPUT; + return 0; + } + break; + } + if (c == '\n') + break; + /* XXX + + Can quotes be used in quoted message text if protected by \ ? + + Is it illegal to omit the closing quote if there's an opening + quote? + + Is it illegal to have anything after a closing quote? + + */ + + if (quoted && c == quote) { + /* Skip the rest of the line. */ + while ((c = getc(fp)) != '\n') + if (c == EOF) { + if (ferror(fp)) { + cat_errno = E_INPUT; + return 0; + } + break; + } + break; + } + if (c == '\\') { + int d; + + c = getc(fp); + if (c == EOF) + break; + switch (c) { + case '\n': + current_lineno++; + continue; + case 'n': + c = '\n'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'r': + c = '\r'; + break; + case '\\': + c = '\\'; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c -= '0'; + d = getc(fp); + if (d >= '0' && d <= '7') { + c = c*8 + d - '0'; + d = getc(fp); + if (d >= '0' && d <= '7') + c = c*8 + d - '0'; + else if (d != EOF) + ungetc(d,fp); + } + else if (d != EOF) + ungetc(d, fp); + if (c == '\0') + continue; /* XXX */ + break; + default: + /* Ignore the quote. */ + break; + } + } + if (i >= read_buf_len) { + if (!read_buf) + read_buf = malloc(read_buf_len = 40); + else + read_buf = realloc(read_buf, read_buf_len *= 2); + if (!read_buf) { + cat_errno = E_NOMEM; + return 0; + } + } + read_buf[i++] = c; + } + p = malloc(i + 1); + if (!p) { + cat_errno = E_NOMEM; + return 0; + } + memcpy(p, read_buf, i); + p[i] = '\0'; + return p; +} + +/* 0 success, -1 error */ + +static +int parse_command(fp, table, setnump, quotep) +FILE *fp; +struct message **table; +unsigned *setnump; +int *quotep; +{ + char buf[128]; + if (fgets(buf, 128, fp) == NULL) { + cat_errno = ferror(fp) ? E_INPUT : E_EOF; + return -1; + } + if (buf[0] == ' ' || buf[0] == '\t' || buf[0] == '\n') + /* a comment */; + else if (strncmp(buf, "set", 3) == 0) { + if (sscanf(buf + 3, "%u", setnump) != 1) { + cat_errno = E_BADARG; + return -1; + } + + } + else if (strncmp(buf, "delset", 6) == 0) { + unsigned num; + if (sscanf(buf + 6, "%u", &num) != 1) { + cat_errno = E_BADARG; + return -1; + } + delete_set(table, num); + *setnump = NL_SETD; + } + else if (strncmp(buf, "quote", 5) == 0) { + char *p = buf + 5; + while (*p == ' ' || *p == '\t') + p++; + /* XXX should \ be allowed as the quote character? */ + if (*p == '\0' || *p == '\n') + *quotep = -1; + else + *quotep = *p; + } + else { + cat_errno = E_NOSUCHCOMMAND; + return -1; + } + if (strchr(buf, '\n') == 0) { + int c; + while ((c = getc(fp)) != '\n' && c != EOF) + ; + } + return 0; +} + + +static +VOID delete_set(table, setnum) +struct message **table; +unsigned setnum; +{ + int i; + + for (i = 0; i < HASH_TAB_SIZE; i++) { + struct message *p, *nextp; + for (p = table[i], table[i] = 0; p; p = nextp) { + nextp = p->next; + if (p->setnum == setnum) + free((char *)p); + else { + p->next = table[i]; + table[i] = p; + } + } + } +} + +static +VOID delete_message(table, setnum, msgnum) +struct message **table; +unsigned setnum, msgnum; +{ + struct message **pp; + + for (pp = &table[hash(setnum, msgnum)]; *pp; pp = &(*pp)->next) + if ((*pp)->setnum == setnum && (*pp)->msgnum == msgnum) { + struct message *p = *pp; + *pp = p->next; + free(p->text); + free((char *)p); + break; + } +} + +/* 0 success, -1 error. On error cat_errno is set to the error number. */ + +static +int read_catalog(fp, table) +FILE *fp; +struct message **table; +{ + int c; + unsigned setnum = NL_SETD; + int quote_char = -1; + + for (;;) { + /* start of line */ + c = getc(fp); + if (c == EOF) + break; + ++current_lineno; + if (isdigit(c)) { + if (parse_message(c, fp, table, setnum, quote_char) < 0) + return -1; + } + else if (c == '$') { + if (parse_command(fp, table, &setnum, "e_char) < 0) + return -1; + } + else if (c != '\n') { + while ((c = getc(fp)) != '\n' && c != EOF) + if (c != ' ' && c != '\t') { + cat_errno = E_BADLINE; + return -1; + } + if (c == EOF) + break; + } + } + return 0; +} + +static +int hash(setnum, msgnum) +unsigned setnum, msgnum; +{ + return ((setnum << 8) + msgnum) % HASH_TAB_SIZE; +} + +#ifdef GENCAT + +static char *program_name; + +static int message_compare P((UNIV, UNIV)); +static void print_text P((char *, FILE *)); +static void usage P((void)); + +#ifdef VARARGS +static void fatal(); +#else +static void fatal P((char *,...)); +#endif + +int main(argc, argv) +int argc; +char **argv; +{ + FILE *fp; + int i, j, nmessages; + struct message **list; + unsigned setnum; + struct message *table[HASH_TAB_SIZE]; + + program_name = argv[0]; + + if (argc < 3) + usage(); + + for (i = 0; i < HASH_TAB_SIZE; i++) + table[i] = 0; + for (i = 1; i < argc; i++) { + errno = 0; + fp = fopen(argv[i], "r"); + if (!fp) { + if (i > 1) + fatal("can't open `%s': %s", argv[i], strerror(errno)); + } + else { + current_lineno = 0; + cat_errno = E_ZERO; + if (read_catalog(fp, table) < 0) { + assert(cat_errno != E_ZERO); + assert(cat_errno + < sizeof(cat_errlist)/sizeof(cat_errlist[0])); + fatal("%s:%d: %s", argv[i], current_lineno, + cat_errlist[cat_errno]); + } + fclose(fp); + } + } + + errno = 0; + fp = fopen(argv[1], "w"); + if (!fp) + fatal("can't open `%s' for output: %s", argv[1], strerror(errno)); + nmessages = 0; + for (i = 0; i < HASH_TAB_SIZE; i++) { + struct message *p; + for (p = table[i]; p; p = p->next) + nmessages++; + } + list = (struct message **)malloc(nmessages*sizeof(struct message *)); + if (!list) + fatal("out of memory"); + j = 0; + for (i = 0; i < HASH_TAB_SIZE; i++) { + struct message *p; + for (p = table[i]; p; p = p->next) + list[j++] = p; + } + assert(j == nmessages); + + qsort((UNIV)list, nmessages, sizeof(struct message *), message_compare); + + setnum = NL_SETD; + for (i = 0; i < nmessages; i++) { + struct message *p = list[i]; + if (p->setnum != setnum) { + setnum = p->setnum; + fprintf(fp, "$set %u\n", setnum); + } + fprintf(fp, "%u ", p->msgnum); + print_text(p->text, fp); + putc('\n', fp); + } + if (fclose(fp) == EOF) + fatal("error closing `%s'", argv[1]); + return 0; +} + +static +VOID usage() +{ + fprintf(stderr, "usage: %s catfile msgfile...\n", program_name); + exit(1); +} + +static +#ifdef VARARGS +VOID fatal(va_alist) va_dcl +#else /* not VARARGS */ +VOID fatal(char *message,...) +#endif /* not VARARGS */ +{ + va_list ap; + +#ifdef VARARGS + char *message; + va_start(ap); + message = va_arg(ap, char *); +#else /* not VARARGS */ + va_start(ap, message); +#endif /* not VARARGS */ + + fprintf(stderr, "%s: ", program_name); + vfprintf(stderr, message, ap); + putc('\n', stderr); + va_end(ap); + exit(1); +} + +static +int message_compare(p1, p2) +UNIV p1, UNIV p2; +{ + struct message *m1 = *(struct message **)p1; + struct message *m2 = *(struct message **)p2; + + if (m1->setnum < m2->setnum) + return -1; + if (m1->setnum > m2->setnum) + return 1; + if (m1->msgnum < m2->msgnum) + return -1; + if (m1->msgnum > m2->msgnum) + return 1; + return 0; +} + +static +VOID print_text(s, fp) +char *s; +FILE *fp; +{ + for (; *s; s++) { + if (*s == '\\') + fputs("\\\\", fp); + else if (ISASCII(*s) && isprint((UNCH)*s)) + putc(*s, fp); + else { + switch (*s) { + case '\n': + fputs("\\n", fp); + break; + case '\b': + fputs("\\b", fp); + break; + case '\f': + fputs("\\f", fp); + break; + case '\t': + fputs("\\t", fp); + break; + case '\v': + fputs("\\v", fp); + break; + case '\r': + fputs("\\r", fp); + break; + default: + fprintf(fp, "\\%03o", (unsigned char)*s); + break; + } + } + } +} + +#endif /* GENCAT */ + +#ifdef TEST + +int main(argc, argv) +int argc; +char **argv; +{ + nl_catd catd; + int msgnum, setnum; + + if (argc != 2) { + fprintf(stderr, "usage: %s catalogue\n", argv[0]); + exit(1); + } + catd = catopen(argv[1], 0); + fprintf(stderr, "Enter set number, message number pairs:\n"); + fflush(stderr); + while (scanf("%d %d", &setnum, &msgnum) == 2) { + char *msg = catgets(catd, setnum, msgnum, "<default>"); + fprintf(stderr, "Returned \"%s\"\n", msg); + fflush(stderr); + } + return 0; +} + +#endif /* TEST */ + +#endif /* not HAVE_CAT */ +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/msgcat.h b/usr.bin/sgmls/sgmls/msgcat.h new file mode 100644 index 0000000..83e998a --- /dev/null +++ b/usr.bin/sgmls/sgmls/msgcat.h @@ -0,0 +1,13 @@ + +#ifdef HAVE_CAT +#include <nl_types.h> +#else +typedef UNIV nl_catd; +#endif + +/* Don't use prototypes here in case nl_types.h declares a conflicting +prototype. */ + +nl_catd catopen(); +int catclose(); +char *catgets(); diff --git a/usr.bin/sgmls/sgmls/pars1.c b/usr.bin/sgmls/sgmls/pars1.c new file mode 100644 index 0000000..7960dc7 --- /dev/null +++ b/usr.bin/sgmls/sgmls/pars1.c @@ -0,0 +1,958 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +#define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */ +#define NEWGI (newetd->etdgi+1) /* GI of new tag. */ + +static VOID doincludes P((void)); +static int pentname P((char *)); +static struct mpos *newmpos P((void)); +static VOID commbufs P((void)); +static VOID checkdtd P((void)); + +/* PARSECON: Parse content of an element. +*/ +int parsecon(tbuf, pcb) +UNCH *tbuf; /* Work area for tokenization. */ +struct parse *pcb; /* Parse control block for this parse. */ +{ + int srn; /* SHORTREF delimiter number (1-32). */ + int refrc; /* Return code from sentref, stagetd, etc. */ + + TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq); + if (eodsw) return(EOD_); + if (didreq && (conrefsw & TAGREF)) {didreq = 0; goto conr;} + if (etagimct>0) {etagimsw = --etagimct ? 1 : 0; destack(); return(ETG_);} + if (dostag) { + conrefsw = conrefsv; + etisw = etiswsv; + if (charmode) {dostag = 0; return datarc;} + return stag(datarc); + } + if (conrefsw) { + conr: + destack(); + conrefsw = 0; + return ETG_; + } + else if (eofsw) return(EOD_); + + datarc = 0; + while (1) { + parse(pcb); + srn = (int)pcb->action - SRMIN; /* Just in case it's a SHORTREF. */ + switch (pcb->action) { + case DCE_: /* Data character in element content. */ + /* The data character might be a non-SGML character so + reprocess it using pcbconm. */ + REPEATCC; + pcb = conpcb = &pcbconm; + pcb->newstate = pcbcnet; + continue; + case DAS_: /* Current character begins data. */ + data = FPOS; + continue; + + case NLF_: /* NET or SR returns data in lookahead buffer. */ + datalen = (UNS)(ptcon - data); REPEATCC; + goto rcc; + + case LAF_: /* Return data in lookahead buffer: mixed. */ + datalen = (UNS)(ptcon+1 - data); + goto rcc; + + case NON_: /* Single nonchar in nonchbuf. */ + datalen = 2; data = nonchbuf; + goto nrcc; + + case DAR_: /* Return data except for last char. */ + REPEATCC; + case DAF_: /* Return data in source entity buffer. */ + datalen = (UNS)(FPOS - data); + rcc: + REPEATCC; + case DEF_: /* Return data in data entity. */ + nrcc: + datarc = DAF_; + if (pcb==&pcbcone) { + pcbconm.newstate = pcbcnet; + conpcb = &pcbconm; + } + if (charmode) return(datarc); + stagmin = MINNONE; stagreal = newetd = ETDCDATA; + return(stag(datarc)); + + case LAS_: /* Start lookahead buffer with current char. */ + *(ptcon = data = tbuf+1) = *FPOS; + continue; + + case LAM_: /* Move character to lookahead buffer. */ + *++ptcon = *FPOS; + continue; + + case STG_: /* Process non-null start-tag. */ + CTRSET(tagctr); /* Start counting tag length. */ + parsenm(tbuf, NAMECASE); /* Get the GI. */ + newetd = etdref(tbuf); + if (newetd && newetd->adl) { + parseatt(newetd->adl, tbuf); + adlval((int)ADN(al), newetd); + } + parsetag(&pcbstag); /* Parse the tag ending. */ + if ((CTRGET(tagctr)-tagdelsw)>=TAGLEN) + sgmlerr(66, &pcbstag, (UNCH *)0, (UNCH *)0); + if (!newetd) { + sgmlerr(132, pcb, tbuf+1, (UNCH *)0); + continue; + } + return(stagetd(&pcbstag)); + + case NST_: /* Process null start-tag. */ + return nstetd(); + + case ETC_: /* End-tag in CDATA or RCDATA. */ + case ETG_: /* Process non-null end-tag. */ + newetd = etdref(parsenm(tbuf, NAMECASE)); /* Get the GI. */ + parsetag(&pcbetag); /* Parse tag end. */ + if (!newetd) /* Error: undefined.*/ + sgmlerr(11, &pcbetag, tbuf+1, (UNCH *)0); + else if (etagetd(&pcbetag)>=0) return ETG_;/* Open element. */ + if (pcb->action!=ETC_) continue; + /* Tag is undefined or not for an open element and we are in + a CDATA or RCDATA element; issue message and treat as + null end-tag (</>). + */ + sgmlerr(57, &pcbetag, (UNCH *)0, (UNCH *)0); + case NET_: /* Process null end-tag. */ + if ((refrc = netetd(conpcb))!=0) return ETG_; + continue; + + case NED_: /* Process null end-tag delimiter. */ + etagmin = MINNET; + newetd = etagreal = ETDNET; + etagimct = etag(); + etagimsw = etagimct ? 1 : 0; destack(); + return ETG_; + case GTR_: + if (entget()!=-1) { + data = FPOS; + continue; + } + /* fall through */ + case EOD_: /* End of primary file. */ + if (ts<1) return(EOD_); /* Normal end: stack is empty. */ + etagimct = ts-1; /* Treat as end-tag for top tag on stack. */ + etagmin = MINETAG; etagreal = tags[0].tetd; + destack(); + eofsw = 1; /* Return EOD_ after destacking all. */ + return ETG_; + + /* Short references ending with blanks: + If the blank sequence is followed by RE, go do SR7 or SR6. + If the entity is undefined and we are in mixed content, + the blanks must be returned as data. If not, they + can be ignored. + */ + case SR9_: /* Process SR9 (two or more blanks). */ + REPEATCC; /* Make first blank the CC. */ + case SR4_: /* Process SR4 (RS, blanks). */ + parseseq(tbuf, BSEQLEN); /* Squeeze out all blanks. */ + if (*FPOS=='\r') {srn = (srn==9) ? 7 : 6; data = tbuf; goto sr6;} + else REPEATCC; + if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; + if (refrc>0) return refrc; + if (refrc==ENTUNDEF && pcb==&pcbconm) + {data = tbuf; goto nrcc;} + continue; + + /* Short references ending with RE: + If the reference is defined, the RE is ignored. + For RE and RS RE, + no special action is needed if the reference is undefined, + as the RE will be processed immediately as the current character. + For B RE and RS B RE, + the input is primed with a special character that will + be treated as an RE that cannot be a short reference. + */ + case SR7_: /* Process SR7 (blanks, RE). */ + datalen = (UNS)(FPOS - data); + case SR2_: /* Process SR2 (RE). */ + case SR5_: /* Process SR5 (RS, RE). */ + sr6: /* Process SR6 (RS, blanks, RE). */ + if ((refrc = shortref(srn, pcb))!=ENTUNDEF) { + if (refrc==DEF_) goto nrcc; /* Defined: data entity. */ + if (refrc>0) return refrc; /* Defined: tag entity. */ + continue; /* Defined: not tag. */ + } + if (pcb!=&pcbconm) continue; /* Not mixed; ignore chars. */ + if (srn>=6) /* Return blanks as data. */ + {*FPOS = lex.d.genre; REPEATCC; goto nrcc;} + case REF_: /* Undefined SR with RE; return record end. */ + datarc = REF_; + if (charmode) return(datarc); +#if 0 + /* The standard says this situation can force a tag. + See 323:3-6, 412:1-7. */ + /* If RE would be ignored, don't treat it as start-tag + because it could force a required tag; but do change + state to show that an RE was ignored. + */ + if (scbsgml[pss].snext==scbsgmst) { + scbsgml[pss].snext = scbsgmnr; + TRACEGML(scbsgml, pss, conactsw, conact); + continue; + } +#endif + stagmin = MINNONE; stagreal = newetd = ETDCDATA; + return(stag(datarc)); + + case SR3_: /* Process SR3 (RS). */ + REPEATCC; + if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; + if (refrc>0) return refrc; + continue; + + case RBR_: /* Two right brackets */ + srn = 26; + REPEATCC; + /* fall through */ + case SR1_: /* Process SR1 (TAB). */ + case SR8_: /* Process SR8 (space). */ + case SR19: /* Process SR19 (-). */ + case SR26: /* Process SR26 (]). */ + REPEATCC; + goto srproc; + + case FCE_: /* Process free character (SR11-18, SR21-32). */ + fce[0] = *FPOS; + srn = mapsrch(&lex.s.dtb[lex.s.fce], fce); + case SR10: /* Process SR10 ("). */ + case SR11: /* Process SR11 (#). */ + case SR20: /* Process SR20 (-). */ + case SR25: /* Process SR25 ([). */ + srproc: + if ((refrc = shortref(srn, pcb))==DEF_) goto nrcc; + if (refrc>0) return refrc; + if (refrc==ENTUNDEF) { /* Treat the SR as data. */ + data = FPOS - (srn==lex.s.hyp2);/* Two data chars if SR20.*/ + if (pcb!=&pcbconm) { /* If not in mixed content: */ + if (srn>=lex.s.data) { /* Change PCB. */ + pcb = conpcb = &pcbconm; + pcb->newstate = pcbcnda; + } + } + else pcb->newstate = pcbcnda;/* Now in data found state. */ + } + continue; + + case ERX_: /* Entity ref in RCDATA: cancel ending delims.*/ + lexcon[lex.d.tago] = lex.l.fre; + lexcon[lex.d.net] = lex.l.nonet; + lexlms[lex.d.msc] = lex.l.fre; + continue; + + case EE_: /* Entity end in RCDATA: check nesting. */ + if (es<rcessv) {synerr(37, pcb); rcessv = es;} + /* If back at top level, re-enable the ending delimiters. */ + if (es==rcessv) { + lexcon[lex.d.tago] = lex.l.tago; + lexcon[lex.d.net] = etictr ? lex.l.net : lex.l.nonet; + lexlms[lex.d.msc] = lex.l.msc; + } + continue; + + case PIE_: /* PI entity: same as PIS_. */ + return PIS_; + + case RSR_: /* Record start: ccnt=0; ++rcnt.*/ + ++RCNT; CTRSET(RSCC); + default: + return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */ + } + } +} +/* STAGETD: Process start-tag etd. +*/ +int stagetd(pcb) +struct parse *pcb; /* Parse control block for this parse. */ +{ + if (!newetd->etdmod) { + sgmlerr(43, pcb, newetd->etdgi+1, (UNCH *)0); + ++ds.etdercnt; + etdset(newetd, (UNCH)SMO+EMO+ETDOCC, &undechdr, + (PETD *)0, (PETD *)0, (PECB *)0); + TRACEETD(newetd); + } + stagmin = MINNONE; stagreal = newetd; + return stag(0); +} +/* NSTETD: Process null start-tag etd. +*/ +int nstetd() +{ + newetd = ts>0 ? tags[ts].tetd + : tags[0].tetd->etdmod[2].tu.thetd; + stagmin = MINNULL; stagreal = ETDNULL; + etisw = 0; + return stag(0); +} +/* ETAGETD: Process end-tag etd. +*/ +int etagetd(pcb) +struct parse *pcb; /* Parse control block for this parse. */ +{ + etagmin = MINNONE; etagreal = newetd; + if ((etagimct = etag())<0) { + sgmlerr(E_ETAG, pcb, NEWGI, tags[ts].tetd->etdgi+1); + return etagimct; + } + etagimsw = etagimct ? 1 : 0; destack(); + return ETG_; +} +/* NETETD: Process null end-tag etd. +*/ +int netetd(pcb) +struct parse *pcb; /* Parse control block for this parse. */ +{ + if (ts<1) { + sgmlerr(51, pcb, (UNCH *)0, (UNCH *)0); + return 0; + } + etagmin = MINNULL; etagreal = ETDNULL; + etagimsw = 0; destack(); + return ETG_; +} +/* SHORTREF: Process a short (alternative) reference to an entity. + Returns ENTUNDEF if entity is not defined, otherwise returns + the return code from stagetd or etagetd if the entity was + a tag, or zero if an error occurred somewhere. +*/ +int shortref(srn, pcb) +int srn; /* Short reference number. */ +struct parse *pcb; /* Parse control block for this parse. */ +{ + int rc; /* Return code from entopen. */ + + if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF; + if (!tags[ts].tsrm[srn]->estore) { + sgmlerr(93, pcb, tags[ts].tsrm[srn]->ename+1, + tags[ts].tsrm[0]->ename+1); + return(ENTUNDEF); + } + rc = entopen(tags[ts].tsrm[srn]); + if (rc==ENTDATA) return DEF_; + if (rc==ENTPI) return PIS_; + return(0); +} +/* PARSEPRO: Parse prolog. + Note: ptpro cannot overrun tbuf (and therefore needn't be + tested), as long as the buffer exceeds the longest + lookahead sequence in the content parse tables. +*/ +int parsepro() +{ + struct parse *oldpcb; + + while (1) { + int rc; /* Return code: DAF MSS DCE */ + switch (parse(propcb)) { + + case LAS_: /* Start lookahead buffer with current char. */ + *(ptpro = data = tbuf+1) = *FPOS; + continue; + case LAM_: /* Move character to lookahead buffer. */ + *++ptpro = *FPOS; + continue; + case LAF_: /* Return data in lookahead buffer. */ + datalen = (UNS)(ptpro+1 - data); + REPEATCC; + rc = DAF_; + break; /* Prolog ended; data pending. */ + + case DTD_: /* Process document type declaration. */ + parsenm(tbuf, NAMECASE); /* Get declaration name. */ + if (!ustrcmp(tbuf+1, sgmlkey) + && !dtdsw && !sgmlsw++) { +#if 0 + parse(&pcbmdi); +#endif + /* If we got some appinfo, return. */ + if (sgmldecl()) + return APP_; + continue; + } + if (!ustrcmp(tbuf+1, key[KDOCTYPE]) && !dtdsw++) { + startdtd(); + mddtds(tbuf); + continue; + } + sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0); + continue; + case DTE_: /* DOCTYPE declaration (and prolog) ended. */ + REPEATCC; /* Put back char that followed MSC. */ + if (es != 0) + sgmlerr(143, propcb, (UNCH *)0, (UNCH *)0); + else if (dtdrefsw) {/* Process referenced DTD before real DTE. */ + dtdrefsw = 0; /* Keep us from coming through here again. */ + REPEATCC; /* Put back MSC so it follows referenced DTD. */ + entref(indtdent); + } + else mddtde(tbuf); + continue; + + case MD_: + /* Process markup declaration within DTD or LPD. */ + parsenm(tbuf, NAMECASE); /* Get declaration name. */ + if (!ustrcmp(tbuf+1, key[KENTITY])) + mdentity(tbuf); + else if (!ustrcmp(tbuf+1, key[KUSEMAP])) + mdsrmuse(tbuf); + else if (!ustrcmp(tbuf+1, key[KATTLIST])) + mdadl(tbuf); + else if (!ustrcmp(tbuf+1, key[KSHORTREF])) + mdsrmdef(tbuf); + else if (!ustrcmp(tbuf+1, key[KELEMENT])) + mdelem(tbuf); + else if (!ustrcmp(tbuf+1, key[KNOTATION])) + mdnot(tbuf); + else + sgmlerr(E_MDNAME, propcb, tbuf+1, (UNCH *)0); + continue; + case MDC_: /* Process markup declaration comment. */ + sgmlsw++; /* SGML declaration not allowed after comment */ + parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN); + continue; + + case MSS_: /* Process marked section start. */ + oldpcb = propcb; + propcb = mdms(tbuf, propcb); + if (propcb==&pcbmsc || propcb==&pcbmsrc) { + if (oldpcb == &pcbmds) + sgmlerr(135, oldpcb, (UNCH *)0, (UNCH *)0); + conpcb = propcb; + rc = DCE_; + break; + } + continue; + case MSE_: /* Process marked section end. */ + if (mdmse()) propcb = &pcbmds; + continue; + case MSP_: /* Marked section start in prolog outside DTD */ + rc = MSS_; + break; + case PIE_: /* PI entity: same as PIS_. */ + return(PIS_); + + case EOD_: /* Return end of primary entity. */ + if (!sw.onlypro || propcb != &pcbpro || !dtdsw) + sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0); + else { + setdtype(); + checkdtd(); + } + return propcb->action; + case PIS_: /* Return processing instruction (string). */ + sgmlsw++; /* SGML declaration not allowed after PI */ + return((int)propcb->action); /* Prolog will continue later. */ + + case CIR_: /* Chars ignored; trying to resume parse. */ + synerr(E_RESTART, propcb); + REPEATCC; + continue; + case STE_: /* Start tag ended prolog */ + REPEATCC; + REPEATCC; + rc = STE_; + break; + case PEP_: /* Previous character ended prolog. */ + REPEATCC; + case DCE_: /* Data character ended prolog. */ + REPEATCC; + rc = DCE_; + break; + case EE_: /* Illegal entity end in ignored marked section. */ + /* An error message has already been given. */ + continue; + default: + abort(); + } /* switch */ + setdtype(); /* First pass only: set document type. */ + checkdtd(); + if (sw.onlypro) + return EOD_; + TRACESET(); /* Set trace switches. */ + endprolog(); + /* *DOC is first element; stack it at level 0. */ + stack(newetd = nextetd = stagreal = etagreal = docetd); + return(rc); + } /* while */ +} + +/* Allocate buffers that are used in the DTD. */ + +VOID startdtd() +{ + nmgrp = (struct etd **)rmalloc((GRPCNT+1)*sizeof(struct etd *)); + nnmgrp = (PDCB *)rmalloc((GRPCNT+1)*sizeof(PDCB)); + gbuf = (struct thdr *)rmalloc((GRPGTCNT+3)*sizeof(struct thdr)); + /* The extra 1 is for parsing the name of a parameter entity in + mdentity(). */ + nmbuf = (UNCH *)rmalloc(NAMELEN+3); + pubibuf = (UNCH *)rmalloc(LITLEN+1); + sysibuf = (UNCH *)rmalloc(LITLEN+1); + commbufs(); + doincludes(); +} + +static +VOID checkdtd() +{ + struct dcncb *np; + + if (sw.swundef) { + int i; + struct etd *ep; + struct srh *sp; + + for (i = 0; i < ETDHASH; i++) + for (ep = etdtab[i]; ep; ep = ep->etdnext) + if (!ep->etdmod) + sgmlerr(140, (struct parse *)0, ep->etdgi + 1, + (UNCH *)0); + for (sp = srhtab[0]; sp; sp = sp->enext) + if (sp->srhsrm[0] == 0) + sgmlerr(152, (struct parse *)0, sp->ename + 1, + (UNCH *)0); + } + for (np = dcntab[0]; np; np = np->enext) + if (!np->defined) + sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0); +} + +/* Return non-zero if s is a valid parameter entity name. +If so put a transformed name in entbuf. */ + +static +int pentname(s) +char *s; +{ + int i; + if (lextoke[(UNCH)*s] != NMS) + return 0; + entbuf[2] = ENTCASE ? lextran[(UNCH)*s] : (UNCH)*s; + for (i = 1; s[i]; i++) { + if (i > NAMELEN - 1) + return 0; + if (lextoke[(UNCH)s[i]] < NMC || s[i] == EOBCHAR) + return 0; + entbuf[i + 2] = ENTCASE ? lextran[(UNCH)s[i]] : (UNCH)s[i]; + } + entbuf[1] = lex.d.pero; + entbuf[i + 2] = '\0'; + entbuf[0] = (UNCH)(i + 3); /* length byte, PERO and '\0' */ + return 1; +} + +/* Handle sw.includes. */ + +static +VOID doincludes() +{ + char **p; + if (!sw.includes) + return; + for (p = sw.includes; *p; p++) { + if (pentname(*p)) { + if (!entfind(entbuf)) { + union etext etx; + etx.c = savestr(key[KINCLUDE]); + entdef(entbuf, ESM, &etx); + ++ds.ecbcnt; + ds.ecbtext += ustrlen(key[KINCLUDE]); + } + } + else + sgmlerr(138, (struct parse *)0, (UNCH *)*p, (UNCH *)0); + } +} + +/* Allocate buffers that are use both in the DTD and the instance. */ + +static +VOID commbufs() +{ + al = (struct ad *)rmalloc((ATTCNT+2)*sizeof(struct ad)); + lbuf = (UNCH *)rmalloc(LITLEN + 1); +} + +static +struct mpos *newmpos() +{ + int j; + unsigned long *h; + struct mpos *p = (struct mpos *)rmalloc((GRPLVL+2)*sizeof(struct mpos)); + + assert(grplongs > 0); + h = (unsigned long *)rmalloc((GRPLVL+2)*grplongs*sizeof(unsigned long)); + for (j = 0; j < GRPLVL+2; j++) { + p[j].h = h; + h += grplongs; + } + return p; +} + +/* Perform end of prolog buffer allocation. */ + +VOID endprolog() +{ + int i; + + ambigfree(); + if (dtdsw) { + frem((UNIV)nmgrp); + frem((UNIV)nnmgrp); + frem((UNIV)gbuf); + frem((UNIV)nmbuf); + frem((UNIV)sysibuf); + frem((UNIV)pubibuf); + } + else { + commbufs(); + doincludes(); + } + scbsgml = (struct restate *)rmalloc((TAGLVL+1)*sizeof(struct restate)); + tags = (struct tag *)rmalloc((TAGLVL+1)*sizeof(struct tag)); + grplongs = (GRPCNT + LONGBITS - 1)/LONGBITS; + for (i = 0; i < TAGLVL+1; i++) + tags[i].tpos = newmpos(); + savedpos = newmpos(); +} + +/* SETDTYPE: Establish specified or default document type. +*/ +VOID setdtype() +{ + /* Initialize default model hdr for declared content. */ + undechdr.ttype = MANY+MCHARS+MGI; /* Declared content is ANY. */ + undechdr.tu.tnum = 0; /* No content model. */ + + /* Initialize content model and etd for *DOC. */ + prcon[0].ttype = MGI; /* Model is an element model. */ + prcon[0].tu.tnum = 2; /* A single group with a single GI in it. */ + prcon[1].ttype = TTSEQ; /* Non-repeatable SEQ group. */ + prcon[1].tu.tnum = 1; /* Only one token in group. */ + prcon[2].ttype = TTETD; /* Token is an etd. */ + docetd = etddef(indocetd); /* etd for document as a whole. */ + etdset(docetd, ETDOCC, prcon, (PETD *)0, (PETD *)0, SRMNULL); + + /* Put specified or default document type etd in *DOC model. */ + if (!dtype) { + sgmlerr(E_DOCTYPE, propcb, (UNCH *)0, (UNCH *)0); + dtype = indefetd; + } + prcon[2].tu.thetd = etddef(dtype); + if (!prcon[2].tu.thetd->etdmod) { + if (dtype != indefetd) + sgmlerr(52, propcb, dtype+1, (UNCH *)0); + ++ds.etdercnt; + etdset(prcon[2].tu.thetd, (UNCH)SMO+EMO+ETDUSED+ETDOCC, &undechdr, + (PETD *)0, (PETD *)0, (PECB *)0); + } + TRACEETD(docetd); + TRACEMOD(prcon); + TRACEETD(prcon[2].tu.thetd); + return; +} +/* PARSETAG: Tag end parser for SGML documents. + For start-tags, it + sets etisw to TAGNET if tag ended with ETI; otherwise to 0. +*/ +VOID parsetag(pcb) +struct parse *pcb; /* Parse control block: pcbstag or pcbetag. */ +{ + tagdelsw = 1; /* Assume tag had an ETI or TAGC. */ + switch (parse(pcb)) { + case ETIC: /* Tag closed with ETI. */ + if (!sd.shorttag) synerr(194, pcb); + etisw = TAGNET; /* Set switch for stack entry flag. */ + return; + case DSC: + synerr(9, pcb); + REPEATCC; + etisw = 0; + return; + case NVS: /* Att name or value token found. */ + case NTV: /* Name token value found. */ + synerr(E_POSSATT, pcb); + pcb->newstate = 0; /* Reset parse state. */ + REPEATCC; /* Put it back for next read. */ + tagdelsw = 0; /* Tag had no closing delimiter. */ + etisw = 0; /* Don't flag stack entry. */ + return; + case TAGO: /* Tag closing implied by TAGO. */ + if (!sd.shorttag) synerr(193, pcb); + REPEATCC; /* Put it back for next read. */ + tagdelsw = 0; /* Tag had no closing delimiter. */ + case TAGC: /* Normal close. */ + default: /* Invalid character (msg was sent). */ + etisw = 0; /* Don't flag stack entry. */ + return; + } +} +/* STAG: Check whether a start-tag is valid at this point in the document + structure, or whether other tags must precede it. + Special case processing is done for the fake tag, #CDATA, as + it is never stacked. +*/ +int stag(dataret) +int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */ +{ + int rc, realrc; /* Return code from context or other test. */ + int mexts = 0; /* >0=stack level of minus grp; -1=plus; 0=none.*/ + + badresw = pexsw = 0; + /* If real element (i.e., not #PCDATA) set mexts and test if empty. */ + if (dataret==0) { + mexts = pexmex(newetd); + /* If element is declared empty, it is same as a conref. */ + if (GET(newetd->etdmod->ttype, MNONE)) conrefsw = TAGREF; + } + if (GET(tags[ts].tetd->etdmod->ttype, MANY)) + rc = mexts>0 ? RCMEX : RCHIT; + else rc = context(newetd, tags[ts].tetd->etdmod, tags[ts].tpos, + &tags[ts].status, mexts); + TRACESTG(newetd, dataret, rc, nextetd, mexts); + + switch (rc) { + case RCEND: /* End current element, then retry start-tag. */ + if (ts<1) realrc = RCMISS; + else realrc = RCEND; + break; + case RCREQ: /* Stack compulsory GI, then retry start-tag. */ + realrc = RCREQ; + break; + case RCMISS: /* Start-tag invalid (#PCDATA or real). */ + if (ts>0 && GET(tags[ts].tetd->etdmod->ttype, MANY)) + realrc = RCEND; + else realrc = RCMISS; + break; + case RCMEX: /* Start-tag invalid (minus exception). */ + etagimct = ts - mexts; + realrc = RCEND; + break; + case RCHITMEX: /* Invalid minus exclusion for required element. */ +#if 0 /* This will have been detected by exclude.c. */ + sgmlerr(E_MEXERR, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1); +#endif + case RCHIT: /* Start-tag was valid. */ + realrc = RCHIT; + break; + case RCPEX: /* Start-tag valid only because of plus exception. */ + pexsw = TAGPEX; + realrc = RCHIT; + break; + default: + abort(); + } + + switch (realrc) { + case RCEND: /* End current element, then retry start-tag. */ + if (didreq) sgmlerr(07, &pcbstag, nextetd->etdgi+1, (UNCH *)0); + didreq = 0; /* No required start-tag done. */ + dostag = 1; etiswsv = etisw; /* Save real start-tag status. */ + conrefsv = conrefsw; /* Save real start-tag conref. */ + conrefsw = 0; /* Current element is not empty. */ + etagmin = MINSTAG; destack(); /* Process omitted end-tag. */ + return ETG_; + case RCREQ: /* Stack compulsory GI, then retry start-tag. */ + if (!BADPTR(nextetd)) { +#if 0 /* This will have been detected in exclude.c. */ + if ((mexts = pexmex(nextetd))>0) + sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1, + tags[mexts].tetd->etdgi+1); +#endif + if (!nextetd->etdmod) { + sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0); + etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr, + (PETD *)0, (PETD *)0, (PECB *)0); + ++ds.etdercnt; + TRACEETD(nextetd); + } + } + if (BITOFF(nextetd->etdmin, SMO)) { + if (!BADPTR(stagreal)) + sgmlerr(21, &pcbstag, nextetd->etdgi+1, stagreal->etdgi+1); + else if (stagreal==ETDCDATA) + sgmlerr(49, &pcbstag, nextetd->etdgi+1, (UNCH *)0); + else sgmlerr(50, &pcbstag, nextetd->etdgi+1, (UNCH *)0); + } + didreq = 1; /* Required start-tag done. */ + dostag = 1; etiswsv = etisw; /* Save real start-tag status. */ + etisw = 0; conrefsv = conrefsw; /* Save real start-tag conref. */ + /* If element is declared empty, it is same as a conref. */ + conrefsw = (GET(nextetd->etdmod->ttype, MNONE)) ? TAGREF : 0; + stack(nextetd); /* Process omitted start-tag. */ + return STG_; + case RCMISS: /* Start-tag invalid (#PCDATA or actual). */ + dostag = 0; contersw |= 1; didreq = 0; + if (dataret) { + if (dataret==REF_) badresw = 1; + else sgmlerr(E_CHARS, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0); + return dataret; + } + sgmlerr(E_CONTEXT, &pcbstag, NEWGI, tags[ts].tetd->etdgi+1); + if (stagmin!=MINNULL) stagmin = MINNONE; stack(newetd); + return STG_; + case RCHIT: /* Start-tag was valid. */ + dostag = 0; didreq = 0; + if (dataret) return dataret; + stack(newetd); + return STG_; + } + return NOP_; /* To avoid Borland C++ warning */ +} +/* PEXMEX: See if a GI is in a plus or minus exception group on the stack. + If in a minus, returns stack level of minus group; otherwise, + returns -1 if in a plus and not a minus, and zero if in neither. +*/ +int pexmex(curetd) +struct etd *curetd; /* The etd for this GI. */ +{ + int tsl; /* Temporary stack level for looping. */ + int pex = 0; /* 1=found in plus grp; 0=not. */ + + for (tsl = ts; tsl>0; --tsl) { + if (tags[tsl].tetd->etdmex && ingrp(tags[tsl].tetd->etdmex, curetd)) + return(tsl); + if (tags[tsl].tetd->etdpex && ingrp(tags[tsl].tetd->etdpex, curetd)) + pex = -1; + } + return(pex); +} +/* STACK: Add a new entry to the tag stack. + If there is no room, issue a message and reuse last position. +*/ +VOID stack(curetd) +struct etd *curetd; /* The etd for this entry. */ +{ + /* Stack the new element type definition (error if no room). */ + if (++ts>TAGLVL) + sgmlerr(E_STAGMAX, conpcb, curetd->etdgi+1, tags[--ts].tetd->etdgi+1); + tags[ts].tetd = curetd; + + /* Set flags: plus exception + tag had ETI + context error + empty. */ + tags[ts].tflags = (UNCH)pexsw + etisw + contersw + conrefsw; contersw = 0; + + /* If tag had ETI, update ETI counter and enable NET if first ETI. */ + if (etisw && ++etictr==1) lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.net; + + /* If etd has ALT table, use it; otherwise, use last element's ALT. */ + if (curetd->etdsrm) { + if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) { + /* Map hasn't been defined. Ignore it. */ + sgmlerr(159, &pcbstag, curetd->etdgi + 1, (UNCH *)0); + curetd->etdsrm = 0; + tags[ts].tsrm = tags[ts-1].tsrm; + } + else + tags[ts].tsrm = curetd->etdsrm; + } + else + tags[ts].tsrm = tags[ts-1].tsrm; + + /* Initialize rest of stack entry. */ + tags[ts].status = 0; + tags[ts].tpos[0].g = 1; /* M: Index in model of next token to test.*/ + tags[ts].tpos[0].t = 1; /* P: Index in tpos of current group. */ + HITCLEAR(tags[ts].tpos[0].h); + tags[ts].tpos[1].g = 1; /* Index of group in model (dummy grp). */ + tags[ts].tpos[1].t = 1; /* 1st token is next in grp to be tested. */ + HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */ + TRACESTK(&tags[ts], ts, etictr); + + exclude(); + return; +} +/* ETAG: Check validity of an end-tag by seeing if it matches any tag + on the stack. If so, return the offset of the match from the + current entry (0=current). If there is no match, issue a message + and return an error code (-1). + If the newetd is ETDNET, a NET delimiter was found, so check for + a tag that ended with ETI instead of a matching GI. +*/ +int etag() +{ + int tsl = ts+1; /* Temporary stack level for looping. */ + + /* See if end-tag is anywhere on stack, starting at current entry. */ + while (--tsl) { + if (newetd!=ETDNET ? newetd==tags[tsl].tetd : tags[tsl].tflags) { + TRACEETG(&tags[ts], newetd, tsl, ts-tsl); + return(ts-tsl); + } + } + return (-1); /* End-tag didn't match any start-tag. */ +} +/* DESTACK: + Call ECONTEXT to see if element can be ended at this point. + and issue message if there are required tags left. + Remove the current entry from the tag stack. + Issue an error if the destacked element was not minimizable + and its end-tag was omitted. +*/ +VOID destack() +{ + register int ecode = 0; /* Error code (0=o.k.). */ + UNCH *eparm2 = NULL; /* Second parameter of error message. */ + register int minmsgsw; /* 1=message if tag omitted; 0=no message. */ + + /* If element has a content model (i.e., not a keyword) and there + are required tags left, and no CONREF attribute was specified, + issue an error message. + */ + if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD) + && !conrefsw + && !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) { + if (BADPTR(nextetd)) + sgmlerr(54, conpcb, tags[ts].tetd->etdgi+1, (UNCH *)0); + else + sgmlerr(30, conpcb, tags[ts].tetd->etdgi+1, nextetd->etdgi+1); + } + /* If the current tag ended with ETI, decrement the etictr. + If etictr is now zero, disable the NET delimiter. + */ + if (GET(tags[ts--].tflags, TAGNET) && --etictr==0) + lexcon[lex.d.net] = lexcnm[lex.d.net] = lex.l.nonet; + + minmsgsw = BITOFF(tags[ts+1].tetd->etdmin, EMO); + if (!conrefsw && minmsgsw && (etagimsw || etagmin==MINETAG)) { + /* Minimization caused by NET delimiter. */ + if (BADPTR(etagreal)) ecode = 46; + /* Minimization caused by a containing end-tag. */ + else {ecode = 20; eparm2 = etagreal->etdgi+1;} + } + else if (!conrefsw && etagmin==MINSTAG && (minmsgsw || ts<=0)) { + /* Minimization caused by out-of-context start-tag. */ + if (!BADPTR(stagreal)) { + ecode = ts>0 ? 39 : 89; + eparm2 = stagreal->etdgi+1; + } + /* Minimization caused by out-of-context data. */ + else if (stagreal==ETDCDATA) ecode = ts>0 ? 47 : 95; + /* Minimization caused by out-of-context short start-tag. */ + else ecode = ts>0 ? 48 : 96; + if (ts<=0 && ecode) eodsw = 1; + } + if (ecode) sgmlerr((UNS)ecode, conpcb, tags[ts+1].tetd->etdgi+1, eparm2); + /* TEMP: See if parser bug caused stack to go below zero. */ + else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;} + TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr); +} +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/pars2.c b/usr.bin/sgmls/sgmls/pars2.c new file mode 100644 index 0000000..8c97ec6 --- /dev/null +++ b/usr.bin/sgmls/sgmls/pars2.c @@ -0,0 +1,1308 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +/* PARSE: Parse a source input stream with specified lexical and state tables. + Return to caller with action code. +*/ +int parse(pcb) +struct parse *pcb; /* Current parse control block. */ +{ + int rc; /* Return code from ENTREF. */ + + while (1) { + NEWCC; + pcb->input = pcb->plex[*FPOS]; + pcb->state = pcb->newstate; + pcb->newstate = (*(pcb->ptab + pcb->state)) [pcb->input]; + pcb->action = (*(pcb->ptab + pcb->state + 1)) [pcb->input]; + TRACEPCB(pcb); + switch (pcb->action) { + case RC2_: /* Back up two characters. */ + REPEATCC; + case RCC_: /* Repeat current character. */ + REPEATCC; + case NOP_: /* No action necessary.*/ + continue; + + case RS_: /* Record start: ccnt=0; ++rcnt.*/ + ++RCNT; CTRSET(RSCC); + continue; + + case GET_: /* EOB or dull EOS or EE found: keep going.*/ + if (entget()==-1) {pcb->action = EOD_; break;}/* Signal if EOD.*/ + continue; + + case EOF_: /* Illegal entity end; return EE_. */ + synerr(E_EOF, pcb); + pcb->action = EE_; + case EE_: /* Important EOS or EE found: return to caller.*/ + if (entget()==-1) pcb->action = EOD_; /* Signal if EOD. */ + break; + + case PER_: /* Parameter entity reference. */ + REPEATCC; /* Use PERO as 1st char of entity name. */ + parsenm(entbuf, ENTCASE); + parse(&pcbref); /* Handle REFC or other terminator. */ + rc = entref(entbuf); + if (rc==ENTPI) {pcb->action = PIE_; break;} + continue; + + case ER_: /* General entity reference; continue. */ + parsenm(entbuf, ENTCASE); + parse(&pcbref); /* Handle REFC or other terminator. */ + rc = entref(entbuf); + if (rc==ENTDATA) {pcb->action = DEF_; break;} + if (rc==ENTPI) {pcb->action = PIE_; break;} + continue; + + + case PEX_: /* Parameter entity reference; return. */ + REPEATCC; /* Use PERO as 1st char of entity name. */ + case ERX_: /* General entity reference; return. */ + parsenm(entbuf, ENTCASE); + parse(&pcbref); /* Handle REFC or other terminator. */ + rc = entref(entbuf); + if (rc == ENTDATA){ + /* Reference to external data/subdoc entity in replaceable + character data. */ + if (BITON(entdatsw, NDECONT)) { + switch (((PNE)data)->nextype) { + case ESNCDATA: + case ESNSDATA: + /* The standard says `non-SGML data entity' + but the amendment should have changed it + to `external data entity'. */ + synerr(145, pcb); + break; + case ESNNDATA: + case ESNSUB: + /* This is definitely illegal. */ + synerr(141, pcb); + break; + } + entdatsw = 0; + continue; + } + pcb->action = DEF_; + } + else if (rc == ENTPI) { + /* Reference to PI entity not allowed in replaceable + character data. */ + synerr(59, pcb); + entpisw = 0; + continue; + } + else if (rc) pcb->action = EE_; + break; + + case CRN_: /* Character reference: numeric. */ + parsetkn(entbuf, NU, NAMELEN); + parse(&pcbref); /* Handle reference terminator. */ + pcb->action = charrefn(entbuf, pcb); + if (pcb->action==CRN_) continue; /* Invalid reference */ + break; + + case CRA_: /* Character reference: alphabetic. */ + parsenm(entbuf, NAMECASE); + parse(&pcbref); /* Handle reference terminator. */ + charrefa(entbuf); + continue; + + case SYS_: /* Invalid NONCHAR: send msg and ignore. */ + synerr(E_SYS, pcb); + if (*FPOS == DELNONCH) NEWCC; + continue; + + case NON_: /* Valid NONCHAR: prefix and shift encoding. */ + synerr(60, pcb); + pcb->action = datachar(*FPOS, pcb); + break; + case NSC_: + synerr(60, pcb); + NEWCC; + nonchbuf[1] = *FPOS; + pcb->action = NON_; + break; + case PCI_: /* Previous character was invalid (INV_). */ + REPEATCC; + case INV_: /* Markup ended by invalid char; repeat char. */ + synerr(9, pcb); + REPEATCC; + break; + + case LNR_: /* Previous char exceeded len; back up to it. */ + REPEATCC; + case LEN_: /* Token too long; ignore excess character. */ + synerr(3, pcb); + continue; + + case RCR_: /* Repeat current char and return to caller. */ + REPEATCC; + default: /* Actions for specific parse. */ + break; + } + return (int)pcb->action; + } +} +/* CHARREFA: Resolve an alphabetical reference to a function character + and put the character in the read buffer. + If reference is bad, issue an error message. +*/ +VOID charrefa(r) +UNCH *r; /* Undelimited char ref (with length and EOS). */ +{ + UNCH thechar; + + thechar = mapsrch(funtab, r+1); + if (thechar == 0) + synerr(62, &pcbref); + else { + /* This isn't ideal, because the character position will still + be wrong for one line. */ + if (thechar == RSCHAR) RCNT--; + setcurchar(thechar); + REPEATCC; + } +} + +/* Make the current character ch. */ + +VOID setcurchar(ch) +int ch; +{ + /* If we're reading directly from an internal entity, we can't + change the entity, since the entity might be referenced again. + So in this case we copy the entity. This is inefficient, but + it will only happen in a case like this: + + <!entity % amp "&"> + <!entity e "x%amp;#SPACE;"> + + Usually character references will have been processed while the + entity was being defined. */ + if (*FPOS != ch) { + if (!FILESW && !COPIEDSW) { + UNCH *s = savestr(FBUF + 1); + FPOS = s + (FPOS - FBUF - 1); + FBUF = s - 1; + COPIEDSW = 1; + } + *FPOS = ch; + } +} + +/* CHARREFN: Resolve a numeric character reference. + If reference is bad, issue an error message. +*/ + +int charrefn(r, pcb) +UNCH *r; /* Undelimited character reference. */ +struct parse *pcb; /* Current parse control block. */ +{ + int thechar; + + thechar = atoi((char *)r); + if (thechar<0 || thechar>255) { + synerr(61, &pcbref); + return((int)pcb->action); + } + return datachar(thechar, pcb); +} + +/* Return ch as a datachar. If this a non-SGML character which might +confuse the parser, shift it to a code that won't and place it in a +special buffer which has DELNONCH in the preceding byte. Otherwise +put it the read buffer. */ + +int datachar(ch, pcb) +int ch; +struct parse *pcb; +{ + switch (ch) { + case EOS: + case EOFCHAR: + case EOBCHAR: + case GENRECHAR: + case DELCDATA: + case DELSDATA: + case DELNONCH: + /* A potentially confusing character which must be prefixed + with DELNONCH. */ + nonchbuf[1] = SHIFTNON((UNCH)ch); + return NON_; + } + setcurchar(ch); + /* If in content, return DCE_ for element content, DAF_ for mixed. */ + /* If not content, it must be a literal parse, so return MLA_. */ + if (pcb == conpcb) { + if (pcb == &pcbcone) + return DCE_; + else { + data = FPOS; + /* Action for DAF_ will do REPEATCC. */ + NEWCC; + return DAF_; + } + } + else + return MLA_; +} +/* INITATT: Initialize al with adl. */ + +VOID initatt(adl) +struct ad *adl; +{ + notadn = 0; /* No NOTATION attribute yet. */ + conrefsw = 0; /* Assume no content reference att. */ + /* Copy attribute definition list as a template. */ + memcpy((UNIV)al, (UNIV)adl, (1+ADN(adl))*ADSZ); +} + +/* PARSEATT: Parse attribute specification list. + Make a current copy of the attribute definition list + and update it with the user's specifications. + Indicate each attribute that was specified in the + list (as opposed to defaulted) by setting the ASPEC flag. + If no attributes were specified, return NULL. Otherwise, + if in the prolog, make a permanent copy of the list and + return its pointer. If not in the prolog, return al. +*/ +struct ad *parseatt(adl, pt) +struct ad *adl; /* Attribute definition list. */ +UNCH *pt; /* Tokenization area: tbuf[TAGLEN+ATTSPLEN]. */ +{ + UNCH *antvptr; + UNCH *nm = 0; /* Pointer to saved name in tbuf (with length). */ + int adn = -1; /* Position of attribute in list (-1=empty). */ + UNCH *tbuflim = pt + ATTSPLEN; + mdessv = es; /* Save es for checking entity nesting. */ + initatt(adl); + while (pt<=tbuflim) { + parse(&pcbstag); + switch (pcbstag.action) { + case NVS: /* Att name or value token found. */ + parsenm(pt, NAMECASE); /* Case translation wanted on name. */ + pt += *(nm = pt); /* Save name while pointing past it. */ + continue; + + case AVD: /* Delimited value found. */ + case AVDA: /* Delimited value found (alternate delimiter). */ + /* Find position (adn) of saved attribute name in list. */ + adn = anmget((int)ADN(al), nm); + parselit(pt, + (adn == 0 || ADTYPE(al, adn) == ACHARS) + ? &pcblitr + : &pcblitt, + LITLEN, + (pcbstag.action==AVD) ? lex.d.lit : lex.d.lita); + if (adn == 0) { + /* Error: unrecognized attribute name. */ + sgmlerr(13, &pcbstag, nm+1, pt); + continue; + } + /* Tokenize and validate value; let it default if an error. */ + /* Put value in list and bump ptr by the normalized length + (which is always >= the actual length). */ + if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn); + continue; + case AVU: /* Attribute value found: undelimited. */ + if (!sd.shorttag) sgmlerr(196, &pcbstag, (UNCH *)0, (UNCH *)0); + parsetkn(pt, NMC, LITLEN); + /* Find position (adn) of saved attribute name in list. */ + if ((adn = anmget((int)ADN(al), nm))==0) { + /* Error: unrecognized attribute name. */ + sgmlerr(13, &pcbstag, nm+1, pt); + continue; + } + /* Tokenize and validate value; let it default if an error. */ + /* Put value in list and bump ptr by the normalized length + (which is always >= the actual length). */ + if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn); + continue; + + case NASV: /* Saved NVS was really an NTV. */ + REPEATCC; /* Put back next token starter. */ + pt = nm; /* Back up to NVS. */ + case NTV: /* Name token value found. */ + if (!sd.shorttag) sgmlerr(195, &pcbstag, (UNCH *)0, (UNCH *)0); + if (pcbstag.action==NTV) parsenm(pt, NAMECASE); + if ((adn = antvget((int)ADN(al), pt, &antvptr))==0) { + /* Error: unrecognized name token value. */ + sgmlerr(74, &pcbstag, pt+1, (UNCH *)0); + continue; + } + /* Validate value; let it default if an error. */ + /* Put value in list and bump ptr by the normalized length + (which is always >= the actual length). */ + if (!attval(0, antvptr+1, adn, adl)) pt += ADLEN(al,adn); + continue; + + default: /* All attributes have been parsed. */ + REPEATCC; /* Put next char back for tag close parse. */ + break; + } + break; + } + if (pt>tbuflim) synerr(75, &pcbstag); + if (es!=mdessv) synerr(37, &pcbstag); + if (adn<0) return((struct ad *)0); /* List was empty. */ + TRACEADL(al); + return al; +} +/* ATTVAL: Validate a specified attribute value. Issue a message if it is + the wrong type (or otherwise is not up to spec), and use the default. + Call PARSEVAL to tokenize the value, unless it is a CDATA string. + If the attribute is a group, the value is a string. + For other types, the token count is set by PARSEVAL if the value + is syntactically correct. If incorrect (or if CDATA) the token + count is zero (i.e., the value is a string). + The length of a token does not include the length byte, and + there is no EOS. A string length (as always) includes both + the length byte and the EOS. + If it is a CONREF attribute, set a switch for STAG(). + If it is a CURRENT attribute, store the value as the new default. +*/ +#define DEFVAL adl[adn].addef /* Default value of current attribute. */ +#define DEFNUM adl[adn].adnum /* Default group size of current attribute. */ +#define DEFLEN adl[adn].adlen /* Length of default value of current attribute.*/ +int attval(mtvsw, adval, adn, adl) +int mtvsw; /* Must tokenize value: 1=yes; 0=no. */ +UNCH *adval; /* Untokenized attribute value. */ +int adn; /* Attribute's position in list. */ +struct ad *adl; /* Element's master att def list. */ +{ + int errcode; /* Value/declaration conflict error code. */ + + if (GET(ADFLAGS(al,adn), ASPEC)) /* Can't respecify same attribute. */ + {sgmlerr(73, &pcbstag, ADNAME(al,adn), adval); return(1);} + SET(ADFLAGS(al,adn), ASPEC); /* Indicate att was specified. */ + if (GET(ADFLAGS(al,adn), ACONREF)) /* If attribute is content reference: */ + conrefsw = TAGREF; /* Set switch for STAG(). */ + if (mtvsw && ADTYPE(al,adn)!=ACHARS) { + /* If no syntax errors, check for proper group membership. */ + if ( ((errcode = parseval(adval, ADTYPE(al,adn), lbuf))==0) + && GET(ADFLAGS(al,adn), AGROUP) + && !amemget(&al[adn], ADNUM(al,adn), lbuf) ) errcode = 18; + /* If syntax or group membership error, send message and exit. */ + if (errcode) { + sgmlerr(errcode, &pcbstag, ADNAME(al,adn), adval); + SET(ADFLAGS(al,adn), AERROR); + return(1); + } + /* Replace specified value in adval with tokenized in lbuf. */ + ustrcpy(adval, lbuf); + if (BITOFF(ADFLAGS(al,adn), AGROUP)) ADNUM(al,adn) = (UNCH)tokencnt; + } + if (!mtvsw) + adval--; + /* If attribute is FIXED, specified value must equal default. */ + if (BITON(ADFLAGS(al,adn), AFIXED) && ustrcmp(adval, DEFVAL)) { + /* Since the value has been tokenized, don't use it in the + error message. */ + sgmlerr(67, &pcbstag, ADNAME(al,adn), (UNCH *)0); + SET(ADFLAGS(al,adn), AERROR); + return(1); + } + ADLEN(al,adn) = vallen(ADTYPE(al,adn), ADNUM(al,adn), adval); + if (ADLEN(al,adn) > LITLEN) { + sgmlerr(224, &pcbstag, ADNAME(al,adn), (UNCH *)0); + SET(ADFLAGS(al,adn), AERROR); + return 1; + } + ADVAL(al,adn) = adval; + /* If attribute is CURRENT, value is new default.*/ + if (GET(ADFLAGS(al,adn), ACURRENT)) { + if (ADLEN(al,adn)>DEFLEN) { + ds.attdef += (ADLEN(al,adn) - DEFLEN); + DEFLEN = ADLEN(al,adn); + } + DEFVAL = replace(DEFVAL, ADVAL(al,adn)); + DEFNUM = ADNUM(al,adn); + } + return(0); /* Indicate value was valid. */ +} +/* ADLVAL: Validate the completed attribute definition list (defaults plus + specified values). Issue a message if an + attribute is required or current and its value is NULL. +*/ +VOID adlval(adsz, newetd) +int adsz; /* Size of list. */ +struct etd *newetd; /* Element type definition for this element. */ +{ + int adn = 1; /* Position in list. */ + UNCH *npt, *pt; /* Ptr save areas. */ + UNCH nptsv; /* Save area for ptr value (length?). */ + struct dcncb *dpt; /* Save area for dcncb ptr. */ + + aentctr = 0; /* Number of AENTITY tokens in this att list. */ + idrctr = 0; /* Number of IDREF tokens in this att list. */ + do { + if (ADVAL(al,adn)==NULL) { /* NULL value */ + if (GET(ADFLAGS(al,adn), AREQ+ACURRENT)) { /*Error if REQ, CURRENT*/ + sgmlerr(19, &pcbstag, ADNAME(al,adn), (UNCH *)0); + SET(ADFLAGS(al,adn), AINVALID); + } + } + else switch (ADTYPE(al,adn)) { + case AENTITY: /* Return data ecb pointer if valid entity. */ + aenttst(adn, ADVAL(al,adn)); + break; + case AENTITYS: /* Return data ecb pointers if valid entities. */ + pt = ADVAL(al,adn); + tokencnt = (int)ADNUM(al,adn); + while (tokencnt--) { + nptsv = *(npt = pt + *pt+1); + *pt += 2; *npt = EOS; + aenttst(adn, pt); + *pt -= 2; *(pt = npt) = nptsv; + } + break; + case AID: + /* Define ID; msg if it already exists. */ + if (iddef(ADVAL(al,adn))) { + sgmlerr(71, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1); + SET(ADFLAGS(al,adn), AINVALID); + continue; + } + ++ds.idcnt; + break; + case AIDREF: + idreftst(adn, ADVAL(al,adn)); + break; + case AIDREFS: + pt = ADVAL(al,adn); + tokencnt = (int)ADNUM(al,adn); + while (tokencnt--) { + nptsv = *(npt = pt + *pt+1); + *pt += 2; *npt = EOS; + idreftst(adn, pt); + *pt -= 2; *(pt = npt) = nptsv; + } + break; + case ANOTEGRP: /* Return notation identifier. */ + if (GET(ADFLAGS(al,adn), ASPEC)) notadn = adn;/*NOTATION specified*/ + if ((dpt = dcnfind(ADVAL(al,adn)))==0) { + sgmlerr(77, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1); + SET(ADFLAGS(al,adn), AINVALID); + } + else ADDATA(al,adn).x = dpt; + break; + } + if (!sd.shorttag && !sd.omittag && ADVAL(al,adn)!=NULL + && !GET(ADFLAGS(al,adn), ASPEC+AINVALID)) + sgmlerr(197, &pcbstag, ADNAME(al,adn), (UNCH *)0); + } while ((adn+=BITON(ADFLAGS(al,adn),AGROUP) ? (int)ADNUM(al,adn)+1 : 1)<=adsz); + + /* Error if NOTATION specified with CONREF attribute or EMPTY element. */ + if (notadn && (conrefsw + || (newetd && GET(newetd->etdmod->ttype, MNONE)))) { + sgmlerr((UNS)(conrefsw ? 84 : 76), &pcbstag, + ADNAME(al,notadn), ADVAL(al,notadn)+1); + SET(ADFLAGS(al,notadn), AINVALID); + } +} +/* AENTTST: Validate an individual ENTITY token in AENTITY or AENTITYS value. +*/ +VOID aenttst(adn, pt) +int adn; /* Position in list. */ +UNCH *pt; /* Ptr to current ENTITY token in value. */ +{ + struct entity *ept; /* Save area for ecb ptr. */ + + if (++aentctr>GRPCNT) { + sgmlerr(136, &pcbstag, ADNAME(al,adn), pt+1); + SET(ADFLAGS(al,adn), AINVALID); + return; + } + if ( (ept = entfind(pt))==0 + && (ecbdeflt==0 || (ept = usedef(pt))==0) ) { + sgmlerr(ecbdeflt ? 151 : 72, &pcbstag, ADNAME(al,adn), pt+1); + SET(ADFLAGS(al,adn), AINVALID); + return; + } + if (ept->estore==ESX || ept->estore==ESC || ept->estore==ESN) { + /* Error if DCN has no notation identifier. */ + if (ept->estore==ESN && NEXTYPE(ept->etx.n)!=ESNSUB + && !NEDCNDEFINED(ept->etx.n)) { + sgmlerr(78, &pcbstag, NEDCN(ept->etx.n)+1, + pt+1); + SET(ADFLAGS(al,adn), AINVALID); + } + } + else { + sgmlerr(86, &pcbstag, ADNAME(al,adn), pt+1); + SET(ADFLAGS(al,adn), AINVALID); + } +} +/* IDREFTST: Validate an individual IDREF token in an IDREF or IDREFS value. +*/ +VOID idreftst(adn, pt) +int adn; /* Position in list. */ +UNCH *pt; /* Ptr to current IDREF token in value. */ +{ + struct fwdref *rp; + if (++idrctr>GRPCNT) { + sgmlerr(70, &pcbstag, ADNAME(al,adn), pt+1); + SET(ADFLAGS(al,adn), AINVALID); + return; + } + /* Note IDREF; indicate if ID exists. */ + if ((rp = idref(pt)) != 0) + rp->msg = saverr(69, &pcbstag, ADNAME(al,adn), pt+1); + ++ds.idrcnt; +} +/* ANMGET: Locate an attribute name in an attribute definition list. +*/ +int anmget(adsz, nm) +int adsz; /* Size of list. */ +UNCH *nm; /* Value to be found (with length byte). */ +{ + int adn = 0; /* Position in list. */ + + while (++adn <= adsz && ustrcmp(nm+1, ADNAME(al,adn))) { + if (BITON(ADFLAGS(al,adn), AGROUP)) adn += (int)ADNUM(al,adn); + } + return (adn > adsz) ? 0 : adn; +} +/* ANTVGET: Find the position of a name token value in an attribute list. + Return the position of the attribute definition, or zero + if none was found. Set pp to the value, if non-NULL. +*/ +int antvget(adsz, nm, pp) +int adsz; /* Size of list. */ +UNCH *nm; /* Value to be found (with length byte). */ +UNCH **pp; /* Store value here */ +{ + int adn = 0; /* Position in list. */ + + while (++adn<=adsz) { + /* Test only name group members. */ + if (BITON(ADFLAGS(al,adn), AGROUP)) { + int advn; /* Position of value in sub-list. */ + if ((advn = amemget(&al[adn], (int)ADNUM(al,adn), nm))!=0) { + if (pp) + *pp = al[adn+advn].adname; + return adn; + } + adn += (int)ADNUM(al,adn); + } + } + return 0; +} +/* AMEMGET: Get the position of a member in an attribute name token group. + Returns the position, or zero if not found. + The length byte is ignored in the comparison so that final + form tokens from ATTVAL can be compared to group members. +*/ +int amemget(anmtgrp, adsz, nm) +struct ad anmtgrp[]; /* Name token group. */ +int adsz; /* Size of group. */ +UNCH *nm; /* Name to be found (with length byte). */ +{ + int adn = 0; /* Position in group. */ + + while ( ++adn<=adsz && ustrncmp(nm+1, anmtgrp[adn].adname+1, (UNS)*nm-1)) ; + return (adn>adsz) ? 0 : adn; +} +/* VALLEN: Returns the length of an attribute value for capacity + calculations. Normally, the length is NORMSEP plus the number + of characters. For tokenized lists, it is NORMSEP, + plus the number of characters in the tokens, plus + NORMSEP for each token. + ACHARS and tokenized lists don't have a length byte. + +*/ +UNS vallen(type, num, def) +int type; /* ADTYPE(al,adn) */ +int num; /* ADNUM(al,adn) */ +UNCH *def; /* ADVAL(al,adn) */ +{ + if (type == ACHARS) + return ustrlen(def) + NORMSEP; + if (type < ATKNLIST) + return *def - 2 + NORMSEP; + return ustrlen(def) + num * (NORMSEP - 1) + NORMSEP; +} +/* PARSEGRP: Parse GI names, get their etds, and form an array of pointers + to them. The array is terminated by a NULL pointer. + The number of pointers (including the NULL) is returned. + The grp buffer must have room for GRPCNT+1 etds. +*/ +UNS parsegrp(grp, pcb, tbuf) +struct etd *grp[]; /* Buffer for building the group. */ +struct parse *pcb; /* Current parse control block. */ +UNCH *tbuf; +{ + int grpcnt = 0; /* Number of etds in the group. */ + int i; + int essv = es; /* Entity stack level when grp started. */ + + while (parse(pcb)!=GRPE && grpcnt<GRPCNT) { + switch (pcb->action) { + case NAS_: /* GI name: get its etd for the group. */ + grp[grpcnt] = etddef(parsenm(tbuf, NAMECASE)); + for (i = 0; i < grpcnt; i++) + if (grp[i] == grp[grpcnt]) { + mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->etdgi + 1); + break; + } + if (i == grpcnt) + grpcnt++; + continue; + + case EE_: /* Entity ended (correctly or incorrectly). */ + if (es<essv) {synerr(37, pcb); essv = es;} + continue; + + case PIE_: /* PI entity reference (invalid). */ + entpisw = 0; /* Reset PI entity indicator. */ + synerr(59, pcb); + continue; + + default: + break; + } + break; + } + grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */ + if (es!=essv) synerr(37, pcb); + return grpcnt; /* Return number of ptrs in group. */ +} +/* PARSNGRP: Parse notation names, get their dcncbs, and form an array of + pointers to them. The array is terminated by a NULL pointer. + The number of pointers (including the NULL) is returned. + The grp buffer must have room for GRPCNT+1 members. +*/ +UNS parsngrp(grp, pcb, tbuf) +struct dcncb *grp[]; /* Buffer for building the group. */ +struct parse *pcb; /* Current parse control block. */ +UNCH *tbuf; +{ + int grpcnt = 0; /* Number of members in the group. */ + int i; + int essv = es; /* Entity stack level when grp started. */ + + while (parse(pcb)!=GRPE && grpcnt<GRPCNT) { + switch (pcb->action) { + case NAS_: /* Member name: get its control block. */ + grp[grpcnt] = dcndef(parsenm(tbuf, NAMECASE)); + for (i = 0; i < grpcnt; i++) + if (grp[i] == grp[grpcnt]) { + mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->ename + 1); + break; + } + if (i == grpcnt) + grpcnt++; + continue; + + case EE_: /* Entity ended (correctly or incorrectly). */ + if (es<essv) {synerr(37, pcb); essv = es;} + continue; + + case PIE_: /* PI entity reference (invalid). */ + entpisw = 0; /* Reset PI entity indicator. */ + synerr(59, pcb); + continue; + + default: + break; + } + break; + } + grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */ + if (es!=essv) synerr(37, pcb); + return grpcnt; /* Return number of ptrs in group. */ +} +/* COPYGRP: Allocate storage for a group and copy the group into it. +*/ +PETD *copygrp(pg, grpsz) +PETD pg[]; /* Pointer to a group (array of etd ptrs). */ +UNS grpsz; /* Number of ptrs in grp, including final NULL. */ +{ + UNS glen; /* Group length in characters. */ + PETD *gnm; /* Ptr to permanent name group. */ + + if (pg==0) return (PETD *)0; + glen = grpsz * sizeof(struct etd *); + memcpy( (UNIV)(gnm = (struct etd **)rmalloc(glen)) , (UNIV)pg, glen ); + return gnm; +} +/* INGRP: Locate an etd in a name group and return its index+1 (or zero + if not found). +*/ +int ingrp(pg, ketd) +PETD pg[]; /* Array of pointers to etds. */ +PETD ketd; /* Pointer to etd to be found in group. */ +{ + int i = 0; /* Array index. */ + + while (pg[i]) if (pg[i++]==ketd) return i; + return 0; +} +/* PARSELIT: Parse a delimited string and collect it into a token. + Caller supplies buffer, which must be 1 longer than + maximum string allowed. + Caller also supplies character that delimits the string. + TODO: Return 1 if CDATA, SDATA or NONSGML occurred. +*/ +#ifdef USE_PROTOTYPES +VOID parselit(UNCH *tbuf, struct parse *pcb, UNS maxlen, UNCH del) +#else +VOID parselit(tbuf, pcb, maxlen, del) +UNCH *tbuf; /* Work area for tokenization (parmlen+1). */ +struct parse *pcb; /* Current parse control block. */ +UNS maxlen; /* Maximum length of token. */ +UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ +#endif +{ + UNCH *pt = tbuf; /* Current pointer into tbuf. */ + UNCH lexsv = lexlms[del];/* Saved lexlms value of delimiter. */ + int essv = es; /* Entity stack level when literal started. */ + UNCH datadel; /* Delimiter for CDATA/SDATA entity. */ + int parmlen = (int)maxlen; /* Working limit (to be decremented). */ + + lexlms[del] = lex.l.litc; /* Set delimiter to act as literal close. */ + do { + switch (parse(pcb)) { + case LP2_: /* Move 2nd char back to buffer; redo prev.*/ + REPEATCC; + case LPR_: /* Move previous char to buffer; REPEATCC; */ + REPEATCC; + case MLA_: /* Move character to buffer. */ + *pt++ = *FPOS; --parmlen; + continue; + + case FUN_: /* Function char found; replace with space.*/ + *pt++ = ' '; --parmlen; + continue; + + case RSM_: /* Record start: ccnt=0; ++rcnt.*/ + ++RCNT; CTRSET(RSCC); *pt++ = *FPOS; --parmlen; + continue; + + case ERX_: /* Entity reference: cancel LITC delim. */ + case PEX_: /* Parameter entity ref: cancel LITC delim.*/ + lexlms[del] = lexsv; + continue; + + case EE_: + if (es<essv) { + synerr(37, pcb); + essv = es; + } + /* If back at top level, re-enable the LITC delimiter. */ + if (es==essv) lexlms[del] = lex.l.litc; + continue; + + case MLE_: /* Char not allowed in minimum literal. */ + synerr(63, pcb); + continue; + + case DEF_: /* Data entity: add it to buffer. */ + if (pcb == &pcblitt) { + int parmlensv = parmlen; + entdatsw = 0; + parmlen = tokdata(pt, parmlen); + if (parmlen < 0) + break; + pt += parmlensv - parmlen; + continue; + } + if ((parmlen -= (int)datalen+2)<0) {entdatsw = 0; break;} + *pt++ = datadel = + BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA; + entdatsw = 0; + memcpy( pt , data, datalen ); + pt += datalen; + *pt++ = datadel; + continue; + + case NON_: /* Non-SGML char (delimited and shifted). */ + if ((parmlen -= 2)<0) break; + memcpy( pt , nonchbuf, 2 ); + pt += 2; + continue; + + case RPR_: /* Remove character from buffer. */ + --pt; ++parmlen; + break; + + case EOD_: + exiterr(92, pcb); + + default: + break; + } + break; + } while (parmlen>=0 && pcb->action!=TER_); + + if (parmlen<0) {--pt; sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); REPEATCC;} + datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */ + *pt++ = EOS; + lexlms[del] = lexsv; /* Restore normal delimiter handling. */ + if (es!=essv) synerr(37, pcb); + return; +} + +/* Handle a data entity in a tokenized attribute value literal. +Parmlen is amount of space left. Return new parmlen. If there's not +enough space return -1, and copy up to parmlen + 1 characters. */ + +int tokdata(pt, parmlen) +UNCH *pt; +int parmlen; +{ + int skip = (pcblitt.newstate == 0); + int i; + + for (i = 0; parmlen >= 0 && i < datalen; i++) { + switch (data[i]) { + case RSCHAR: + /* ignore it */ + break; + case RECHAR: + case TABCHAR: + case SPCCHAR: + if (!skip) { + *pt++ = data[i]; + parmlen--; + skip = 1; + } + break; + default: + if (data[i] == DELNONCH) { + assert(i + 1 < datalen); + if ((parmlen -= 2) < 0) + break; + *pt++ = DELNONCH; + *pt++ = data[++i]; + skip = 0; + } + else { + *pt++ = data[i]; + parmlen--; + skip = 0; + } + break; + } + } + pcblitt.newstate = skip ? 0 : pcblittda; + return parmlen; +} + + +/* PARSEMD: Parser for markup declarations. + It returns a token each time it is called. + +*/ +int parsemd(pt, namecase, lpcb, tokenlen) +UNCH *pt; /* Token buffer: >=tokenlen+2. */ +int namecase; /* Case translation: ENTCASE NAMECASE AVALCASE. */ +struct parse *lpcb; /* Parse control block for literal parse. */ +UNS tokenlen; /* Max length of expected token: NAMELEN LITLEN */ +{ + struct parse *pcb; /* Current parse control block. */ + + pcb = (lpcb) ? &pcbmd : &pcbmdc; /* If no literal pcb, dcl is comment. */ + + doparse: while (parse(pcb)==EE_) + if (es<mdessv) {synerr(37, pcb); mdessv = es;} + if (pcb->action==PIE_) { /* PI entity reference not allowed. */ + entpisw = 0; /* Reset PI entity indicator. */ + synerr(59, pcb); + goto doparse; + } + ++parmno; /* Increment parameter counter. */ + switch (pcb->action) { + case CDR: /* COM[1] (MINUS) occurred previously. */ + REPEATCC; + return (int)pcb->action; + case LIT: /* Literal: CDATA with LIT delimiter. */ + parselit(pt, lpcb, tokenlen, lex.d.lit); + return (int)pcb->action; + case LITE: /* Literal: CDATA with LITA delimiter. */ + parselit(pt, lpcb, tokenlen, lex.d.lita); + return((int)(pcb->action = LIT)); + case RNS: /* Reserved name started (after RNI). */ + parsenm(pt, NAMECASE); + return (int)pcb->action; + case NAS: /* Name started. */ + if (namecase!=AVALCASE) { + parsenm(pt, namecase); + return (int)pcb->action; + } + /* Treat attribute value as name character string. */ + case NMT: /* Name token string. */ + parsetkn(pt, NMC, (int)tokenlen); /* Get undelimited value. */ + return (int)pcb->action; + case NUM: /* Number or number token string. */ + parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen); + return (int)pcb->action; + case PENR: + REPEATCC; + return (pcb->action = PEN); + case EOD_: + exiterr(133, pcb); + /* EXIT */ + default: /* End of declaration. */ + return (int)pcb->action; /* EMD GRPS MGRP PEN PGRP */ + } +} +/* PARSEMOD: If the declared content was a keyword, the token count is zero + and it is only necessary to save the type. Otherwise, + collect the outermost token count and model type bytes for a model. + The count includes tokens found in nested groups also. + After building the model, parse for its occurrence indicator. +*/ +struct thdr *parsemod(dctype) +int dctype; /* Content type (0=model). */ +{ + gbuf[0].ttype = (UNCH)dctype; /* Initialize content flags byte. */ + if (dctype) {gbuf[0].tu.tnum = 0; return gbuf;} /* Return if not model. */ + + gbuf[0].tu.tnum = 0; /* Don't count 1st group or model header. */ + gbuf[1].ttype = 0; /* Initialize 1st group type ... */ + gbuf[1].tu.tnum = 0; /* and count. */ + grplvl = 1; /* Content model is 1st level group. */ + pcbgrcm.newstate = 0; /* Go parse the model group. */ + /* Empty group is trapped during syntax parse; other errors return NULL. */ + if (!parsegcm(&pcbgrcm, &gbuf[1], &gbuf[0])) return (struct thdr *)0; + parse(&pcbgrcs); /* Get the model suffix, if there is one. */ + switch(pcbgrcs.action) { + case OPT: /* OPT occurrence indicator for model. */ + SET(gbuf[1].ttype, TOPT|TXOPT); + break; + case REP: /* REP occurrence indicator for model. */ + SET(gbuf[1].ttype, TREP|TXREP); + break; + case OREP: /* OREP occurrence indicator for model. */ + SET(gbuf[1].ttype, TOREP|TXOREP); + break; + default: /* RCR_: Repeat char and return. */ + break; + } + if (sw.swambig) ambig(); /* Check content model for ambiguity. */ + return gbuf; +} +/* PARSEGCM: Collect token headers (struct thdr) into a group (array). + An etd is defined for each GI (if none exists) and its pointer is + stored in the header. The function is called recursively. +*/ +struct thdr *parsegcm(pcb, pgh, gbuf) +struct parse *pcb; /* Current parse control block. */ +struct thdr *pgh; /* Current group header in group buffer. */ +struct thdr *gbuf; /* Header for outermost group (model). */ +{ +#define MCON gbuf->ttype /* Model type (content attributes). */ + struct thdr *pg=pgh; /* Current group token. */ + struct thdr *pgsv=pgh; /* Saved current token for occ indicator. */ + int optcnt = 0; /* Count of optional tokens in group. */ + int essv = es; /* Entity stack level when grp started. */ + + while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE) + switch (pcb->action) { + + case NAS_: /* GI name: get its etd and store it. */ + ++gbuf->tu.tnum; ++pgh->tu.tnum; + (pgsv = ++pg)->ttype = TTETD; + pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE)); + SET(MCON, MGI); + continue; + + case RNS_: /* Reserved name started (#PCDATA). */ + parsenm(tbuf, NAMECASE); + if (ustrcmp(tbuf+1, key[KPCDATA])) { + mderr(116, ntoa(gbuf->tu.tnum), tbuf+1); + return (struct thdr *)0; + } + /* If #PCDATA is the first non-group token, model is a phrase. */ + if (!MCON) SET(MCON, MPHRASE); + case DTAG: /* Data tag template ignored; treat as #PCDATA. */ + if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */ + ++gbuf->tu.tnum; ++pgh->tu.tnum; + (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */ + pg->tu.thetd = ETDCDATA; + ++optcnt; /* Ct opt tokens to see if grp is opt.*/ + SET(MCON, MCHARS); + continue; + + case GRP_: /* Group started. */ + ++gbuf->tu.tnum; ++pgh->tu.tnum; + (pgsv = ++pg)->ttype = 0; /* Type will be set by connector. */ + pg->tu.tnum = 0; /* Group has number instead of etd. */ + if (++grplvl>GRPLVL) { + mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0); + return (struct thdr *)0; + } + pg = parsegcm(pcb, pg, gbuf); + if (!pg) return (struct thdr *)0; + if (GET(pgsv->ttype, TOPT)) ++optcnt; /* Indicate nested opt grp. */ + --grplvl; + continue; + + case OREP: /* OREP occurrence indicator for current token.*/ + SET(pgsv->ttype, TREP|TXREP); + /* Now treat like OPT. */ + case OPT: /* OPT occurrence indicator for current token. */ + SET(pgsv->ttype, TXOPT); + if (GET(pgsv->ttype, TOPT)) continue; /* Exit if nested opt grp. */ + SET(pgsv->ttype, TOPT); + ++optcnt; /* Count opt tokens to see if grp is optional. */ + continue; + case REP: /* REP occurrence indicator for current token. */ + SET(pgsv->ttype, TREP|TXREP); + continue; + + case OR: /* OR connector found. */ + if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR); + else if (GET(pgh->ttype, TTAND)!=TTOR) + mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); + continue; + case AND: /* AND connector found. */ + if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND); + else if (GET(pgh->ttype, TTAND)!=TTAND) + mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); + continue; + case SEQ: /* SEQ connector found. */ + if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ); + else if (GET(pgh->ttype, TTAND)!=TTSEQ) + mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0); + continue; + + case EE_: /* Entity ended (correctly or incorrectly). */ + if (es<essv) {synerr(37, pcb); essv = es;} + continue; + + case PIE_: /* PI entity reference (not permitted). */ + entpisw = 0; /* Reset PI entity indicator. */ + synerr(59, pcb); + continue; + + default: /* Syntax errors return in disgrace. */ + synerr(37, pcb); + return (struct thdr *)0; + } + if (pgh->tu.tnum>GRPCNT) { + mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0); + return (struct thdr *)0; + } + if (gbuf->tu.tnum>GRPGTCNT) { + mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0); + return (struct thdr *)0; + } + if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */ + /* An optional token in an OR group makes the group optional. */ + if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT); + /* If all tokens in any group are optional, so is the group. */ + if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT); + + if (es!=essv) synerr(37, pcb); + return pg; /* Return pointer to GRPS token. */ +} +/* PARSENM: Parser for SGML names, which can be translated with LEXTRAN. + The input is read from the entity stack. CC is 1st char of name. + Returns a pointer to the parsed name. +*/ +UNCH *parsenm(tbuf, nc) +UNCH *tbuf; /* Buffer for name: >=NAMELEN+2. */ +int nc; /* Namecase translation: 1=yes; 0=no. */ +{ + UNCH len; /* Length of name (incl EOS & length byte). */ + + *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS; + while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) { + TRACETKN(NMC, lextoke); + if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) { + --len; + entget(); + } + } + REPEATCC; /* Put back the non-token character. */ + *(tbuf + ++len) = EOS; /* Terminate name with standard EOS. */ + *tbuf = ++len; /* Store length ahead of name. */ + return tbuf; +} +/* PARSETKN: Parser for start-tag attribute value tokens. + First character of token is already in *FPOS. + Returns a pointer to the parsed token. + Parsed token has EOS but no length byte. +*/ +#ifdef USE_PROTOTYPES +UNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen) +#else +UNCH *parsetkn(tbuf, scope, maxlen) +UNCH *tbuf; /* Buffer for token: >=maxlen+1. */ +UNCH scope; /* Minimum lexical class allowed. */ +int maxlen; /* Maximum length of a token. */ +#endif +{ + int i = 1; + tbuf[0] = *FPOS; + while (i < maxlen) { + NEWCC; + if (lextoke[*FPOS] < scope) { + REPEATCC; + break; + } + TRACETKN(scope, lextoke); + if (*FPOS == EOBCHAR) + entget(); + else + tbuf[i++] = *FPOS; + } + tbuf[i] = EOS; + return tbuf; +} +/* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ). + First character of sequence is already in *FPOS. +*/ +VOID parseseq(tbuf, maxlen) +UNCH *tbuf; /* Buffer for storing found sequence. */ +int maxlen; /* Maximum length of a blank sequence. */ +{ + tbuf[0] = *FPOS; + datalen = 1; + for (;;) { + NEWCC; + if (*FPOS == EOBCHAR) { + entget(); + continue; + } + if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR) + || datalen >= maxlen) + break; + tbuf[datalen++] = *FPOS; + TRACETKN(SEP, lextoke); + } +} +/* S2VALNM: Parser for attribute values that are tokenized like names. + The input is read from a string (hence S ("string") 2 ("to") VALNM). + It stops at the first bad character. + Returns a pointer to the created name. +*/ +#ifdef USE_PROTOTYPES +UNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate) +#else +UNCH *s2valnm(nm, s, scope, translate) +UNCH *nm; /* Name to be created. */ +UNCH *s; /* Source string to be parsed as name. */ +UNCH scope; /* Minimum lexical class allowed. */ +int translate; /* Namecase translation: 1=yes; 0=no. */ +#endif +{ + UNCH len = 0; /* Length of name (incl EOS and length). */ + + for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++) + nm[++len] = translate ? lextran[*s] : *s; + nm[++len] = EOS; /* Terminate name with standard EOS. */ + *nm = ++len; /* Store length ahead of name. */ + return nm; +} +/* PARSEVAL: Parser for attribute values. + The input is read from a string and tokenized in a buffer. + The input is terminated by EOS. + Each token is preceded by its actual length; there is no EOS. + If an error occurs while parsing, or + if a token doesn't conform, set the token count to 0 to show that + value was not tokenized and return the error code. + After successful parse, return buffer length and 0 error code. + The number of tokens found is set in external variable tokencnt. +*/ +int parseval(s, atype, tbuf) +UNCH *s; /* Source string to be parsed as token list. */ +UNS atype; /* Type of token list expected. */ +UNCH *tbuf; /* Work area for tokenization. */ +{ + int t; + UNCH *pt = tbuf; + + pcbval.newstate = 0; tokencnt = 0; + while (1) { + for (;;) { + pcbval.input = lextoke[*s]; + pcbval.state = pcbval.newstate; + pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input]; + pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input]; + TRACEVAL(&pcbval, atype, s, tokencnt); + if (pcbval.action != NOPA) + break; + s++; + } + + + switch (pcbval.action) { + case INVA: /* Invalid character; terminate parse. */ + if (*s == '\0') goto alldone; /* Normal termination. */ + tokencnt = 0; /* Value was not tokenized. */ + return(14); + case LENA: /* Length limit of token exceeded; end parse. */ + tokencnt = 0; /* Value was not tokenized. */ + return(15); + default: /* Token begun: NUMA, NASA, or NMTA. */ + break; + } + + ++tokencnt; /* One token per iteration. */ + switch (atype) { + case AENTITY: + if (tokencnt>1) {tokencnt = 0; return(16);} + case AENTITYS: + if (pcbval.action!=NASA) {tokencnt = 0; return(17);} + s2valnm(pt, s, NMC, ENTCASE); + break; + + case AID: + case AIDREF: + case ANAME: + case ANOTEGRP: + if (tokencnt>1) {tokencnt = 0; return(16);} + case AIDREFS: + case ANAMES: + if (pcbval.action!=NASA) {tokencnt = 0; return(17);} + s2valnm(pt, s, NMC, NAMECASE); + break; + + case ANMTGRP: + case ANMTOKE: + if (tokencnt>1) {tokencnt = 0; return(16);} + case ANMTOKES: + /* No test needed because NMTA, NUMA and NASA are all valid. */ + s2valnm(pt, s, NMC, NAMECASE); + break; + + case ANUMBER: + if (tokencnt>1) {tokencnt = 0; return(16);} + case ANUMBERS: + if (pcbval.action!=NUMA) {tokencnt = 0; return(17);} + s2valnm(pt, s, NU, NAMECASE); + t = lextoke[s[*pt - 2]]; + if (t == NMS || t == NMC) {tokencnt = 0; return(17);} + break; + + case ANUTOKE: + if (tokencnt>1) {tokencnt = 0; return(16);} + case ANUTOKES: + if (pcbval.action!=NUMA) {tokencnt = 0; return(17);} + s2valnm(pt, s, NMC, NAMECASE); + break; + } + *pt -= 2; + s += *pt; + pt += *pt + 1; + } + alldone: + *pt++ = EOS; + if (*tbuf == '\0') + return 25; + if (atype < ATKNLIST) + *tbuf += 2; /* include length and EOS */ + return 0; +} +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/pcbrf.c b/usr.bin/sgmls/sgmls/pcbrf.c new file mode 100644 index 0000000..16786e5 --- /dev/null +++ b/usr.bin/sgmls/sgmls/pcbrf.c @@ -0,0 +1,1344 @@ +/* PCBRF: Parse tables for reference concrete syntax. +*/ +#include "config.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "action.h" /* Action names for all parsing. */ +#include "synxtrn.h" /* Declarations for concrete syntax constants. */ +#include "adl.h" /* Definitions for attribute list processing. */ +/* PCBCONM: State and action table for content parse of mixed content. + Initial state assumes a start-tag was just processed. +*/ +/* Symbols for state names (end with a number). */ +#define ET0 0 /* Markup found or buffer flushed; no data. */ +#define DA0 2 /* Data in buffer. */ +#define DA1 4 /* Data and space in buffer. */ +#define ER0 6 /* ERO found; start lookahead buffer. */ +#define CR0 8 /* CRO found (ERO, RNI). */ +#define RS0 10 /* RS found; possible SR 3-6. */ +#define ME0 12 /* MSC found; possible SR26. */ +#define ME1 14 /* MSC, MSC found. */ +#define ES0 16 /* TAGO found; start lookahead buffer. */ +#define EE0 18 /* End-tag start (TAGO,ETI); move to lookahead buffer. */ +#define NE0 20 /* End-tag start (TAGO,NET); process NET if not end-tag. */ +#define MD0 22 /* MDO found (TAGO, MDO[2]). */ +#define MC0 24 /* MDO, COM found. */ +#define SC0 26 /* COM found; possible SR19-20. */ +#define SP0 28 /* Space found; data pending; possible SR7 or SR9. */ +#define SR0 30 /* SPCR found; possible SR7 or SR9. */ +#define TB0 32 /* TAB found; possible SR7 or SR9. */ + +int pcbcnet = ET0; /* PCBCONM: markup found or data buffer flushed.*/ +int pcbcnda = DA0; /* PCBCONM: data in buffer. */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +et0 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,ET0 ,ET0 ,ET0 ,RS0 ,ET0 ,TB0 ,DA0 ,ET0 ,ER0 , + ET0 ,SC0 ,DA0 ,ET0 ,ET0 ,SR0 ,DA0 ,ME0 ,ET0 ,DA0 ,ET0 ,DA0 ,ES0 ,ET0 },/*et0*/ +et0a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,GET_,GET_,RSR_,SR2_,DAS_,DAS_,NSC_,LAS_, + REF_,NOP_,DAS_,NED_,SR10,DAS_,DAS_,NOP_,SR25,DAS_,SR11,DAS_,LAS_,FCE_}, + +da0 []={DA0 ,DA0 ,DA0 ,DA0 ,DA1 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 , + ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da0*/ +da0a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_, + DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_}, + +da1 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 , + ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da1*/ +da1a[]={NOP_,NOP_,NOP_,NOP_,DAR_,DAF_,DAF_,DAR_,DAF_,DAR_,DAR_,NOP_,DAF_,DAF_, + DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_}, + +er0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/ +er0a[]={LAF_,LAF_,LAF_,ER_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +cr0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*cr0*/ +cr0a[]={NLF_,CRN_,NLF_,CRA_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_}, + +rs0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,RS0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*rs0*/ +rs0a[]={SR3_,SR3_,SR3_,SR3_,SR4_,SR3_,SR3_,GET_,SR3_,SR5_,SR4_,SR3_,SR3_,SR3_, + SR3_,SR3_,SR3_,NED_,SR3_,SR4_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_}, + +me0 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME0, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, + ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ME1 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*me0*/ +me0a[]={SR26,SR26,SR26,SR26,SR26,SR26,SR26,GET_,SR26,SR26,SR26,SR26,SR26,SR26, + SR26,SR26,SR26,SR26,SR26,SR26,SR26,NOP_,SR26,SR26,SR26,SR26,SR26,SR26}, + +me1 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME1, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, + ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*me1*/ +me1a[]={RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,GET_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_, + RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,MSE_,RBR_,RBR_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +es0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ES0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,EE0 ,NE0 ,ET0 ,ET0 ,MD0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*es0*/ +es0a[]={LAF_,LAF_,LAF_,STG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAM_,LAF_,LAF_,PIS_,LAF_,NST_,LAF_,LAF_}, + +ee0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,EE0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*ee0*/ +ee0a[]={LAF_,LAF_,LAF_,ETG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_,LAF_}, + +ne0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,NE0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*ne0*/ +ne0a[]={NLF_,NLF_,NLF_,ETG_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NET_,NLF_,NLF_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +md0 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, MD0, ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, + ET0 ,MC0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ET0 ,ET0, ET0, ET0 },/*md0*/ +md0a[]={LAF_,LAF_,LAF_,MD_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,MSS_,LAF_,LAF_,MDC_,LAF_,LAF_}, + +mc0 []={ET0, ET0, ET0, ET0, ET0, ET0 ,ET0, MC0, ET0 ,ET0, ET0 ,ET0, ET0, ET0, + ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0 ,ET0 ,ET0 ,ET0, ET0, ET0 },/*mc0*/ +mc0a[]={NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,MDC_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_}, + +sc0 []={ET0, ET0, ET0, ET0, ET0, ET0 ,ET0, SC0, ET0 ,ET0, ET0 ,ET0, ET0, ET0, + ET0 ,ET0 ,ET0 ,ET0, ET0, ET0, ET0, ET0 ,ET0 ,ET0 ,ET0 ,ET0, ET0, ET0 },/*sc0*/ +sc0a[]={SR19,SR19,SR19,SR19,SR19,SR19,SR19,GET_,SR19,SR19,SR19,SR19,SR19,SR19, + SR19,SR20,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +sp0 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,SP0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 , + ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*sp0*/ +sp0a[]={NOP_,NOP_,NOP_,NOP_,SR9_,DAF_,DAF_,GTR_,DAF_,SR7_,SR9_,NOP_,NOP_,DAF_, + DAF_,DAF_,NOP_,DAF_,DAF_,SR9_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_}, + +sr0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,SR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*sr0*/ +sr0a[]={SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,GET_,SR8_,SR7_,SR9_,SR8_,SR8_,SR8_, + SR8_,SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_}, + +tb0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,TB0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*tb0*/ +tb0a[]={SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,GET_,SR1_,SR7_,SR9_,SR1_,SR1_,SR1_, + SR1_,SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ + +*conmtab[] = {et0, et0a, da0, da0a, da1, da1a, er0, er0a, cr0, cr0a, rs0, rs0a, + me0, me0a, me1, me1a, es0, es0a, ee0, ee0a, ne0, ne0a, md0, md0a, + mc0, mc0a, sc0, sc0a, sp0, sp0a, sr0, sr0a, tb0, tb0a }; +struct parse pcbconm = {"CONM", lexcnm, conmtab, 0, 0, 0, 0}; +#undef ET0 +#undef DA0 +#undef DA1 +#undef ER0 +#undef CR0 +#undef RS0 +#undef ME0 +#undef ME1 +#undef ES0 +#undef EE0 +#undef NE0 +#undef MD0 +#undef MC0 +#undef SC0 +#undef SP0 +#undef SR0 +#undef TB0 +/* PCBCONE: State and action table for content parse of element content. + Initial state assumes a start-tag was just processed. +*/ +/* Symbols for state names (end with a number). */ +#define ET2 0 /* Markup found. */ +#define ER2 2 /* ERO found; start lookahead buffer. */ +#define CR2 4 /* CRO found (ERO, RNI). */ +#define RS2 6 /* RS found; possible SR 3-6 if they were declared. */ +#define ME2 8 /* MSC found. */ +#define ME3 10 /* MSC, MSC found. */ +#define ES2 12 /* TAGO found; start lookahead buffer. */ +#define EE2 14 /* End-tag start (TAGO,ETI); move to lookahead buffer. */ +#define NE2 16 /* End-tag start (TAGO,NET); process NET if not end-tag. */ +#define MD2 18 /* MDO found (TAGO, MDO[2]). */ +#define MC2 20 /* MDO, COM found. */ +#define SC2 22 /* COM found; possible SR19-20 if they were mapped. */ +#define SP2 24 /* Space found; possible SR7 or SR9. */ +#define SR2 26 /* SPCR found; possible SR7 or SR9. */ +#define TB2 28 /* TAB found; possible SR7 or SR9. */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +et2 []={ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,TB2 ,ET2 ,ET2 ,ER2 , + ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*et2*/ +et2a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,GET_,GET_,RS_ ,SR2_,NOP_,DCE_,DCE_,LAS_, + NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_}, + +er2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 },/*er2*/ +er2a[]={LAF_,LAF_,LAF_,ER_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_}, + +cr2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*cr2*/ +cr2a[]={NLF_,CRN_,NLF_,CRA_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_}, + +rs2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*rs2*/ +rs2a[]={SR3_,SR3_,SR3_,SR3_,SR4_,SR3_,SR3_,GET_,SR3_,SR5_,SR4_,SR3_,SR3_,SR3_, + SR3_,SR3_,SR3_,NED_,SR3_,SR4_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_,SR3_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spcr mdo msc mso pio rni tagc tago fce */ +me2 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME2, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2, + ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ME3 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*me2*/ +me2a[]={SR26,SR26,SR26,SR26,SR26,SR26,SR26,GET_,SR26,SR26,SR26,SR26,SR26,SR26, + SR26,SR26,SR26,SR26,SR26,SR26,SR26,NOP_,SR26,SR26,SR26,SR26,SR26,SR26}, + +me3 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME3, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2, + ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*me3*/ +me3a[]={RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,GET_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_, + RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,RBR_,MSE_,RBR_,RBR_}, + +es2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,EE2 ,NE2 ,ET2 ,ET2 ,MD2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*es2*/ +es2a[]={LAF_,LAF_,LAF_,STG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAM_,LAF_,LAF_,PIS_,LAF_,NST_,LAF_,LAF_}, + +ee2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,EE2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*ee2*/ +ee2a[]={LAF_,LAF_,LAF_,ETG_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_,LAF_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spc mdo msc mso pio rni tagc tago fce */ +ne2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,NE2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*ne2*/ +ne2a[]={NLF_,NLF_,NLF_,ETG_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NET_,NLF_,NLF_}, + +md2 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, MD2, ET2 ,ET2 ,ET2 ,ET2, ET2, ET2, + ET2 ,MC2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ET2 ,ET2, ET2, ET2 },/*md2*/ +md2a[]={LAF_,LAF_,LAF_,MD_ ,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,MSS_,LAF_,LAF_,MDC_,LAF_,LAF_}, + +mc2 []={ET2, ET2, ET2, ET2, ET2, ET2 ,ET2, MC2, ET2 ,ET2, ET2 ,ET2, ET2, ET2, + ET2 ,ET2, ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2 ,ET2 ,ET2 ,ET2, ET2, ET2 },/*mc2*/ +mc2a[]={NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,GET_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_, + NLF_,MDC_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_,NLF_}, + +sc2 []={ET2, ET2, ET2, ET2, ET2, ET2 ,ET2, SC2, ET2 ,ET2, ET2 ,ET2, ET2, ET2, + ET2 ,ET2 ,ET2 ,ET2, ET2, ET2, ET2, ET2 ,ET2 ,ET2 ,ET2 ,ET2, ET2, ET2 },/*sc2*/ +sc2a[]={SR19,SR19,SR19,SR19,SR19,SR19,SR19,GET_,SR19,SR19,SR19,SR19,SR19,SR19, + SR19,SR20,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19,SR19}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net lit spc mdo msc mso pio rni tagc tago fce */ +sp2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,RS2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 , + ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*sp2*/ +sp2a[]={DCE_,DCE_,DCE_,DCE_,SR9_,DCE_,GET_,GET_,RS_ ,SR7_,SR9_,DCE_,DCE_,LAS_, + NOP_,NOP_,DCE_,NED_,SR10,SR9_,DCE_,LAS_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_}, + +sr2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*sr2*/ +sr2a[]={SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,GET_,SR8_,SR7_,SR9_,SR8_,SR8_,SR8_, + SR8_,SR8_,SR8_,SR8_,SR8_,SR9_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_,SR8_}, + +tb2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,TB2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , + ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 },/*tb2*/ +tb2a[]={SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,GET_,SR1_,SR7_,SR9_,SR1_,SR1_,SR1_, + SR1_,SR1_,SR1_,SR1_,SR1_,SR9_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_,SR1_}, + +*conetab[] = {et2, et2a, er2, er2a, cr2, cr2a, rs2, rs2a, me2, me2a, me3, me3a, + es2, es2a, ee2, ee2a, ne2, ne2a, md2, md2a, mc2, mc2a, sc2, sc2a, + sp2, sp2a, sr2, sr2a, tb2, tb2a }; +struct parse pcbcone = {"CONE", lexcnm, conetab, 0, 0, 0, 0}; +#undef ET2 +#undef ER2 +#undef CR2 +#undef RS2 +#undef ME2 +#undef ME3 +#undef ES2 +#undef EE2 +#undef NE2 +#undef MD2 +#undef MC2 +#undef SC2 +#undef SP2 +#undef SR2 +#undef TB2 +/* PCBCONR: State and action table for content parse of replaceable character + data. Initial state assumes a start-tag was just processed. + Only entity references and character references are recognized. +*/ +/* Symbols for state names (end with a number). */ +#define ET4 0 /* Markup found or buffer flushed; no data. */ +#define DA4 2 /* Data in buffer. */ +#define ER4 4 /* ERO found; start lookahead buffer. */ +#define CR4 6 /* CRO found (ER2, RNI). */ +#define ES4 8 /* TAGO found; start lookahead buffer. */ +#define EE4 10 /* End-tag start (TAGO,ETI); move to lookahead buffer. */ +#define NE4 12 /* End-tag start (TAGO,NET); process NET if not end-tag. */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +et4 []={DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,DA4 ,DA4 ,ET4 ,ER4 , + ET4 ,DA4 ,DA4 ,ET4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ES4 },/*et4*/ +et4a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EE_ ,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,LAS_, + REF_,DAS_,DAS_,NED_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,LAS_}, + +da4 []={DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,DA4 ,DA4 ,ET4 ,ET4 , + ET4 ,DA4 ,DA4 ,ET4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,DA4 ,ET4 },/*da4*/ +da4a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_, + DAF_,NOP_,NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,DAF_}, + +er4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ER4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 , + ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,CR4 ,ET4 ,ET4 },/*er4*/ +er4a[]={LAF_,LAF_,LAF_,ERX_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_}, + +cr4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,CR4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 , + ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*cr4*/ +cr4a[]={LAF_,CRN_,LAF_,CRA_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +es4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 , + ET4 ,ET4 ,EE4 ,NE4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*es4*/ +es4a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +ee4 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,EE4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 , + ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*ee4*/ +ee4a[]={LAF_,LAF_,LAF_,ETC_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_}, + +ne4 []={EE4 ,EE4 ,EE4 ,ET4 ,EE4 ,EE4 ,EE4 ,NE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 , + EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,EE4 ,ET4 ,EE4 },/*ne4*/ +ne4a[]={RC2_,RC2_,RC2_,ETC_,RC2_,RC2_,RC2_,GET_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_, + RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,NET_,RC2_}, + +*conrtab[] = {et4, et4a, da4, da4a, er4, er4a, cr4, cr4a, + es4, es4a, ee4, ee4a, ne4, ne4a}; +struct parse pcbconr = {"CONR", lexcon, conrtab, 0, 0, 0, 0}; +#undef ET4 +#undef DA4 +#undef ER4 +#undef CR4 +#undef ES4 +#undef EE4 +#undef NE4 +/* PCBCONC: State and action table for content parse of character data. + Initial state assumes a start-tag was just processed. +*/ +/* Symbols for state names (end with a number). */ +#define ET6 0 /* Markup found or buffer flushed; no data. */ +#define DA6 2 /* Data in buffer. */ +#define ES6 4 /* TAGO found; start lookahead buffer. */ +#define EE6 6 /* End-tag start (TAGO,ETI); move to lookahead buffer. */ +#define NE6 8 /* End-tag start (TAGO,NET); process NET if not end-tag. */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +et6 []={DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,DA6 ,DA6 ,ET6 ,DA6 , + ET6 ,DA6 ,DA6 ,ET6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ES6 },/*et6*/ +et6a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EOF_,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,DAS_, + REF_,DAS_,DAS_,NED_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_,LAS_}, + +da6 []={DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,DA6 ,DA6 ,ET6 ,ET6 , + ET6 ,DA6 ,DA6 ,ET6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,DA6 ,ET6 },/*da6*/ +da6a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_, + DAF_,NOP_,NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,DAF_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +es6 []={ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ES6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 , + ET6 ,ET6 ,EE6 ,NE6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 },/*es6*/ +es6a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAM_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +ee6 []={ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,EE6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 , + ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 ,ET6 },/*ee6*/ +ee6a[]={LAF_,LAF_,LAF_,ETC_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,NET_,LAF_}, + +ne6 []={EE6 ,EE6 ,EE6 ,ET6 ,EE6 ,EE6 ,EE6 ,NE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 , + EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,EE6 ,ET6 ,EE6 },/*ne6*/ +ne6a[]={RC2_,RC2_,RC2_,ETC_,RC2_,RC2_,RC2_,GET_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_, + RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,RC2_,NET_,RC2_}, + +*conctab[] = {et6, et6a, da6, da6a, es6, es6a, ee6, ee6a, ne6, ne6a}; +struct parse pcbconc = {"CONC", lexcon, conctab, 0, 0, 0, 0}; +#undef ET6 +#undef DA6 +#undef ES6 +#undef EE6 +#undef NE6 +/* PCBPRO: State and action table for prolog parse. + Initial state assumes document just began. +*/ +/* Symbols for state names (end with a number). */ +#define ET7 0 /* Markup found. */ +#define ES7 2 /* TAGO found; start lookahead buffer. */ +#define MD7 4 /* MDO found (TAGO, MDO[2]). */ +#define MC7 6 /* MDO, COM found. */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +et7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 , + ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 },/*et7*/ +et7a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,EE_ ,GET_,RS_ ,NOP_,NOP_,DCE_,DCE_,DCE_, + DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,LAS_}, + +es7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 , + ET7 ,ET7 ,ET7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/ +es7a[]={PEP_,PEP_,PEP_,STE_,PEP_,PEP_,PEP_,GET_,PEP_,PEP_,PEP_,PEP_,PEP_,PEP_, + PEP_,PEP_,PEP_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_}, + +md7 []={ET7, ET7, ET7, ET7, ET7 ,ET7, ET7, MD7, ET7 ,ET7 ,ET7 ,ET7, ET7, ET7, + ET7, MC7, ET7, ET7, ET7, ET7 ,ET7, ET7, ET7, ET7 ,ET7, ET7 },/*md7*/ +md7a[]={LAF_,LAF_,LAF_,DTD_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,MSP_,LAF_,LAF_,LAF_,NOP_,LAF_}, + +mc7 []={ET7, ET7, ET7, ET7, ET7, ET7 ,ET7, MC7, ET7 ,ET7, ET7 ,ET7, ET7, ET7, + ET7, ET7, ET7, ET7, ET7, ET7 ,ET7 ,ET7, ET7 ,ET7 ,ET7, ET7 },/*mc7*/ +mc7a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,MDC_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a}; +struct parse pcbpro = {"PRO", lexcon, protab, 0, 0, 0, 0}; +#undef ET7 +#undef ES7 +#undef MD7 +#undef MC7 +/* PCBMDS: State and action table for parse of markup declaration subset. + Initial state assumes subset just began (MSO found). +*/ +/* Symbols for state names (end with a number). */ +#define ET8 0 /* Markup found. */ +#define ER8 2 /* PERO found; start lookahead buffer. */ +#define ME8 4 /* MSC found. */ +#define ME9 6 /* MSC, MSC found. */ +#define ES8 8 /* TAGO found; start lookahead buffer. */ +#define MD8 10 /* MDO found (TAGO, MDO[2]). */ +#define MC8 12 /* MDO, CD found. */ +#define DC8 14 /* Data characters found (erroneously). */ + +static UNCH +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +et8 []={DC8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,ET8 ,ET8 ,ET8 ,ET8 ,ET8 ,DC8 ,DC8 ,DC8 , + DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ME8 ,DC8 ,ER8 ,DC8 ,DC8 ,DC8 ,ES8 },/*et8*/ +et8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,GET_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +er8 []={DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 ,ER8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 , + DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 },/*er8*/ +er8a[]={NOP_,NOP_,NOP_,PER_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +me8 []={ET8, ET8, ET8, ET8, ET8 ,ET8, ET8, ME8, ET8 ,ET8 ,ET8 ,ET8, ET8, ET8, + ET8 ,ET8, ET8 ,ET8, ET8, ME9 ,ET8, ET8, ET8, ET8 ,ET8, ET8 },/*me8*/ +me8a[]={DTE_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_,GET_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_, + DTE_,DTE_,DTE_,DTE_,DTE_,NOP_,DTE_,DTE_,DTE_,DTE_,DTE_,DTE_}, + +me9 []={DC8, DC8, DC8, DC8, DC8 ,DC8, DC8, ME9, DC8 ,DC8 ,DC8 ,DC8, DC8, DC8, + DC8 ,DC8, DC8 ,DC8, DC8, DC8 ,DC8, DC8, DC8, DC8 ,ET8, DC8 },/*me9*/ +me9a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,MSE_,NOP_}, + +/* free nu nmc nms spc non ee eob rs re sep cde nsc ero + nmre com eti net mdo msc mso pero pio rni tagc tago */ +es8 []={DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ES8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 , + DC8 ,DC8 ,DC8 ,DC8 ,MD8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 },/*es8*/ +es8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,PIS_,NOP_,NOP_,NOP_}, + +md8 []={DC8, DC8, DC8, ET8, DC8 ,DC8, DC8, MD8, DC8 ,DC8 ,DC8 ,DC8, DC8, DC8, + DC8 ,MC8, DC8 ,DC8, DC8, DC8 ,ET8, DC8, DC8, DC8 ,ET8, DC8 },/*md8*/ +md8a[]={NOP_,NOP_,NOP_,MD_ ,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,MSS_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +mc8 []={DC8, DC8, DC8, DC8, DC8, DC8 ,DC8, MC8, DC8 ,DC8, DC8 ,DC8, DC8, DC8, + DC8 ,ET8, DC8 ,DC8, DC8, DC8 ,DC8 ,DC8, DC8 ,DC8 ,DC8, DC8 },/*mc8*/ +mc8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_,GET_,NOP_,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,MDC_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +dc8 []={DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,DC8 , + DC8 ,DC8 ,DC8 ,DC8 ,DC8 ,ET8 ,DC8 ,ET8 ,DC8 ,DC8 ,DC8 ,ET8 },/*dc8*/ +dc8a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,GET_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,CIR_,NOP_,CIR_,NOP_,NOP_,NOP_,CIR_}, + +*mdstab[] = {et8, et8a, er8, er8a, me8, me8a, me9, me9a, + es8, es8a, md8, md8a, mc8, mc8a, dc8, dc8a}; +struct parse pcbmds = {"MDS", lexcon, mdstab, 0, 0, 0, 0}; +#undef ET8 +#undef ER8 +#undef ME8 +#undef ME9 +#undef ES8 +#undef MD8 +#undef MC8 +#undef DC8 +/* PCBGRCM: State and action table for content model group. + Groups can nest. Reserved names are allowed. + Data tag token groups are allowed. + A non-reserved name or model group can have a suffix. + Columns are based on LEXGRP.C. +*/ +/* Symbols for state names (end with a number). */ +#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */ +#define CO1 2 /* Connector between tokens expected. */ +#define ER1 4 /* PERO found when token was expected. */ +#define SP1 6 /* Name or model: suffix or connector expected. */ +#define RN1 8 /* RNI found; possible #PCDATA. */ +#define DG1 10 /* Data tag: group begun; name expected. */ +#define DN1 12 /* Data tag: name found; SEQ connector expected. */ +#define DT1 14 /* Data tag: ignore template and pattern; MSC expected. */ +#define DR1 16 /* PERO found when data tag name was expected. */ +#define LI1 18 /* Literal in data tag group; search for LIT. */ +#define LA1 20 /* Literal in data tag group; search for LITA. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +tk01 []={TK1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,DG1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,RN1 ,TK1 ,TK1 },/*tk1*/ +tk01a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,GRP_,INV_,INV_, + INV_,GRP_,INV_,INV_,NOP_,INV_,INV_,NOP_,INV_,INV_}, + +co01 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/ +co01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,AND ,GRPE,INV_,INV_,INV_, + INV_,INV_,INV_,OR ,INV_,INV_,INV_,INV_,SEQ ,INV_}, + +er01 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/ +er01a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +sp01 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,SP1 ,CO1 ,SP1 ,CO1 ,TK1 ,SP1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 },/*sp1*/ +sp01a[]={INV_,LEN_,LEN_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,AND ,GRPE,INV_,INV_,INV_, + INV_,INV_,OPT ,OR ,INV_,REP ,OREP,INV_,SEQ ,LEN_}, + +/* bit nmc nms spc spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +rn01 []={TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*rn1*/ +rn01a[]={PCI_,PCI_,RNS_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +dg01 []={TK1 ,TK1 ,DN1 ,DG1 ,DG1 ,DG1 ,DG1 ,DG1 ,DG1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,DR1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*dg1*/ +dg01a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,INV_}, + +dn01 []={TK1 ,TK1 ,TK1 ,DN1 ,DN1 ,DN1 ,DN1 ,DN1 ,DN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dn1*/ +dn01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,DTAG,INV_}, + +dt01 []={TK1 ,TK1 ,TK1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,TK1 ,DT1 ,DT1 ,LI1 ,LA1 , + CO1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/ +dt01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,NOP_,NOP_,NOP_,NOP_, + GRPE,INV_,INV_,NOP_,NOP_,INV_,INV_,INV_,NOP_,INV_}, + +/* bit nmc nms spc spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +dr01 []={TK1 ,TK1 ,DG1 ,TK1 ,TK1 ,DR1 ,TK1 ,DR1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*dr1*/ +dr01a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +li01 []={LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,DT1 ,LI1 , + LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 },/*li1*/ +li01a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +la01 []={LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,DT1 , + LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 },/*la1*/ +la01a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +*grcmtab[] = {tk01, tk01a, co01, co01a, er01, er01a, sp01, sp01a, + rn01, rn01a, dg01, dg01a, dn01, dn01a, dt01, dt01a, + dr01, dr01a, li01, li01a, la01, la01a}; +struct parse pcbgrcm = {"GRCM", lexgrp, grcmtab, 0, 0, 0, 0}; +#undef TK1 +#undef CO1 +#undef ER1 +#undef SP1 +#undef RN1 +#undef DG1 +#undef DN1 +#undef DT1 +#undef DR1 +#undef LI1 +#undef LA1 +/* PCBGRCS: State and action table for content model suffix. + If suffix occurs, process it. Otherwise, put character + back for the next parse. +*/ +/* Symbols for state names (end with a number). */ +#define SP4 0 /* Suffix expected. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +sp04 []={SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 , + SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 ,SP4 },/*sp4*/ +sp04a[]={RCR_,RCR_,RCR_,RCR_,RCR_,SYS_,EE_ ,GET_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_, + RCR_,RCR_,OPT ,RCR_,RCR_,REP ,OREP,RCR_,RCR_,RCR_}, + +*grcstab[] = {sp04, sp04a}; +struct parse pcbgrcs = {"GRCS", lexgrp, grcstab, 0, 0, 0, 0}; +#undef SP4 +/* PCBGRNT: State and action table for name token group parse. + Groups cannot nest. Reserved names are not allowed. + No suffixes or data tag pattern groups. +*/ +/* Symbols for state names (end with a number). */ +#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */ +#define CO1 2 /* Connector between tokens expected. */ +#define ER1 4 /* PERO found when token was expected. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +tk02 []={TK1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,CO1 },/*tk1*/ +tk02a[]={INV_,NMT_,NMT_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NMT_}, + +co02 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/ +co02a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,GRPE,INV_,INV_,INV_, + INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NOP_,INV_}, + +er02 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/ +er02a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +*grnttab[] = {tk02, tk02a, co02, co02a, er02, er02a}; +struct parse pcbgrnt = {"GRNT", lexgrp, grnttab, 0, 0, 0, 0}; +#undef TK1 +#undef CO1 +#undef ER1 +/* PCBGRNM: State and action table for name group parse. + Groups cannot nest. Reserved names are not allowed. + No suffixes or data tag pattern groups. +*/ +/* Symbols for state names (end with a number). */ +#define TK1 0 /* Token expected: name, #CHARS, data tag grp, model. */ +#define CO1 2 /* Connector between tokens expected. */ +#define ER1 4 /* PERO found when token was expected. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +tk03 []={TK1 ,TK1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*tk1*/ +tk03a[]={INV_,INV_,NAS_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,INV_}, + +co03 []={TK1 ,TK1 ,TK1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,CO1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*co1*/ +co03a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,GRPE,INV_,INV_,INV_, + INV_,INV_,INV_,NOP_,INV_,INV_,INV_,INV_,NOP_,INV_}, + +er03 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,ER1 ,TK1 ,ER1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*er1*/ +er03a[]={PCI_,PCI_,PER_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +*grnmtab[] = {tk03, tk03a, co03, co03a, er03, er03a}; +struct parse pcbgrnm = {"GRNM", lexgrp, grnmtab, 0, 0, 0, 0}; +#undef TK1 +#undef CO1 +#undef ER1 +/* PCBREF: State and action table to find the end of entity, parameter entity, + and character references. The opening delimiter and name + have already been found; the parse determines whether the + tokenization of the name ended normally and processes the REFC. +*/ +/* Symbols for state names (end with a number). */ +#define ER5 0 /* Handle REFC or other entity reference termination. */ +#define ER6 2 /* Return to caller and reset state for next call. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +er05 []={ER5 ,ER6 ,ER6 ,ER6 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 , + ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER6 },/*er5*/ +er05a[]={RCR_,LEN_,LEN_,NOP_,RCR_,SYS_,RCR_,GET_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_, + RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,NOP_}, + +er06 []={ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 , + ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 ,ER5 },/*er6*/ +er06a[]={RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_, + RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_,RCR_}, + +*reftab[]={er05, er05a, er06, er06a}; +struct parse pcbref = {"ENTREF", lexgrp, reftab, 0, 0, 0, 0}; +#undef ER5 +#undef ER6 +/* +Use (typical) Name Ending Chsw References RS RE SEP +Parameter literal LITPC LIT/A OK Parm,Char RSM_ LAM_ LAM_ + Data tag template NO +System ID LITC LIT/A n/a none RSM_ LAM_ LAM_ + Processing instruction PIC +Attribute value LITRV LIT/A NO Gen,Char RS_ FUN_ FUN_ +Minimum literal LITV LIT/A n/a none RS_ FUN_ MLE_ +*/ +/* PCBLITP: Literal parse with parameter and character references; + no function character translation. +*/ +/* Symbols for state names (end with a number). */ +#define DA0 0 /* Data in buffer. */ +#define ER0 2 /* ERO found. */ +#define CR0 4 /* CRO found (ER0, RNI). */ +#define PR0 6 /* PRO found (for PCBLITP). */ + +static UNCH +/* free num min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +da13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 , + DA0 ,DA0 ,DA0 ,PR0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da3*/ +da13a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RSM_,MLA_,MLA_,MLA_,NSC_,NOP_, + MLA_,MLA_,MLA_,NOP_,MLA_,MLA_,MLA_,TER_}, + +er13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er3*/ +er13a[]={LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_, + LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_}, + +cr13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr3*/ +cr13a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_, + LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_}, + +pr13 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,PR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*pr3*/ +pr13a[]={LPR_,LPR_,LPR_,PEX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_, + LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_}, + +*litptab[] = {da13, da13a, er13, er13a, cr13, cr13a, pr13, pr13a}; +struct parse pcblitp = {"LITP", lexlms, litptab, 0, 0, 0, 0}; +#undef DA0 +#undef ER0 +#undef CR0 +#undef PR0 +/* PCBLITC: Literal parse; no references; no function char translation. + Used for character data (system data). +*/ +/* Symbols for state names (end with a number). */ +#define DA0 0 /* Data in buffer. */ + +static UNCH +/* free num min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +da2 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da2*/ +da2a[]={MLA_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RSM_,MLA_,MLA_,MLA_,SYS_,MLA_, + MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_}, + +*litctab[] = {da2, da2a}; +struct parse pcblitc = {"LITC", lexlms, litctab, 0, 0, 0, 0}; +#undef DA0 +/* PCBLITR: Attribute value parse; general and character references; + function chars are translated. +*/ +/* Symbols for state names (end with a number). */ +#define DA0 0 /* Data in buffer. */ +#define ER0 2 /* ERO found. */ +#define CR0 4 /* CRO found (ER0, RNI). */ + +static UNCH +/* free num min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +da11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da1*/ +da11a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RS_ ,FUN_,FUN_,MLA_,NSC_,NOP_, + MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_}, + +er11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er1*/ +er11a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_, + LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_}, + +cr11 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr1*/ +cr11a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_, + LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_}, + +*litrtab[] = {da11, da11a, er11, er11a, cr11, cr11a}; +struct parse pcblitr = {"LITR", lexlms, litrtab, 0, 0, 0, 0}; +#undef DA0 +#undef ER0 +#undef CR0 +/* PCBLITV: Literal parse; no references; RS ignored; RE/SPACE sequences + become single SPACE. Only minimum data characters allowed. +*/ +/* Symbols for state names (end with a number). */ +#define LS0 0 /* Leading SPACE or RE found. */ +#define VA0 2 /* Valid character found. */ +#define SP0 4 /* SPACE/RE sequence begun. */ + +static UNCH +/* free num min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 ,VA0 , + VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*ls0*/ +ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_, + MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_}, +va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 , + VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*va0*/ +da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_,MLE_, + MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_}, +sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 , + VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*sp0*/ +sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_, + MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,RPR_}, + +*litvtab[] = {ls10, ls10a, va10, da10a, sp10, sp10a}; +struct parse pcblitv = {"LITV", lexlms, litvtab, 0, 0, 0, 0}; +#undef LS0 +#undef VA0 +#undef SP0 +/* PCBLITT: Tokenized attribute value parse. +*/ + +/* PCBLITT: Attribute value parse; general and character references; + function chars are translated. +*/ +/* Symbols for state names (end with a number). */ +#define SP0 0 /* Ignore spaces */ +#define DA0 2 /* Data character */ +#define ER0 4 /* ERO found; ignore space */ +#define ER1 6 /* ERO found; don't ignore space */ +#define CR0 8 /* CRO found (ER0, RNI); ignore space */ +#define CR1 10 /* CR0 found; don't ignore space */ + +int pcblittda = DA0; + +static UNCH +/* free num min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ + +sp14 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,SP0 ,SP0 ,SP0 ,SP0 ,DA0 ,DA0 ,ER0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*sp0*/ +sp14a[]={MLA_,MLA_,MLA_,MLA_,NOP_,NON_,EE_ ,GET_,RS_ ,NOP_,NOP_,MLA_,NSC_,NOP_, + MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_}, + +da14 []={DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,DA0 ,SP0 ,SP0 ,DA0 ,DA0 ,ER1 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,SP0 },/*da0*/ +da14a[]={MLA_,MLA_,MLA_,MLA_,MLA_,NON_,EE_ ,GET_,RS_ ,FUN_,FUN_,MLA_,NSC_,NOP_, + MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,MLA_,TER_}, + +er14 []={DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,ER0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 },/*er0*/ +er14a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_, + LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_}, + +er15 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ER1 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,CR1 ,DA0 ,DA0 ,DA0 },/*er1*/ +er15a[]={LPR_,LPR_,LPR_,ERX_,LPR_,LPR_,LPR_,GET_,LPR_,LPR_,LPR_,LPR_,LPR_,LPR_, + LPR_,LPR_,LPR_,LPR_,NOP_,LPR_,LPR_,LPR_}, + +cr14 []={DA0 ,DA0 ,DA0 ,SP0 ,DA0 ,DA0 ,DA0 ,CR0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr0*/ +cr14a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_, + LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_}, + +cr15 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,CR1 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 , + DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*cr1*/ +cr15a[]={LP2_,CRN_,LP2_,CRA_,LP2_,LP2_,LP2_,GET_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_, + LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_,LP2_}, + +*litttab[] = {sp14, sp14a, da14, da14a, er14, er14a, er15, er15a, cr14, cr14a, + cr15, cr15a}; +struct parse pcblitt = {"LITT", lexlms, litttab, 0, 0, 0, 0}; +#undef SP0 +#undef DA0 +#undef ER0 +#undef ER1 +#undef CR0 +#undef CR1 +/* PCBMD: State and action table for markup declaration tokenization. + Columns are based on LEXMARK.C. +*/ +/* Symbols for state names (end with a number). */ +#define SP1 0 /* Separator before token expected. */ +#define TK1 2 /* Token expected. */ +#define CM0 4 /* COM[1] found when sep expected: possible comment, MGRP.*/ +#define CM1 6 /* COM[1] found: possible comment, MGRP, or minus.*/ +#define CM2 8 /* COM[2] found; in comment. */ +#define CM3 10 /* Ending COM[1] found; end comment or continue it. */ +#define PR1 12 /* PERO found when token was expected. */ +#define PX1 14 /* PLUS found: PGRP or error. */ +#define RN1 16 /* RNI found; possible reserved name start. */ + +int pcbmdtk = TK1; /* PCBMD: token expected. */ + +static UNCH +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,CM0 ,SP1 ,TK1 ,TK1 ,TK1 , + TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 }, +sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE, + MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_}, + +tk21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 , + TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 }, +tk21a[]={INV_,NMT ,NUM ,NAS ,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE, + MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_}, + +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +cm20 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, +cm20a[]={LNR_,LNR_,LNR_,LNR_,LNR_,SYS_,LNR_,GET_,LNR_,NOP_,LNR_,LNR_,LNR_,LNR_, + LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_}, + +cm21 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 }, +cm21a[]={CDR ,CDR ,CDR ,CDR ,CDR ,SYS_,CDR ,GET_,CDR ,NOP_,CDR ,MGRP,CDR ,CDR , + CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR ,CDR }, + +cm22 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM2 , + CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 }, +cm22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +cm23 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 ,CM2 , + CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 }, +cm23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP1 ,PR1 ,TK1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 }, +pr21a[]={PCI_,PCI_,PCI_,PER_,PEN ,SYS_,PENR,GET_,PEN ,PENR,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PENR,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +px21 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,PX1 ,SP1 ,PX1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, +px21a[]={PCI_,PCI_,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PGRP,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +rn21 []={TK1 ,TK1 ,TK1 ,SP1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , + TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 }, +rn21a[]={PCI_,PCI_,PCI_,RNS ,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +*mdtab[] = {sp21, sp21a, tk21, tk21a, cm20, cm20a, cm21, cm21a, cm22, cm22a, + cm23, cm23a, pr21, pr21a, px21, px21a, rn21, rn21a}; +struct parse pcbmd = {"MD", lexmark, mdtab, 0, 0, 0, 0}; +#undef SP1 +#undef TK1 +#undef CM0 +#undef CM1 +#undef CM2 +#undef CM3 +#undef PR1 +#undef PX1 +#undef RN1 +/* PCBMDC: State and action table for comment declaration. +*/ +/* Symbols for state names (end with a number). */ +#define CD2 0 /* COM[2] found; in comment. */ +#define CD3 2 /* Ending COM[1] found; end comment or continue it. */ +#define EM1 4 /* Ending COM[2] found; start new comment or end. */ +#define CD1 6 /* COM[1] found; new comment or error. */ + +static UNCH +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +cd22 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD3 ,CD2 ,CD2 ,CD2 ,CD2 , + CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 }, +cd22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +cd23 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD3 ,CD2 ,CD3 ,CD2 ,EM1 ,CD2 ,CD2 ,CD2 ,CD2 , + CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 }, +cd23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +em21 []={CD2 ,CD2 ,CD2 ,CD2 ,EM1 ,EM1 ,CD2 ,EM1 ,EM1 ,CD1 ,CD2 ,CD2 ,CD2 ,CD2 , + CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 }, +em21a[]={INV_,INV_,INV_,INV_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,INV_,INV_,EMD ,INV_,INV_}, + +cd21 []={CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD1 ,CD2 ,CD1 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 , + CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 ,CD2 }, +cd21a[]={PCI_,PCI_,PCI_,PCI_,PCI_,SYS_,EOF_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_,PCI_, + PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, + +*mdctab[] = {cd22, cd22a, cd23, cd23a, em21, em21a, cd21, cd21a}; +struct parse pcbmdc = {"MDC", lexmark, mdctab, 0, 0, 0, 0}; +#undef CD2 +#undef CD3 +#undef EM1 +#undef CD1 +/* PCBMDI: State and action table for ignoring markup declarations. + Literals are handled properly so a TAGC won't end the declaration. + An error is noted if the entity ends within a declaration that + is being ignored. + TO DO: Handle nested declaration sets. +*/ +/* Symbols for state names (end with a number). */ +#define NC1 0 /* Not in a comment; TAGC ends declaration. */ +#define IC1 2 /* COM[1] found; possible comment. */ +#define IC2 4 /* COM[2] found; in comment. */ +#define IC3 6 /* Ending COM[1] found; end comment or continue it. */ +#define LI1 8 /* Literal parameter; search for LIT. */ +#define LA1 10 /* Literal parameter; search for LITA. */ + +static UNCH +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +nc21 []={NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,IC1 ,NC1 ,NC1 ,LI1 ,LA1 , + NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 }, +nc21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,EMD ,NOP_,NOP_}, + +ic21 []={NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,IC1 ,NC1 ,IC1 ,NC1 ,IC2 ,NC1 ,NC1 ,LI1 ,LA1 , + NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 ,NC1 }, +ic21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,EMD ,NOP_,NOP_}, + +ic22 []={IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,NC1 ,IC2 ,IC2 ,IC3 ,IC2 ,IC2 ,IC2 ,IC2 , + IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 }, +ic22a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +ic23 []={IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC3 ,NC1 ,IC3 ,IC2 ,NC1 ,IC2 ,IC2 ,IC2 ,IC2 , + IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 ,IC2 },/*ic3*/ +ic23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +li21 []={LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,NC1 ,LI1 , + LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 ,LI1 },/*li1*/ +li21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +la21 []={LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,NC1 , + LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 ,LA1 },/*la1*/ +la21a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +*mditab[] = {nc21, nc21a, ic21, ic21a, ic22, ic22a, + ic23, ic23a, li21, li21a, la21, la21a}; +struct parse pcbmdi = {"MDI", lexmark, mditab, 0, 0, 0, 0}; +#undef NC1 +#undef IC1 +#undef IC2 +#undef IC3 +#undef LI1 +#undef LA1 +/* PCBMSRC: State and action table for marked section in RCDATA mode. + Nested marked sections are not recognized; the first MSE ends it. + Initial state assumes an MS declaration was processed. + Columns are based on LEXLMS.C but LITC column needn't exist. +*/ +/* Symbols for state names (end with a number). */ +#define ET0 0 /* MSS processed or buffer flushed; no data. */ +#define DA0 2 /* Data in buffer. */ +#define ER0 4 /* ERO found; start lookahead buffer. */ +#define CR0 6 /* CRO found (ER0, RNI). */ +#define ME0 8 /* MSC found. */ +#define ME1 10 /* MSC, MSC found. */ + +static UNCH +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +et30 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 ,ER0 , + DA0 ,ME0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*et0*/ +et30a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EE_ ,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,LAS_, + DAS_,LAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_}, + +da30 []={DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,DA0 ,ET0 ,ET0 , + DA0 ,ET0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 ,DA0 },/*da0*/ +da30a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,DAF_, + NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +er30 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/ +er30a[]={LAF_,LAF_,LAF_,ERX_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAM_,LAF_,LAF_,LAF_}, + +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +cr30 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , + ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 },/*cr0*/ +cr30a[]={LAF_,CRN_,LAF_,CRA_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +me30 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME0, ET0 ,ET0 ,ET0 ,ET0, ET0 ,ET0 , + ET0, ME1, ET0 ,ET0, ET0 ,ET0, ET0 ,ET0 },/*me0*/ +me30a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +me31 []={ET0, ET0, ET0, ET0, ET0 ,ET0, ET0, ME1, ET0 ,ET0 ,ET0 ,ET0, ET0 ,ET0 , + ET0, ET0, ET0 ,ET0, ET0 ,ET0, ET0 ,ET0,},/*me1*/ +me31a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,MSE_,LAF_,LAF_}, + +*msrctab[]={et30, et30a, da30, da30a, er30, er30a, cr30, cr30a, + me30, me30a, me31, me31a}; +struct parse pcbmsrc = {"MSRCDATA", lexlms, msrctab, 0, 0, 0, 0}; +#undef ET0 +#undef DA0 +#undef ER0 +#undef CR0 +#undef ME0 +#undef ME1 +/* PCBMSC: State and action table for marked section in CDATA mode. + Nested marked sections are not recognized; the first MSE ends it. + Initial state assumes an MS declaration was processed. +*/ +/* Symbols for state names (end with a number). */ +#define ET2 0 /* MSS processed or buffer flushed; no data. */ +#define DA2 2 /* Data in buffer. */ +#define ME2 4 /* MSC found. */ +#define ME3 6 /* MSC, MSC found. */ + +static UNCH +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +et32 []={DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,DA2 ,DA2 ,ET2 ,DA2 , + DA2 ,ME2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 },/*et2*/ +et32a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,EOF_,GET_,RS_ ,REF_,DAS_,DAS_,NSC_,DAS_, + DAS_,LAS_,DAS_,DAS_,DAS_,DAS_,DAS_,DAS_}, + +da32 []={DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,DA2 ,DA2 ,ET2 ,DA2 , + DA2 ,ET2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 ,DA2 },/*da2*/ +da32a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,NOP_,DAF_,NOP_, + NOP_,DAF_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +me32 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME2, ET2 ,ET2 ,ET2 ,ET2, ET2 ,ET2 , + ET2, ME3, ET2 ,ET2, ET2 ,ET2, ET2, ET2,},/*me2*/ +me32a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAM_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, + +me33 []={ET2, ET2, ET2, ET2, ET2 ,ET2, ET2, ME3, ET2 ,ET2 ,ET2 ,ET2, ET2 ,ET2 , + ET2, ET2, ET2 ,ET2, ET2 ,ET2, ET2, ET2,},/*me3*/ +me33a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,MSE_,LAF_,LAF_}, + +*msctab[]={et32, et32a, da32, da32a, me32, me32a, me33, me33a}; +struct parse pcbmsc = {"MSCDATA", lexlms, msctab, 0, 0, 0, 0}; +#undef ET2 +#undef DA2 +#undef ME2 +#undef ME3 +/* PCBMSI: State and action table for marked section in IGNORE mode. + Nested marked sections are recognized; the matching MSE ends it. + Initial state assumes an MS declaration, MSS, or MSE was processed. +*/ +/* Symbols for state names (end with a number). */ +#define ET4 0 /* Markup found or buffer flushed; no data. */ +#define ME4 2 /* MSC found. */ +#define ME5 4 /* MSC, MSC found. */ +#define ES4 6 /* TAGO found. */ +#define MD4 8 /* MDO found (TAGO, MDO[2]). */ + +static UNCH +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc refc */ +et34 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 , + ET4 ,ME4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ET4 },/*et4*/ +et34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +me34 []={ET4, ET4, ET4, ET4, ET4 ,ET4, ET4, ME4, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 , + ET4, ME5 ,ET4, ET4, ET4 ,ET4, ET4, ET4, ET4,},/*me4*/ +me34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +me35 []={ET4, ET4, ET4, ET4, ET4 ,ET4, ET4, ME5, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 , + ET4, ET4 ,ET4, ET4, ET4 ,ET4, ET4, ET4, ET4,},/*me5*/ +me35a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,MSE_,NOP_,NOP_,NOP_}, + +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tagc tago litc */ +es34 []={ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 ,ES4 ,ET4 ,ET4 ,ET4 ,ET4 ,ES4 ,ET4 , + MD4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 ,ET4 },/*es4*/ +es34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +md34 []={ET4, ET4, ET4, ET4, ET4 ,MD4, ET4, MD4, ET4 ,ET4 ,ET4 ,ET4, ET4, ET4 , + ET4, ET4 ,ET4, ET4, ET4 ,ET4, ET4, ET4,},/*md4*/ +md34a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,SYS_,NOP_, + NOP_,NOP_,MSS_,NOP_,NOP_,NOP_,NOP_,NOP_}, + +*msitab[]={et34, et34a, me34, me34a, me35, me35a, es34, es34a, md34, md34a}; +struct parse pcbmsi = {"MSIGNORE", lexlms, msitab, 0, 0, 0, 0}; +#undef ET4 +#undef ME4 +#undef ME5 +#undef ES4 +#undef MD4 +#undef NS4 +/* PCBSTAG: State and action table for start-tag parse. + Columns are based on LEXMARK.C. +*/ +/* Symbols for state names (end with a number). */ +#define SP1 0 /* Separator before name expected. */ +#define AN1 2 /* Attribute name expected. */ +#define SP2 4 /* Separator or value indicator expected. */ +#define VI1 6 /* Value indicator expected. */ +#define AV1 8 /* Attribute value expected. */ + +int pcbstan = AN1; /* PCBSTAG: attribute name expected. */ + +static UNCH +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +sp41 []={SP1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, +sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_, + INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_}, + +an41 []={SP1 ,SP1 ,SP1 ,SP2 ,AN1 ,AN1 ,AN1 ,AN1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, +an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_, + INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_}, + +sp42 []={SP1 ,SP1 ,SP1 ,SP1 ,VI1 ,SP2 ,SP2 ,SP2 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 }, +sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_, + INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_}, + +/* bit nmc num nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 }, +vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_, + INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_}, + +av41 []={SP1 ,SP1 ,SP1 ,SP1 ,AV1 ,AV1 ,AV1 ,AV1 ,AV1 ,SP1 ,SP1 ,SP1 ,AN1 ,AN1 , + SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, +av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,AVD ,AVDA, + INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_}, + +*stagtab[] = {sp41, sp41a, an41, an41a, sp42, sp42a, vi41, vi41a, av41, av41a}; +struct parse pcbstag = {"STAG", lexmark, stagtab, 0, 0, 0, 0}; +#undef SP1 +#undef AN1 +#undef SP2 +#undef VI1 +#undef AV1 +/* PCBETAG: State and action table for end-tag parse. +*/ +#define TC1 0 /* Tag close expected (no attributes allowed). */ + +static UNCH +/* bit nmc nu nms spc non ee eob rs com eti grpo lit lita + dso dsc pero plus refc rni tagc tago vi */ +tc41 []={TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 , + TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 ,TC1 },/*tc1*/ +tc41a[]={INV_,INV_,INV_,INV_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, + INV_,INV_,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_}, + +*etagtab[] = {tc41, tc41a}; +struct parse pcbetag = {"ETAG", lexmark, etagtab, 0, 0, 0, 0}; +#undef TC1 +/* PCBVAL: State and action table for tokenizing attribute values. + Columns are based on lextoke (but EOB cannot occur). +*/ +/* Symbols for state names (end with a number). */ +#define TK1 0 /* Token expected. */ +#define SP1 2 /* Separator before token expected. */ + +static UNCH +/* inv rec sep sp nmc nms nu eob */ +tk51 []={TK1 ,TK1 ,TK1 ,TK1 ,SP1 ,SP1 ,SP1 },/*tk1*/ +tk51a[]={INVA,INVA,INVA,NOPA,NMTA,NASA,NUMA}, + +sp51 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },/*sp1*/ +sp51a[]={INVA,INVA,INVA,NOPA,LENA,LENA,LENA}, + +*valtab[] = {tk51, tk51a, sp51, sp51a}; +struct parse pcbval = {"VAL", lextoke, valtab, 0, 0, 0, 0}; +#undef TK1 +#undef SP1 +/* PCBEAL: State and action table for end of attribute specification list. + If delimiter occurs, process it. Otherwise, put invalid character + back for the next parse. +*/ +/* Symbols for state names (end with a number). */ +#define AL0 0 /* Delimiter expected. */ + +static UNCH +/* bit nmc nms re spc non ee eob rs and grpc grpo lit lita + dtgc dtgo opt or pero plus rep rni seq refc */ +al00 []={AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 , + AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 ,AL0 },/*al0*/ +al00a[]={INV_,INV_,INV_,INV_,INV_,SYS_,EE_ ,GET_,INV_,INV_,INV_,INV_,INV_,INV_, + GRPE,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_}, + +*ealtab[] = {al00, al00a}; +struct parse pcbeal = {"EAL", lexgrp, ealtab, 0, 0, 0, 0}; +#undef AL0 + +/* PCBSD: State and action tables for SGML declaration parsing. */ + +/* Symbols for state names. */ + +#define SP1 0 /* Separator before token expected. */ +#define TK1 2 /* Token expected. */ +#define CM0 4 /* COM[1] found when sep expected: possible comment.*/ +#define CM1 6 /* COM[1] found: possible comment.*/ +#define CM2 8 /* COM[2] found; in comment. */ +#define CM3 10 /* Ending COM[1] found; end comment or continue it. */ + +static UNCH +/* sig dat num nms spc non ee eob rs com lit lita tagc */ + +sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,CM0 ,TK1 ,TK1 ,SP1 }, +sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD}, + +tk31 []={TK1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 }, +tk31a[]={INV_,ISIG,NUM1,NAS1,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD}, + +cm30 []={SP1 ,CM0 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 }, +cm30a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_}, + +cm31 []={TK1 ,CM1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 }, +cm31a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_}, + +cm32 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 }, +cm32a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_}, + +cm33 []={CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 }, +cm33a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_}, + +*sdtab[]={sp31, sp31a, tk31, tk31a, cm30, cm30a, cm31, cm31a, cm32, cm32a, + cm33, cm33a}; + +struct parse pcbsd = {"SD", lexsd, sdtab, 0, 0, 0, 0}; + +#undef SP1 +#undef TK1 +#undef CM0 +#undef CM1 +#undef CM2 +#undef CM3 diff --git a/usr.bin/sgmls/sgmls/portproc.c b/usr.bin/sgmls/sgmls/portproc.c new file mode 100644 index 0000000..0bb2431 --- /dev/null +++ b/usr.bin/sgmls/sgmls/portproc.c @@ -0,0 +1,104 @@ +/* portproc.c - + + Semi-portable implementation of run_process(). + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifdef SUPPORT_SUBDOC + +#include "std.h" +#include "entity.h" +#include "appl.h" + +/* This code shows how you might use system() to implement run_process(). +ANSI C says very little about the behaviour of system(), and so this +is necessarily system dependent. */ + +/* Characters that are significant to the shell and so need quoting. */ +#define SHELL_MAGIC "$\\\"';&()|<>^ \t\n" +/* Character with which to quote shell arguments. */ +#define SHELL_QUOTE_CHAR '\'' +/* String that can be used to get SHELL_QUOTE_CHAR into a quoted argument. */ +#define SHELL_ESCAPE_QUOTE "'\\''" +/* Character that can be used to separate arguments to the shell. */ +#define SHELL_ARG_SEP ' ' + +static UNS shell_quote P((char *, char *)); + +int run_process(argv) +char **argv; +{ + char **p; + char *s, *command; + int ret; + UNS len = 0; + + for (p = argv; *p; p++) + len += shell_quote(*p, (char *)0); + len += p - argv; + s = command = xmalloc(len); + for (p = argv; *p; ++p) { + if (s > command) + *s++ = SHELL_ARG_SEP; + s += shell_quote(*p, s); + } + *s++ = '\0'; + errno = 0; + ret = system(command); + if (ret < 0) + appl_error(E_EXEC, argv[0], strerror(errno)); + free(command); + return ret; +} + +/* Quote a string so that it appears as a single argument to the +shell (as used for system()). Store the quoted argument in result, if +result is not NULL. Return the length. */ + +static +UNS shell_quote(s, result) +char *s, *result; +{ + UNS len = 0; + int quoted = 0; + + if (strpbrk(s, SHELL_MAGIC)) { + quoted = 1; + len++; + if (result) + result[0] = SHELL_QUOTE_CHAR; + } + for (; *s; s++) { + if (*s == SHELL_QUOTE_CHAR) { + if (result) + strcpy(result + len, SHELL_ESCAPE_QUOTE); + len += strlen(SHELL_ESCAPE_QUOTE); + } + else { + if (result) + result[len] = *s; + len++; + } + } + if (quoted) { + if (result) + result[len] = SHELL_QUOTE_CHAR; + len++; + } + return len; +} + +#endif /* SUPPORT_SUBDOC */ + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/serv.c b/usr.bin/sgmls/sgmls/serv.c new file mode 100644 index 0000000..b9699d2 --- /dev/null +++ b/usr.bin/sgmls/sgmls/serv.c @@ -0,0 +1,299 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +/* ETDDEF: Define an element type definition. + Use an existing one if there is one; otherwise create one, which + rmalloc initializes to zero which shows it is a virgin etd. +*/ +PETD etddef(ename) +UNCH *ename; /* Element name (GI) with length byte. */ +{ + PETD p; /* Pointer to an etd. */ + int hnum; /* Hash number for ename. */ + + if ((p = (PETD)hfind((THASH)etdtab,ename,hnum = hash(ename, ETDHASH)))==0){ + p = (PETD)hin((THASH)etdtab, ename, hnum, ETDSZ); + } + return p; +} +/* ETDSET: Store data in an element type definition. + The etd must be valid and virgin (except for adl and etdmin). + As an etd cannot be modified, there is no checking for existing + pointers and no freeing of their storage. +*/ +#ifdef USE_PROTOTYPES +PETD etdset(PETD p, UNCH fmin, struct thdr *cmod, PETD *mexgrp, PETD *pexgrp, + struct entity **srm) +#else +PETD etdset(p, fmin, cmod, mexgrp, pexgrp, srm) +PETD p; /* Pointer to an etd. */ +UNCH fmin; /* Minimization bit flags. */ +struct thdr *cmod; /* Pointer to content model. */ +PETD *mexgrp; /* Pointers to minus and plus exception lists. */ +PETD *pexgrp; /* Pointers to minus and plus exception lists. */ +struct entity **srm; /* Short reference map. */ +#endif +{ + p->etdmin |= fmin; + p->etdmod = cmod; + p->etdmex = mexgrp; + p->etdpex = pexgrp; + p->etdsrm = srm; + return p; +} +/* ETDREF: Retrieve the pointer to an element type definition. +*/ +PETD etdref(ename) +UNCH *ename; /* Element name (GI) with length byte.. */ +{ + + return (PETD)hfind((THASH)etdtab, ename, hash(ename, ETDHASH)); +} +/* ETDCAN: Cancel an element definition. The etd is freed and is removed + from the hash table, but its model and other pointers are not freed. +*/ +VOID etdcan(ename) +UNCH *ename; /* GI name (with length and EOS). */ +{ + PETD p; + + if ((p = (PETD)hout((THASH)etdtab, ename, hash(ename, ETDHASH)))!=0) + frem((UNIV)p); +} +/* SYMBOL TABLE FUNCTIONS: These functions manage hash tables that are used + for entities, element type definitions, IDs, and other purposes. The + interface will be expanded in the future to include multiple environments, + probably by creating arrays of the present hash tables with each table + in the array corresponding to an environment level. +*/ +/* HASH: Form hash value for a string. + From the Dragon Book, p436. +*/ +int hash(s, hashsize) +UNCH *s; /* String to be hashed. */ +int hashsize; /* Size of hash table array. */ +{ + unsigned long h = 0, g; + + while (*s != 0) { + h <<= 4; + h += *s++; + if ((g = h & 0xf0000000) != 0) { + h ^= g >> 24; + h ^= g; + } + } + return (int)(h % hashsize); +} +/* HFIND: Look for a name in a hash table. +*/ +struct hash *hfind(htab, s, h) +struct hash *htab[]; /* Hash table. */ +UNCH *s; /* Entity name. */ +int h; /* Hash value for entity name. */ +{ + struct hash *np; + + for (np = htab[h]; np != 0; np = np->enext) + if (ustrcmp(s, np->ename) == 0) return np; /* Found it. */ + return (struct hash *)0; /* Not found. */ +} +/* HIN: Locates an entry in a hash table, or allocates a new one. + Returns a pointer to a structure containing a name + and a pointer to the next entry. Other data in the + structure must be maintained by the caller. +*/ +struct hash *hin(htab, name, h, size) +struct hash *htab[]; /* Hash table. */ +UNCH *name; /* Entity name. */ +int h; /* Hash value for entity name. */ +UNS size; /* Size of structures pointed to by table. */ +{ + struct hash *np; + + if ((np = hfind(htab, name, h))!=0) return np; /* Return if name found. */ + /* Allocate space for structure and name. */ + np = (struct hash *)rmalloc(size + name[0]); + np->ename = (UNCH *)np + size; + memcpy(np->ename, name, name[0]); /* Store name in it. */ + np->enext = htab[h]; /* 1st entry is now 2nd.*/ + htab[h] = np; /* New entry is now 1st.*/ + return np; /* Return new entry ptr. */ +} +/* HOUT: Remove an entry from a hash table and return its pointer. + The caller must free any pointers in the entry and then + free the entry itself if that is what is desired; this + routine does not free any storage. +*/ +struct hash *hout(htab, s, h) +struct hash *htab[]; /* Hash table. */ +UNCH *s; /* Search argument entry name. */ +int h; /* Hash value for search entry name. */ +{ + struct hash **pp; + + for (pp = &htab[h]; *pp != 0; pp = &(*pp)->enext) + if (ustrcmp(s, (*pp)->ename) == 0) { /* Found it. */ + struct hash *tem = *pp; + *pp = (*pp)->enext; + return tem; + } + return 0; /* NULL if not found; else ptr. */ +} +/* SAVESTR: Save a null-terminated string +*/ +UNCH *savestr(s) +UNCH *s; +{ + UNCH *rp; + + rp = (UNCH *)rmalloc(ustrlen(s) + 1); + ustrcpy(rp, s); + return rp; +} +/* SAVENM: Save a name (with length and EOS) +*/ +UNCH *savenm(s) +UNCH *s; +{ + UNCH *p; + p = (UNCH *)rmalloc(*s); + memcpy(p, s, *s); + return p; +} +/* REPLACE: Free the storage for the old string (p) and store the new (s). + If the specified ptr is NULL, don't free it. +*/ +UNCH *replace(p, s) +UNCH *p; +UNCH *s; +{ + if (p) frem((UNIV)p); /* Free old storage (if any). */ + if (!s) return(s); /* Return NULL if new string is NULL. */ + return savestr(s); +} +/* RMALLOC: Interface to memory allocation with error handling. + If storage is not available, fatal error message is issued. + Storage is initialized to zeros. +*/ +UNIV rmalloc(size) +unsigned size; /* Number of bytes of initialized storage. */ +{ + UNIV p = (UNIV)calloc(size, 1); + if (!p) exiterr(33, (struct parse *)0); + return p; +} +UNIV rrealloc(p, n) +UNIV p; +UNS n; +{ + UNIV r = realloc(p, n); + if (!r) + exiterr(33, (struct parse *)0); + return r; +} + +UNCH *pt; +/* FREM: Free specified memory area gotten with rmalloc(). +*/ +VOID frem(ptr) +UNIV ptr; /* Memory area to be freed. */ +{ + free(ptr); +} +/* MAPSRCH: Find a string in a table and return its associated value. + The last entry must be a dummy consisting of a NULL pointer for + the string and whatever return code is desired if the + string is not found in the table. +*/ +int mapsrch(maptab, name) +struct map maptab[]; +UNCH *name; +{ + int i = 0; + + do { + UNCH *mapnm, *nm; + for (mapnm = maptab[i].mapnm, nm=name; *nm==*mapnm; mapnm++) { + if (!*nm++) return maptab[i].mapdata; + } + } while (maptab[++i].mapnm); + return maptab[i].mapdata; +} +/* IDDEF: Define an ID control block; return -1 if it already exists. +*/ +int iddef(iname) +UNCH *iname; /* ID name (with length and EOS). */ +{ + PID p; + struct fwdref *r; + + p = (PID)hin((THASH)itab, iname, hash(iname, IDHASH), IDSZ); + if (p->iddefed) return(-1); + p->iddefed = 1; + TRACEID("IDDEF", p); + /* Delete any forward references. */ + r = p->idrl; + p->idrl = 0; + while (r) { + struct fwdref *tem = r->next; + if (r->msg) + msgsfree(r->msg); + frem((UNIV)r); + r = tem; + } + return(0); +} +/* IDREF: Store a reference to an ID and define the ID if it doesn't yet exist. + Return 0 if already defined, otherwise pointer to a fwdref. +*/ +struct fwdref *idref(iname) +UNCH *iname; /* ID name (with length and EOS). */ +{ + PID p; + int hnum; + struct fwdref *rp; + + if ((p = (PID)hfind((THASH)itab, iname, (hnum = hash(iname, IDHASH))))==0) + p = (PID)hin((THASH)itab, iname, hnum, IDSZ); + if (p->iddefed) + return 0; + rp = (struct fwdref *)rmalloc(FWDREFSZ); + rp->next = p->idrl; + p->idrl = rp; + rp->msg = 0; + TRACEID("IDREF", p); + return rp; +} +/* IDRCK: Check idrefs. +*/ +VOID idrck() +{ + int i; + PID p; + struct fwdref *r; + + for (i = 0; i < IDHASH; i++) + for (p = itab[i]; p; p = p->idnext) + if (!p->iddefed) + for (r = p->idrl; r; r = r->next) + svderr(r->msg); +} +/* NTOA: Converts a positive integer to an ASCII string (abuf) + No leading zeros are generated. +*/ +UNCH *ntoa(i) +int i; +{ + static UNCH buf[1 + 3*sizeof(int) + 1]; + sprintf((char *)buf, "%d", i); + return buf; +} +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgml1.c b/usr.bin/sgmls/sgmls/sgml1.c new file mode 100644 index 0000000..94a6119 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgml1.c @@ -0,0 +1,477 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ + +#define ETDCON (tags[ts].tetd->etdmod->ttype) /* ETD content flags. */ + +/* SGML: Main SGML driver routine. +*/ +enum sgmlevent sgmlnext(rcbdafp, rcbtagp) +struct rcbdata *rcbdafp; +struct rcbtag *rcbtagp; +{ + while (prologsw && !conactsw) { + int oconact; + conact = parsepro(); + conactsw = 0; /* Assume sgmlact() will not be skipped. */ + switch(conact) { + + case PIS_: + case EOD_: + case APP_: /* APPINFO */ + conactsw = 1; /* We can skip sgmlact() in opening state. */ + break; + + case DAF_: + newetd = stagreal = ETDCDATA; + conact = stag(datarc = DAF_); + conactsw = 1; /* We can skip sgmlact() in opening state. */ + prologsw = 0; /* End the prolog. */ + break; + case DCE_: + case MSS_: + /* prcon[2].tu.thetd holds the etd for the document element. */ + newetd = stagreal = prcon[2].tu.thetd; + stagmin = MINSTAG; /* This tag was minimized. */ + /* It's an error if the start tag of the document element + is not minimizable. */ + if (BITOFF(newetd->etdmin, SMO)) + sgmlerr(226, conpcb, (UNCH *)0, (UNCH *)0); + oconact = conact; /* Save conact. */ + conact = stag(0); /* Start the document element. */ + conactsw = 1; /* conact needs processing. */ + prologsw = 0; /* The prolog is finished. */ + if (oconact == MSS_) { + if (msplevel==0) conpcb = getpcb((int)ETDCON); + conpcb = mdms(tbuf, conpcb); /* Parse the marked section + start. */ + } + break; + default: /* STE_: not defined in SGMLACT.H. */ + if (msplevel==0) conpcb = getpcb((int)ETDCON); + prologsw = 0; /* End the prolog. */ + break; + } + } + for (;;) { + unsigned swact; /* Switch action: saved conact, new, or sgmlact.*/ + + if (conactsw) { + conactsw = 0; + swact = conact; + contersw = contersv; + } + else { + conact = parsecon(tbuf, conpcb); + swact = sgmlact((UNCH)(conact != EOD_ ? conact : LOP_)); + } + + switch (swact) { + + case MD_: /* Process markup declaration. */ + parsenm(tbuf, NAMECASE); /* Get declaration name. */ + if (!ustrcmp(tbuf+1, key[KUSEMAP])) mdsrmuse(tbuf); + else sgmlerr(E_MDNAME, conpcb, tbuf+1, (UNCH *)0); + continue; + case MDC_: /* Process markup declaration comment. */ + if (*FPOS!=lex.d.mdc) + parsemd(tbuf, NAMECASE, (struct parse *)0, NAMELEN); + continue; + + case MSS_: /* Process marked section start. */ + conpcb = mdms(tbuf, conpcb); + continue; + case MSE_: /* Process marked section end (drop to LOP_). */ + if (mdmse()) conpcb = getpcb((int)ETDCON); + continue; + + case PIS_: /* Return processing instruction (string). */ + if (entpisw) rcbdafp->data = data; + else { + parselit(tbuf, &pcblitc, PILEN, lex.d.pic); + rcbdafp->data = tbuf; + } + rcbdafp->datalen = datalen; + rcbdafp->contersw = entpisw; + entpisw = 0; /* Reset for next time.*/ + scbset(); /* Update location in current scb. */ + return SGMLPIS; + + case APP_: + rcbdafp->data = tbuf; + rcbdafp->datalen = ustrlen(tbuf); + rcbdafp->contersw = 0; + scbset(); + return SGMLAPP; + case ETG_: /* Return end-tag. */ + charmode = 0; /* Not in char mode unless CDATA or RCDATA.*/ + if (msplevel==0) conpcb = getpcb((int)ETDCON); + rcbtagp->contersw = tags[ts+1].tflags; + rcbtagp->tagmin = etagimsw ? MINETAG : etagmin; + rcbtagp->curgi = tags[ts+1].tetd->etdgi; + rcbtagp->ru.oldgi = tags[ts].tetd->etdgi; + if (etagmin==MINSTAG) rcbtagp->tagreal = + BADPTR(stagreal) ? stagreal : (PETD)stagreal->etdgi; + else rcbtagp->tagreal = + BADPTR(etagreal) ? etagreal : (PETD)etagreal->etdgi; + rcbtagp->etictr = etictr; + rcbtagp->srmnm = tags[ts].tsrm!=SRMNULL ? tags[ts].tsrm[0]->ename + : 0; + scbset(); /* Update location in current scb. */ + return SGMLETG; + + case STG_: /* Return start-tag. */ + charmode = 0; /* Not in char mode unless CDATA or RCDATA.*/ + if (!conrefsw && msplevel==0) conpcb = getpcb((int)ETDCON); + rcbtagp->contersw = tags[ts].tflags; + rcbtagp->tagmin = dostag ? MINSTAG : stagmin; + rcbtagp->curgi = tags[ts].tetd->etdgi; + /* Get attribute list if one was defined for this element. */ + rcbtagp->ru.al = !tags[ts].tetd->adl ? 0 : + rcbtagp->tagmin==MINNONE ? al : tags[ts].tetd->adl; + rcbtagp->tagreal = BADPTR(stagreal)?stagreal:(PETD)stagreal->etdgi; + rcbtagp->etictr = etictr; + rcbtagp->srmnm = tags[ts].tsrm!=SRMNULL ? tags[ts].tsrm[0]->ename + : 0; + scbset(); /* Update location in current scb. */ + return SGMLSTG; + + case DAF_: /* Return data in source entity buffer. */ + charmode = 1; + rcbdafp->datalen = datalen; + rcbdafp->data = data; + rcbdafp->contersw = contersw | entdatsw; + contersw = entdatsw = 0;/* Reset for next time.*/ + scbset(); /* Update location in current scb. */ + return SGMLDAF; + + case CON_: /* Process conact after returning REF_. */ + conactsw = 1; + contersv = contersw; + case REF_: /* Return RE found. */ + if (badresw) { + badresw = 0; + sgmlerr(E_CHARS, &pcbconm, tags[ts].tetd->etdgi+1, (UNCH *)0); + continue; + } + charmode = 1; + rcbdafp->contersw = contersw; + contersw = 0; /* Reset for next time.*/ + scbset(); /* Update location in current scb. */ + return SGMLREF; + + case EOD_: /* End of source document entity. */ + if (mslevel != 0) sgmlerr(139, conpcb, (UNCH *)0, (UNCH *)0); + idrck(); /* Check idrefs. */ + scbset(); /* Update location in current scb. */ + return SGMLEOD; + + default: /* LOP_: Loop again with no action. */ + continue; + } + } +} +/* PCBSGML: State and action table for action codes returned to text processor + by SGML.C. + Columns are based on SGMLACT.H values minus DAF_, except that end + of document has input code LOP_, regardless of its action code. +*/ +/* Symbols for state names (end with a number). */ +#define ST1 0 /* Just had a start tag. */ +#define NR1 2 /* Just had an RS or RE. */ +#define DA1 4 /* Just had some data. */ +#define NR2 6 /* Just had an RE; RE pending. */ +#define ST2 8 /* Had only markup since last RE/RS; RE pending. */ + +static UNCH sgmltab[][11] = { +/*daf_ etg_ md_ mdc_ mss_ mse_ pis_ ref_ stg_ rsr_ eod */ + {DA1 ,DA1 ,ST1 ,ST1 ,ST1 ,ST1 ,ST1 ,NR1 ,ST1 ,NR1 ,ST1 },/*st1*/ + {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_}, + + {DA1 ,DA1 ,ST1 ,ST1 ,ST1 ,ST1 ,ST1 ,NR2 ,ST1 ,NR1 ,ST1 },/*nr1*/ + {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_}, + + {DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,DA1 ,NR2 ,ST1 ,NR1 ,ST1 },/*da1*/ + {DAF_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,STG_,LOP_,EOD_}, + + {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*nr2*/ + {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_}, + + {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR1 ,ST1 ,NR2 ,ST1 },/*st2*/ + {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_}, +}; +int scbsgmst = ST1; /* SCBSGML: trailing stag or markup; ignore RE. */ +int scbsgmnr = NR1; /* SCBSGML: new record; do not ignore RE. */ +/* SGMLACT: Determine action to be taken by SGML.C based on current state and + specified input. + For start or end of a plus exception element, push or pop the + pcbsgml stack. + Return to caller with action code. +*/ +#ifdef USE_PROTOTYPES +int sgmlact(UNCH conret) +#else +int sgmlact(conret) +UNCH conret; /* Action returned to SGML.C by content parse. */ +#endif +{ + int action; + + if (conret==STG_ && GET(tags[ts].tflags, TAGPEX)) + {++pss; scbsgml[pss].snext = ST1;} + scbsgml[pss].sstate = scbsgml[pss].snext; + scbsgml[pss].snext = sgmltab[scbsgml[pss].sstate] + [scbsgml[pss].sinput = conret-DAF_]; + scbsgml[pss].saction = sgmltab[scbsgml[pss].sstate+1][scbsgml[pss].sinput]; + TRACEGML(scbsgml, pss, conactsw, conact); + action = scbsgml[pss].saction; + if (conret==ETG_ && GET(tags[ts+1].tflags, TAGPEX)) { + pss--; + /* An included subelement affects the enclosing state like a + processing instruction (or MDC_ or MD_), + that is to say NR1 is changed to ST1 and NR2 to ST2. */ + scbsgml[pss].sstate = scbsgml[pss].snext; + scbsgml[pss].snext = sgmltab[scbsgml[pss].sstate][PIS_ - DAF_]; + } + return action; +} +/* GETPCB: Choose pcb for new or resumed element. +*/ +struct parse *getpcb(etdcon) +int etdcon; /* Content type of new or resumed element. */ +{ + if (BITON(etdcon, MGI)) { + return(BITON(etdcon, MCHARS) ? &pcbconm : &pcbcone); + } + if (BITON(etdcon, MCDATA) || BITON(etdcon, MRCDATA)) { + charmode = 1; + return(BITON(etdcon, MCDATA) ? &pcbconc : (rcessv = es, &pcbconr)); + } + return(&pcbconm); +} + +struct markup *sgmlset(swp) +struct switches *swp; +{ + /* Initialize variables based on switches structure members. */ + sw = *swp; + rbufs = (UNCH *)rmalloc((UNS)3+sw.swbufsz) + 3; /* DOS file read area. */ + TRACEPRO(); /* Set trace switches for prolog. */ + msginit(swp); + ioinit(swp); + sdinit(); + return &lex.m; +} + +/* Points for each capacity, indexed by *CAP in sgmldecl.h. We'll replace +2 with the real NAMELEN at run time. */ + +static UNCH cappoints[] = { + 1, + 2, + 1, + 2, + 2, + 2, + 2, + 2, + 1, + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 2 +}; + +static long capnumber[NCAPACITY]; +static long maxsubcap[NCAPACITY]; + +VOID sgmlend(p) +struct sgmlcap *p; +{ + int i; + for (; es >= 0; --es) + if (FILESW) + fileclos(); + + capnumber[NOTCAP] = ds.dcncnt; + capnumber[EXGRPCAP] = ds.pmexgcnt; + capnumber[ELEMCAP] = ds.etdcnt+ds.etdercnt; + capnumber[EXNMCAP] = ds.pmexcnt; + capnumber[GRPCAP] = ds.modcnt; + capnumber[ATTCAP] = ds.attcnt; + capnumber[ATTCHCAP] = ds.attdef; + capnumber[AVGRPCAP] = ds.attgcnt; + capnumber[IDCAP] = ds.idcnt; + capnumber[IDREFCAP] = ds.idrcnt; + capnumber[ENTCAP] = ds.ecbcnt; + capnumber[ENTCHCAP] = ds.ecbtext; + capnumber[MAPCAP] = ds.srcnt + ds.srcnt*lex.s.dtb[0].mapdata; + capnumber[NOTCHCAP] = ds.dcntext; + + capnumber[TOTALCAP] = 0; + + for (i = 1; i < NCAPACITY; i++) { + if (cappoints[i] > 1) + cappoints[i] = NAMELEN; + capnumber[i] += maxsubcap[i]/cappoints[i]; + capnumber[TOTALCAP] += (long)capnumber[i] * cappoints[i]; + } + p->number = capnumber; + p->points = cappoints; + p->limit = sd.capacity; + p->name = captab; + + for (i = 0; i < NCAPACITY; i++) { + long excess = capnumber[i]*cappoints[i] - sd.capacity[i]; + if (excess > 0) { + char buf[sizeof(long)*3 + 1]; + sprintf(buf, "%ld", excess); + sgmlerr(162, (struct parse *)0, + (UNCH *)captab[i], (UNCH *)buf); + } + } +} + +VOID sgmlsubcap(v) +long *v; +{ + int i; + for (i = 0; i < NCAPACITY; i++) + if (v[i] > maxsubcap[i]) + maxsubcap[i] = v[i]; +} + +int sgmlsdoc(ptr) +UNIV ptr; +{ + struct entity *e; + union etext etx; + etx.x = ptr; + + e = entdef(indocent, ESF, &etx); + if (!e) + return -1; + return entopen(e); +} + +/* SGMLGENT: Get a data entity. + Returns: + -1 if the entity does not exist + -2 if it is not a data entity + 1 if it is an external entity + 2 if it is an internal cdata entity + 3 if it is an internal sdata entity +*/ +int sgmlgent(iname, np, tp) +UNCH *iname; +PNE *np; +UNCH **tp; +{ + PECB ep; /* Pointer to an entity control block. */ + + ep = entfind(iname); + if (!ep) + return -1; + switch (ep->estore) { + case ESN: + if (np) + *np = ep->etx.n; + return 1; + case ESC: + if (tp) + *tp = ep->etx.c; + return 2; + case ESX: + if (tp) + *tp = ep->etx.c; + return 3; + } + return -2; +} + +/* Mark an entity. */ + +int sgmlment(iname) +UNCH *iname; +{ + PECB ep; + int rc; + + ep = entfind(iname); + if (!ep) + return -1; + rc = ep->mark; + ep->mark = 1; + return rc; +} + +int sgmlgcnterr() +{ + return msgcnterr(); +} + +/* This is for error handling functions that want to print a gi backtrace. */ + +UNCH *getgi(i) +int i; +{ + return i >= 0 && i <= ts ? tags[i].tetd->etdgi + 1 : NULL; +} + +/* Returns the value of prologsw for the use by error handling functions. */ + +int inprolog() +{ + return prologsw; +} + +/* Used by the error handling functions to access scbs. */ + +int getlocation(level, locp) +int level; +struct location *locp; +{ + if (level < 0 || level > es) + return 0; + if (locp) { + int es = level; + /* source macros access a variable called `es' */ + + locp->filesw = FILESW; + locp->rcnt = RCNT; + locp->ccnt = CCNT; + locp->ename = ENTITY + 1; + locp->fcb = SCBFCB; + locp->curchar = CC; + locp->nextchar = NEXTC; + } + return 1; +} + +int sgmlloc(linenop, filenamep) +unsigned long *linenop; +char **filenamep; +{ + int level = es; + int es; + + for (es = level; es >= 0 && !FILESW; es--) + ; + if (es < 0) + return 0; + *linenop = RCNT; + *filenamep = ioflid(SCBFCB); + return 1; +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgml2.c b/usr.bin/sgmls/sgmls/sgml2.c new file mode 100644 index 0000000..e202f84 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgml2.c @@ -0,0 +1,499 @@ +/* Added exiterr() for terminal errors to prevent SGML.MSG errors. */ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ +static int iorc; /* Return code from io* functions */ +/* ENTDEF: Process an entity definition and return the pointer to it. + The entity text must be in permanent storage. + There is no checking to see if the entity already exists; + the caller must have done that. +*/ +#ifdef USE_PROTOTYPES +PECB entdef(UNCH *ename, UNCH estore, union etext *petx) +#else +PECB entdef(ename, estore, petx) +UNCH *ename; /* Entity name (with length and EOS). */ +UNCH estore; /* Entity storage class. */ +union etext *petx; /* Ptr to entity text union. */ +#endif +{ + PECB p; + + p = (PECB)hin((THASH)etab, ename, hash(ename, ENTHASH), ENTSZ); + memcpy((UNIV)&p->etx, (UNIV)petx, ETEXTSZ); + p->estore = estore; + TRACEECB("ENTDEF", p); + return(p); +} +/* ENTFIND: If an entity exists, return ptr to its ecb. + Return NULL if it is not defined. +*/ +PECB entfind(ename) +UNCH *ename; /* Entity name (with length and EOS). */ +{ + PECB p; + + p = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)); + TRACEECB("ENTFIND", p); + return p; +} +/* ENTREF: Process a general or parameter entity reference. + If the entity is defined it returns the return code from ENTOPEN. + It returns ENTUNDEF for undefined parameter entity references + and for general entity references when defaulting is not allowed. + Otherwise, it uses the default entity text. +*/ +int entref(ename) +UNCH *ename; /* Entity name (with length and EOS). */ +{ + PECB ecb; /* Entity control block. */ + + /* Get the entity control block, if the entity has been defined. */ + if ((ecb = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)))==0 + || ecb->estore == 0) { + if ( ename[1]==lex.d.pero + || ecbdeflt==0 + || (ecb = usedef(ename))==0 ) { + sgmlerr(ename[1] == lex.d.pero || ecbdeflt == 0 ? 35 : 150, + (struct parse *)0, ename+1, (UNCH *)0); + return(ENTUNDEF); + } + } + return(entopen(ecb)); +} +/* ENTOPEN: Open a newly referenced entity. + Increment the stack pointer (es) and initialize the new entry. + ENTDATA if entity is CDATA or SDATA, ENTPI if it is PI, + 0 if normal and all o.k.; <0 if not. +*/ +int entopen(ecb) +struct entity *ecb; /* Entity control block. */ +{ + int i; /* Loop counter. */ + + /* See if we have exceeded the entity nesting level. */ + if (es>=ENTLVL) { + sgmlerr(34, (struct parse *)0, ecb->ename+1, ntoa(ENTLVL)); + return(ENTMAX); + } + /* If entity is an etd, pi, or data, return it without creating an scb. */ + switch (ecb->estore) { + case ESN: + if (NEXTYPE(ecb->etx.n)!=ESNSUB) { + if (!NEDCNDEFINED(ecb->etx.n)) + sgmlerr(78, (struct parse *)0, NEDCN(ecb->etx.n)+1, + ecb->ename+1); + } + else { +#if 0 + if (!NEID(ecb->etx.n)) { + sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0); + return ENTFILE; + } +#endif + if (sw.nopen >= sd.subdoc) + sgmlerr(188, (struct parse *)0, + (UNCH *)NULL, (UNCH *)NULL); + } + data = (UNCH *)ecb->etx.n; + entdatsw = NDECONT; + return(ENTDATA); + case ESC: + case ESX: + datalen = ustrlen(ecb->etx.c); + data = ecb->etx.c; + entdatsw = (ecb->estore==ESC) ? CDECONT : SDECONT; + return(ENTDATA); + case ESI: + datalen = ustrlen(ecb->etx.c); + data = ecb->etx.c; + entpisw = 4; + return(ENTPI); + } + /* If the same entity is already open, send msg and ignore it. + Level 0 needn't be tested, as its entity name is always *DOC. + */ + for (i = 0; ++i<=es;) if (scbs[i].ecb.enext==ecb) { + sgmlerr(36, (struct parse *)0, ecb->ename+1, (UNCH *)0); + return(ENTLOOP); + } + /* Update SCB if entity trace is wanted in messages or entity is a file. + (Avoid this at start when es==-1 or memory will be corrupted.) + */ + if (es >= 0 && (sw.swenttr || FILESW)) scbset(); + + /* Stack the new source control block (we know there is room). */ + ++es; /* Increment scbs index. */ + RCNT = CCO = RSCC = 0; /* No records or chars yet. */ + COPIEDSW = 0; + memcpy((UNIV)&ECB, (UNIV)ecb, (UNS)ENTSZ); /* Copy the ecb into the scb. */ + ECBPTR = ecb; /* Save the ecb pointer in scb.ecb.enext. */ + TRACEECB("ENTOPEN", ECBPTR); + + /* For memory entities, the read buffer is the entity text. + The text starts at FBUF, so FPOS should be FBUF-1 + because it is bumped before each character is read. + */ + if (ECB.estore<ESFM) {FPOS = (FBUF = ECB.etx.c)-1; return 0;} + + /* For file entities, suspend any open file and do first read. */ + if (ECB.etx.x == 0) { + --es; + switch (ecb->estore) { + case ESF: + sgmlerr(149, (struct parse *)0, ecb->ename + 1, (UNCH *)0); + break; + case ESP: + sgmlerr(229, (struct parse *)0, ecb->ename + 2, (UNCH *)0); + break; + default: + abort(); + } + return ENTFILE; + } + fileopen(); /* Open new external file. */ + if (iorc<0) { /* If open not successful: */ + FPOS = FBUF-1; /* Clean CCNT for OPEN error msg.*/ + filerr(32, ecb->ename+1); + --es; /* Pop the stack. */ + return(ENTFILE); + } + filepend(es); /* Suspend any open file. */ + fileread(); /* First read of file must be ok.*/ + return 0; +} +/* ENTGET: Get next record of entity (if there is one). + Otherwise, close the file (if entity is a file) and + pop the entity stack. If nothing else is on the stack, + return -1 to advise the caller. +*/ +int entget() +{ + RSCC += (CCO = FPOS-FBUF); + /* Characters-in-record (ignore EOB/EOF). */ + tagctr += CCO; /* Update tag length counter. */ + switch (*FPOS) { + case EOBCHAR: /* End of file buffer: refill it. */ + rbufs[-2] = FPOS[-2]; + rbufs[-1] = FPOS[-1]; + fileread(); /* Read the file. */ + if (iorc > 0) break; + readerr: + filerr(31, ENTITY+1); /* Treat error as EOF. */ + case EOFCHAR: /* End of file: close it. */ + fileclos(); /* Call SGMLIO to close file. */ + conterr: + if (es==0) { /* Report if it is primary file. */ + FPOS = FBUF-1; /* Preserve CCNT for omitted end-tags. */ + return -1; + } + case EOS: /* End of memory entity: pop the stack. */ + TRACEECB("ENTPOP", ECBPTR); + if (COPIEDSW) { + frem((UNIV)(FBUF + 1)); + COPIEDSW = 0; + } + --es; /* Pop the SCB stack. */ + if (FBUF) break; /* Not a PEND file. */ + filecont(); /* Resume previous file. */ + if (iorc<0) { /* If CONT not successful: */ + filerr(94, ENTITY+1); + goto conterr; + } + fileread(); /* Read the file. */ + if (iorc<=0) goto readerr; /* If READ not successful: */ + rbufs[-1] = SCB.pushback; + FPOS += CCO; + CCO = 0; + if (delmscsw && es==0) { /* End of DTD. */ + delmscsw = 0; + *rbufs = lex.d.msc; + } + break; + } + return 0; +} +/* USEDEF: Use the default value for an entity reference. + Returns the ECB for the defaulted entity. +*/ +PECB usedef(ename) +UNCH *ename; /* Entity name (with length and EOS). */ +{ + union etext etx; /* Save return from entgen. */ + PECB ecb; /* Entity control block. */ + PNE pne = 0; /* Ptr to NDATA entity control block. */ + UNCH estore; /* Default entity storage type. */ + + if ((estore = ecbdeflt->estore)<ESFM) /* Default is an internal string. */ + etx.c = ecbdeflt->etx.c; + else { + /* Move entity name into fpi. */ + fpidf.fpinm = ename + 1; + if ((etx.x = entgen(&fpidf))==0) return (PECB)0; + if (estore==ESN) { + memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ); + NEID(pne) = etx.x; + etx.n = pne; + } + } + if (sw.swrefmsg) sgmlerr(45, (struct parse *)0, ename+1, (UNCH *)0); + ++ds.ecbcnt; + ecb = entdef(ename, estore, &etx); + ecb->dflt = 1; + if (pne) NEENAME(pne) = ecb->ename; + return(ecb); +} +/* SCBSET: Set source control block to current location in the current entity. + This routine is called by SGML when it returns to the text + processor and by ERROR when it reports an error. +*/ +VOID scbset() +{ + if (es >= 0 && FBUF) { + CC = *FPOS; + if (*FPOS == DELNONCH) + NEXTC = FPOS[1]; + else + NEXTC = 0; + CCO = FPOS + 1 - FBUF; + } +} +/* FILEOPEN: Call IOOPEN to open an external entity (file). +*/ +VOID fileopen() /* Open an external entity's file. */ +{ + iorc = ioopen(ECB.etx.x, &SCBFCB); +} +/* FILEREAD: Call IOREAD to read an open external entity (file). +*/ +VOID fileread() /* Read the current external entity's file. */ +{ + int newfile; + iorc = ioread(SCBFCB, rbufs, &newfile); + FPOS = (FBUF = rbufs) - 1; /* Actual read buffer. */ + if (newfile) RCNT = 0; +} +/* FILEPEND: Call IOPEND to close an open external entity (file) temporarily. +*/ +VOID filepend(es) /* Close the current external entity's file. */ +int es; /* Local index to scbs. */ +{ + while (--es>=0) { /* Find last external file on stack. */ + int off; + if (!FILESW) continue; /* Not an external file. */ + if (!FBUF) continue; /* Already suspended. */ + off = CCO; + assert(off >= -1); + if (off < 0) off = 0; + else CCO = 0; + FPOS -= CCO; + SCB.pushback = FPOS[-1]; + FBUF = 0; /* Indicate pending file. */ + RSCC += off; /* Update characters-in-record counter. */ + tagctr += off; /* Update tag length counter. */ + iopend(SCBFCB, off, rbufs); + return; + } +} +/* FILECONT: Call IOCONT to reopen an external entity (file). +*/ +VOID filecont() /* Open an external entity's file. */ +{ + iorc = iocont(SCBFCB); +} +/* FILECLOS: Call IOCLOSE to close an open external entity (file). +*/ +VOID fileclos() /* Close the current external entity's file. */ +{ + if (!SCBFCB) + return; + ioclose(SCBFCB); + /* The fcb will have been freed by sgmlio. + Make sure we don't access it again. */ + SCBFCB = NULL; +} +/* ERROR: Interface to text processor SGML I/O services for error handling. +*/ +VOID error(e) +struct error *e; +{ + scbset(); /* Update location in source control block. */ + msgprint(e); +} +/* PTRSRCH: Find a pointer in a list and return its index. + Search key must be on list as there is no limit test. + This routine is internal only -- not for user data. +*/ +UNIV mdnmtab[] = { + (UNIV)key[KATTLIST], + (UNIV)key[KDOCTYPE], + (UNIV)key[KELEMENT], + (UNIV)key[KENTITY], + (UNIV)key[KLINKTYPE], + (UNIV)key[KLINK], + (UNIV)key[KNOTATION], + (UNIV)sgmlkey, + (UNIV)key[KSHORTREF], + (UNIV)key[KUSELINK], + (UNIV)key[KUSEMAP] +}; +UNIV pcbtab[] = { + (UNIV)&pcbconc, + (UNIV)&pcbcone, + (UNIV)&pcbconm, + (UNIV)&pcbconr, + (UNIV)&pcbetag, + (UNIV)&pcbgrcm, + (UNIV)&pcbgrcs, + (UNIV)&pcbgrnm, + (UNIV)&pcbgrnt, + (UNIV)&pcblitc, + (UNIV)&pcblitp, + (UNIV)&pcblitr, + (UNIV)&pcblitt, + (UNIV)&pcblitv, + (UNIV)&pcbmd, + (UNIV)&pcbmdc, + (UNIV)&pcbmdi, + (UNIV)&pcbmds, + (UNIV)&pcbmsc, + (UNIV)&pcbmsi, + (UNIV)&pcbmsrc, + (UNIV)&pcbpro, + (UNIV)&pcbref, + (UNIV)&pcbstag, + (UNIV)&pcbval, + (UNIV)&pcbeal, + (UNIV)&pcbsd, +}; +UNS ptrsrch(ptrtab, ptr) +UNIV ptrtab[]; +UNIV ptr; +{ + UNS i; + + for (i = 0; ; ++i) + if (ptrtab[i] == ptr) + break; + return i; +} +/* MDERR: Process errors for markup declarations. + Prepare the special parameters that only exist for + markup declaration errors. +*/ +VOID mderr(number, parm1, parm2) +UNS number; /* Error number. */ +UNCH *parm1; /* Additional parameters (or NULL). */ +UNCH *parm2; /* Additional parameters (or NULL). */ +{ + struct error err; + errorinit(&err, subdcl ? MDERR : MDERR2, number); + err.parmno = parmno; + err.subdcl = subdcl; + err.eparm[0] = (UNIV)parm1; + err.eparm[1] = (UNIV)parm2; + err.errsp = (sizeof(pcbtab)/sizeof(pcbtab[0])) + ptrsrch(mdnmtab, + (UNIV)mdname); + error(&err); +} +/* SGMLERR: Process errors for SGML parser. +*/ +VOID sgmlerr(number, pcb, parm1, parm2) +UNS number; /* Error number. */ +struct parse *pcb; /* Current parse control block. */ +UNCH *parm1; /* Error message parameters. */ +UNCH *parm2; /* Error message parameters. */ +{ + struct error err; + errorinit(&err, DOCERR, number); + if (!pcb) pcb = prologsw ? propcb : conpcb; + err.errsp = ptrsrch(pcbtab, (UNIV)pcb); + err.eparm[0] = (UNIV)parm1; + err.eparm[1] = (UNIV)parm2; + error(&err); +} +/* SAVERR: Save an error for possible later use. +*/ +UNIV saverr(number, pcb, parm1, parm2) +UNS number; /* Error number. */ +struct parse *pcb; /* Current parse control block. */ +UNCH *parm1; /* Error message parameters. */ +UNCH *parm2; /* Error message parameters. */ +{ + struct error err; + errorinit(&err, DOCERR, number); + if (!pcb) pcb = prologsw ? propcb : conpcb; + err.errsp = ptrsrch(pcbtab, (UNIV)pcb); + err.eparm[0] = (UNIV)parm1; + err.eparm[1] = (UNIV)parm2; + scbset(); + return msgsave(&err); +} +/* SVDERR: Print a saved error. +*/ +VOID svderr(p) +UNIV p; +{ + msgsprint(p); +} +/* EXITERR: Process terminal errors for SGML parser. +*/ +VOID exiterr(number, pcb) +UNS number; /* Error number. */ +struct parse *pcb; /* Current parse control block. */ +{ + struct error err; + errorinit(&err, EXITERR, number); + if (!pcb) pcb = prologsw ? propcb : conpcb; + err.errsp = ptrsrch(pcbtab, (UNIV)pcb); + error(&err); + /* The error handler should have exited. */ + abort(); +} +/* SYNERR: Process syntax errors for SGML parser. +*/ +VOID synerr(number, pcb) +UNS number; /* Error number. */ +struct parse *pcb; /* Current parse control block. */ +{ + struct error err; + errorinit(&err, DOCERR, number); + err.errsp = ptrsrch(pcbtab, (UNIV)pcb); + error(&err); +} +/* FILERR: Process a file access error. +*/ +VOID filerr(number, parm) +UNS number; +UNCH *parm; +{ + struct error err; + errorinit(&err, FILERR, number); + err.eparm[0] = (UNIV)parm; + err.sverrno = errno; + error(&err); +} +/* ERRORINIT: Constructor for struct error. +*/ +VOID errorinit(e, type, number) +struct error *e; +UNS type; +UNS number; +{ + int i; + e->errtype = type; + e->errnum = number; + e->errsp = 0; + for (i = 0; i < MAXARGS; i++) + e->eparm[i] = 0; + e->parmno = 0; + e->subdcl = 0; +} +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgmlaux.h b/usr.bin/sgmls/sgmls/sgmlaux.h new file mode 100644 index 0000000..f87ac8b --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlaux.h @@ -0,0 +1,70 @@ +/* This file controls the interface between the parser core and the auxiliary +functions in entgen.c, sgmlio.c, and sgmlmsg.c */ + +#include "std.h" +#include "entity.h" +#include "sgmldecl.h" + +/* Error types (ERRTYPE) for calls to error-handling services + performed for SGML by the text processor (SGMLIO). + NOTE: Strings in these blocks have no lengths, but cannot exceed + NAMELEN (plus 1 more byte for the zero terminator). +*/ +#define FILERR 0 /* Error: file access. */ +#define DOCERR 1 /* Error: in document markup. */ +#define MDERR 2 /* Error: in markup declaration with subdcl. */ +#define MDERR2 3 /* Error: in markup declaration with no subdcl. */ +#define EXITERR 4 /* Error: terminal error in document markup. */ +/* Quantities affecting error messages and their arguments. +*/ +#define MAXARGS 2 /* Maximum number of arguments in a msg. */ + +/* NOTE: Error handler must return, or next call to SGML must be RSET or END, + except for EXITERR errors which must not return. +*/ +struct error { /* IPB for error messages. */ + UNS errtype; /* Type of error: DOC, MD, MD2, FIL. */ + UNS errnum; /* Error number. */ + UNS errsp; /* Special parameter index in message file. */ + int sverrno; /* Saved value of errno. */ + int parmno; /* MDERROR: declaration parameter number. */ + UNCH *subdcl; /* MDERROR: subject of declaration. */ + UNIV eparm[MAXARGS]; /* Ptrs to arguments (no length, but EOS). */ +}; + +struct location { + int filesw; + unsigned long rcnt; + int ccnt; + UNCH curchar; + UNCH nextchar; + UNCH *ename; + UNIV fcb; +}; + +int ioopen P((UNIV, UNIV*)); +VOID ioclose P((UNIV)); +int ioread P((UNIV, UNCH *, int *)); +VOID iopend P((UNIV, int, UNCH *)); +int iocont P((UNIV)); +VOID ioinit P((struct switches *)); +char *ioflid P((UNIV)); + +UNIV entgen P((struct fpi *)); + +VOID msgprint P((struct error *)); +VOID msginit P((struct switches *)); +UNIV msgsave P((struct error *)); +VOID msgsprint P((UNIV)); +VOID msgsfree P((UNIV)); +int msgcnterr P((void)); + + +int inprolog P((void)); +UNCH *getgi P((int)); + +int getlocation P((int, struct location *)); +UNIV rmalloc P((unsigned int)); +UNIV rrealloc P((UNIV, UNS)); +VOID frem P((UNIV)); +VOID exiterr P((unsigned int,struct parse *)); diff --git a/usr.bin/sgmls/sgmls/sgmldecl.c b/usr.bin/sgmls/sgmls/sgmldecl.c new file mode 100644 index 0000000..aab66e9 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmldecl.c @@ -0,0 +1,1741 @@ +/* sgmldecl.c - + SGML declaration parsing. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "sgmlincl.h" + +/* Symbolic names for the error numbers that are be generated only by +this module. */ + +#define E_STANDARD 163 +#define E_SIGNIFICANT 164 +#define E_BADLIT 165 +#define E_SCOPE 166 +#define E_XNUM 167 +#define E_BADVERSION 168 +#define E_NMUNSUP 169 +#define E_XNMLIT 170 +#define E_CHARDESC 171 +#define E_CHARDUP 172 +#define E_CHARRANGE 173 +#define E_7BIT 174 +#define E_CHARMISSING 175 +#define E_SHUNNED 176 +#define E_NONSGML 177 +#define E_CAPSET 178 +#define E_CAPMISSING 179 +#define E_SYNTAX 180 +#define E_CHARNUM 181 +#define E_SWITCHES 182 +#define E_INSTANCE 183 +#define E_ZEROFEATURE 184 +#define E_YESNO 185 +#define E_CAPACITY 186 +#define E_NOTSUPPORTED 187 +#define E_FORMAL 189 +#define E_BADCLASS 190 +#define E_MUSTBENON 191 +#define E_BADBASECHAR 199 +#define E_SYNREFUNUSED 200 +#define E_SYNREFUNDESC 201 +#define E_SYNREFUNKNOWN 202 +#define E_SYNREFUNKNOWNSET 203 +#define E_FUNDUP 204 +#define E_BADFUN 205 +#define E_FUNCHAR 206 +#define E_GENDELIM 207 +#define E_SRDELIM 208 +#define E_BADKEY 209 +#define E_BADQUANTITY 210 +#define E_BADNAME 211 +#define E_REFNAME 212 +#define E_DUPNAME 213 +#define E_QUANTITY 214 +#define E_QTOOBIG 215 +#define E_NMSTRTCNT 219 +#define E_NMCHARCNT 220 +#define E_NMDUP 221 +#define E_NMBAD 222 +#define E_NMMINUS 223 +#define E_UNKNOWNSET 227 + +#define CANON_NMC '.' /* Canonical name character. */ +#define CANON_NMS 'A' /* Canonical name start character. */ +#define CANON_MIN ':' /* Canonical minimum data character. */ + +#define SUCCESS 1 +#define FAIL 0 +#define SIZEOF(v) (sizeof(v)/sizeof(v[0])) +#define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0) + +static UNCH standard[] = "ISO 8879:1986"; + +#define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN" +#define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN" + +static UNCH (*newkey)[REFNAMELEN+1] = 0; + +struct pmap { + char *name; + UNIV value; +}; + +/* The reference capacity set. */ +#define REFCAPSET \ +{ 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \ +35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L } + +long refcapset[NCAPACITY] = REFCAPSET; + +/* A pmap of known capacity sets. */ + +static struct pmap capset_map[] = { + { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset }, + { 0 }, +}; + +/* Table of capacity names. Must match *CAP in sgmldecl.h. */ + +char *captab[] = { + "TOTALCAP", + "ENTCAP", + "ENTCHCAP", + "ELEMCAP", + "GRPCAP", + "EXGRPCAP", + "EXNMCAP", + "ATTCAP", + "ATTCHCAP", + "AVGRPCAP", + "NOTCAP", + "NOTCHCAP", + "IDCAP", + "IDREFCAP", + "MAPCAP", + "LKSETCAP", + "LKNMCAP", +}; + +/* The default SGML declaration. */ +#define MAXNUMBER 99999999L + +/* Reference quantity set */ + +#define REFATTCNT 40 +#define REFATTSPLEN 960 +#define REFBSEQLEN 960 +#define REFDTAGLEN 16 +#define REFDTEMPLEN 16 +#define REFENTLVL 16 +#define REFGRPCNT 32 +#define REFGRPGTCNT 96 +#define REFGRPLVL 16 +#define REFNORMSEP 2 +#define REFPILEN 240 +#define REFTAGLEN 960 +#define REFTAGLVL 24 + +#define ALLOC_MAX 65534 + +#define BIGINT 30000 + +#define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2) +#define MAXATTSPLEN BIGINT +#define MAXBSEQLEN BIGINT +#define MAXDTAGLEN 16 +#define MAXDTEMPLEN 16 +#define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1) +#define MAXGRPCNT MAXGRPGTCNT +/* Must be between 96 and 253 */ +#define MAXGRPGTCNT 253 +#define MAXGRPLVL MAXGRPGTCNT +#define MAXLITLEN BIGINT +/* This guarantees that NAMELEN < LITLEN (ie there's always space for a name +in a buffer intended for a literal.) */ +#define MAXNAMELEN (REFLITLEN - 1) +#define MAXNORMSEP 2 +#define MAXPILEN BIGINT +#define MAXTAGLEN BIGINT +#define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1) + +/* Table of quantity names. Must match Q* in sgmldecl.h. */ + +static char *quantity_names[] = { + "ATTCNT", + "ATTSPLEN", + "BSEQLEN", + "DTAGLEN", + "DTEMPLEN", + "ENTLVL", + "GRPCNT", + "GRPGTCNT", + "GRPLVL", + "LITLEN", + "NAMELEN", + "NORMSEP", + "PILEN", + "TAGLEN", + "TAGLVL", +}; + +static int max_quantity[] = { + MAXATTCNT, + MAXATTSPLEN, + MAXBSEQLEN, + MAXDTAGLEN, + MAXDTEMPLEN, + MAXENTLVL, + MAXGRPCNT, + MAXGRPGTCNT, + MAXGRPLVL, + MAXLITLEN, + MAXNAMELEN, + MAXNORMSEP, + MAXPILEN, + MAXTAGLEN, + MAXTAGLVL, +}; + +static char *quantity_changed; + +/* Non-zero means the APPINFO parameter was not NONE. */ +static int appinfosw = 0; + +struct sgmldecl sd = { + REFCAPSET, /* capacity */ +#ifdef SUPPORT_SUBDOC + MAXNUMBER, /* subdoc */ +#else /* not SUPPORT_SUBDOC */ + 0, /* subdoc */ +#endif /* not SUPPORT_SUBDOC */ + 1, /* formal */ + 1, /* omittag */ + 1, /* shorttag */ + 1, /* shortref */ + { 1, 0 }, /* general/entity name case translation */ + { /* reference quantity set */ + REFATTCNT, + REFATTSPLEN, + REFBSEQLEN, + REFDTAGLEN, + REFDTEMPLEN, + REFENTLVL, + REFGRPCNT, + REFGRPGTCNT, + REFGRPLVL, + REFLITLEN, + REFNAMELEN, + REFNORMSEP, + REFPILEN, + REFTAGLEN, + REFTAGLVL, + }, +}; + +static int systemcharset[] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; + +static struct pmap charset_map[] = { + { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */ + { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */ + { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset }, + /* system character set */ + { 0 } +}; + +static int synrefcharset[256]; /* the syntax reference character set */ + +#define CHAR_NONSGML 01 +#define CHAR_SIGNIFICANT 02 +#define CHAR_MAGIC 04 +#define CHAR_SHUNNED 010 + +static UNCH char_flags[256]; +static int done_nonsgml = 0; +static UNCH *nlextoke = 0; /* new lextoke */ +static UNCH *nlextran = 0; /* new lextran */ + + +static UNCH kcharset[] = "CHARSET"; +static UNCH kbaseset[] = "BASESET"; +static UNCH kdescset[] = "DESCSET"; +static UNCH kunused[] = "UNUSED"; +static UNCH kcapacity[] = "CAPACITY"; +static UNCH kpublic[] = "PUBLIC"; +static UNCH ksgmlref[] = "SGMLREF"; +static UNCH kscope[] = "SCOPE"; +static UNCH kdocument[] = "DOCUMENT"; +static UNCH kinstance[] = "INSTANCE"; +static UNCH ksyntax[] = "SYNTAX"; +static UNCH kswitches[] = "SWITCHES"; +static UNCH kfeatures[] = "FEATURES"; +static UNCH kminimize[] = "MINIMIZE"; +static UNCH kdatatag[] = "DATATAG"; +static UNCH komittag[] = "OMITTAG"; +static UNCH krank[] = "RANK"; +static UNCH kshorttag[] = "SHORTTAG"; +static UNCH klink[] = "LINK"; +static UNCH ksimple[] = "SIMPLE"; +static UNCH kimplicit[] = "IMPLICIT"; +static UNCH kexplicit[] = "EXPLICIT"; +static UNCH kother[] = "OTHER"; +static UNCH kconcur[] = "CONCUR"; +static UNCH ksubdoc[] = "SUBDOC"; +static UNCH kformal[] = "FORMAL"; +static UNCH kyes[] = "YES"; +static UNCH kno[] = "NO"; +static UNCH kappinfo[] = "APPINFO"; +static UNCH knone[] = "NONE"; +static UNCH kshunchar[] = "SHUNCHAR"; +static UNCH kcontrols[] = "CONTROLS"; +static UNCH kfunction[] = "FUNCTION"; +static UNCH krs[] = "RS"; +static UNCH kre[] = "RE"; +static UNCH kspace[] = "SPACE"; +static UNCH knaming[] = "NAMING"; +static UNCH klcnmstrt[] = "LCNMSTRT"; +static UNCH kucnmstrt[] = "UCNMSTRT"; +static UNCH klcnmchar[] = "LCNMCHAR"; +static UNCH kucnmchar[] = "UCNMCHAR"; +static UNCH knamecase[] = "NAMECASE"; +static UNCH kdelim[] = "DELIM"; +static UNCH kgeneral[] = "GENERAL"; +static UNCH kentity[] = "ENTITY"; +static UNCH kshortref[] = "SHORTREF"; +static UNCH knames[] = "NAMES"; +static UNCH kquantity[] = "QUANTITY"; + +#define sderr mderr + +static UNIV pmaplookup P((struct pmap *, char *)); +static UNCH *ltous P((long)); +static VOID sdfixstandard P((UNCH *)); +static int sdparm P((UNCH *, struct parse *)); +static int sdname P((UNCH *, UNCH *)); +static int sdckname P((UNCH *, UNCH *)); +static int sdversion P((UNCH *)); +static int sdcharset P((UNCH *)); +static int sdcsdesc P((UNCH *, int *)); +static int sdpubcapacity P((UNCH *)); +static int sdcapacity P((UNCH *)); +static int sdscope P((UNCH *)); +static VOID setlexical P((void)); +static VOID noemptytag P((void)); +static int sdpubsyntax P((UNCH *)); +static int sdsyntax P((UNCH *)); +static int sdxsyntax P((UNCH *)); +static int sdtranscharnum P((UNCH *)); +static int sdtranschar P((int)); +static int sdshunchar P((UNCH *)); +static int sdsynref P((UNCH *)); +static int sdfunction P((UNCH *)); +static int sdnaming P((UNCH *)); +static int sddelim P((UNCH *)); +static int sdnames P((UNCH *)); +static int sdquantity P((UNCH *)); +static int sdfeatures P((UNCH *)); +static int sdappinfo P((UNCH *)); + +static VOID bufsalloc P((void)); +static VOID bufsrealloc P((void)); + +/* Parse the SGML declaration. Return non-zero if there was some appinfo. */ + +int sgmldecl() +{ + int i; + int errsw = 0; + UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */ + static int (*section[]) P((UNCH *)) = { + sdversion, + sdcharset, + sdcapacity, + sdscope, + sdsyntax, + sdfeatures, + sdappinfo, + }; + /* These are needed if we use mderr. */ + parmno = 0; + mdname = sgmlkey; + subdcl = NULL; + for (i = 0; i < SIZEOF(section); i++) + if ((*section[i])(tbuf) == FAIL) { + errsw = 1; + break; + } + if (!errsw) + setlexical(); + bufsrealloc(); + /* Parse the >. Don't overwrite the appinfo. */ + if (!errsw) + sdparm(endbuf, 0); + /* We must exit if we hit end of document. */ + if (pcbsd.action == EOD_) + exiterr(161, &pcbsd); + if (!errsw && pcbsd.action != ESGD) + sderr(126, (UNCH *)0, (UNCH *)0); + return appinfosw; +} + +/* Parse the literal (which should contain the version of the +standard) at the beginning of a SGML declaration. */ + +static int sdversion(tbuf) +UNCH *tbuf; +{ + if (sdparm(tbuf, &pcblitv) != LIT1) { + sderr(123, (UNCH *)0, (UNCH *)0); + return FAIL; + } + sdfixstandard(tbuf); + if (ustrcmp(tbuf, standard) != 0) + sderr(E_BADVERSION, tbuf, standard); + return SUCCESS; +} + +/* Parse the CHARSET section. Use one token lookahead. */ + +static int sdcharset(tbuf) +UNCH *tbuf; +{ + int i; + int status[256]; + + if (sdname(tbuf, kcharset) == FAIL) return FAIL; + (void)sdparm(tbuf, 0); + + if (sdcsdesc(tbuf, status) == FAIL) + return FAIL; + + for (i = 128; i < 256; i++) + if (status[i] != UNDESC) + break; + if (i >= 256) { + /* Only a 7-bit character set was described. Fill it out to 8-bits. */ + for (i = 128; i < 256; i++) + status[i] = UNUSED; +#if 0 + sderr(E_7BIT, (UNCH *)0, (UNCH *)0); +#endif + } + /* Characters that are declared UNUSED in the document character set + are assigned to non-SGML. */ + for (i = 0; i < 256; i++) { + if (status[i] == UNDESC) { + sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0); + char_flags[i] |= CHAR_NONSGML; + } + else if (status[i] == UNUSED) + char_flags[i] |= CHAR_NONSGML; + } + done_nonsgml = 1; + return SUCCESS; +} + +/* Parse a character set description. Uses one character lookahead. */ + +static int sdcsdesc(tbuf, status) +UNCH *tbuf; +int *status; +{ + int i; + int nsets = 0; + struct fpi fpi; + + for (i = 0; i < 256; i++) + status[i] = UNDESC; + + for (;;) { + int nchars; + int *baseset = 0; + + if (pcbsd.action != NAS1) { + if (nsets == 0) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + break; + } + if (!matches(tbuf, kbaseset)) { + if (nsets == 0) { + sderr(118, tbuf+1, kbaseset); + return FAIL; + } + break; + } + nsets++; + MEMZERO((UNIV)&fpi, FPISZ); + if (sdparm(tbuf, &pcblitv) != LIT1) { + sderr(123, (UNCH *)0, (UNCH *)0); + return FAIL; + } + fpi.fpipubis = tbuf; + /* Give a warning if it is not a CHARSET fpi. */ + if (parsefpi(&fpi)) + sderr(E_FORMAL, (UNCH *)0, (UNCH *)0); + else if (fpi.fpic != FPICHARS) + sderr(E_BADCLASS, kcharset, (UNCH *)0); + else { + fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0'; + baseset = (int *)pmaplookup(charset_map, + (char *)fpi.fpipubis + fpi.fpil); + if (!baseset) + sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0); + } + if (sdname(tbuf, kdescset) == FAIL) return FAIL; + nchars = 0; + for (;;) { + long start, count; + long basenum; + if (sdparm(tbuf, 0) != NUM1) + break; + start = atol((char *)tbuf); + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + count = atol((char *)tbuf); + switch (sdparm(tbuf, &pcblitv)) { + case NUM1: + basenum = atol((char *)tbuf); + break; + case LIT1: + basenum = UNKNOWN; + break; + case NAS1: + if (matches(tbuf, kunused)) { + basenum = UNUSED; + break; + } + /* fall through */ + default: + sderr(E_CHARDESC, ltous(start), (UNCH *)0); + return FAIL; + } + if (start + count > 256) + sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0); + else { + int i; + int lim = (int)start + count; + for (i = (int)start; i < lim; i++) { + if (status[i] != UNDESC) + sderr(E_CHARDUP, ltous((long)i), (UNCH *)0); + else if (basenum == UNUSED || basenum == UNKNOWN) + status[i] = (int)basenum; + else if (baseset == 0) + status[i] = UNKNOWN_SET; + else { + int n = basenum + (i - start); + if (n < 0 || n > 255) + sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0); + else if (baseset[n] == UNUSED) + sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0); + else + status[i] = baseset[n]; + } + } + } + nchars++; + } + if (nchars == 0) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + } + return SUCCESS; +} + +/* Parse the CAPACITY section. Uses one token lookahead. */ + +static int sdcapacity(tbuf) +UNCH *tbuf; +{ + int ncap; + + if (sdckname(tbuf, kcapacity) == FAIL) + return FAIL; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kpublic)) + return sdpubcapacity(tbuf); + if (!matches(tbuf, ksgmlref)) { + sderr(E_CAPACITY, tbuf+1, (UNCH *)0); + return FAIL; + } + memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity)); + ncap = 0; + for (;;) { + int capno = -1; + int i; + + if (sdparm(tbuf, 0) != NAS1) + break; + for (i = 0; i < SIZEOF(captab); i++) + if (matches(tbuf, captab[i])) { + capno = i; + break; + } + if (capno < 0) + break; + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + sd.capacity[capno] = atol((char *)tbuf); + ncap++; + } + if (ncap == 0) { + sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0); + return FAIL; + } + + return SUCCESS; +} + +/* Parse a CAPACITY section that started with PUBLIC. Must do one +token lookahead, since sdcapacity() also does. */ + +static int sdpubcapacity(tbuf) +UNCH *tbuf; +{ + UNIV ptr; + if (sdparm(tbuf, &pcblitv) != LIT1) { + sderr(123, (UNCH *)0, (UNCH *)0); + return FAIL; + } + sdfixstandard(tbuf); + ptr = pmaplookup(capset_map, (char *)tbuf); + if (!ptr) + sderr(E_CAPSET, tbuf, (UNCH *)0); + else + memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity)); + (void)sdparm(tbuf, 0); + return SUCCESS; +} + +/* Parse the SCOPE section. Uses no lookahead. */ + +static int sdscope(tbuf) +UNCH *tbuf; +{ + if (sdckname(tbuf, kscope) == FAIL) + return FAIL; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kdocument)) + ; + else if (matches(tbuf, kinstance)) + sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0); + else { + sderr(E_SCOPE, tbuf+1, (UNCH *)0); + return FAIL; + } + return SUCCESS; +} + +/* Parse the SYNTAX section. Uses one token lookahead. */ + +static int sdsyntax(tbuf) +UNCH *tbuf; +{ + if (sdname(tbuf, ksyntax) == FAIL) return FAIL; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kpublic)) + return sdpubsyntax(tbuf); + return sdxsyntax(tbuf); +} + +/* Parse the SYNTAX section which starts with PUBLIC. Uses one token +lookahead. */ + +static int sdpubsyntax(tbuf) +UNCH *tbuf; +{ + int nswitches; + if (sdparm(tbuf, &pcblitv) != LIT1) + return FAIL; + sdfixstandard(tbuf); + if (ustrcmp(tbuf, CORE_SYNTAX) == 0) + sd.shortref = 0; + else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0) + sd.shortref = 1; + else + sderr(E_SYNTAX, tbuf, (UNCH *)0); + if (sdparm(tbuf, 0) != NAS1) + return SUCCESS; + if (!matches(tbuf, kswitches)) + return SUCCESS; + nswitches = 0; + for (;;) { + int errsw = 0; + + if (sdparm(tbuf, 0) != NUM1) + break; + if (atol((char *)tbuf) > 255) { + sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); + errsw = 1; + } + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (!errsw) { + if (atol((char *)tbuf) > 255) + sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); + } + nswitches++; + } + if (nswitches == 0) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0); + return SUCCESS; +} + +/* Parse an explicit concrete syntax. Uses one token lookahead. */ + +static +int sdxsyntax(tbuf) +UNCH *tbuf; +{ + static int (*section[]) P((UNCH *)) = { + sdshunchar, + sdsynref, + sdfunction, + sdnaming, + sddelim, + sdnames, + sdquantity, + }; + int i; + + for (i = 0; i < SIZEOF(section); i++) + if ((*section[i])(tbuf) == FAIL) + return FAIL; + return SUCCESS; +} + +/* Parse the SHUNCHAR section. Uses one token lookahead. */ + +static +int sdshunchar(tbuf) +UNCH *tbuf; +{ + int i; + for (i = 0; i < 256; i++) + char_flags[i] &= ~CHAR_SHUNNED; + + if (sdckname(tbuf, kshunchar) == FAIL) + return FAIL; + + if (sdparm(tbuf, 0) == NAS1) { + if (matches(tbuf, knone)) { + (void)sdparm(tbuf, 0); + return SUCCESS; + } + if (matches(tbuf, kcontrols)) { + for (i = 0; i < 256; i++) + if (ISASCII(i) && iscntrl(i)) + char_flags[i] |= CHAR_SHUNNED; + if (sdparm(tbuf, 0) != NUM1) + return SUCCESS; + } + } + if (pcbsd.action != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + do { + long n = atol((char *)tbuf); + if (n > 255) + sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); + else + char_flags[(int)n] |= CHAR_SHUNNED; + } while (sdparm(tbuf, 0) == NUM1); + return SUCCESS; +} + +/* Parse the syntax reference character set. Uses one token lookahead. */ + +static +int sdsynref(tbuf) +UNCH *tbuf; +{ + return sdcsdesc(tbuf, synrefcharset); +} + +/* Translate a character number from the syntax reference character set +to the system character set. If it can't be done, give an error message +and return -1. */ + +static +int sdtranscharnum(tbuf) +UNCH *tbuf; +{ + long n = atol((char *)tbuf); + if (n > 255) { + sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0); + return -1; + } + return sdtranschar((int)n); +} + + +static +int sdtranschar(n) +int n; +{ + int ch = synrefcharset[n]; + if (ch >= 0) + return ch; + switch (ch) { + case UNUSED: + sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0); + break; + case UNDESC: + sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0); + break; + case UNKNOWN: + sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0); + break; + case UNKNOWN_SET: + sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0); + break; + default: + abort(); + } + return -1; +} + + +/* Parse the function section. Uses two tokens lookahead. "NAMING" +could be a function name. */ + +static +int sdfunction(tbuf) +UNCH *tbuf; +{ + static UNCH *fun[] = { kre, krs, kspace }; + static int funval[] = { RECHAR, RSCHAR, ' ' }; + int i; + int had_tab = 0; + int changed = 0; /* attempted to change reference syntax */ + + if (sdckname(tbuf, kfunction) == FAIL) + return FAIL; + for (i = 0; i < SIZEOF(fun); i++) { + int ch; + if (sdname(tbuf, fun[i]) == FAIL) + return FAIL; + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + ch = sdtranscharnum(tbuf); + if (ch >= 0 && ch != funval[i]) + changed = 1; + } + for (;;) { + int tabsw = 0; + int namingsw = 0; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, (UNCH *)"TAB")) { + tabsw = 1; + if (had_tab) + sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0); + } + else { + for (i = 0; i < SIZEOF(fun); i++) + if (matches(tbuf, fun[i])) + sderr(E_BADFUN, fun[i], (UNCH *)0); + if (matches(tbuf, knaming)) + namingsw = 1; + else + changed = 1; + } + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (namingsw) { + if (matches(tbuf, klcnmstrt)) + break; + changed = 1; + } + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (tabsw && !had_tab) { + int ch = sdtranscharnum(tbuf); + if (ch >= 0 && ch != TABCHAR) + changed = 1; + had_tab = 1; + } + + } + if (!had_tab) + changed = 1; + if (changed) + sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0); + return SUCCESS; +} + +/* Parse the NAMING section. Uses no lookahead. */ + +static +int sdnaming(tbuf) +UNCH *tbuf; +{ + int i; + int bad = 0; + static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar }; + static UNCH *types[] = { kgeneral, kentity }; + +#define NCLASSES SIZEOF(classes) + + int bufsize = 4; /* allocated size of buf */ + UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters + in naming classes */ + int bufi = 0; /* next index into buf */ + int start[NCLASSES]; /* index of first character for each class */ + int count[NCLASSES]; /* number of characters for each class */ + + for (i = 0; i < NCLASSES; i++) { + UNCH *s; + + if (sdckname(tbuf, classes[i]) == FAIL) { + frem((UNIV)buf); + return FAIL; + } + if (sdparm(tbuf, &pcblitp) != LIT1) { + sderr(123, (UNCH *)0, (UNCH *)0); + frem((UNIV)buf); + return FAIL; + } + start[i] = bufi; + + for (s = tbuf; *s; s++) { + int c = *s; + if (c == DELNONCH) { + c = UNSHIFTNON(*s); + s++; + } + c = sdtranschar(c); + if (c < 0) + bad = 1; + else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC)) + && c != '.' && c != '-') { + int class = lextoke[c]; + if (class == SEP || class == SP || class == NMC + || class == NMS || class == NU) + sderr(E_NMBAD, ltous((long)c), (UNCH *)0); + else + sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0); + bad = 1; + } + if (bufi >= bufsize) + buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2); + buf[bufi++] = c; + } + + count[i] = bufi - start[i]; + (void)sdparm(tbuf, 0); + } + if (!bad && count[0] != count[1]) { + sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0); + bad = 1; + } + if (!bad && count[2] != count[3]) { + sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0); + bad = 1; + } + if (!bad) { + nlextoke = (UNCH *)rmalloc(256); + memcpy((UNIV)nlextoke, lextoke, 256); + nlextoke['.'] = nlextoke['-'] = INV; + + nlextran = (UNCH *)rmalloc(256); + memcpy((UNIV)nlextran, lextran, 256); + + for (i = 0; i < count[0]; i++) { + UNCH lc = buf[start[0] + i]; + UNCH uc = buf[start[1] + i]; + nlextoke[lc] = NMS; + nlextoke[uc] = NMS; + nlextran[lc] = uc; + } + + for (i = 0; i < count[2]; i++) { + UNCH lc = buf[start[2] + i]; + UNCH uc = buf[start[3] + i]; + if (nlextoke[lc] == NMS) { + sderr(E_NMDUP, ltous((long)lc), (UNCH *)0); + bad = 1; + } + else if (nlextoke[uc] == NMS) { + sderr(E_NMDUP, ltous((long)uc), (UNCH *)0); + bad = 1; + } + else { + nlextoke[lc] = NMC; + nlextoke[uc] = NMC; + nlextran[lc] = uc; + } + } + if (nlextoke['-'] != NMC) { + sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0); + bad = 1; + } + if (bad) { + if (nlextoke) { + frem((UNIV)nlextoke); + nlextoke = 0; + } + if (nlextran) { + frem((UNIV)nlextran); + nlextran = 0; + } + } + } + + frem((UNIV)buf); + + if (sdckname(tbuf, knamecase) == FAIL) + return FAIL; + for (i = 0; i < SIZEOF(types); ++i) { + if (sdname(tbuf, types[i]) == FAIL) + return FAIL; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kyes)) + sd.namecase[i] = 1; + else if (matches(tbuf, kno)) + sd.namecase[i] = 0; + else { + sderr(E_YESNO, tbuf+1, (UNCH *)0); + return FAIL; + } + } + return SUCCESS; +} + +/* Parse the DELIM section. Uses one token lookahead. */ + +static +int sddelim(tbuf) +UNCH *tbuf; +{ + int changed = 0; + if (sdname(tbuf, kdelim) == FAIL + || sdname(tbuf, kgeneral) == FAIL + || sdname(tbuf, ksgmlref) == FAIL) + return FAIL; + for (;;) { + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kshortref)) + break; + if (sdparm(tbuf, &pcblitp) != LIT1) { + sderr(123, (UNCH *)0, (UNCH *)0); + return FAIL; + } + changed = 1; + } + if (changed) { + sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0); + changed = 0; + } + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, ksgmlref)) + sd.shortref = 1; + else if (matches(tbuf, knone)) + sd.shortref = 0; + else { + sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */ + return FAIL; + } + while (sdparm(tbuf, &pcblitp) == LIT1) + changed = 1; + if (changed) + sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0); + return SUCCESS; +} + +/* Parse the NAMES section. Uses one token lookahead. */ + +static +int sdnames(tbuf) +UNCH *tbuf; +{ + int i; + if (sdckname(tbuf, knames) == FAIL) + return FAIL; + if (sdname(tbuf, ksgmlref) == FAIL) + return FAIL; + + while (sdparm(tbuf, 0) == NAS1) { + int j; + if (matches(tbuf, kquantity)) + break; + for (i = 0; i < NKEYS; i++) + if (matches(tbuf, key[i])) + break; + if (i >= NKEYS) { + sderr(E_BADKEY, tbuf+1, (UNCH *)0); + return FAIL; + } + if (sdparm(tbuf, &pcblitp) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (!newkey) { + newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS); + MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS); + } + for (j = 0; j < NKEYS; j++) { + if (matches(tbuf, key[j])) { + sderr(E_REFNAME, tbuf + 1, (UNCH *)0); + break; + } + if (matches(tbuf, newkey[j])) { + sderr(E_DUPNAME, tbuf + 1, (UNCH *)0); + break; + } + } + if (j >= NKEYS) + ustrcpy(newkey[i], tbuf + 1); + } + /* Now install the new keys. */ + if (newkey) { + for (i = 0; i < NKEYS; i++) + if (newkey[i][0] != '\0') { + UNCH temp[REFNAMELEN + 1]; + + ustrcpy(temp, key[i]); + ustrcpy(key[i], newkey[i]); + ustrcpy(newkey[i], temp); + } + } + return SUCCESS; +} + +/* Parse the QUANTITY section. Uses one token lookahead. */ + +static int sdquantity(tbuf) +UNCH *tbuf; +{ + int quantity[NQUANTITY]; + int i; + + for (i = 0; i < NQUANTITY; i++) + quantity[i] = -1; + if (sdckname(tbuf, kquantity) == FAIL) + return FAIL; + if (sdname(tbuf, ksgmlref) == FAIL) + return FAIL; + while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) { + long n; + for (i = 0; i < SIZEOF(quantity_names); i++) + if (matches(tbuf, quantity_names[i])) + break; + if (i >= SIZEOF(quantity_names)) { + sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0); + return FAIL; + } + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + n = atol((char *)tbuf); + if (n < sd.quantity[i]) + sderr(E_QUANTITY, (UNCH *)quantity_names[i], + ltous((long)sd.quantity[i])); + else if (n > max_quantity[i]) { + sderr(E_QTOOBIG, (UNCH *)quantity_names[i], + ltous((long)max_quantity[i])); + quantity[i] = max_quantity[i]; + } + else + quantity[i] = (int)n; + } + for (i = 0; i < NQUANTITY; i++) + if (quantity[i] > 0) { + sd.quantity[i] = quantity[i]; + if (!quantity_changed) + quantity_changed = (char *)rmalloc(NQUANTITY); + quantity_changed[i] = 1; + } + return SUCCESS; +} + +/* Parse the FEATURES section. Uses no lookahead. */ + +static int sdfeatures(tbuf) +UNCH *tbuf; +{ + static struct { + UNCH *name; + UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */ + UNIV valp; /* UNCH * if boolean, long * if numeric. */ + } features[] = { + { kminimize, 0, 0 }, + { kdatatag, 1, 0 }, + { komittag, 1, (UNIV)&sd.omittag }, + { krank, 1, 0 }, + { kshorttag, 1, (UNIV)&sd.shorttag }, + { klink, 0, 0 }, + { ksimple, 2, 0 }, + { kimplicit, 1, 0 }, + { kexplicit, 2, 0 }, + { kother, 0, 0 }, + { kconcur, 2, 0 }, + { ksubdoc, 2, (UNIV)&sd.subdoc }, + { kformal, 1, (UNIV)&sd.formal }, + }; + + int i; + + if (sdckname(tbuf, kfeatures) == FAIL) + return FAIL; + for (i = 0; i < SIZEOF(features); i++) { + if (sdname(tbuf, features[i].name) == FAIL) return FAIL; + if (features[i].argtype > 0) { + long n; + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (matches(tbuf, kyes)) { + if (features[i].argtype > 1) { + if (sdparm(tbuf, 0) != NUM1) { + sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + return FAIL; + } + n = atol((char *)tbuf); + if (n == 0) + sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0); + } + else + n = 1; + } + else if (matches(tbuf, kno)) + n = 0; + else { + sderr(E_YESNO, tbuf+1, (UNCH *)0); + return FAIL; + } + if (features[i].valp == 0) { + if (n > 0) + sderr(E_NOTSUPPORTED, features[i].name, + (UNCH *)0); + } + else if (features[i].argtype > 1) + *(long *)features[i].valp = n; + else + *(UNCH *)features[i].valp = (UNCH)n; + } + } + if (!sd.shorttag) + noemptytag(); + return SUCCESS; +} + +/* Parse the APPINFO section. Uses no lookahead. */ + +static int sdappinfo(tbuf) +UNCH *tbuf; +{ + if (sdname(tbuf, kappinfo) == FAIL) return FAIL; + switch (sdparm(tbuf, &pcblitv)) { + case LIT1: + appinfosw = 1; + break; + case NAS1: + if (matches(tbuf, knone)) + break; + sderr(118, tbuf+1, knone); + return FAIL; + default: + sderr(E_XNMLIT, knone, (UNCH *)0); + return FAIL; + } + return SUCCESS; +} + +/* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to +the standard requires the latter. */ + +static VOID sdfixstandard(tbuf) +UNCH *tbuf; +{ + if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) { + sderr(E_STANDARD, (UNCH *)0, (UNCH *)0); + tbuf[8] = ':'; + } +} + +static int sdname(tbuf, key) +UNCH *tbuf; +UNCH *key; +{ + if (sdparm(tbuf, 0) != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (!matches(tbuf, key)) { + sderr(118, tbuf+1, key); + return FAIL; + } + return SUCCESS; +} + +static int sdckname(tbuf, key) +UNCH *tbuf; +UNCH *key; +{ + if (pcbsd.action != NAS1) { + sderr(120, (UNCH *)0, (UNCH *)0); + return FAIL; + } + if (!matches(tbuf, key)) { + sderr(118, tbuf+1, key); + return FAIL; + } + return SUCCESS; +} + +/* Parse a SGML declaration parameter. If lpcb is NULL, pt must be +REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters +long. LPCB should be NULL if a literal is not allowed. */ + +static int sdparm(pt, lpcb) +UNCH *pt; /* Token buffer. */ +struct parse *lpcb; /* PCB for literal parse. */ +{ + for (;;) { + parse(&pcbsd); + if (pcbsd.action != ISIG) + break; + sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0); + } + ++parmno; + switch (pcbsd.action) { + case LIT1: + if (!lpcb) { + sderr(E_BADLIT, (UNCH *)0, (UNCH *)0); + REPEATCC; + return pcbsd.action = INV_; + } + parselit(pt, lpcb, REFLITLEN, lex.d.lit); + return pcbsd.action; + case LIT2: + if (!lpcb) { + sderr(E_BADLIT, (UNCH *)0, (UNCH *)0); + REPEATCC; + return pcbsd.action = INV_; + } + parselit(pt, lpcb, REFLITLEN, lex.d.lita); + return pcbsd.action = LIT1; + case NAS1: + parsenm(pt, 1); + return pcbsd.action; + case NUM1: + parsetkn(pt, NU, REFNAMELEN); + return pcbsd.action; + } + return pcbsd.action; +} + +VOID sdinit() +{ + int i; + /* Shunned character numbers in the reference concrete syntax. */ + static UNCH refshun[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 + }; + UNCH **p; + /* A character is magic if it is a non-SGML character used for + some internal purpose in the parser. */ + char_flags[EOS] |= CHAR_MAGIC; + char_flags[EOBCHAR] |= CHAR_MAGIC; + char_flags[EOFCHAR] |= CHAR_MAGIC; + char_flags[GENRECHAR] |= CHAR_MAGIC; + char_flags[DELNONCH] |= CHAR_MAGIC; + char_flags[DELCDATA] |= CHAR_MAGIC; + char_flags[DELSDATA] |= CHAR_MAGIC; + + /* Figure out the significant SGML characters. */ + for (p = lextabs; *p; p++) { + UNCH datclass = (*p)[CANON_DATACHAR]; + UNCH nonclass = (*p)[CANON_NONSGML]; + for (i = 0; i < 256; i++) + if (!(char_flags[i] & CHAR_MAGIC) + && (*p)[i] != datclass && (*p)[i] != nonclass) + char_flags[i] |= CHAR_SIGNIFICANT; + } + for (i = 0; i < SIZEOF(refshun); i++) + char_flags[refshun[i]] |= CHAR_SHUNNED; + for (i = 0; i < 256; i++) + if (ISASCII(i) && iscntrl(i)) + char_flags[i] |= CHAR_SHUNNED; + bufsalloc(); +} + + +static +VOID bufsalloc() +{ + scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source)); + tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1); + /* entbuf is used for parsing numeric character references */ + entbuf = (UNCH *)rmalloc(REFNAMELEN + 2); +} + +static +VOID bufsrealloc() +{ + UNS size; + + if (ENTLVL != REFENTLVL) + scbs = (struct source *)rrealloc((UNIV)scbs, + (ENTLVL+1)*sizeof(struct source)); + /* Calculate the size for tbuf. */ + size = LITLEN + ATTSPLEN; + if (PILEN > size) + size = PILEN; + if (BSEQLEN > size) + size = BSEQLEN; + if (size != REFATTSPLEN + REFLITLEN) + tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1); + if (NAMELEN != REFNAMELEN) + entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2); +} + + +/* Check that the non-SGML characters are compatible with the concrete +syntax and munge the lexical tables accordingly. If IMPLIED is +non-zero, then the SGML declaration was implied; in this case, don't +give error messages about shunned characters not being declared +non-SGML. Also make any changes that are required by the NAMING section. +*/ + +static VOID setlexical() +{ + int i; + UNCH **p; + + if (nlextoke) { + /* Handle characters that were made significant by the + NAMING section. */ + for (i = 0; i < 256; i++) + if (nlextoke[i] == NMC || nlextoke[i] == NMS) + char_flags[i] |= CHAR_SIGNIFICANT; + } + + for (i = 0; i < 256; i++) + if (char_flags[i] & CHAR_SIGNIFICANT) { + /* Significant SGML characters musn't be non-SGML. */ + if (char_flags[i] & CHAR_NONSGML) { + UNCH buf[2]; + buf[0] = i; + buf[1] = '\0'; + sderr(E_NONSGML, buf, (UNCH *)0); + char_flags[i] &= ~CHAR_NONSGML; + } + } + else { + /* Shunned characters that are not significant SGML characters + must be non-SGML. */ + if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML)) + == CHAR_SHUNNED) { + sderr(E_SHUNNED, ltous((long)i), (UNCH *)0); + char_flags[i] |= CHAR_NONSGML; + } + } + + + /* Now munge the lexical tables. */ + for (p = lextabs; *p; p++) { + UNCH nonclass = (*p)[CANON_NONSGML]; + UNCH datclass = (*p)[CANON_DATACHAR]; + UNCH nmcclass = (*p)[CANON_NMC]; + UNCH nmsclass = (*p)[CANON_NMS]; + UNCH minclass = (*p)[CANON_MIN]; + for (i = 0; i < 256; i++) { + if (char_flags[i] & CHAR_NONSGML) { + /* We already know that it's not significant. */ + if (!(char_flags[i] & CHAR_MAGIC)) + (*p)[i] = nonclass; + } + else { + if (char_flags[i] & CHAR_MAGIC) { + sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0); + } + else if (!(char_flags[i] & CHAR_SIGNIFICANT)) + (*p)[i] = datclass; + else if (nlextoke + /* This relies on the fact that lextoke + occurs last in lextabs. */ + && lextoke[i] != nlextoke[i]) { + switch (nlextoke[i]) { + case NMC: + (*p)[i] = nmcclass; + break; + case NMS: + (*p)[i] = nmsclass; + break; + case INV: + /* This will happen if period is not a + name character. */ + (*p)[i] = minclass; + break; + default: + abort(); + } + } + } + } + } + if (nlextran) { + memcpy((UNIV)lextran, (UNIV)nlextran, 256); + frem((UNIV)nlextran); + } + if (nlextoke) { + frem((UNIV)nlextoke); + nlextoke = 0; + } + +} + +/* Munge parse tables so that empty start and end tags are not recognized. */ + +static VOID noemptytag() +{ + static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc }; + int i; + + for (i = 0; i < SIZEOF(pcbs); i++) { + int maxclass, maxstate; + int j, k, act; + UNCH *plex = pcbs[i]->plex; + UNCH **ptab = pcbs[i]->ptab; + + /* Figure out the maximum lexical class. */ + maxclass = 0; + for (j = 0; j < 256; j++) + if (plex[j] > maxclass) + maxclass = plex[j]; + + /* Now figure out the maximum state number and at the same time + change actions. */ + + maxstate = 0; + + for (j = 0; j <= maxstate; j += 2) { + for (k = 0; k <= maxclass; k++) + if (ptab[j][k] > maxstate) + maxstate = ptab[j][k]; + /* If the '>' class has an empty start or end tag action, + change it to the action that the NMC class has. */ + act = ptab[j + 1][plex['>']]; + if (act == NET_ || act == NST_) + ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']]; + } + } +} + +/* Lookup the value of the entry in pmap PTR whose key is KEY. */ + +static UNIV pmaplookup(ptr, key) +struct pmap *ptr; +char *key; +{ + for (; ptr->name; ptr++) + if (strcmp(key, ptr->name) == 0) + return ptr->value; + return 0; +} + +/* Return an ASCII representation of N. */ + +static UNCH *ltous(n) +long n; +{ + static char buf[sizeof(long)*3 + 2]; + sprintf(buf, "%ld", n); + return (UNCH *)buf; +} + +VOID sgmlwrsd(fp) +FILE *fp; +{ + int i; + int changed; + char *p; + char uc[256]; /* upper case characters (with different lower + case characters) */ + char lcletter[256]; /* LC letters: a-z */ + + fprintf(fp, "<!SGML \"%s\"\n", standard); + fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n", + SYSTEM_CHARSET_OWNER, + SYSTEM_CHARSET_DESCRIPTION, + SYSTEM_CHARSET_DESIGNATING_SEQUENCE); + + if (!done_nonsgml) { + done_nonsgml = 1; + for (i = 0; i < 256; i++) + if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED)) + == CHAR_SHUNNED) + char_flags[i] |= CHAR_NONSGML; + } + i = 0; + while (i < 256) { + int j; + for (j = i + 1; j < 256; j++) + if ((char_flags[j] & CHAR_NONSGML) + != (char_flags[i] & CHAR_NONSGML)) + break; + if (char_flags[i] & CHAR_NONSGML) + fprintf(fp, "%d %d UNUSED\n", i, j - i); + else + fprintf(fp, "%d %d %d\n", i, j - i, i); + i = j; + } + fprintf(fp, "CAPACITY\n"); + changed = 0; + for (i = 0; i < NCAPACITY; i++) + if (refcapset[i] != sd.capacity[i]) { + if (!changed) { + fprintf(fp, "SGMLREF\n"); + changed = 1; + } + fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]); + } + if (!changed) + fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name); + fprintf(fp, "SCOPE DOCUMENT\n"); + + fprintf(fp, "SYNTAX\nSHUNCHAR"); + for (i = 0; i < 256; i++) + if (char_flags[i] & CHAR_SHUNNED) + fprintf(fp, " %d", i); + fprintf(fp, "\n"); + fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n", + SYSTEM_CHARSET_OWNER, + SYSTEM_CHARSET_DESCRIPTION, + SYSTEM_CHARSET_DESIGNATING_SEQUENCE); + + fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n"); + + MEMZERO((UNIV)uc, 256); + for (i = 0; i < 256; i++) + if (lextran[i] != i) + uc[lextran[i]] = 1; + + MEMZERO((UNIV)lcletter, 256); + for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) + lcletter[(unsigned char)*p]= 1; + + fprintf(fp, "NAMING\n"); + fputs("LCNMSTRT \"", fp); + for (i = 0; i < 256; i++) + if (lextoke[i] == NMS && !uc[i] && !lcletter[i]) + fprintf(fp, "&#%d;", i); + fputs("\"\n", fp); + fputs("UCNMSTRT \"", fp); + for (i = 0; i < 256; i++) + if (lextoke[i] == NMS && !uc[i] && !lcletter[i]) + fprintf(fp, "&#%d;", lextran[i]); + fputs("\"\n", fp); + fputs("LCNMCHAR \"", fp); + for (i = 0; i < 256; i++) + if (lextoke[i] == NMC && !uc[i]) + fprintf(fp, "&#%d;", i); + fputs("\"\n", fp); + fputs("UCNMCHAR \"", fp); + for (i = 0; i < 256; i++) + if (lextoke[i] == NMC && !uc[i]) + fprintf(fp, "&#%d;", lextran[i]); + fputs("\"\n", fp); + + fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n", + sd.namecase[0] ? "YES" : "NO", + sd.namecase[1] ? "YES" : "NO"); + fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n", + sd.shortref ? "SGMLREF" : "NONE"); + fprintf(fp, "NAMES SGMLREF\n"); + if (newkey) { + /* The reference key was saved in newkey. */ + for (i = 0; i < NKEYS; i++) + if (newkey[i][0]) + fprintf(fp, "%s %s\n", newkey[i], key[i]); + } + fprintf(fp, "QUANTITY SGMLREF\n"); + if (quantity_changed) + for (i = 0; i < NQUANTITY; i++) + if (quantity_changed[i]) + fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]); + fprintf(fp, + "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n", + sd.omittag ? "YES" : "NO", + sd.shorttag ? "YES" : "NO"); + fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n"); + fprintf(fp, "OTHER CONCUR NO "); + if (sd.subdoc > 0) + fprintf(fp, "SUBDOC YES %ld ", sd.subdoc); + else + fprintf(fp, "SUBDOC NO "); + fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO"); + fprintf(fp, "APPINFO NONE"); + fprintf(fp, ">\n"); +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgmldecl.h b/usr.bin/sgmls/sgmls/sgmldecl.h new file mode 100644 index 0000000..d5d0466 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmldecl.h @@ -0,0 +1,84 @@ +/* sgmldecl.h: SGML declaration parsing. */ + +#define QATTCNT 0 +#define QATTSPLEN 1 +#define QBSEQLEN 2 +#define QDTAGLEN 3 +#define QDTEMPLEN 4 +#define QENTLVL 5 +#define QGRPCNT 6 +#define QGRPGTCNT 7 +#define QGRPLVL 8 +#define QLITLEN 9 +#define QNAMELEN 10 +#define QNORMSEP 11 +#define QPILEN 12 +#define QTAGLEN 13 +#define QTAGLVL 14 + +#define NQUANTITY (QTAGLVL+1) + +#define TOTALCAP 0 +#define ENTCAP 1 +#define ENTCHCAP 2 +#define ELEMCAP 3 +#define GRPCAP 4 +#define EXGRPCAP 5 +#define EXNMCAP 6 +#define ATTCAP 7 +#define ATTCHCAP 8 +#define AVGRPCAP 9 +#define NOTCAP 10 +#define NOTCHCAP 11 +#define IDCAP 12 +#define IDREFCAP 13 +#define MAPCAP 14 +#define LKSETCAP 15 +#define LKNMCAP 16 + +extern char *captab[]; + +struct sgmldecl { + long capacity[NCAPACITY]; + long subdoc; + UNCH formal; + UNCH omittag; + UNCH shorttag; + UNCH shortref; + UNCH namecase[2]; /* case translation of general/entity names */ + int quantity[NQUANTITY]; +}; + +extern struct sgmldecl sd; + +#define OMITTAG (sd.omittag) +#define SUBDOC (sd.subdoc) +#define SHORTTAG (sd.shorttag) +#define FORMAL (sd.formal) + +#define ATTCNT (sd.quantity[QATTCNT]) +#define ATTSPLEN (sd.quantity[QATTSPLEN]) +#define BSEQLEN (sd.quantity[QBSEQLEN]) +#define ENTLVL (sd.quantity[QENTLVL]) +#define GRPGTCNT (sd.quantity[QGRPGTCNT]) +#define GRPCNT (sd.quantity[QGRPCNT]) +#define GRPLVL (sd.quantity[QGRPLVL]) +#define LITLEN (sd.quantity[QLITLEN]) +#define NAMELEN (sd.quantity[QNAMELEN]) +#define NORMSEP (sd.quantity[QNORMSEP]) +#define PILEN (sd.quantity[QPILEN]) +#define TAGLEN (sd.quantity[QTAGLEN]) +#define TAGLVL (sd.quantity[QTAGLVL]) + +#define NAMECASE (sd.namecase[0]) +#define ENTCASE (sd.namecase[1]) + +#define YES 1 +#define NO 0 + +#define UNUSED -1 +#define UNKNOWN -2 +#define UNDESC -3 +#define UNKNOWN_SET -4 + +extern int asciicharset[]; diff --git a/usr.bin/sgmls/sgmls/sgmlfnsm.h b/usr.bin/sgmls/sgmls/sgmlfnsm.h new file mode 100644 index 0000000..0d617fb --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlfnsm.h @@ -0,0 +1,129 @@ +/* SGMLFNSM.H: SGML function declarations (ANSI prototypes). */ +VOID adlfree P((struct ad *, int)); +VOID adlval P((int,struct etd *)); +VOID aenttst P((int, UNCH *)); +int allhit P((struct thdr *,unsigned long *,int,int)); +VOID ambig P((void)); +VOID ambigfree P((void)); +int amemget P((struct ad *,int,UNCH *)); +int anmget P((int,UNCH *)); +int anmtgrp P((struct parse *,struct ad *,int,UNS *,int)); +int antvget P((int,UNCH *,UNCH **)); +int anyhit P((unsigned long *)); +int attval P((int,UNCH *,int,struct ad *)); +VOID charrefa P((UNCH *)); +int charrefn P((UNCH *, struct parse *)); +int context P((struct etd *,struct thdr *,struct mpos *,UNCH *,int)); +struct etd **copygrp P((struct etd **,unsigned int)); +int datachar P((int, struct parse *)); +struct dcncb *dcnfind P((UNCH *)); +VOID destack P((void)); +int econtext P((struct thdr *,struct mpos *,UNCH *)); +VOID endprolog P((void)); +struct entity *entfind P((UNCH *)); +int entopen P((struct entity *)); +/* VOID eposset P((void)); NOT YET IN USE. */ +VOID error P((struct error *)); +VOID errorinit P((struct error *, unsigned, unsigned)); +int etag P((void)); +int etagetd P((struct parse *)); +VOID etdadl P((struct etd *)); +VOID etdcan P((UNCH *)); +struct etd *etddef P((UNCH *)); +struct etd *etdref P((UNCH *)); +VOID exclude P((void)); +VOID fileclos P((void)); +VOID filecont P((void)); +VOID fileopen P((void)); +VOID filepend P((int)); +VOID fileread P((void)); +VOID filerr P((unsigned, UNCH *)); +VOID fixdatt P((struct dcncb *)); +struct parse *getpcb P((int)); +int groupopt P((struct thdr *,struct mpos *)); +int groupreq P((struct etd *,struct thdr *,struct mpos *)); +int grpsz P((struct thdr *,int)); +int hash P((UNCH *,int)); +struct hash *hfind P((struct hash **,UNCH *,int)); +struct hash *hin P((struct hash **,UNCH *,int,unsigned int)); +int iddef P((UNCH *)); +VOID idrck P((void)); +struct fwdref *idref P((UNCH *)); +VOID idreftst P((int,UNCH *)); +int ingrp P((struct etd **,struct etd *)); +VOID initatt P((struct ad *)); +int mapsrch P((struct map *,UNCH *)); +VOID mdadl P((UNCH *)); +int mdattdef P((int, int)); +VOID mddtde P((UNCH *)); +VOID mddtds P((UNCH *)); +VOID mdelem P((UNCH *)); +VOID mdentity P((UNCH *)); +VOID mderr P((unsigned int,UNCH *,UNCH *)); +struct parse *mdms P((UNCH *,struct parse *)); +int mdmse P((void)); +VOID mdnadl P((UNCH *)); +VOID mdnot P((UNCH *)); +VOID mdsrmdef P((UNCH *)); +VOID mdsrmuse P((UNCH *)); +int netetd P((struct parse *)); +VOID newtoken P((struct thdr *,struct mpos *,UNCH *)); +int nstetd P((void)); +UNCH *ntoa P((int)); +int offbit P((unsigned long *,int,int)); +int parsecon P((UNCH *,struct parse *)); +int parsefpi P((struct fpi *)); +struct thdr *parsegcm P((struct parse *,struct thdr *,struct thdr *)); +VOID parselit P((UNCH *,struct parse *,unsigned int,UNCH)); +struct thdr *parsemod P((int)); +int parsepro P((void)); +VOID parseseq P((UNCH *,int)); +VOID parsetag P((struct parse *)); +int parseval P((UNCH *,unsigned int,UNCH *)); +int pexmex P((struct etd *)); +unsigned int ptrsrch P((UNIV *,UNIV)); +UNCH *pubfield P((UNCH *,UNCH *,UNCH,UNS *)); +UNCH *replace P((UNCH *,UNCH *)); +UNCH *sandwich P((UNCH *,UNCH *,UNCH *)); +UNIV saverr P((unsigned int,struct parse *,UNCH *,UNCH *)); +VOID scbset P((void)); +VOID sdinit P((void)); +VOID setcurchar P((int)); +VOID setdtype P((void)); +int sgmlact P((UNCH)); +int sgmldecl P((void)); +VOID sgmlerr P((unsigned int,struct parse *,UNCH *,UNCH *)); +int shortref P((int,struct parse *)); +struct srh *srhfind P((UNCH *)); +VOID stack P((struct etd *)); +int stag P((int)); +int stagetd P((struct parse *)); +VOID startdtd P((void)); +UNCH *savenm P((UNCH *)); +UNCH *savestr P((UNCH *)); +VOID storedatt P((PNE)); +VOID svderr P((UNIV)); +VOID synerr P((unsigned int,struct parse *)); +int testend P((struct thdr *,struct mpos *,int,int)); +int tokenopt P((struct thdr *,struct mpos *)); +int tokenreq P((struct etd *,struct thdr *,struct mpos *)); +UNS vallen P((int,int,UNCH *)); +struct dcncb *dcndef P((UNCH *)); +struct entity *entdef P((UNCH *,UNCH,union etext *)); +int entget P((void)); +int entref P((UNCH *)); +struct etd *etdset P((struct etd *,UNCH,struct thdr *,struct etd **, + struct etd **, struct entity **)); +struct hash *hout P((struct hash **,UNCH *,int)); +struct fpi *mdextid P((UNCH *,struct fpi *,UNCH *,UNCH *,struct ne *)); +int parse P((struct parse *)); +struct ad *parseatt P((struct ad *,UNCH *)); +unsigned int parsegrp P((struct etd **,struct parse *, UNCH *)); +unsigned int parsngrp P((struct dcncb **,struct parse *, UNCH *)); +int parsemd P((UNCH *,int,struct parse *,unsigned int)); +UNCH *parsenm P((UNCH *,int)); +UNCH *parsetkn P((UNCH *,UNCH,int)); +UNCH *s2valnm P((UNCH *,UNCH *,UNCH,int)); +struct srh *srhdef P((UNCH *)); +int tokdata P((UNCH *, int)); +struct entity *usedef P((UNCH *)); diff --git a/usr.bin/sgmls/sgmls/sgmlincl.h b/usr.bin/sgmls/sgmls/sgmlincl.h new file mode 100644 index 0000000..c4eb5cc --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlincl.h @@ -0,0 +1,20 @@ +/* SGMLINCL.H: Include file for parser core. */ +#ifndef SGMLINCL /* Don't include this file more than once. */ +#define SGMLINCL 1 +#include "config.h" +#include "std.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "action.h" /* Action names for all parsing. */ +#include "adl.h" /* Definitions for attribute list processing. */ +#include "error.h" /* Symbols for error codes. */ +#include "etype.h" /* Definitions for element type processing. */ +#include "keyword.h" /* Definitions for keyword processing. */ +#include "lextoke.h" /* Symbols for tokenization lexical classes. */ +#include "source.h" /* Templates for source entity control blocks. */ +#include "synxtrn.h" /* Declarations for concrete syntax constants. */ +#include "sgmlxtrn.h" /* External variable declarations. */ +#include "trace.h" /* Declarations for internal trace functions. */ +#include "sgmlmain.h" +#include "sgmlaux.h" +#include "sgmlfnsm.h" /* ANSI C: Declarations for SGML functions. */ +#endif /* ndef SGMLINCL */ diff --git a/usr.bin/sgmls/sgmls/sgmlio.c b/usr.bin/sgmls/sgmls/sgmlio.c new file mode 100644 index 0000000..c78bb7a --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlio.c @@ -0,0 +1,384 @@ +/* sgmlio.c - + IO functions for core parser. + + Written by James Clark (jjc@jclark.com). +*/ + +/* SGML must see a file in which records start with RS and end with + RE, and EOFCHAR (Ctl-Z) is present at the end. This module must + supply these characters if they are not naturally present in the + file. SGML will open two files at a time: when an entity is + nested, the new file is opened before closing the old in order to + make sure the open is successful. If it is, the original open file + is closed temporarily (IOPEND); when the stack is popped, the new + file is closed and the original file is re-opened (IOCONT). SGML + will check error returns for the initial open of a file and all + reads, and for re-openings when the stack is popped, but not for + closes. Returning <0 indicates an error; 0 or more is a successful + operation, except for IOREAD where the return value is the number + of characters read, and must exceed 0 to be successful. The first + READ must always be successful, and normally consists of just + priming the buffer with EOBCHAR (or RS EOBCHAR). SGMLIO must + assure that there is an EOBCHAR at the end of each block read, + except for the last block of the entity, which must have an + EOFCHAR. + + SGML views an entity as a contiguous whole, without regard to its + actual form of storage. SGMLIO supports entities that are + equivalent to a single file of one or more records, or to a + concatenation of files. +*/ + +/* Uses only stream I/O. This module should be portable to most ANSI + systems. */ +/* We try to ensure that if an IO operation fails, then errno will contain + a meaningful value (although it may be zero.) */ + +#include "config.h" +#ifdef HAVE_O_NOINHERIT +#include <fcntl.h> +#include <io.h> +#endif /* HAVE_O_NOINHERIT */ + +#include "sgmlaux.h" /* Include files for auxiliary functions.. */ + +#ifdef HAVE_O_NOINHERIT +#define FOPENR(file) nifopen(file) +FILE *nifopen P((char *)); +#else /* not HAVE_O_NOINHERIT */ +#define FOPENR(file) fopen((file), "r") +#endif /* not HAVE_O_NOINHERIT */ + +struct iofcb { /* I/O file control block. */ + FILE *fp; /* File handle. */ + fpos_t off; /* Offset in file of current read block. */ + char *next; /* Next file (NULL if no more). */ + char *file; /* Current file (no length byte). */ + int pendoff; /* Offset into line when file suspended. */ + char bol; /* Non-zero if currently at beginning of line. */ + char first; /* Non-zero if the first read. */ + char wasbol; /* Non-zero if current block was at beginning of line. */ + char canseek; + UNCH *pendbuf; /* Saved partial buffer for suspended file + that can't be closed and reopened. */ +}; + +static char *lastfile; /* The name of the last file closed. */ +static int bufsize; /* Size of buffer passed to ioread(). */ +static char ismagic[256]; /* Table of magic chars that need to be prefixed + by DELNONCH. */ +static int stdinused = 0; + +static char *nextstr P((char *)); /* Iterate over list of strings. */ +static FILE *openfile P((char *, char *)); +static int closefile P((FILE *)); +static int isreg P((FILE *)); + +VOID ioinit(swp) +struct switches *swp; +{ + ismagic[EOBCHAR] = 1; + ismagic[EOFCHAR] = 1; + ismagic[EOS] = 1; + ismagic[(UNCH)DELNONCH] = 1; + ismagic[(UNCH)GENRECHAR] = 1; + bufsize = swp->swbufsz; +} + +int ioopen(id, pp) +UNIV id; +UNIV *pp; +{ + struct iofcb *f; + char *s; + errno = 0; + if (!id) + return -1; + s = id; + if (!*s) + return -1; + f = (struct iofcb *)rmalloc((UNS)sizeof(struct iofcb)); + f->file = s; + f->next = nextstr(s); + errno = 0; + f->fp = openfile(f->file, &f->canseek); + f->bol = 1; + f->first = 1; + f->pendbuf = 0; + *pp = (UNIV)f; + return f->fp ? 1 : -1; +} + +VOID ioclose(p) +UNIV p; +{ + struct iofcb *f = (struct iofcb *)p; + if (f->fp) + closefile(f->fp); + lastfile = f->file; + frem((UNIV)f); +} + +VOID iopend(p, off, buf) +UNIV p; +int off; +UNCH *buf; +{ + struct iofcb *f = (struct iofcb *)p; + if (!f->canseek) { + UNCH *s; + for (s = buf + off; *s != EOFCHAR && *s != EOBCHAR; s++) + ; + s++; + f->pendbuf = (UNCH *)rmalloc((UNS)(s - buf - off)); + memcpy((UNIV)f->pendbuf, (UNIV)(buf + off), (UNS)(s - buf - off)); + return; + } + f->bol = 0; + if (f->wasbol) { + if (off == 0) + f->bol = 1; + else + off--; + } + f->pendoff = off; + if (f->fp) { + fclose(f->fp); + f->fp = 0; + } +} + +int iocont(p) +UNIV p; +{ + struct iofcb *f = (struct iofcb *)p; + int c = EOF; + int off = f->pendoff; + + if (!f->canseek) + return 0; + + errno = 0; + f->fp = FOPENR(f->file); + if (!f->fp) + return -1; + if (fsetpos(f->fp, &f->off)) + return -1; + while (--off >= 0) { + c = getc(f->fp); + if (c != EOF && ismagic[c]) + off--; + } + if (c == '\n') + f->bol = 1; + if (ferror(f->fp)) + return -1; + return 0; +} + +/* Return -1 on error, otherwise the number of bytes read. The +strategy is to concatenate the files, insert a RS at the beginning of +each line, and change each '\n' into a RE. The returned data +shouldn't cross a file boundary, otherwise error messages might be +inaccurate. The first read must always succeed. */ + +int ioread(p, buf, newfilep) +UNIV p; +UNCH *buf; +int *newfilep; +{ + int i = 0; + struct iofcb *f = (struct iofcb *)p; + FILE *fp; + int c; + + *newfilep = 0; + if (f->first) { + buf[i] = EOBCHAR; + f->first = 0; + return 1; + } + if (f->pendbuf) { + for (i = 0; + (buf[i] = f->pendbuf[i]) != EOBCHAR && buf[i] != EOFCHAR; + i++) + ; + frem((UNIV)f->pendbuf); + f->pendbuf = 0; + return i + 1; + } + fp = f->fp; + for (;;) { + errno = 0; + if (f->canseek && fgetpos(fp, &f->off)) + f->canseek = 0; + errno = 0; + c = getc(fp); + if (c != EOF) + break; + if (ferror(fp)) + return -1; + if (closefile(fp) == EOF) + return -1; + if (!f->next){ + f->fp = 0; + buf[0] = EOFCHAR; + return 1; + } + f->file = f->next; + f->next = nextstr(f->next); + *newfilep = 1; + errno = 0; + fp = f->fp = openfile(f->file, &f->canseek); + if (!fp) + return -1; + f->bol = 1; + } + if (f->bol) { + f->bol = 0; + buf[i++] = RSCHAR; + f->wasbol = 1; + } + else + f->wasbol = 0; + errno = 0; + for (;;) { + if (c == '\n') { + f->bol = 1; + buf[i++] = RECHAR; + break; + } + if (ismagic[c]) { + buf[i++] = DELNONCH; + buf[i++] = SHIFTNON(c); + } + else + buf[i++] = c; + if (i >= bufsize - 2) + break; + c = getc(fp); + if (c == EOF) { + if (ferror(fp)) + return -1; + /* This is in the middle of a line. */ + break; + } + } + buf[i++] = EOBCHAR; + return i; +} + +static char *nextstr(p) +char *p; +{ + p = strchr(p, '\0'); + return *++p ? p : 0; +} + +/* Return the filename associated with p. If p is NULL, return the filename +of the last file closed. */ + +char *ioflid(p) +UNIV p; +{ + if (!p) + return lastfile; + return ((struct iofcb *)p)->file; +} + +static +FILE *openfile(name, seekp) +char *name; +char *seekp; +{ + FILE *fp; + if (strcmp(name, STDINNAME) == 0) { + if (stdinused) + return 0; + stdinused = 1; + *seekp = 0; + return stdin; + } + fp = FOPENR(name); + if (fp) + *seekp = isreg(fp); + return fp; +} + +/* Return -1 on error, 0 otherwise. */ + +static +int closefile(fp) +FILE *fp; +{ + if (fp == stdin) { + stdinused = 0; + clearerr(fp); + return 0; + } + else + return fclose(fp); +} + +#ifdef HAVE_O_NOINHERIT + +/* This is the same as fopen(name, "r") except that it tells DOS that +the file descriptor should not be inherited by child processes. */ + +FILE *nifopen(name) +char *name; +{ + int fd = open(name, O_RDONLY|O_NOINHERIT|O_TEXT); + if (fd < 0) + return 0; + return fdopen(fd, "r"); +} + +#endif /* HAVE_O_NOINHERIT */ + +#ifdef HAVE_SYS_STAT_H + +#include <sys/types.h> +#include <sys/stat.h> + +#ifndef S_ISREG +#ifdef S_IFMT +#ifdef S_IFREG +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif /* S_IFREG */ +#endif /* S_IFMT */ +#endif /* not S_ISREG */ + +#endif /* HAVE_SYS_STAT_H */ + +/* Return 1 if fp might be associated with a regular file. 0 +otherwise. We check this because on many Unix systems lseek() will +succeed on a (pseudo-)terminal although terminals aren't seekable in +the way we need. */ + +static +int isreg(fp) +FILE *fp; +{ +#ifdef S_ISREG + struct stat sb; + + /* This assumes that a system that has S_ISREG will also have + fstat() and fileno(). */ + if (fstat(fileno(fp), &sb) == 0) + return S_ISREG(sb.st_mode); +#endif /* S_ISREG */ + return 1; +} + + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +comment-column: 30 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgmlmain.h b/usr.bin/sgmls/sgmls/sgmlmain.h new file mode 100644 index 0000000..3911f76 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlmain.h @@ -0,0 +1,101 @@ +/* SGMLMAIN: Main interface to SGML services. + +Preprocessor variable names are the only supported interface +to data maintained by SGML. They are defined in this file or in adl.h. +*/ +/* Return control block types (RCBTYPE) from calls to parser (SGML): + Names and strings follow the convention for the IPBs. +*/ +enum sgmlevent { + SGMLEOD, /* End of document. */ + SGMLDAF, /* Data found. */ + SGMLSTG, /* Start-tag found. */ + SGMLETG, /* End-tag found. */ + SGMLREF, /* Record end found. */ + SGMLPIS, /* Processing instruction (string). */ + SGMLAPP /* APPINFO (string) */ +}; + +struct rcbdata { /* Return control block: DAF EOD REF PIS APP. */ + UNS contersw; /* 1=context error; 2,4,8=data type; 0=not. */ + UNS datalen; /* Length of data or PI (0=single nonchar). */ + UNCH *data; /* Data, PI, single nonSGML, or NDATA ecb ptr. */ +}; + +struct rcbtag { /* Return control block for STG and ETG. */ + UNS contersw; /* 1=context error; 2=NET enabled; 0/0=not. */ + UNS tagmin; /* Minim: NONE NULL NET DATA; implied by S/ETAG */ + UNCH *curgi; /* Start-tag (or end-tag) GI. */ + union { + struct ad *al; /* Start-tag: attribute list. */ + UNCH *oldgi; /* End-tag: resumed GI. */ + } ru; + struct ad *lal; /* Start-tag: link attribute list (UNUSED). */ + UNS format; /* Format class for default processing. */ + struct etd *tagreal; /* Dummy etd or ptr to GI that implied this tag.*/ + int etictr; /* Number of elements on stack with NET enabled.*/ + UNCH *srmnm; /* Current SHORTREF map name (NULL=#EMPTY). */ +}; + +/* Accessors for rcbdata and rcbtag. */ +/* Datatype abbreviations: C=unsigned char S=string U=unsigned int L=4 bytes + A=array P=ptr to structure N=name (see sgmlcb.h) +*/ +/* Data control block fields: processing instructions (SGMLPIS). +*/ +#define PDATA(d) ((d).data) /*S PI string. */ +#define PDATALEN(d) ((d).datalen) /*U Length of PI string. */ +#define PIESW(d) (((d).contersw & 4)) /*U 1=PIDATA entity returned. */ +/* Data control block fields: other data types. +*/ +#define CDATA(d) ((d).data) /*S CDATA content string. */ +#define CDATALEN(d) ((d).datalen) /*U Length of CDATA content string. */ +#define CONTERSW(d) (((d).contersw &1))/*U 1=CDATA or TAG out of context. */ +#define CDESW(d) (((d).contersw & 2)) /*U 1=CDATA entity returned. */ +#define SDESW(d) (((d).contersw & 4)) /*U 1=SDATA entity returned. */ +#define NDESW(d) (((d).contersw & 8)) /*U 1=NDATA entity returned. */ +#define NEPTR(d) ((PNE)(d).data) /*P Ptr to NDATA control block. */ +#define MARKUP(d) ((d).data) /*A Markup delimiter strings. */ +#define DTYPELEN(d) ((d).datalen) /*U Length of doc type name +len+EOS. */ +#define DOCTYPE(d) ((d).data) /*S Document type name (with len+EOS). */ +#define ADATA(d) ((d).data) /*S APPINFO */ +#define ADATALEN(d) ((d).datalen) /*U Length of APPINFO string. */ +/* Tag control block fields. +*/ +#define ALPTR(t) ((t).ru.al) /*P Ptr to SGML attribute list. */ +#define CURGI(t) ((t).curgi+1) /*N GI of started or ended element. */ +#define OLDGI(t) ((t).ru.oldgi) /*S GI of resumed element. */ +#define TAGMIN(t) (t).tagmin /*U Minimization for current tag. */ +#define TAGREAL(t) ((t).tagreal) /*P Dummy etd that implied this tag. */ +#define TAGRLNM(t) ((UNCH *)(t).tagreal) /*P GI of tag that implied this tag.*/ +#define ETISW(t) (((t).contersw & 2)) /*U 1=NET delimiter enabled by ETI. */ +#define PEXSW(t) (((t).contersw & 4)) /*U 1=Element was plus exception. */ +#define MTYSW(t) (((t).contersw & 8)) /*U 1=Element is empty. */ +#define ETICTR(t) ((t).etictr) /*U Number of active NET delimiters. */ +#define SRMNM(t) ((t).srmnm) /*S Name of current SHORTREF map. */ +#define SRMCNT(t) ((t).contersw) /*U Number of SHORTREF maps defined. */ +#define FORMAT(t) ((t).format) /*U Format class.*/ + +/* These function names are chosen so as to be distinct in the first 6 +letters. */ + +/* Initialize. */ +struct markup *sgmlset P((struct switches *)); +/* Cleanup and return capacity usage statistics. */ +VOID sgmlend P((struct sgmlcap *)); +/* Set document entity. */ +int sgmlsdoc P((UNIV)); +/* Get entity. */ +int sgmlgent P((UNCH *, PNE *, UNCH **)); +/* Mark an entity. Return is non-zero if already marked.*/ +int sgmlment P((UNCH *)); +/* Get the next sgml event. */ +enum sgmlevent sgmlnext P((struct rcbdata *, struct rcbtag *)); +/* Get the error count. */ +int sgmlgcnterr P((void)); +/* Get the current location. */ +int sgmlloc P((unsigned long *, char **)); +/* Write out the SGML declaration. */ +VOID sgmlwrsd P((FILE *)); +/* Note subdocument capacity usage. */ +VOID sgmlsubcap P((long *)); diff --git a/usr.bin/sgmls/sgmls/sgmlmsg.c b/usr.bin/sgmls/sgmls/sgmlmsg.c new file mode 100644 index 0000000..a35cb1b --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlmsg.c @@ -0,0 +1,514 @@ +/* sgmlmsg.c - + message handling for core parser + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" +#include "sgmlaux.h" +#include "msg.h" + +static nl_catd catd; + +#define TEXT_SET 1 /* message set number for text of messages */ +#define HEADER_SET 2 /* message set number for header strings */ +#define PARM_SET 3 /* message set number for special parameters */ + +#ifdef HAVE_EXTENDED_PRINTF +#define xfprintf fprintf +#else +extern int xfprintf VP((FILE *, char *,...)); +#endif + +#define SIZEOF(v) (sizeof(v)/sizeof(v[0])) + +static char *gettext P((int)); +static char *getheader P((int)); +static char *getparm P((int)); +static VOID elttrace P((FILE *, int)); +static int printit P((FILE *, struct error *)); +static char *transparm P((UNCH *, char *)); +static VOID spaces P((FILE *, int)); + +#define PARMBUFSIZ 50 +static char parmbuf[PARMBUFSIZ*2]; +static char *parmbuf1 = parmbuf; +static char *parmbuf2 = parmbuf + PARMBUFSIZ; + +static char *prog; /* program name */ +static int sweltr; /* non-zero means print an element trace */ +static int swenttr; /* non-zero means print an entity trace */ +static int cnterr = 0; +static VOID (*die) P((void)); + +static char *headers[] = { +"In file included", +"SGML error", /* parameters: type, severity, number */ +"Unsupported feature", /* type U errors */ +"Error", /* for type R errors */ +"Warning", /* severity type I */ +" at %s, %.0sline %lu", /* ignore entity name and ccnt */ +" at entity %s, line %lu", +"%.0s%.0s in declaration parameter %d", /* ignore first two parameters */ +"%.0s in declaration parameter %d", /* ignore first parameter */ +"%.0s", /* parse mode */ +" at end of file", +" at end of entity", +" at record start", +" at record end", +" at \"%c\"", +" at \"\\%03o\"", +" accessing \"%s\"", +"Element structure:" +}; + +/* Indexes into headers[] */ + +#define HDRPFX 0 +#define HDRALL 1 +#define HDRUNSUP 2 +#define HDRSYS 3 +#define HDRWARN 4 +#define HDRLOC 5 +#define HDRELOC 6 +#define HDRMD 7 +#define HDRMD2 8 +#define HDRMODE 9 +#define HDREOF 10 +#define HDREE 11 +#define HDRRS 12 +#define HDRRE 13 +#define HDRPRT 14 +#define HDRCTL 15 +#define HDRFIL 16 +#define HDRELT 17 + +/* Special parameters (error::errsp) */ +static char *parms[] = { +"character data", +"element content", +"mixed content", +"replaceable character data", +"tag close", +"content model group", +"content model occurrence indicator", +"name group", +"name token group", +"system data", +"parameter literal", +"attribute value literal", +"tokenized attribute value literal", +"minimum literal", +"markup declaration", +"markup declaration comment", +"ignored markup declaration", +"declaration subset", +"CDATA marked section", +"IGNORE marked section", +"RCDATA marked section", +"prolog", +"reference", +"attribute specification list", +"tokenized attribute value", +"attribute specification list close", +"SGML declaration", +"attribute definition list", +"document type", +"element", +"entity", +"link type", +"link set", +"notation", +"SGML", +"short reference mapping", +"link set use", +"short reference use", +}; + +static FILE *tfp; /* temporary file for saved messages */ + +struct saved { + long start; + long end; + char exiterr; + char countit; +}; + +VOID msgprint(e) +struct error *e; +{ + if (printit(stderr, e)) + ++cnterr; + fflush(stderr); + if (e->errtype == EXITERR) { + if (die) { + (*die)(); + abort(); + } + else + exit(EXIT_FAILURE); + } +} + +/* Save an error message. */ + +UNIV msgsave(e) +struct error *e; +{ + struct saved *sv; + + sv = (struct saved *)rmalloc(sizeof(struct saved)); + if (!tfp) { + tfp = tmpfile(); + if (!tfp) + exiterr(160, (struct parse *)0); + } + sv->start = ftell(tfp); + sv->countit = (char)printit(tfp, e); + sv->end = ftell(tfp); + sv->exiterr = (char)(e->errtype == EXITERR); + return (UNIV)sv; +} + +/* Print a saved error message. */ + +VOID msgsprint(p) +UNIV p; +{ + struct saved *sv = (struct saved *)p; + long cnt; + + assert(p != 0); + assert(tfp != 0); + if (fseek(tfp, sv->start, SEEK_SET) < 0) + return; + /* Temporary files are opened in binary mode, so this is portable. */ + cnt = sv->end - sv->start; + while (--cnt >= 0) { + int c = getc(tfp); + if (c == EOF) + break; + putc(c, stderr); + } + fflush(stderr); + if (sv->countit) + ++cnterr; + if (sv->exiterr) + exit(EXIT_FAILURE); +} + +/* Free a sved error message. */ + +VOID msgsfree(p) +UNIV p; +{ + frem(p); +} + +/* Return 1 if it should be counted as an error. */ + +static int printit(efp, e) +FILE *efp; +struct error *e; +{ + int indent; + int countit; + int hdrcode; + int filelevel = -1, prevfilelevel = -1, toplevel; + struct location loc; + char type[2], severity[2]; + + assert(e->errnum < SIZEOF(messages)); + assert(messages[e->errnum].text != NULL); + if (prog) { + fprintf(efp, "%s: ", prog); + indent = strlen(prog) + 2; /* don't rely on return value of fprintf */ + /* Don't want to waste too much space on indenting. */ + if (indent > 10) + indent = 4; + } + else + indent = 4; + + for (toplevel = 0; getlocation(toplevel, &loc); toplevel++) + if (loc.filesw) { + prevfilelevel = filelevel; + filelevel = toplevel; + } + toplevel--; + + if (e->errtype == FILERR) { + toplevel--; + filelevel = prevfilelevel; + } + if (swenttr && filelevel > 0) { + int level = 0; + int middle = 0; /* in the middle of a line */ + do { + (void)getlocation(level, &loc); + if (loc.filesw) { + if (middle) { + fputs(":\n", efp); + spaces(efp, indent); + } + else + middle = 1; + xfprintf(efp, getheader(HDRPFX)); + xfprintf(efp, getheader(HDRLOC), ioflid(loc.fcb), + loc.ename, loc.rcnt, loc.ccnt); + } + else if (middle) + xfprintf(efp, getheader(HDRELOC), + loc.ename, loc.rcnt + 1, loc.ccnt); + } + while (++level != filelevel); + if (middle) { + fputs(":\n", efp); + spaces(efp, indent); + } + } + + /* We use strings for the type and severity, + so that the format can use %.0s to ignore them. */ + + type[0] = messages[e->errnum].type; + type[1] = '\0'; + severity[0] = messages[e->errnum].severity; + severity[1] = '\0'; + + countit = (severity[0] != 'I'); + if (!countit) + hdrcode = HDRWARN; + else if (type[0] == 'R') + hdrcode = HDRSYS; + else if (type[0] == 'U') + hdrcode = HDRUNSUP; + else + hdrcode = HDRALL; + + xfprintf(efp, getheader(hdrcode), type, severity, e->errnum); + + if (filelevel >= 0) { + (void)getlocation(filelevel, &loc); + xfprintf(efp, getheader(HDRLOC), + ioflid(loc.fcb), loc.ename, loc.rcnt, loc.ccnt); + while (filelevel < toplevel) { + ++filelevel; + if (swenttr) { + (void)getlocation(filelevel, &loc); + xfprintf(efp, getheader(HDRELOC), + loc.ename, loc.rcnt + 1, loc.ccnt); + } + } + } + + /* It is necessary to copy the result of getparm() because + the specification of catgets() says in can return a + pointer to a static buffer which may get overwritten + by the next call to catgets(). */ + + switch (e->errtype) { + case MDERR: + strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1); + xfprintf(efp, getheader(HDRMD), parmbuf, + (e->subdcl ? e->subdcl : (UNCH *)""), e->parmno); + break; + case MDERR2: + /* no subdcl parameter */ + strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1); + xfprintf(efp, getheader(HDRMD2), parmbuf, e->parmno); + break; + case DOCERR: + case EXITERR: + if (toplevel < 0) + break; + strncpy(parmbuf, getparm(e->errsp), PARMBUFSIZ*2 - 1); + xfprintf(efp, getheader(HDRMODE), parmbuf); + switch (loc.curchar) { + case EOFCHAR: + xfprintf(efp, getheader(HDREOF)); + break; + case RSCHAR: + xfprintf(efp, getheader(HDRRS)); + break; + case RECHAR: + xfprintf(efp, getheader(HDRRE)); + break; + case DELNONCH: + xfprintf(efp, getheader(HDRCTL), UNSHIFTNON(loc.nextchar)); + break; + case EOS: + xfprintf(efp, getheader(HDREE)); + break; + case EOBCHAR: + break; + default: + if (ISASCII(loc.curchar) && isprint(loc.curchar)) + xfprintf(efp, getheader(HDRPRT), loc.curchar); + else + xfprintf(efp, getheader(HDRCTL), loc.curchar); + break; + } + break; + case FILERR: + if (getlocation(toplevel + 1, &loc)) + xfprintf(efp, getheader(HDRFIL), ioflid(loc.fcb)); + break; + } + fputs(":\n", efp); + + if (e->errtype == FILERR && e->sverrno != 0) { + char *errstr = strerror(e->sverrno); + UNS len = strlen(errstr); + /* Strip a trailing newline if there is one. */ + if (len > 0 && errstr[len - 1] == '\n') + len--; + spaces(efp, indent); + for (; len > 0; len--, errstr++) + putc(*errstr, efp); + fputs(":\n", efp); + } + + spaces(efp, indent); + + xfprintf(efp, gettext(e->errnum), + transparm((UNCH *)e->eparm[0], parmbuf1), + transparm((UNCH *)e->eparm[1], parmbuf2)); + putc('\n', efp); + + if (sweltr) + elttrace(efp, indent); + return countit; +} + +/* Print an element trace. */ +static VOID elttrace(efp, indent) +FILE *efp; +int indent; +{ + int i = 1; + UNCH *gi; + + gi = getgi(i); + if (!gi) + return; + spaces(efp, indent); + xfprintf(efp, getheader(HDRELT)); + do { + fprintf(efp, " %s", gi); + gi = getgi(++i); + } while (gi); + putc('\n', efp); +} + +static VOID spaces(efp, indent) +FILE *efp; +int indent; +{ + while (--indent >= 0) + putc(' ', efp); +} + +VOID msginit(swp) +struct switches *swp; +{ + catd = swp->catd; + prog = swp->prog; + sweltr = swp->sweltr; + swenttr = swp->swenttr; + die = swp->die; +} + +/* Return the error count. */ + +int msgcnterr() +{ + return cnterr; +} + +/* Transform a parameter into a form suitable for printing. */ + +static char *transparm(s, buf) +UNCH *s; +char *buf; +{ + char *ptr; + int cnt; + + if (!s) + return 0; + + ptr = buf; + cnt = PARMBUFSIZ - 4; /* space for `...\0' */ + + while (*s) { + UNCH ch = *s++; + if (ch == DELNONCH) { + if (*s == '\0') + break; + ch = UNSHIFTNON(*s); + s++; + } + if (ch == DELCDATA || ch == DELSDATA) + ; + else if (ch == '\\') { + if (cnt < 2) + break; + *ptr++ = '\\'; + *ptr++ = '\\'; + cnt -= 2; + } + else if (ISASCII(ch) && isprint(ch)) { + if (cnt < 1) + break; + *ptr++ = ch; + cnt--; + } + else { + if (cnt < 4) + break; + sprintf(ptr, "\\%03o", ch); + ptr += 4; + cnt -= 4; + } + } + if (!*s) + *ptr = '\0'; + else + strcpy(ptr, "..."); + return buf; +} + +/* The message and set numbers in the catgets function must be > 0. */ + +static char *gettext(n) +int n; +{ + assert(n > 0 && n < SIZEOF(messages)); + assert(messages[n].text != 0); + return catgets(catd, TEXT_SET, n, messages[n].text); +} + +static char *getheader(n) +int n; +{ + assert(n >= 0 && n < SIZEOF(headers)); + return catgets(catd, HEADER_SET, n + 1, headers[n]); +} + +static char *getparm(n) +int n; +{ + assert(n >= 0 && n < SIZEOF(parms)); + return catgets(catd, PARM_SET, n + 1, parms[n]); +} + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/sgmls.1 b/usr.bin/sgmls/sgmls/sgmls.1 new file mode 100644 index 0000000..b9967a0 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmls.1 @@ -0,0 +1,871 @@ +'\" t +.\" Uncomment the next line to get a man page accurate for MS-DOS +.\"nr Os 1 +.\" Uncomment the next line if tracing is enabled. +.\"nr Tr 1 +.if \n(.g .if !r Os .nr Os 0 +.tr \(ts" +.ds S \s-1SGML\s0 +.de TS +.br +.sp .5 +.. +.de TE +.br +.sp .5 +.. +.de TQ +.br +.ns +.TP \\$1 +.. +.TH SGMLS 1 +.SH NAME +sgmls \- a validating SGML parser +.sp +An \*S System Conforming to +.if n .br +International Standard ISO 8879 \(em +.br +Standard Generalized Markup Language +.SH SYNOPSIS +.B sgmls +[ +.B \-deglprsuv +] +[ +.BI \-c file +] +.if \n(Os=1 \{\ +[ +.BI \-f file +] +.\} +[ +.BI \-i name +] +.if \n(Tr \{\ +[ +.BI \-x flags +] +[ +.BI \-y flags +] +.\} +[ +.I filename\|.\|.\|. +] +.SH DESCRIPTION +.I Sgmls +parses and validates +the \*S document entity in +.I filename\|.\|.\|. +and prints on the standard output a simple \s-1ASCII\s0 representation of its +Element Structure Information Set. +(This is the information set which a structure-controlled +conforming \*S application should act upon.) +Note that the document entity may be spread amongst several files; +for example, the SGML declaration, document type declaration and document +instance set could each be in a separate file. +If no filenames are specified, then +.I sgmls +will read the document entity from the standard input. +A filename of +.B \- +can also be used to refer to the standard input. +.LP +The following options are available: +.TP +.BI \-c file +Write a report of capacity usage to +.IR file . +The report is in the format of a RACT result. +RACT is the Reference Application for Capacity Testing defined in the +Proposed American National Standard +.I +Conformance Testing for Standard Generalized Markup Language (SGL) Systems +(X3.190-199X), +Draft July 1991. +.TP +.B \-d +Warn about duplicate entity declarations. +.TP +.B \-e +Describe open entities in error messages. +Error messages always include the position of the most recently +opened external entity. +.if \n(Os=1 \{\ +.TP +.BI \-f file +Redirect errors to +.IR file . +.\} +.TP +.B \-g +Show the \s-1GI\s0s of open elements in error messages. +.TP +.BI \-i name +Pretend that +.RS +.IP +.BI <!ENTITY\ %\ name\ \(tsINCLUDE\(ts> +.LP +occurs at the start of the document type declaration subset +in the \*S document entity. +Since repeated definitions of an entity are ignored, +this definition will take precedence over any other definitions +of this entity in the document type declaration. +Multiple +.B \-i +options are allowed. +If the \*S declaration replaces the reserved name +.B INCLUDE +then the new reserved name will be the replacement text of the entity. +Typically the document type declaration will contain +.IP +.BI <!ENTITY\ %\ name\ \(tsIGNORE\(ts> +.LP +and will use +.BI % name ; +in the status keyword specification of a marked section declaration. +In this case the effect of the option will be to cause the marked +section not to be ignored. +.RE +.TP +.B \-l +Output +.B L +commands giving the current line number and filename. +.TP +.B \-p +Parse only the prolog. +.I Sgmls +will exit after parsing the document type declaration. +Implies +.BR \-s . +.TP +.B \-r +Warn about defaulted references. +.TP +.B \-s +Suppress output. +Error messages will still be printed. +.TP +.B \-u +Warn about undefined elements: elements used in the DTD but not defined. +Also warn about undefined short reference maps. +.TP +.B \-v +Print the version number. +.if \n(Tr \{\ +.TP +.BI \-x flags +.br +.ns +.TP +.BI \-y flags +Enable debugging output; +.B \-x +applies to the document body, +.B \-y +to the prolog. +Each character in the +.I flags +argument enables tracing of a particular activity. +.RS +.TP +.B t +Trace state transitions. +.TP +.B a +Trace attribute activity. +.TP +.B c +Trace context checking. +.TP +.B d +Trace declaration parsing. +.TP +.B e +Trace entities. +.TP +.B g +Trace groups. +.TP +.B i +Trace \s-1ID\s0s. +.TP +.B m +Trace marked sections. +.TP +.B n +Trace notations. +.RE +.\} +.SS "Entity Manager" +An external entity resides in one or more files. +The entity manager component of +.I sgmls +maps a sequence of files into an entity in three sequential stages: +.IP 1. +each carriage return character is turned into a non-SGML character; +.IP 2. +each newline character is turned into a record end character, +and at the same time +a record start character is inserted at the beginning of each line; +.IP 3. +the files are concatenated. +.LP +A system identifier is +interpreted as a list of filenames separated by +.if \n(Os=0 colons. +.if \n(Os=1 semi-colons. +A filename of +.B \- +can be used to refer to the standard input. +If no system identifier is supplied, then the entity manager will +attempt to generate a filename using the public identifier +(if there is one) and other information available to it. +Notation identifiers are not subject to this treatment. +This process is controlled by the environment variable +.BR \s-1SGML_PATH\s0 ; +this contains a +.if \n(Os=0 colon-separated +.if \n(Os=1 semicolon-separated +list of filename templates. +A filename template is a filename that may contain +substitution fields; a substitution field is a +.B % +character followed by a single letter that indicates the value +of the substitution. +If +.B \s-1SGML_PATH\s0 +uses the +.B %S +field (the value of which is the system identifier), +then the entity manager will also use +.B \s-1SGML_PATH\s0 +to generate a filename +when a system identifier that does not contain any +.if \n(Os=0 colons +.if \n(Os=1 semi-colons +is supplied. +The value of a substitution can either be a string +or it can be +.IR null . +The entity manager transforms the list of +filename templates into a list of filenames by substituting for each +substitution field and discarding any template +that contained a substitution field whose value was null. +It then uses the first resulting filename that exists and is readable. +Substitution values are transformed before being used for substitution: +firstly, any names that were subject to upper case substitution +are folded to lower case; +secondly, +.if \n(Os=0 \{\ +.\" Unix +space characters are mapped to underscores +and slashes are mapped to percents. +.\} +.if \n(Os=1 \{\ +.\" MS-DOS +the characters +.B +,./:=? +and space characters are deleted. +.\} +The value of the +.B %S +field is not transformed. +The values of substitution fields are as follows: +.TP +.B %% +A single +.BR % . +.TP +.B %D +The entity's data content notation. +This substitution will succeed only for external data entities. +.TP +.B %N +The entity, notation or document type name. +.TP +.B %P +The public identifier if there was a public identifier, +otherwise null. +.TP +.B %S +The system identifier if there was a system identifier +otherwise null. +.TP +.B %X +(This is provided mainly for compatibility with \s-1ARCSGML\s0.) +A three-letter string chosen as follows: +.LP +.RS +.ne 11 +.TS +tab(&); +c|c|c s +c|c|c s +c|c|c|c +c|c|c|c +l|lB|lB|lB. +&&With public identifier +&&_ +&No public&Device&Device +&identifier&independent&dependent +_ +Data or subdocument entity&nsd&pns&vns +General SGML text entity&gml&pge&vge +Parameter entity&spe&ppe&vpe +Document type definition&dtd&pdt&vdt +Link process definition&lpd&plp&vlp +.TE +.LP +The device dependent version is selected if the public text class +allows a public text display version but no public text display +version was specified. +.RE +.TP +.B %Y +The type of thing for which the filename is being generated: +.TS +tab(&); +l lB. +SGML subdocument entity&sgml +Data entity&data +General text entity&text +Parameter entity&parm +Document type definition&dtd +Link process definition&lpd +.TE +.LP +The value of the following substitution fields will be null +unless a valid formal public identifier was supplied. +.TP +.B %A +Null if the text identifier in the +formal public identifier contains an unavailable text indicator, +otherwise the empty string. +.TP +.B %C +The public text class, mapped to lower case. +.TP +.B %E +The public text designating sequence (escape sequence) +if the public text class is +.BR \s-1CHARSET\s0 , +otherwise null. +.TP +.B %I +The empty string if the owner identifier in the formal public identifier +is an \s-1ISO\s0 owner identifier, +otherwise null. +.TP +.B %L +The public text language, mapped to lower case, +unless the public text class is +.BR \s-1CHARSET\s0 , +in which case null. +.TP +.B %O +The owner identifier (with the +.B +// +or +.B \-// +prefix stripped.) +.TP +.B %R +The empty string if the owner identifier in the formal public identifier +is a registered owner identifier, +otherwise null. +.TP +.B %T +The public text description. +.TP +.B %U +The empty string if the owner identifier in the formal public identifier +is an unregistered owner identifier, +otherwise null. +.TP +.B %V +The public text display version. +This substitution will be null if the public text class +does not allow a display version or if no version was specified. +If an empty version was specified, a value of +.B default +will be used. +.br +.ne 18 +.SS "System declaration" +The system declaration for +.I sgmls +is as follows: +.LP +.TS +tab(&); +c1 s1 s1 s1 s1 s1 s1 s1 s +c s s s s s s s s +l l s s s s s s s +l l s s s s s s s +l l s s s s s s s +l l l s s s s s s +c s s s s s s s s +l l l l l l l l l +l l l l l l l l l +l l l l l l l l l +l l s s s s s s s +l l l s s s s s s +l l l s s s s s s +c s s s s s s s s +l l l l l l l l l. +SYSTEM "ISO 8879:1986" +CHARSET +BASESET&"ISO 646-1983//CHARSET +&\h'\w'"'u'International Reference Version (IRV)//ESC 2/5 4/0" +DESCSET&0\0128\00 +CAPACITY&PUBLIC&"ISO 8879:1986//CAPACITY Reference//EN" +FEATURES +MINIMIZE&DATATAG&NO&OMITTAG&YES&RANK&NO&SHORTTAG&YES +LINK&SIMPLE&NO&IMPLICIT&NO&EXPLICIT&NO +OTHER&CONCUR&NO&SUBDOC&YES 1&FORMAL&YES +SCOPE&DOCUMENT +SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Reference//EN" +SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Core//EN" +VALIDATE +&GENERAL&YES&MODEL&YES&EXCLUDE&YES&CAPACITY&YES +&NONSGML&YES&SGML&YES&FORMAL&YES +.T& +c s s s s s s s s +l l l l l l l l l. +SDIF +&PACK&NO&UNPACK&NO +.TE +.LP +The memory usage of +.I sgmls +is not a function of the capacity points used by a document; +however, +.I sgmls +can handle capacities significantly greater than the reference capacity set. +.LP +In some environments, +higher values may be supported for the \s-1SUBDOC\s0 parameter. +.LP +Documents that do not use optional features are also supported. +For example, if +.B FORMAL\ NO +is specified in the \*S declaration, +public identifiers will not be required to be valid formal public identifiers. +.LP +Certain parts of the concrete syntax may be changed: +.RS +.LP +The shunned character numbers can be changed. +.LP +Eight bit characters can be assigned to +\s-1LCNMSTRT\s0, \s-1UCNMSTRT\s0, \s-1LCNMCHAR\s0 and \s-1UCNMCHAR\s0. +Declaring this requires that the syntax reference character set be declared +like this: +.RS +.ne 3 +.TS +tab(&); +l l. +BASESET&"ISO Registration Number 100//CHARSET +&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1" +DESCSET&0\0256\00 +.TE +.RE +.LP +Uppercase substitution can be performed or not performed +both for entity names and for other names. +.LP +Either short reference delimiters assigned by the reference delimiter set +or no short reference delimiters are supported. +.LP +The reserved names can be changed. +.LP +The quantity set can be increased within certain limits +subject to there being sufficient memory available. +The upper limit on \s-1\%NAMELEN\s0 is 239. +The upper limits on +\s-1\%ATTCNT\s0, \s-1\%ATTSPLEN\s0, \s-1\%BSEQLEN\s0, \s-1\%ENTLVL\s0, +\s-1\%LITLEN\s0, \s-1\%PILEN\s0, \s-1\%TAGLEN\s0, and \s-1\%TAGLVL\s0 +are more than thirty times greater than the reference limits. +The upper limit on +\s-1\%GRPCNT\s0, \s-1\%GRPGTCNT\s0, and \s-1\%GRPLVL\s0 is 253. +\s-1\%NORMSEP\s0 +cannot be changed. +\s-1\%DTAGLEN\s0 +are +\s-1\%DTEMPLEN\s0 +irrelevant since +.I sgmls +does not support the +\s-1\%DATATAG\s0 +feature. +.RE +.SS "\*S declaration" +The \*S declaration may be omitted, +the following declaration will be implied: +.TS +tab(&); +c1 s1 s1 s1 s1 s1 s1 s1 s +c s s s s s s s s +l l s s s s s s s. +<!SGML "ISO 8879:1986" +CHARSET +BASESET&"ISO 646-1983//CHARSET +&\h'\w'"'u'International Reference Version (IRV)//ESC 2/5 4/0" +DESCSET&\0\00\0\09\0UNUSED +&\0\09\0\02\0\09 +&\011\0\02\0UNUSED +&\013\0\01\013 +&\014\018\0UNUSED +&\032\095\032 +&127\0\01\0UNUSED +.T& +l l l s s s s s s +l l s s s s s s s +l l l s s s s s s +c s s s s s s s s +l l l l l l l l l. +CAPACITY&PUBLIC&"ISO 8879:1986//CAPACITY Reference//EN" +SCOPE&DOCUMENT +SYNTAX&PUBLIC&"ISO 8879:1986//SYNTAX Reference//EN" +FEATURES +MINIMIZE&DATATAG&NO&OMITTAG&YES&RANK&NO&SHORTTAG&YES +LINK&SIMPLE&NO&IMPLICIT&NO&EXPLICIT&NO +OTHER&CONCUR&NO&SUBDOC&YES 99999999&FORMAL&YES +.T& +c s s s s s s s s. +APPINFO NONE> +.TE +with the exception that characters 128 through 254 will be assigned to +\s-1DATACHAR\s0. +When exporting documents that use characters in this range, +an accurate description of the upper half of the document character set +should be added to this declaration. +For ISO Latin-1, an appropriate description would be: +.br +.ne 5 +.TS +tab(&); +l l. +BASESET&"ISO Registration Number 100//CHARSET +&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1" +DESCSET&128\032\0UNUSED +&160\095\032 +&255\0\01\0UNUSED +.TE +.SS "Output format" +The output is a series of lines. +Lines can be arbitrarily long. +Each line consists of an initial command character +and one or more arguments. +Arguments are separated by a single space, +but when a command takes a fixed number of arguments +the last argument can contain spaces. +There is no space between the command character and the first argument. +Arguments can contain the following escape sequences. +.TP +.B \e\e +A +.BR \e. +.TP +.B \en +A record end character. +.TP +.B \e| +Internal \s-1SDATA\s0 entities are bracketed by these. +.TP +.BI \e nnn +The character whose code is +.I nnn +octal. +.LP +A record start character will be represented by +.BR \e012 . +Most applications will need to ignore +.B \e012 +and translate +.B \en +into newline. +.LP +The possible command characters and arguments are as follows: +.TP +.BI ( gi +The start of an element whose generic identifier is +.IR gi . +Any attributes for this element +will have been specified with +.B A +commands. +.TP +.BI ) gi +The end an element whose generic identifier is +.IR gi . +.TP +.BI \- data +Data. +.TP +.BI & name +A reference to an external data entity +.IR name ; +.I name +will have been defined using an +.B E +command. +.TP +.BI ? pi +A processing instruction with data +.IR pi . +.TP +.BI A name\ val +The next element to start has an attribute +.I name +with value +.I val +which takes one of the following forms: +.RS +.TP +.B IMPLIED +The value of the attribute is implied. +.TP +.BI CDATA\ data +The attribute is character data. +This is used for attributes whose declared value is +.BR \s-1CDATA\s0 . +.TP +.BI NOTATION\ nname +The attribute is a notation name; +.I nname +will have been defined using a +.B N +command. +This is used for attributes whose declared value is +.BR \s-1NOTATION\s0 . +.TP +.BI ENTITY\ name\|.\|.\|. +The attribute is a list of general entity names. +Each entity name will have been defined using an +.BR I , +.B E +or +.B S +command. +This is used for attributes whose declared value is +.B \s-1ENTITY\s0 +or +.BR \s-1ENTITIES\s0 . +.TP +.BI TOKEN\ token\|.\|.\|. +The attribute is a list of tokens. +This is used for attributes whose declared value is anything else. +.RE +.TP +.BI D ename\ name\ val +This is the same as the +.B A +command, except that it specifies a data attribute for an +external entity named +.IR ename . +Any +.B D +commands will come after the +.B E +command that defines the entity to which they apply, but +before any +.B & +or +.B A +commands that reference the entity. +.TP +.BI N nname +.IR nname. +Define a notation +This command will be preceded by a +.B p +command if the notation was declared with a public identifier, +and by a +.B s +command if the notation was declared with a system identifier. +A notation will only be defined if it is to be referenced +in an +.B E +command or in an +.B A +command for an attribute with a declared value of +.BR \s-1NOTATION\s0 . +.TP +.BI E ename\ typ\ nname +Define an external data entity named +.I ename +with type +.I typ +.RB ( \s-1CDATA\s0 , +.B \s-1NDATA\s0 +or +.BR \s-1SDATA\s0 ) +and notation +.IR not. +This command will be preceded by one or more +.B f +commands giving the filenames generated by the entity manager from the system +and public identifiers, +by a +.B p +command if a public identifier was declared for the entity, +and by a +.B s +command if a system identifier was declared for the entity. +.I not +will have been defined using a +.B N +command. +Data attributes may be specified for the entity using +.B D +commands. +An external data entity will only be defined if it is to be referenced in a +.B & +command or in an +.B A +command for an attribute whose declared value is +.B \s-1ENTITY\s0 +or +.BR \s-1ENTITIES\s0 . +.TP +.BI I ename\ typ\ text +Define an internal data entity named +.I ename +with type +.I typ +.RB ( \s-1CDATA\s0 +or +.BR \s-1SDATA\s0 ) +and entity text +.IR text . +An internal data entity will only be defined if it is referenced in an +.B A +command for an attribute whose declared value is +.B \s-1ENTITY\s0 +or +.BR \s-1ENTITIES\s0 . +.TP +.BI S ename +Define a subdocument entity named +.IR ename . +This command will be preceded by one or more +.B f +commands giving the filenames generated by the entity manager from the system +and public identifiers, +by a +.B p +command if a public identifier was declared for the entity, +and by a +.B s +command if a system identifier was declared for the entity. +A subdocument entity will only be defined if it is referenced +in a +.B { +command +or in an +.B A +command for an attribute whose declared value is +.B \s-1ENTITY\s0 +or +.BR \s-1ENTITIES\s0 . +.TP +.BI s sysid +This command applies to the next +.BR E , +.B S +or +.B N +command and specifies the associated system identifier. +.TP +.BI p pubid +This command applies to the next +.BR E , +.B S +or +.B N +command and specifies the associated public identifier. +.TP +.BI f filename +This command applies to the next +.B E +or +.B S +command and specifies an associated filename. +There will be more than one +.B f +command for a single +.B E +or +.B S +command if the system identifier used a +.if \n(Os=0 colon. +.if \n(Os=1 semi-colon. +.TP +.BI { ename +The start of the \*S subdocument entity +.IR ename ; +.I ename +will have been defined using a +.B S +command. +.TP +.BI } ename +The end of the \*S subdocument entity +.IR ename . +.TP +.BI L lineno\ file +.TQ +.BI L lineno +Set the current line number and filename. +The +.I filename +argument will be omitted if only the line number has changed. +This will be output only if the +.B \-l +option has been given. +.TP +.BI # text +An \s-1APPINFO\s0 parameter of +.I text +was specified in the \*S declaration. +This is not strictly part of the ESIS, but a structure-controlled +application is permitted to act on it. +No +.B # +command will be output if +.B \s-1APPINFO\s0\ \s-1NONE\s0 +was specified. +A +.B # +command will occur at most once, +and may be preceded only by a single +.B L +command. +.TP +.B C +This command indicates that the document was a conforming \*S document. +If this command is output, it will be the last command. +An \*S document is not conforming if it references a subdocument entity +that is not conforming. +.SH BUGS +Some non-SGML characters in literals are counted as two characters for the +purposes of quantity and capacity calculations. +.SH "SEE ALSO" +The \*S Handbook, Charles F. Goldfarb +.br +\s-1ISO\s0 8879 (Standard Generalized Markup Language), +International Organization for Standardization +.SH ORIGIN +\s-1ARCSGML\s0 was written by Charles F. Goldfarb. +.LP +.I Sgmls +was derived from \s-1ARCSGML\s0 by James Clark (jjc@jclark.com), +to whom bugs should be reported. diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.c b/usr.bin/sgmls/sgmls/sgmlxtrn.c new file mode 100644 index 0000000..d27eb66 --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlxtrn.c @@ -0,0 +1,223 @@ +/* Standard Generalized Markup Language Users' Group (SGMLUG) + SGML Parser Materials (ARCSGML 1.0) + +(C) 1983-1988 Charles F. Goldfarb (assigned to IBM Corporation) +(C) 1988-1991 IBM Corporation + +Licensed to the SGML Users' Group for distribution under the terms of +the following license: */ + +char license[] = +"SGMLUG hereby grants to any user: (1) an irrevocable royalty-free,\n\ +worldwide, non-exclusive license to use, execute, reproduce, display,\n\ +perform and distribute copies of, and to prepare derivative works\n\ +based upon these materials; and (2) the right to authorize others to\n\ +do any of the foregoing.\n"; + +#include "sgmlincl.h" + +/* SGMLXTRN: Storage allocation and initialization for all public variables. + Exceptions: Constants lex????? and del????? are defined in + LEX?????.C modules; constants pcb????? are defined in PCB?????.c. +*/ +int badresw = 0; /* 1=REF_ out of context; 0=valid. */ +int charmode = 0; /* >0=in #CHARS; 0=not. */ +int conactsw = 0; /* 1=return saved content action 0=get new one.*/ +int conrefsw = 0; /* 1=content reference att specified; 0=no. */ +int contersv = 0; /* Save contersw while processing pending REF. */ +int contersw = 0; /* 1=element or #CHARS out of context; 0=valid. */ +int datarc = 0; /* Return code for data: DAF_ or REF_. */ +int delmscsw = 0; /* 1=DELMSC must be read on return to es==0. */ +int didreq = 0; /* 1=required implied tag processed; 0=no. */ +int dostag = 0; /* 1=retry newetd instead of parsing; 0=parse. */ +int dtdsw = 0; /* DOCTYPE declaration found: 1=yes; 0=no. */ +int entdatsw = 0; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */ +int entpisw = 0; /* 4=PI entity occurred; 0=not. */ +int eodsw = 0; /* 1=eod found in error; 0=not yet. */ +int eofsw = 0; /* 1=eof found in body of document; 0=not yet. */ +int es = -1; /* Index of current source in stack. */ +int etagimct = 0; /* Implicitly ended elements left on stack. */ +int etagimsw = 0; /* 1=end-tag implied by other end-tag; 0=not. */ +int etagmin = MINNONE; /* Minim: NONE NULL NET DATA; implied by S/ETAG*/ +int etictr = 0; /* Number of "NET enabled" tags on stack. */ +int etisw = 0; /* 1=tag ended with eti; 0=did not. */ +int indtdsw = 0; /* Are we in the DTD? 1=yes; 0=no. */ +int mslevel = 0; /* Nesting level of marked sections. */ +int msplevel = 0; /* Nested MS levels subject to special parse. */ +int prologsw = 1; /* 1=in prolog; 0=not. */ +int pss = 0; /* SGMLACT: scbsgml stack level. */ +int sgmlsw = 0; /* SGML declaration found: 1=yes; 0=no. */ +int stagmin = MINNONE; /* Minimization: NONE, NULL tag, implied by STAG*/ +int tagctr = 0; /* Tag source chars read. */ +int ts = -1; /* Index of current tag in stack. */ +struct parse *propcb = &pcbpro; /* Current PCB for prolog parse. */ +int aentctr = 0; /* Number of ENTITY tokens in this att list. */ +int conact = 0; /* Return code from content parse. */ +int conrefsv = 0; /* Save conrefsw when doing implied start-tag.*/ +int dtdrefsw = 0; /* External DTD? 1=yes; 0=no. */ +int etiswsv = 0; /* Save etisw when processing implied start-tag.*/ +int grplvl = 0; /* Current level of nested grps in model. */ +int idrctr = 0; /* Number of IDREF tokens in this att list. */ +int mdessv = 0; /* ES level at start of markup declaration. */ +int notadn = 0; /* Position of NOTATION attribute in list. */ +int parmno = 0; /* Current markup declaration parameter number. */ +int pexsw = 0; /* 1=tag valid solely because of plus exception.*/ +int rcessv = 0; /* ES level at start of RCDATA content. */ +int tagdelsw = 0; /* 1=tag ended with delimiter; 0=no delimiter. */ +int tokencnt = 0; /* Number of tokens found in attribute value. */ +struct entity *ecbdeflt = 0; /* #DEFAULT ecb (NULL if no default entity). */ +struct etd *docetd = 0; /* The etd for the document as a whole. */ +struct etd *etagreal = 0; /* Actual or dummy etd that implied this tag. */ +struct etd *newetd = 0; /* The etd for a start- or end-tag recognized. */ +struct etd *nextetd = 0; /* ETD that must come next (only one choice). */ +struct etd *stagreal = 0; /* Actual or dummy etd that implied this tag. */ +struct parse *conpcb = 0; /* Current PCB for content parse. */ +UNCH *data = 0; /* Pointer to returned data in buffer. */ +UNCH *mdname = 0; /* Name of current markup declaration. */ +UNCH *ptcon = 0; /* Current pointer into tbuf. */ +UNCH *ptpro = 0; /* Current pointer into tbuf. */ +UNCH *rbufs = 0; /* DOS file read area: start position for read. */ +UNCH *subdcl = 0; /* Subject of markup declaration (e.g., GI). */ +int Tstart = 0; /* Save starting token for AND group testing. */ +UNS conradn = 0; /* 1=CONREF attribute in list (0=no). */ +UNS datalen = 0; /* Length of returned data in buffer. */ +UNS entlen = 0; /* Length of TAG or EXTERNAL entity text. */ +UNS idadn = 0; /* Number of ID attribute (0 if none). */ +UNS noteadn = 0; /* Number of NOTATION attribute (0 if none). */ +UNS reqadn = 0; /* Num of atts with REQUIRED default (0=none). */ +int grplongs; /* Number of longs for GRPCNT bitvector. */ + +/* Variable arrays and structures. +*/ +struct ad *al = 0; /* Current attribute list work area. */ +struct dcncb *dcntab[1]; /* List of data content notation names. */ +struct entity *etab[ENTHASH]; /* Entity hash table. */ +struct etd *etdtab[ETDHASH]; /* Element type definition hash table. */ +struct fpi fpidf; /* Fpi for #DEFAULT entity. */ +struct id *itab[IDHASH]; /* Unique identifier hash table. */ +struct etd **nmgrp = 0; /* Element name group */ +PDCB *nnmgrp = 0; /* Notation name group */ +struct restate *scbsgml = 0; /* SGMLACT: return action state stack. */ +struct source *scbs = 0; /* Stack of open sources ("SCB stack"). */ +struct srh *srhtab[1]; /* List of SHORTREF table headers. */ +struct sgmlstat ds; /* Document statistics. */ +struct switches sw; /* Parser control switches set by text proc. */ +struct tag *tags = 0; /* Stack of open elements ("tag stack"). */ +struct thdr *gbuf = 0; /* Buffer for creating group. */ +struct thdr prcon[3]; /* 0-2: Model for *DOC content. */ +struct thdr undechdr; /* 0:Default model hdr for undeclared content.*/ +UNCH *dtype = 0; /* Document type name. */ +UNCH *entbuf = 0; /* Buffer for entity reference name. */ +UNCH fce[2]; /* String form of FCE char. + (fce[1] must be EOS).*/ +/* Buffer for non-SGML character reference.*/ +UNCH nonchbuf[2] = { DELNONCH }; +UNCH *tbuf; /* Work area for tokenization. */ +UNCH *lbuf = 0; /* In tbuf: Literal parse area.*/ +UNCH *sysibuf = 0; /* Buffer for system identifiers. */ +UNCH *pubibuf = 0; /* Buffer for public identifiers. */ +UNCH *nmbuf = 0; /* Name buffer used by mdentity. */ +struct mpos *savedpos; + +/* Constants. +*/ +struct map dctab[] = { /* Keywords for declared content parameter.*/ + { key[KRCDATA], MRCDATA+MPHRASE }, + { key[KCDATA], MCDATA+MPHRASE }, + { key[KANY], MANY+MCHARS+MGI }, + { key[KEMPTY], MNONE+MPHRASE }, + { NULL, 0 } +}; +struct map deftab[] = { /* Default value keywords. */ + { key[KIMPLIED], DNULL }, + { key[KREQUIRED], DREQ }, + { key[KCURRENT], DCURR }, + { key[KCONREF], DCONR }, + { key[KFIXED], DFIXED}, + { NULL, 0} +}; +struct map dvtab[] = { /* Declared value: keywords and type codes.*/ +/* TYPE NUMBER */ +/* grp ANMTGRP Case 1 0 Grp size */ +/* grp member ANMTGRP Case 0 Position */ +/* grp ANOTEGRP Case 1 1 Grp size */ + { key[KNOTATION], ANOTEGRP}, /* Case 1 Position */ + { key[KCDATA], ACHARS }, /* Case 2 Always 0 */ + { key[KENTITY], AENTITY }, /* Case 3 Normal 1 */ + { key[KID], AID }, /* Case 4 Normal 1 */ + { key[KIDREF], AIDREF }, /* Case 5 Normal 1 */ + { key[KNAME], ANAME }, /* Case 6 Normal 1 */ + { key[KNMTOKEN], ANMTOKE }, /* Case 7 Normal 1 */ + { key[KNUMBER], ANUMBER }, /* Case 8 Normal 1 */ + { key[KNUTOKEN], ANUTOKE }, /* Case 9 Normal 1 */ + { key[KENTITIES], AENTITYS}, /* Case A Normal 1 */ + { key[KIDREFS], AIDREFS }, /* Case B # tokens */ + { key[KNAMES], ANAMES }, /* Case C # tokens */ + { key[KNMTOKENS], ANMTOKES}, /* Case D # tokens */ + { key[KNUMBERS], ANUMBERS}, /* Case E # tokens */ + { key[KNUTOKENS], ANUTOKES}, /* Case F # tokens */ + { NULL, 0 } /* Case 0 ERROR */ +}; +struct map enttab[] = { /* Entity declaration second parameter. */ + { key[KCDATA], ESC }, + { key[KSDATA], ESX }, + { key[KMS], ESMS}, + { key[KPI], ESI }, + { key[KSTARTTAG], ESS }, + { key[KENDTAG], ESE }, + { key[KMD], ESMD}, + { NULL, 0 } +}; +struct map exttab[] = { /* Keywords for external identifier. */ + { key[KSYSTEM], EDSYSTEM }, + { key[KPUBLIC], EDPUBLIC }, + { NULL, 0 } +}; +struct map extettab[] = { /* Keywords for external entity type. */ + { key[KCDATA], ESNCDATA }, + { key[KNDATA], ESNNDATA }, + { key[KSDATA], ESNSDATA }, + { key[KSUBDOC], ESNSUB }, + { NULL, 0 } +}; +struct map funtab[] = { /* Function character reference names. */ + { key[KRE], RECHAR }, + { key[KRS], RSCHAR }, + { key[KSPACE], SPCCHAR }, + /* We should use an extra table for added functions. */ + { (UNCH *)"TAB", TABCHAR }, + { NULL, 0 } +}; +struct map mstab[] = { /* Marked section keywords. */ + { key[KTEMP], MSTEMP }, + { key[KINCLUDE], MSTEMP }, /* Treat INCLUDE like TEMP; both are NOPs.*/ + { key[KRCDATA], MSRCDATA}, + { key[KCDATA], MSCDATA }, + { key[KIGNORE], MSIGNORE}, + { NULL, 0 } +}; +struct map pubcltab[] = { /* Names for public text class. */ + { (UNCH *)"CAPACITY", FPICAP }, + { (UNCH *)"CHARSET", FPICHARS}, + { (UNCH *)"DOCUMENT", FPIDOC }, + { (UNCH *)"DTD", FPIDTD }, + { (UNCH *)"ELEMENTS", FPIELEM }, + { (UNCH *)"ENTITIES", FPIENT }, + { (UNCH *)"LPD", FPILPD }, + { (UNCH *)"NONSGML", FPINON }, + { (UNCH *)"NOTATION", FPINOT }, + { (UNCH *)"SHORTREF", FPISHORT}, + { (UNCH *)"SUBDOC", FPISUB }, + { (UNCH *)"SYNTAX", FPISYN }, + { (UNCH *)"TEXT", FPITEXT }, + { NULL, 0 } +}; +UNCH indefent[] = "\12#DEFAULT"; /* Internal name: default entity name. */ +UNCH indefetd[] = "\12*DOCTYPE"; /* Internal name: default document type. */ +UNCH indocent[] = "\12*SGMLDOC"; /* Internal name: SGML document entity. */ +UNCH indocetd[] = "\6*DOC"; /* Internal name: document level etd. */ +UNCH indtdent[] = "\11*DTDENT"; /* Internal name: external DTD entity. */ + +struct etd dumetd[3]; +struct entity *dumpecb; +UNCH sgmlkey[] = "SGML"; diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.h b/usr.bin/sgmls/sgmls/sgmlxtrn.h new file mode 100644 index 0000000..f1b0b4b --- /dev/null +++ b/usr.bin/sgmls/sgmls/sgmlxtrn.h @@ -0,0 +1,121 @@ +/* SGMLXTRN.H: External declarations for SGML public variables. + Exceptions: Constants lex????? and del????? are defined in + LEX?????.C modules; constants pcb????? are defined in PCB?????.c. +*/ +#ifndef SGMLXTRN /* Don't include this file more than once. */ +#define SGMLXTRN +extern int badresw; /* 1=REF_ out of context; 0=valid. */ +extern int charmode; /* >0=in #CHARS; 0=not. */ +extern int conactsw; /* 1=return saved content action 0=get new one.*/ +extern int conrefsw; /* 1=content reference att specified; 0=no. */ +extern int contersv; /* Save contersw while processing pending REF. */ +extern int contersw; /* 1=element or #CHARS out of context; 0=valid. */ +extern int datarc; /* Return code for data: DAF_ or REF_. */ +extern int delmscsw; /* 1=DELMSC must be read on return to es==0. */ +extern int didreq; /* 1=required implied tag processed; 0=no. */ +extern int dostag; /* 1=retry newetd instead of parsing; 0=parse. */ +extern int dtdsw; /* DOCTYPE declaration found: 1=yes; 0=no. */ +extern int entdatsw; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */ +extern int entpisw; /* 4=PI entity occurred; 0=not. */ +extern int eodsw; /* 1=eod found in error; 0=not yet. */ +extern int eofsw; /* 1=eof found in body of document; 0=not yet. */ +extern int etagimct; /* Implicitly ended elements left on stack. */ +extern int etagimsw; /* 1=end-tag implied by other end-tag; 0=not. */ +extern int etagmin; /* Minim: NONE NULL NET DATA; implied by S/ETAG*/ +extern int etictr; /* Number of "NET enabled" tags on stack. */ +extern int etisw; /* 1=tag ended with eti; 0=did not. */ +extern int indtdsw; /* Are we in the DTD? 1=yes; 0=no. */ +extern int mslevel; /* Nesting level of marked sections. */ +extern int msplevel; /* Nested MS levels subject to special parse. */ +extern int prologsw; /* 1=in prolog; 0=not. */ +extern int pss; /* SGMLACT: scbsgml stack level. */ +extern int sgmlsw; /* SGML declaration found: 1=yes; 0=no. */ +extern int stagmin; /* Minimization: NONE, NULL tag, implied by STAG*/ +extern int tagctr; /* Tag source chars read. */ +extern int ts; /* Index of current tag in stack. */ +extern struct parse *propcb; /* Current PCB for prolog parse. */ +extern int aentctr; /* Number of ENTITY tokens in this att list. */ +extern int conact; /* Return code from content parse. */ +extern int conrefsv; /* Save conrefsw when doing implied start-tag.*/ +extern int dtdrefsw; /* External DTD? 1=yes; 0=no. */ +extern int etiswsv; /* Save etisw when processing implied start-tag.*/ +extern int grplvl; /* Current level of nested grps in model. */ +extern int idrctr; /* Number of IDREF tokens in this att list. */ +extern int mdessv; /* ES level at start of markup declaration. */ +extern int notadn; /* Position of NOTATION attribute in list. */ +extern int parmno; /* Current markup declaration parameter number. */ +extern int pexsw; /* 1=tag valid solely because of plus exception.*/ +extern int rcessv; /* ES level at start of RCDATA content. */ +extern int tagdelsw; /* 1=tag ended with delimiter; 0=no delimiter. */ +extern int tokencnt; /* Number of tokens found in attribute value. */ +extern struct entity *ecbdeflt; /* #DEFAULT ecb (NULL if no default entity). */ +extern struct etd *docetd; /* The etd for the document as a whole. */ +extern struct etd *etagreal; /* Actual or dummy etd that implied this tag. */ +extern struct etd *newetd; /* The etd for a start- or end-tag recognized. */ +extern struct etd *nextetd; /* ETD that must come next (only one choice). */ +extern struct etd *stagreal; /* Actual or dummy etd that implied this tag. */ +extern struct parse *conpcb; /* Current PCB for content parse. */ +extern UNCH *data; /* Pointer to returned data in buffer. */ +extern UNCH *mdname; /* Name of current markup declaration. */ +extern UNCH *ptcon; /* Current pointer into tbuf. */ +extern UNCH *ptpro; /* Current pointer into tbuf. */ +extern UNCH *rbufs; /* DOS file read area: start position for read. */ +extern UNCH *subdcl; /* Subject of markup declaration (e.g., GI). */ +extern int Tstart; /* Save starting token for AND group testing. */ +extern UNS conradn; /* 1=CONREF attribute in list (0=no). */ +extern UNS datalen; /* Length of returned data in buffer. */ +extern UNS entlen; /* Length of TAG or EXTERNAL entity text. */ +extern UNS idadn; /* Number of ID attribute (0 if none). */ +extern UNS noteadn; /* Number of NOTATION attribute (0 if none). */ +extern UNS reqadn; /* Num of atts with REQUIRED default (0=none). */ +extern int grplongs; /* Number of longs for GRPCNT bitvector. */ +/* Variable arrays and structures. +*/ +extern struct ad *al; /* Current attribute list work area. */ +extern struct dcncb *dcntab[];/* List of data content notation names. */ +extern struct entity *etab[]; /* Entity hash table. */ +extern struct etd *etdtab[]; /* Element type definition hash table. */ +extern struct fpi fpidf; /* Fpi for #DEFAULT entity. */ +extern struct id *itab[]; /* Unique identifier hash table. */ +extern struct etd **nmgrp; /* Element name group */ +extern PDCB *nnmgrp; /* Notation name group */ +extern struct restate *scbsgml; /* SGMLACT: return action state stack. */ +extern struct srh *srhtab[]; /* List of SHORTREF table headers. */ +extern struct sgmlstat ds; /* Document statistics. */ +extern struct switches sw; /* Parser control switches set by text proc. */ +extern struct tag *tags; /* Stack of open elements ("tag stack"). */ +extern struct thdr *gbuf; /* Buffer for creating group. */ +extern struct thdr prcon[]; /* 0-2: Model for *DOC content. */ +extern struct thdr undechdr; /* 0: Default model hdr for undeclared content. */ +extern UNCH *dtype; /* Document type name. */ +extern UNCH *entbuf; /* Buffer for entity reference name. */ +extern UNCH fce[]; /* String form of FCE char (fce[1] must be EOS).*/ +extern UNCH nonchbuf[]; /* Buffer for valid nonchar character reference.*/ +extern UNCH *tbuf; /* Work area for tokenization. */ +extern UNCH *lbuf; /* In tbuf: Literal parse area; TAGLEN limit.*/ +extern struct entity *dumpecb; /* SRMNULL points to this. */ +extern UNCH *sysibuf; +extern UNCH *pubibuf; +extern UNCH *nmbuf; /* Name buffer used by mdentity. */ +extern struct mpos *savedpos; + +/* Constants. +*/ +extern int scbsgmnr; /* SCBSGML: new record; do not ignore RE. */ +extern int scbsgmst; /* SCBSGML: trailing stag or markup; ignore RE. */ +extern struct map dctab[]; /* Keywords for declared content parameter. */ +extern struct map deftab[]; /* Default value keywords. */ +extern struct map dvtab[]; /* Declared value: keywords and type codes.*/ +extern struct map enttab[]; /* Entity declaration second parameter. */ +extern struct map exttab[]; /* Keywords for external identifier. */ +extern struct map extettab[]; /* Keywords for external entity type. */ +extern struct map funtab[]; /* Function character reference names. */ +extern struct map mstab[]; /* Marked section keywords. */ +extern struct map pubcltab[]; /* Keywords for public text class. */ +extern UNCH indefent[]; /* Internal name: default entity name. */ +extern UNCH indefetd[]; /* Internal name: default document type. */ +extern UNCH indocent[]; /* Internal name: SGML document entity. */ +extern UNCH indocetd[]; /* Internal name: etd for document as a whole. */ +extern UNCH indtdent[]; /* Internal name: external DTD entity. */ +extern char license[]; /* SGML Users' Group free license. */ +#endif /* ndef SGMLXTRN */ diff --git a/usr.bin/sgmls/sgmls/source.h b/usr.bin/sgmls/sgmls/source.h new file mode 100644 index 0000000..32cc85a --- /dev/null +++ b/usr.bin/sgmls/sgmls/source.h @@ -0,0 +1,114 @@ +/* SOURCE.H: Entity and source control block structures and definitions. +*/ +#define ENTHASH 503 /* Size of entity hash table. Must be prime. */ +/* Entity storage class values for estore member of entity structure. */ +#define EST 1 /* String: Tag (usually a fixed STARTGI). */ + /* <MDENTITY sets these:> */ +#define ESMD 2 /* String: Markup declaration. */ +#define ESMS 3 /* String: Marked section. */ +#define ESM 4 /* String: ordinary text. */ + /* <ENTOPEN treats these specially:> */ +#define ESS 5 /* ETD: Start-tag. */ +#define ESE 6 /* ETD: End-tag. */ +#define ESI 7 /* String: PI. */ +#define ESX 8 /* String: SDATA general entity. */ +#define ESC 9 /* String: CDATA general entity. */ + /* </MDENTITY> <MDEXTID sets these:> */ +#define ESFM 10 /* LPU: minimum external (file) storage class. */ +#define ESN 10 /* XCB: N/C/SDATA or SUBDOC control block. */ + /* </ENTOPEN> */ +#define ESF 11 /* LPU: General entity. */ +#define ESP 12 /* LPU: Parameter entity. */ +#define ESD 13 /* LPU: Document type definition. */ +#define ESL 14 /* LPU: Link process definition. */ +#define ESK 15 /* LPU: Data content notation. */ + /* </MDEXTID> */ + +union etext { /* Entity text. */ + UNIV x; /* External ID generated by system. */ + UNCH *c; /* Character string. */ + struct ne *n; /* N/C/SDATA or SUBDOC entity control block. */ +}; +#define ETEXTSZ sizeof(union etext) +struct entity { /* Entity control block. */ + struct entity *enext; /* Next entity in chain. */ + UNCH *ename; /* Entity name with length and EOS. */ + UNCH estore; /* Storage class (see values above). */ + UNCH dflt; /* Declared as default entity. */ + UNCH mark; /* For use by for sgmlment. */ + union etext etx; /* Entity text. */ +}; +#define ENTSZ sizeof(struct entity) +typedef struct entity *PECB; /* Ptr to entity control block. */ +typedef struct entity **TECB; /* Table of entity control blocks. */ + +struct source { /* Source control block. */ + struct entity ecb; /* Entity control block. */ + unsigned long rcnt; /* Source record number. */ + int ccnt; /* Source record chars since last RS. */ + int curoff; /* Offset of curchar (chars read in this block).*/ + UNCH curchar; /* Current character. */ + UNCH nextchar; /* If curchar was DELNONCH, next character. */ + UNIV fcb; /* SGMLIO fcb ptr returned by OPEN. */ + UNCH *fbuf; /* 1st char in buffer (0=PEND) or entity text. */ + UNCH *fpos; /* Current char in buffer or entity text. */ + UNCH pushback; /* Character before pend position */ + char copied; /* Is this a copy of the internal entity? */ +}; +#define SCBSZ sizeof(struct source) +typedef struct source *PSCB; /* Ptr to source control block. */ + +extern int es; /* Index of current source in stack. */ +extern struct source *scbs; /* Stack of open sources ("SCB stack"). */ + +/* Member definitions for source and entity control blocks. +*/ +#define SCB (scbs[es]) /* Ptr to current source control block. */ + +#define ECB SCB.ecb /* Pointer to current entity control block. */ +#define FBUF SCB.fbuf /* Pointer to start of entity buffer. */ +#define FPOS SCB.fpos /* Pointer to current char of current source. */ +#define RSCC SCB.ccnt /* CCNT at start of block (across EOB/EOS/EOF). */ +#define CCO SCB.curoff /* Offset in read buffer of current char. */ +#define CC SCB.curchar /* Current character of current source entity. */ +#define NEXTC SCB.nextchar /* Next character in current source entity. */ +#define CCNT (SCB.ccnt+CCO) /* Position of CC in current record (RS=0). */ +#define RCNT SCB.rcnt /* Position of record in entity (origin=1). */ +#define SCBFCB SCB.fcb /* Current file control block (if FILESW). */ +#define ECBPTR ((ECB.enext)) /* Pointer to this entity's ECB. */ +#define ENTITY ((ECB.ename)) /* Current entity name. */ +#define FILESW (ECB.estore>=ESFM) /* 1=Entity is external file; 0=internal. */ +#define NEWCC (++FPOS) /* Get next current character. */ +#define REPEATCC (--FPOS) /* Repeat previous current character. */ +#define COPIEDSW SCB.copied /* Non-zero means entity was copied. */ + +struct srh { /* Short reference map header. */ + struct srh *enext; /* Next short reference map in chain. */ + UNCH *ename; /* Short reference map name. */ + TECB srhsrm; /* Ptr to short reference map. */ +}; +#define SRHSZ (sizeof(struct srh)) +typedef struct srh *PSRH; /* Ptr to short reference map header. */ +#define SRMNULL (&dumpecb) /* Dummy ptr to empty short reference map. */ + +/* Definitions for ENTOPEN/ENTREF return codes. +*/ +#define ENTUNDEF -1 /* Callers of ENTOPEN: entity undefined. */ +#define ENTLOOP -2 /* ENTOPEN: endless loop entity. */ +#define ENTMAX -3 /* ENTOPEN: too many open entities. */ +#define ENTFILE -4 /* ENTOPEN: file I/O error. */ +#define ENTDATA -5 /* ENTOPEN: CDATA or SDATA entity. */ +#define ENTPI -6 /* ENTOPEN: PI entity. */ + +/* Definitions for ENTDATA switches set in contersw. +*/ +#define CDECONT 2 /* 0010 CDATA entity referenced. */ +#define SDECONT 4 /* 0100 SDATA entity referenced. */ +#define NDECONT 8 /* 1000 NDATA entity referenced. */ + +/* Definitions for manipulating signed source character counters. +*/ +#define CTRSET(CTR) (CTR = (int) -(FPOS+1-FBUF)) /* Init source char ctr. */ +#define CTRGET(CTR) (CTR + (int) (FPOS+1-FBUF)) /* Read source char ctr. */ + + diff --git a/usr.bin/sgmls/sgmls/std.h b/usr.bin/sgmls/sgmls/std.h new file mode 100644 index 0000000..3a9ab4b --- /dev/null +++ b/usr.bin/sgmls/sgmls/std.h @@ -0,0 +1,116 @@ +/* std.h - + Include standard header files. +*/ + +#ifndef STD_H +#define STD_H 1 + +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <assert.h> +#ifdef SUPPORT_SUBDOC +#include <signal.h> +#endif /* SUPPORT_SUBDOC */ + +#ifndef STDDEF_H_MISSING +#include <stddef.h> +#endif /* not STDDEF_H_MISSING */ + +#ifndef LIMITS_H_MISSING +#include <limits.h> +#endif /* not LIMITS_H_MISSING */ + +#ifndef UINT_MAX +#define UINT_MAX (sizeof(unsigned int) == 2 ? 0x7fff : \ + (sizeof(unsigned int) == 4 ? 0x7fffffff : cant_guess_UINT_MAX)) +#endif + +#ifdef VARARGS +#include <varargs.h> +#else +#include <stdarg.h> +#endif + +#ifdef BSD_STRINGS +#include <strings.h> +#define memcpy(to, from, n) bcopy(from, to, n) +#define memcmp(p, q, n) bcmp(p, q, n) +#define strchr(s, c) index(s, c) +#define strrchr(s, c) rindex(s, c) +#else /* not BSD_STRINGS */ +#include <string.h> +#endif /* not BSD_STRINGS */ + +#ifdef STRERROR_MISSING +#ifdef USE_PROTOTYPES +extern char *strerror(int); +#else +extern char *strerror(); +#endif +#endif /* STRERROR_MISSING */ + +#ifdef STDLIB_H_MISSING +UNIV malloc(); +UNIV calloc(); +UNIV realloc(); +char *getenv(); +long atol(); +#else /* not STDLIB_H_MISSING */ +#include <stdlib.h> +#endif /* not STDLIB_H_MISSING */ + +#ifdef REMOVE_MISSING +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ +#define remove unlink +#endif /* REMOVE_MISSING */ + +#ifdef RAISE_MISSING +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ +#define raise(sig) kill(getpid(), sig) +#endif /* RAISE_MISSING */ + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +#ifdef FPOS_MISSING +typedef long fpos_t; +#define fsetpos(stream, pos) fseek(stream, *(pos), SEEK_SET) +#define fgetpos(stream, pos) ((*(pos) = ftell(stream)) == -1L) +#endif /* FPOS_MISSING */ + +/* Old BSD systems lack L_tmpnam and tmpnam(). This is a partial +emulation using mktemp(). It requires that the argument to tmpnam() +be non-NULL. */ + +#ifndef L_tmpnam +#define tmpnam_template "/tmp/sgmlsXXXXXX" +#define L_tmpnam (sizeof(tmpnam_template)) +#undef tmpnam +#define tmpnam(buf) \ + (mktemp(strcpy(buf, tmpnam_template)) == 0 || (buf)[0] == '\0' ? 0 : (buf)) +#endif /* not L_tmpnam */ + +#ifndef errno +extern int errno; +#endif + +#endif /* not STD_H */ diff --git a/usr.bin/sgmls/sgmls/stklen.c b/usr.bin/sgmls/sgmls/stklen.c new file mode 100644 index 0000000..43af5dd --- /dev/null +++ b/usr.bin/sgmls/sgmls/stklen.c @@ -0,0 +1,2 @@ +/* This tells Borland C++ to allocate a 14k stack. */ +unsigned _stklen = 14*1024; diff --git a/usr.bin/sgmls/sgmls/strerror.c b/usr.bin/sgmls/sgmls/strerror.c new file mode 100644 index 0000000..f5679c0 --- /dev/null +++ b/usr.bin/sgmls/sgmls/strerror.c @@ -0,0 +1,36 @@ +/* strerror.c - + ANSI C strerror() function. + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifdef STRERROR_MISSING +#include <stdio.h> + +char *strerror(n) +int n; +{ + extern int sys_nerr; + extern char *sys_errlist[]; + static char buf[sizeof("Error ") + 1 + 3*sizeof(int)]; + + if (n >= 0 && n < sys_nerr && sys_errlist[n] != 0) + return sys_errlist[n]; + else { + sprintf(buf, "Error %d", n); + return buf; + } +} + +#endif /* STRERROR_MISSING */ +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/synrf.c b/usr.bin/sgmls/sgmls/synrf.c new file mode 100644 index 0000000..2076107 --- /dev/null +++ b/usr.bin/sgmls/sgmls/synrf.c @@ -0,0 +1,72 @@ +/* SYNRF: Reserved names and other constants for reference concrete syntax. +*/ +#include "config.h" +#include "entity.h" /* Templates for entity control blocks. */ +#include "synxtrn.h" /* Declarations for concrete syntax constants. */ +#include "adl.h" /* Definitions for attribute list processing. */ +UNCH key[NKEYS][REFNAMELEN+1] = { + "ANY", + "ATTLIST", + "CDATA", + "CONREF", + "CURRENT", + "DEFAULT", + "DOCTYPE", + "ELEMENT", + "EMPTY", + "ENDTAG", + "ENTITIES", + "ENTITY", + "FIXED", + "ID", + "IDLINK", + "IDREF", + "IDREFS", + "IGNORE", + "IMPLIED", + "INCLUDE", + "INITIAL", + "LINK", + "LINKTYPE", + "MD", + "MS", + "NAME", + "NAMES", + "NDATA", + "NMTOKEN", + "NMTOKENS", + "NOTATION", + "NUMBER", + "NUMBERS", + "NUTOKEN", + "NUTOKENS", + "O", + "PCDATA", + "PI", + "POSTLINK", + "PUBLIC", + "RCDATA", + "RE", + "REQUIRED", + "RESTORE", + "RS", + "SDATA", + "SHORTREF", + "SIMPLE", + "SPACE", + "STARTTAG", + "SUBDOC", + "SYSTEM", + "TEMP", + "USELINK", + "USEMAP" +}; +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/synxtrn.h b/usr.bin/sgmls/sgmls/synxtrn.h new file mode 100644 index 0000000..75b6471 --- /dev/null +++ b/usr.bin/sgmls/sgmls/synxtrn.h @@ -0,0 +1,152 @@ +/* SYNXTRN.H: External declarations for concrete syntax constants. +*/ +/* Short References +*/ +#define SRCT 32 /* Number of short reference delimiters. */ +#define SRMAXLEN 3 /* Maximum length of a SHORTREF delimiter. */ +#define SRNPRT 8 /* Number of non-printable SHORTREF delimiters. */ +struct srdel { + struct map dtb[SRCT+2]; /* LEXCNM: Short reference delimiters. */ + char *pdtb[SRNPRT+1]; /* LEXCNM: Printable form of unprintable SRs. */ + int fce; /* LEXCNM: Index of first FCE in srdeltab. */ + int hyp2; /* LEXCNM: Index of "two hyphens" in srdeltab. */ + int data; /* LEXCNM: Index of first SR with data char. */ + int hyp; /* LEXCNM: Index of hyphen in srdeltab. */ + int prtmin; /* LEXCNM: Index of 1st printable SR. */ + int spc; /* LEXCNM: Index of space in srdeltab. */ + int lbr; /* LEXCNM: Index of left bracket in srdeltab. */ + int rbr; /* LEXCNM: Index of right bracket in srdeltab. */ +}; +struct delim { + UNCH genre; /* LEXCON: Generated RE; cannot be markup. */ + UNCH lit; /* LEXMARK: Char used as LIT delimiter.*/ + UNCH lita; /* LEXMARK: Char used as LITA delimiter.*/ + UNCH mdc; /* LEXLMS: Char used as MDC delimiter.*/ + UNCH msc; /* LEXCON: Char used as MSC delimiter. */ + UNCH net; /* LEXCON: Char used as NET when enabled.*/ + UNCH pero; /* LEXMARK: Char used as PERO delimiter. */ + UNCH pic; /* LEXCON: Char used as PIC delimiter.*/ + UNCH tago; /* LEXCON: Char used as TAGO when enabled.*/ +}; +struct lexcode { + UNCH fce; /* LEXCNM: FRE character as entity reference. */ + UNCH fre; /* LEXCON: Free character not an entity ref. */ + UNCH litc; /* LEXLMS: Literal close delimiter enabled. */ + UNCH msc; /* LEXLMS: Marked section close delim enabled. */ + UNCH net; /* LEXCON: Null end-tag delimiter enabled. */ + UNCH nonet; /* LEXCON: NET disabled; still used as ETI. */ + UNCH spcr; /* LEXCNM: Space in use as SHORTREF delimiter. */ + UNCH tago; /* LEXCON: Tag open delimiter enabled. */ + UNCH cde; /* LEXLMS: CDATA/SDATA delimiters. */ +}; +struct lexical { + struct markup m; /* Markup strings for text processor. */ + struct srdel s; /* Short reference delimiters. */ + struct delim d; /* General delimiter characters. */ + struct lexcode l; /* Lexical table code assignments. */ +}; +extern struct lexical lex; /* Delimiter set constants. */ +extern UNCH lexcnm[]; /* Lexical table: mixed content. */ +extern UNCH lexcon[]; /* Lexical table for content (except mixed). */ +extern UNCH lexgrp[]; /* Lexical table for groups. */ +extern UNCH lexlms[]; /* Lexical table: literals and marked sections. */ +extern UNCH lexmark[]; /* Lexical table for markup. */ +extern UNCH lexsd[]; /* Lexical table for SGML declaration. */ +extern UNCH lextran[]; /* Case translation table for SGML names. */ +extern UNCH lextoke[]; /* Lexical table for tokenization. */ +extern UNCH *lextabs[]; /* List of all lexical tables. */ +extern struct parse pcbconc; /* PCB: character data. */ +extern struct parse pcbcone; /* PCB: element content (no data allowed). */ +extern struct parse pcbconm; /* PCB: mixed content (data allowed). */ +extern struct parse pcbconr; /* PCB: replaceable character data. */ +extern struct parse pcbetag; /* PCB: end-tags. */ +extern struct parse pcbgrcm; /* PCB: content model group. */ +extern struct parse pcbgrcs; /* PCB: content model suffix. */ +extern struct parse pcbgrnm; /* PCB: name group. */ +extern struct parse pcbgrnt; /* PCB: name token group. */ +extern struct parse pcblitc; /* PCB: literal with CDATA. */ +extern struct parse pcblitp; /* PCB: literal with CDATA, parm & char refs. */ +extern struct parse pcblitr; /* PCB: attribute value with general refs. */ +extern struct parse pcblitt; /* PCB: tokenized attribute value. */ +extern struct parse pcblitv; /* PCB: literal with CDATA, function char trans.*/ +extern struct parse pcbmd; /* PCB: markup declaration. */ +extern struct parse pcbmdc; /* PCB: comment declaration. */ +extern struct parse pcbmdi; /* PCB: markup declaration (ignored). */ +extern struct parse pcbmds; /* PCB: markup declaration subset. */ +extern struct parse pcbmsc; /* PCB: marked section in CDATA mode. */ +extern struct parse pcbmsi; /* PCB: marked section in IGNORE mode. */ +extern struct parse pcbmsrc; /* PCB: marked section in RCDATA mode. */ +extern struct parse pcbpro; /* PCB: prolog. */ +extern struct parse pcbref; /* PCB: reference. */ +extern struct parse pcbstag; /* PCB: start-tag. */ +extern struct parse pcbval; /* PCB: attribute value. */ +extern struct parse pcbeal; /* PCB: end of attribute list. */ +extern struct parse pcbsd; /* PCB: SGML declaration. */ +extern int pcbcnda; /* PCBCONM: data in buffer. */ +extern int pcbcnet; /* PCBCONM: markup found or data buffer flushed.*/ +extern int pcbmdtk; /* PCBMD: token expected. */ +extern int pcbstan; /* PCBSTAG: attribute name expected. */ +extern int pcblittda; /* PCBLITT: data character found */ + +#define KANY 0 +#define KATTLIST 1 +#define KCDATA 2 +#define KCONREF 3 +#define KCURRENT 4 +#define KDEFAULT 5 +#define KDOCTYPE 6 +#define KELEMENT 7 +#define KEMPTY 8 +#define KENDTAG 9 +#define KENTITIES 10 +#define KENTITY 11 +#define KFIXED 12 +#define KID 13 +#define KIDLINK 14 +#define KIDREF 15 +#define KIDREFS 16 +#define KIGNORE 17 +#define KIMPLIED 18 +#define KINCLUDE 19 +#define KINITIAL 20 +#define KLINK 21 +#define KLINKTYPE 22 +#define KMD 23 +#define KMS 24 +#define KNAME 25 +#define KNAMES 26 +#define KNDATA 27 +#define KNMTOKEN 28 +#define KNMTOKENS 29 +#define KNOTATION 30 +#define KNUMBER 31 +#define KNUMBERS 32 +#define KNUTOKEN 33 +#define KNUTOKENS 34 +#define KO 35 +#define KPCDATA 36 +#define KPI 37 +#define KPOSTLINK 38 +#define KPUBLIC 39 +#define KRCDATA 40 +#define KRE 41 +#define KREQUIRED 42 +#define KRESTORE 43 +#define KRS 44 +#define KSDATA 45 +#define KSHORTREF 46 +#define KSIMPLE 47 +#define KSPACE 48 +#define KSTARTTAG 49 +#define KSUBDOC 50 +#define KSYSTEM 51 +#define KTEMP 52 +#define KUSELINK 53 +#define KUSEMAP 54 + +#define NKEYS (KUSEMAP+1) + +extern UNCH key[NKEYS][REFNAMELEN+1]; + +/* Holds the SGML keyword (not alterable by concrete syntax). */ +extern UNCH sgmlkey[]; diff --git a/usr.bin/sgmls/sgmls/tools.h b/usr.bin/sgmls/sgmls/tools.h new file mode 100644 index 0000000..57ce45a --- /dev/null +++ b/usr.bin/sgmls/sgmls/tools.h @@ -0,0 +1,76 @@ +/* TOOLS.H: Definitions for type declarations, printing, bit handling, etc. +*/ + +#if CHAR_SIGNED +typedef unsigned char UNCH; +#else +typedef char UNCH; +#endif + +#if CHAR_SIGNED +#define ustrcmp(s1, s2) strcmp((char *)(s1), (char *)(s2)) +#define ustrcpy(s1, s2) strcpy((char *)(s1), (char *)(s2)) +#define ustrchr(s, c) (UNCH *)strchr((char *)(s), c) +#define ustrncmp(s1, s2, n) strncmp((char *)(s1), (char *)(s2), n) +#define ustrncpy(s1, s2, n) strncpy((char *)(s1), (char *)(s2), n) +#define ustrlen(s1) strlen((char *)(s1)) +#else +#define ustrcmp strcmp +#define ustrcpy strcpy +#define ustrchr strchr +#define ustrncmp strncmp +#define ustrncpy strncpy +#define ustrlen strlen +#endif + +#if 0 +int ustrcmp(UNCH *, UNCH *); +UNCH *ustrchr(UNCH *, int); +int ustrncmp(UNCH *, UNCH *, UNS); +int ustrncpy(UNCH *, UNCH *, UNS); +int ustrlen(UNCH *); +#endif + +typedef unsigned UNS; + +#ifdef USE_ISASCII +#define ISASCII(c) isascii(c) +#else +#define ISASCII(c) (1) +#endif + +#ifdef BSD_STRINGS +#define MEMZERO(s, n) bzero(s, n) +#else /* not BSD_STRINGS */ +#define MEMZERO(s, n) memset(s, '\0', n) +#endif /* not BSD_STRINGS */ + +/* Macros for bit manipulation. +*/ +#define SET(word, bits) ((word) |= (bits)) /* Turn bits on */ +#define RESET(word, bits) ((word) &= ~(bits)) /* Turn bits off */ +#define GET(word, bits) ((word) & (bits)) /* 1=any bit on */ +#define BITOFF(word, bits) (GET(word, bits)==0) /* 1=no bits on */ +#define BITON(word, bits) ((word) & (bits)) /* 1=any bit on */ + +#define ETDCDATA (dumetd) /* Dummy etd pointer for #PCDATA. */ +#define ETDNULL (dumetd + 1) /* Dummy etd pointer for null tag. */ +#define ETDNET (dumetd + 2) /* Dummy etd pointer for NET delimiter. */ +#define BADPTR(p) \ + ((p) == NULL || (p) == ETDCDATA || (p) == ETDNULL || (p) == ETDNET) +#define PTRNUM(p) ((p) == NULL ? 0 : ((p) - dumetd) + 1) + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +/* VP is used for prototypes of varargs functions. You can't have a +prototype if the function is defined using varargs.h rather than +stdarg.h. */ +#ifdef VARARGS +#define VP(parms) () +#else +#define VP(parms) P(parms) +#endif diff --git a/usr.bin/sgmls/sgmls/trace.h b/usr.bin/sgmls/sgmls/trace.h new file mode 100644 index 0000000..56362be --- /dev/null +++ b/usr.bin/sgmls/sgmls/trace.h @@ -0,0 +1,113 @@ +/* TRACE.H: Declarations for internal trace functions. */ + +#ifdef TRACE + +/* Trace variables. +*/ +extern int trace; /* Switch: 1=trace state transitions; 0=don't. */ +extern int atrace; /* Switch: 1=trace attribute activity; 0=don't. */ +extern int ctrace; /* Switch: 1=trace context checking; 0=don't. */ +extern int dtrace; /* Switch: 1=trace declaration parsing; 0=don't.*/ +extern int etrace; /* Switch: 1=trace entity activity; 0=don't.*/ +extern int gtrace; /* Switch: 1=trace group creations; 0=don't. */ +extern int itrace; /* Switch: 1=trace ID activity; 0=don't. */ +extern int mtrace; /* Switch: 1=trace MS activity; 0=don't. */ +extern int ntrace; /* Switch: 1=trace data notation activity. */ +extern char emd[]; /* For "EMD" parameter type in dtrace calls. */ + +VOID traceadl P((struct ad *)); +VOID tracecon P((int,int,int,struct parse *,int,int)); +VOID tracedcn P((struct dcncb *)); +VOID tracedsk P((struct tag *,struct tag *,int,int)); +VOID traceecb P((char *,struct entity *)); +VOID traceend P((char *,struct thdr *,struct mpos *,int,int,int)); +VOID traceesn P((struct ne *)); +VOID traceetd P((struct etd *)); +VOID traceetg P((struct tag *,struct etd *,int,int)); +VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *,int)); +VOID tracegml P((struct restate *,int,int,int)); +VOID tracegrp P((struct etd **)); +VOID traceid P((char *,struct id *)); +VOID tracemd P((char *)); +VOID tracemod P((struct thdr *)); +VOID tracems P((int,int,int,int)); +VOID tracengr P((struct dcncb **)); +VOID tracepcb P((struct parse *)); +VOID tracepro P((void)); +VOID traceset P((void)); +VOID tracesrm P((char *,struct entity **,UNCH *)); +VOID tracestg P((struct etd *,int,int,struct etd *,int)); +VOID tracestk P((struct tag *,int,int)); +VOID tracetkn P((int,UNCH *)); +VOID traceval P((struct parse *,unsigned int,UNCH *,int)); + +#define TRACEADL(al) ((void)(atrace && (traceadl(al), 1))) +#define TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq) \ + ((void)(gtrace \ + && (tracecon(etagimct, dostag, datarc, pcb, conrefsw, didreq), 1))) +#define TRACEDCN(dcn) ((void)(ntrace && (tracedcn(dcn), 1))) +#define TRACEDSK(pts, ptso, ts3, etictr) \ + ((void)(gtrace && (tracedsk(pts, ptso, ts3, etictr), 1))) +#define TRACEECB(action, p) \ + ((void)(etrace && (traceecb(action, p), 1))) +#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) \ + ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt, Tstart), 1))) +#define TRACEESN(p) \ + ((void)((etrace || atrace || ntrace) && (traceesn(p), 1))) +#define TRACEETD(p) ((void)(gtrace && (traceetd(p), 1))) +#define TRACEETG(pts, curetd, tsl, etagimct) \ + ((void)(gtrace && (traceetg(pts, curetd, tsl, etagimct), 1))) +#define TRACEGI(stagenm, gi, mod, pos, Tstart) \ + ((void)(ctrace && (tracegi(stagenm, gi, mod, pos, Tstart), 1))) +#define TRACEGML(scb, pss, conactsw, conact) \ + ((void)(trace && (tracegml(scb, pss, conactsw, conact), 1))) +#define TRACEGRP(p) ((void)(gtrace && (tracegrp(p), 1))) +#define TRACEID(action, p) ((void)(itrace && (traceid(action, p), 1))) +#define TRACEMD(p) ((void)(dtrace && (tracemd(p), 1))) +#define TRACEMOD(p) ((void)(gtrace && (tracemod(p), 1))) +#define TRACEMS(action, code, mslevel, msplevel) \ + ((void)(mtrace && (tracems(action, code, mslevel, msplevel), 1))) +#define TRACENGR(p) ((void)(gtrace && (tracengr(p), 1))) +#define TRACEPCB(p) ((void)(trace && (tracepcb(p), 1))) +#define TRACEPRO() (tracepro()) +#define TRACESET() (traceset()) +#define TRACESRM(action, pg, gi) \ + ((void)(etrace && (tracesrm(action, pg, gi), 1))) +#define TRACESTG(curetd, dataret, rc, nextetd, mexts) \ + ((void)(gtrace && (tracestg(curetd, dataret, rc, nextetd, mexts), 1))) +#define TRACESTK(pts, ts2, etictr) \ + ((void)(gtrace && (tracestk(pts, ts2, etictr), 1))) +#define TRACETKN(scope, lextoke) \ + ((void)(trace && (tracetkn(scope, lextoke), 1))) +#define TRACEVAL(pcb, atype, aval, tokencnt) \ + ((void)(atrace && (traceval(pcb, atype, aval, tokencnt), 1))) + +#else /* not TRACE */ + +#define TRACEADL(al) /* empty */ +#define TRACECON(etagimct, dostag, datarc, pcb, conrefsw, didreq) /* empty */ +#define TRACEDCN(dcn) /* empty */ +#define TRACEDSK(pts, ptso, ts3, etictr) /* empty */ +#define TRACEECB(action, p) /* empty */ +#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) /* empty */ +#define TRACEESN(p) /* empty */ +#define TRACEETG(pts, curetd, tsl, etagimct) /* empty */ +#define TRACEETD(p) /* empty */ +#define TRACEGI(stagenm, gi, mod, pos, Tstart) /* empty */ +#define TRACEGML(scb, pss, conactsw, conact) /* empty */ +#define TRACEGRP(p) /* empty */ +#define TRACEID(action, p) /* empty */ +#define TRACEMD(p) /* empty */ +#define TRACEMOD(p) /* empty */ +#define TRACEMS(action, code, mslevel, msplevel) /* empty */ +#define TRACENGR(p) /* empty */ +#define TRACEPCB(p) /* empty */ +#define TRACEPRO() /* empty */ +#define TRACESET() /* empty */ +#define TRACESRM(action, pg, gi) /* empty */ +#define TRACESTG(curetd, dataret, rc, nextetd, mexts) /* empty */ +#define TRACESTK(pts, ts2, etictr) /* empty */ +#define TRACETKN(scope, lextoke) /* empty */ +#define TRACEVAL(pcb, atype, aval, tokencnt) /* empty */ + +#endif /* not TRACE */ diff --git a/usr.bin/sgmls/sgmls/traceset.c b/usr.bin/sgmls/sgmls/traceset.c new file mode 100644 index 0000000..df18cbe --- /dev/null +++ b/usr.bin/sgmls/sgmls/traceset.c @@ -0,0 +1,465 @@ +#include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */ + +#ifdef TRACE + +#include "context.h" + +/* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ */ +#define STATUX tags[ts].status + +/* Trace variables. +*/ +int trace = 0; /* Switch: 1=trace state transitions; 0=don't. */ +int atrace = 0; /* Switch: 1=trace attribute activity; 0=don't. */ +int ctrace = 0; /* Switch: 1=trace context checking; 0=don't. */ +int dtrace = 0; /* Switch: 1=trace declaration parsing; 0=don't.*/ +int etrace = 0; /* Switch: 1=trace entity activity; 0=don't.*/ +int gtrace = 0; /* Switch: 1=trace group creations; 0=don't. */ +int itrace = 0; /* Switch: 1=trace ID activity; 0=don't. */ +int mtrace = 0; /* Switch: 1=trace MS activity; 0=don't. */ +int ntrace = 0; /* Switch: 1=trace notation activity; 0=don't. */ +char emd[] = "EMD"; /* For "EMD" parameter type in dtrace calls. */ + +/* Return a printable representation of c. +*/ +static +char *printable(c) +int c; +{ + static char buf[5]; + if (c >= 040 && c < 0177) { + buf[0] = c; + buf[1] = '\0'; + } + else + sprintf(buf, "\\%03o", (UNCH)c); + return buf; +} + +static +VOID dotrace(s) +char *s; +{ + trace = (s && strchr(s, 't') != 0); + atrace = (s && strchr(s, 'a') != 0); + ctrace = (s && strchr(s, 'c') != 0); + dtrace = (s && strchr(s, 'd') != 0); + etrace = (s && strchr(s, 'e') != 0); + gtrace = (s && strchr(s, 'g') != 0); + itrace = (s && strchr(s, 'i') != 0); + mtrace = (s && strchr(s, 'm') != 0); + ntrace = (s && strchr(s, 'n') != 0); +} +/* TRACESET: Set switches for tracing body of document. +*/ +VOID traceset() +{ + dotrace(sw.trace); + + if (trace||atrace||ctrace||dtrace||etrace||gtrace||itrace||mtrace||ntrace) + fprintf(stderr, +"TRACESET: state=%d;att=%d;con=%d;dcl=%d;ent=%d;grp=%d;id=%d;ms=%d;dcn=%d.\n", + trace, atrace, ctrace, dtrace, etrace, gtrace, itrace, + mtrace, ntrace); +} +/* TRACEPRO: Set switches for tracing prolog. + */ +VOID tracepro() +{ + dotrace(sw.ptrace); + + if (trace||atrace||dtrace||etrace||gtrace||mtrace||ntrace) + fprintf(stderr, + "TRACEPRO: state=%d; att=%d; dcl=%d; ent=%d; grp=%d; ms=%d; dcn=%d.\n", + trace, atrace, dtrace, etrace, gtrace, mtrace, ntrace); +} +/* TRACEPCB: Trace character just parsed and other pcb data. + */ +VOID tracepcb(pcb) +struct parse *pcb; +{ + fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %d:%d.\n", + pcb->pname, pcb->state, pcb->input, pcb->action, + pcb->newstate, printable(*FPOS), *FPOS, ENTITY+1, RCNT, + RSCC+FPOS+1-FBUF); +} +/* TRACETKN: Trace character just read during token parse. + */ +VOID tracetkn(scope, lextoke) +int scope; +UNCH lextoke[]; /* Lexical table for token and name parses. */ +{ + fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %d:%d.\n", + scope, lextoke[*FPOS], + printable(*FPOS), *FPOS, ENTITY+1, RCNT, + RSCC+FPOS+1-FBUF); +} +/* TRACEGML: Trace state of main SGML driver routine. + */ +VOID tracegml(scb, pss, conactsw, conact) +struct restate *scb; +int pss, conactsw, conact; +{ + fprintf(stderr, + "SGML%02d %2d-%2d-%2d-%2d in main driver; conactsw=%d; conact=%d.\n", + pss, scb[pss].sstate, scb[pss].sinput, scb[pss].saction, + scb[pss].snext, conactsw, conact); +} +/* TRACEVAL: Trace parse of an attribute value that is a token list. + */ +VOID traceval(pcb, atype, aval, tokencnt) +struct parse *pcb; +UNS atype; /* Type of token list expected. */ +UNCH *aval; /* Value string to be parsed as token list. */ +int tokencnt; /* Number of tokens found in attribute value. */ +{ + fprintf(stderr, + "%-8s %2d-%2d-%2d-%2d at %p, atype=%02x, tokencnt=%d: ", + pcb->pname, pcb->state, pcb->input, pcb->action, + pcb->newstate, (UNIV)aval, atype, tokencnt); + fprintf(stderr, "%s\n", aval); +} +/* TRACESTK: Trace entry just placed on tag stack. + */ +VOID tracestk(pts, ts2, etictr) +struct tag *pts; /* Stack entry for this tag. */ +int ts2; /* Stack depth. */ +int etictr; /* Number of "netok" tags on stack. */ +{ + fprintf(stderr, + "STACK %s begun; stack depth %d; tflag=%02x; etictr=%d", + pts->tetd->etdgi+1, ts2, pts->tflags, etictr); + fprintf(stderr, " srm=%s.\n", + pts->tsrm!=SRMNULL ? (char *)(pts->tsrm[0]->ename+1) : "#EMPTY"); +} +/* TRACEDSK: Trace entry just removed from tag stack. + */ +VOID tracedsk(pts, ptso, ts3, etictr) +struct tag *pts; /* Stack entry for new open tag. */ +struct tag *ptso; /* Stack entry for tag just ended. */ +int ts3; /* Stack depth. */ +int etictr; /* Number of "netok" tags on stack. */ +{ + fprintf(stderr, + "DESTACK %s ended; otflag=%02x; %s resumed; depth=%d; tflag=%02x; etictr=%d", + ptso->tetd->etdgi+1, ptso->tflags, + pts->tetd->etdgi+1, ts3, pts->tflags, etictr); + fprintf(stderr, " srm=%s.\n", + pts->tsrm!=SRMNULL ? (char *)(pts->tsrm[0]->ename+1) : "#EMPTY"); +} +/* TRACECON: Trace interactions between content parse and stag/context + processing. + */ +VOID tracecon(etagimct, dostag, datarc, pcb, conrefsw, didreq) +int etagimct; /* Implicitly ended elements left on stack. */ +int dostag; /* 1=retry newetd instead of parsing; 0=parse. */ +int datarc; /* Return code for data: DAF_ or REF_ or zero. */ +struct parse *pcb; /* Parse control block for this parse. */ +int conrefsw; /* 1=content reference att specified; 0=no. */ +int didreq; /* 1=required implied empty tag processed; 0=no.*/ +{ + fprintf(stderr, + "CONTENT etagimct=%d dostag=%d datarc=%d pname=%s action=%d \ +conrefsw=%d didreq=%d\n", + etagimct, dostag, datarc, pcb->pname, pcb->action, + conrefsw, didreq); +} +/* TRACESTG: Trace start-tag context validation input and results. + */ +VOID tracestg(curetd, dataret, rc, nextetd, mexts) +struct etd *curetd; /* The etd for this tag. */ +int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */ +int rc; /* Return code from context or other test. */ +struct etd *nextetd; /* The etd for a forced start-tag (if rc==2). */ +int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/ +{ + fprintf(stderr, + "STARTTAG newetd=%p; dataret=%d; rc=%d; nextetd=%p; mexts=%d.\n", + (UNIV)curetd, dataret, rc, (UNIV)nextetd, mexts); +} +/* TRACEETG: Trace end-tag matching test on stack. + */ +VOID traceetg(pts, curetd, tsl, etagimct) +struct tag *pts; /* Stack entry for this tag. */ +struct etd *curetd; /* The etd for this tag. */ +int tsl; /* Temporary stack level for looping. */ +int etagimct; /* Num of implicitly ended tags left on stack. */ +{ + fprintf(stderr, + "ENDTAG tsl=%d; newetd=%p; stacketd=%p; tflags=%02x; etagimct=%d.\n", + tsl, (UNIV)curetd, (UNIV)pts->tetd, pts->tflags, etagimct); +} +/* TRACEECB: Trace entity control block activity. + */ +VOID traceecb(action, p) +char *action; +struct entity *p; +{ + static char estype1[] = " TMMMSEIXCNFPDLK"; + static char estype2[] = " DS "; + if (!p) + return; + fprintf(stderr, + "%-8s (es=%d) type %c%c entity %s at %p containing ", + action, es, estype1[p->estore], estype2[p->estore], p->ename+1, + (UNIV)p); + if (p->estore==ESN && strcmp(action, "ENTDEF")) + traceesn(p->etx.n); + else if (p->etx.x==0) + fprintf(stderr, "[NOTHING]"); + else + fprintf(stderr, "%s", + p->etx.c[0] ? (char *)p->etx.c : "[EMPTY]"); + putc('\n', stderr); +} +/* TRACEDCN: Trace data content notation activity. + */ +VOID tracedcn(p) +struct dcncb *p; +{ + fprintf(stderr, + "DCN dcn=%p; adl=%p; notation is %s\n", + (UNIV)p, (UNIV)p->adl, p->ename+1); + if (p->adl) + traceadl(p->adl); +} +/* TRACEESN: Print a data entity control block. + */ +VOID traceesn(p) +PNE p; +{ + fprintf(stderr, "ESN Entity name is %s; entity type is %s.\n", + (NEENAME(p)!=0) ? ((char *)NEENAME(p))+1 : "[UNDEFINED]", + /* NEXTYPE(p)); */ + (NEXTYPE(p)==1 ? "CDATA" : (NEXTYPE(p)==2 ? "NDATA" : "SDATA"))); + fprintf(stderr, " System ID is %s\n", + (NEID(p)!=0) ? (char *)NEID(p) : "[UNDEFINED]"); + if (p->nedcn!=0) + tracedcn(p->nedcn); +} +/* TRACESRM: Print the members of a short reference map. + */ +VOID tracesrm(action, pg, gi) +char *action; +TECB pg; +UNCH *gi; +{ + int i = 0; /* Loop counter. */ + + if (pg==SRMNULL) + fprintf(stderr, "%-8s SHORTREF table empty for %s.\n", action, gi); + else { + fprintf(stderr, "%-8s %s at %p mapped for %s.\n", + action, pg[0]->ename+1, (UNIV)pg, + gi ? (char *)gi : "definition"); + while (++i<=lex.s.dtb[0].mapdata) + if (pg[i]) + fprintf(stderr, "%14s%02u %p %s\n", + "SR", i, (UNIV)pg[i], pg[i]->ename+1); + } +} +/* TRACEADL: Print an attribute definition list. + */ +VOID traceadl(al) +struct ad al[]; +{ + int i=0; + + fprintf(stderr, "ADLIST %p %d membe%s; %d attribut%s\n", + (UNIV)al, ADN(al), ADN(al)==1 ? "r" : "rs", AN(al), + AN(al)==1 ? "e" : "es"); + while (++i<=ADN(al)) { + fprintf(stderr, + (BITOFF(ADFLAGS(al,i), AGROUP) && ADTYPE(al,i)<=ANOTEGRP) + ? " %p %-8s %02x %02x %2d %2d %p %p\n" + : " %p %-8s %02x %02x %2d %2d %p %p\n", + &al[i], ADNAME(al,i), ADFLAGS(al,i), ADTYPE(al,i), ADNUM(al,i), + ADLEN(al,i), ADVAL(al,i), ADDATA(al,i).x); + if (ADVAL(al,i)) { + fprintf(stderr, "%s", ADVAL(al,i)); + if (ADTYPE(al,i)==AENTITY && ADDATA(al,i).n!=0) { + fprintf(stderr, "=>"); + traceesn(ADDATA(al,i).n); + } + else if (ADTYPE(al,i)==ANOTEGRP) + fprintf(stderr, "=>%s", + (ADDATA(al,i).x->dcnid!=0) + ? (char *)ADDATA(al,i).x->dcnid + : "[UNDEFINED]"); + } + else + fprintf(stderr, "[%s]", + GET(ADFLAGS(al,i), AREQ) + ? "REQUIRED" + : (GET(ADFLAGS(al,i), ACURRENT) ? "CURRENT" : "NULL")); + } + fprintf(stderr, "\n"); +} +/* TRACEMOD: Print the members of a model. + */ +VOID tracemod(pg) +struct thdr pg[]; +{ + fprintf(stderr, "MODEL %p %02x %d\n", + (UNIV)&pg[0], pg[0].ttype, pg[0].tu.tnum); + if ((pg[0].ttype & MKEYWORD) == 0) { + int i; + + for (i = 1; i < pg[0].tu.tnum + 2; i++) { + if (GET(pg[i].ttype, TTMASK) == TTETD) + fprintf(stderr, " %p %02x %s\n", + (UNIV)&pg[i], pg[i].ttype, pg[i].tu.thetd->etdgi+1); + else if (GET(pg[i].ttype, TTMASK) == TTCHARS) + fprintf(stderr, " %p %02x %s\n", + (UNIV)&pg[i], pg[i].ttype, "#PCDATA"); + else + fprintf(stderr, " %p %02x %d\n", + (UNIV)&pg[i], pg[i].ttype, pg[i].tu.tnum); + } + } + fprintf(stderr, "\n"); +} +/* TRACEGRP: Print the members of a name (i.e., etd) group. + */ +VOID tracegrp(pg) +struct etd *pg[]; +{ + int i = -1; /* Loop counter. */ + + fprintf(stderr, "ETDGRP %p\n", (UNIV)pg); + while (pg[++i]!=0) + fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->etdgi+1); +} +/* TRACENGR: Print the members of a notation (i.e., dcncb) group. + */ +VOID tracengr(pg) +struct dcncb *pg[]; +{ + int i = -1; /* Loop counter. */ + + fprintf(stderr, "DCNGRP %p\n", (UNIV)pg); + while (pg[++i]!=0) + fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->ename+1); +} +/* TRACEETD: Print an element type definition. + */ +VOID traceetd(p) +struct etd *p; /* Pointer to an etd. */ +{ + fprintf(stderr, +"ETD etd=%p %s min=%02x cmod=%p ttype=%02x mex=%p, pex=%p, ", + (UNIV)p, p->etdgi+1, p->etdmin, (UNIV)p->etdmod, + p->etdmod->ttype, (UNIV)p->etdmex, (UNIV)p->etdpex); + fprintf(stderr, "adl=%p, srm=%s.\n", + (UNIV)p->adl, + (p->etdsrm==SRMNULL) + ? "#EMPTY" + : (p->etdsrm) ? (char *)(p->etdsrm[0]->ename+1) : "#CURRENT"); +} +/* TRACEID: Print an ID control block. + */ +VOID traceid(action, p) +char *action; +struct id *p; /* Pointer to an ID. */ +{ + fprintf(stderr, "%-8s %s at %p is %s; ", action, p->idname+1, (UNIV)p, + p->iddefed ? "defined" : "undefined"); + fprintf(stderr, "last ref=%p\n", (UNIV)p->idrl); +} +/* TRACEMD: Trace a markup declaration parameter. + */ +VOID tracemd(parmid) +char *parmid; /* Parameter identifier. */ +{ + fprintf(stderr, "MDPARM %-8s for %-8s, token %02d, type %02u, %s.\n", + mdname, subdcl ? (char *)subdcl : "[NONE]", parmno, pcbmd.action, parmid); +} +/* TRACEMS: Trace marked section activity. + */ +VOID tracems(action, code, mslevel, msplevel) +int action; /* 1=began new level; 0=resumed previous. */ +int code; +int mslevel; /* Nesting level of marked sections. */ +int msplevel; /* Nested MS levels subject to special parse. */ +{ + fprintf(stderr, + "MS%c %2d %s nesting level %d (msp %d).\n", + (action ? ' ' : 'E'), code, (action ? "began" : "resumed"), + mslevel, msplevel); +} + +static +VOID tracehits(h) +unsigned long *h; +{ + int i; + fprintf(stderr, " H="); + for (i = grplongs - 1; i >= 0; --i) + fprintf(stderr, "%0*lx", LONGBITS/4, h[i]); +} + +/* TRACEGI: Trace GI testing stages in CONTEXT.C processing. + */ +VOID tracegi(stagenm, gi, mod, pos, Tstart) +char *stagenm; +struct etd *gi; /* ETD of new GI. */ +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +int Tstart; /* Initial T for this group. */ +{ + int i = 0; /* Loop counter. */ + + fprintf(stderr, "%-10s %d:", stagenm, P); + while (++i<=P) + fprintf(stderr, " %d-%d", pos[i].g, pos[i].t); + fprintf(stderr, " (%u) gocc=%02x gtype=%02x gnum=%d", + M, GOCC, GTYPE, GNUM); + tracehits(H); + fprintf(stderr, " status=%d Tstart=%d\n", STATUX, Tstart); + fprintf(stderr, + "=>%-8s tocc=%02x ttype=%02x thetd=%p (%s) gietd=%p (%s)\n", + tags[ts].tetd->etdgi+1, TOCC, TTYPE, (UNIV)TOKEN.tu.thetd, + (TTYPE + ? (TTYPE==TTETD ? (char *)(TOKEN.tu.thetd->etdgi+1) : "#GROUP") + : "#PCDATA"), + (UNIV)gi, + (gi==ETDCDATA ? "#PCDATA" : (char *)(gi->etdgi+1))); +} +/* TRACEEND: Trace testing for end of group in CONTEXT.C processing. + */ +VOID traceend(stagenm, mod, pos, rc, opt, Tstart) +char *stagenm; +struct thdr mod[]; /* Model of current open element. */ +struct mpos pos[]; /* Position in open element's model. */ +int rc; /* Return code: RCNREQ RCHIT RCMISS RCEND */ +int opt; /* ALLHIT parm: 1=test optionals; 0=ignore. */ +int Tstart; /* Initial T for this group. */ +{ + int i = 0; /* Loop counter. */ + + fprintf(stderr, "%-10s %d:", stagenm, P); + while (++i<=P) + fprintf(stderr, " %d-%d", pos[i].g, pos[i].t); + fprintf(stderr, " (%u) gocc=%02x gtype=%02x gnum=%d", + M, GOCC, GTYPE, GNUM); + tracehits(H); + fprintf(stderr, " status=%d Tstart=%d\n", STATUX, Tstart); + fprintf(stderr, "=>%-8s tocc=%02x ttype=%02x thetd=%p (%s)", + tags[ts].tetd->etdgi+1, TOCC, TTYPE, (UNIV)TOKEN.tu.thetd, + (TTYPE + ? (TTYPE==TTETD ? (char *)(TOKEN.tu.thetd->etdgi+1) : "#GROUP") + : "#PCDATA")); + fprintf(stderr, " rc=%d offbitT=%d allhit=%d\n", + rc, offbit(H, (int)T, GNUM), allhit(&GHDR, H, 0, opt)); +} + +#endif /* TRACE */ +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/unix.cfg b/usr.bin/sgmls/sgmls/unix.cfg new file mode 100644 index 0000000..0bc8410 --- /dev/null +++ b/usr.bin/sgmls/sgmls/unix.cfg @@ -0,0 +1,147 @@ +/* unix.cfg: Configuration file for sgmls on Unix. */ + +/* A list of filename templates to use for searching for external entities. +The filenames are separated by the character specified in PATH_FILE_SEP. +See sgmls.man for details. */ +#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D" +/* The character that separates the filenames templates. */ +#define PATH_FILE_SEP ':' +/* The character that separates filenames in a system identifier. +Usually the same as PATH_FILE_SEP. */ +#define SYSID_FILE_SEP ':' +/* The environment variable that contains the list of filename templates. */ +#define PATH_ENV_VAR "SGML_PATH" + +/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name +or system identifier into a legal filename. A character in +MIN_DAT_SUBS_FROM will be transformed into the character in the +corresponding position in MIN_DAT_SUBS_TO. If there is no such +position, then the character is removed. */ +/* This says that spaces should be transformed to underscores, and +slashes to percents. */ +#define MIN_DAT_SUBS_FROM " /" +#define MIN_DAT_SUBS_TO "_%" + +/* Define this to allow tracing. */ +/* #define TRACE 1 */ + +/* Define this you want support for subdocuments. This is implemented +using features that are not part of Standard C, so you might not want +to define it if you are porting to a new system. Otherwise I suggest +you leave it defined. */ +#define SUPPORT_SUBDOC 1 + +/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports +X/Open extensions; if they do, then, for example, + + printf("%2$s%1$s", "bar", "foo") + +should print `foobar'. */ + +/* #define HAVE_EXTENDED_PRINTF 1 */ + +/* Define HAVE_CAT if your system provides the X/Open message +catalogue functions catopen() and catgets(), and you want to use them. +An implementations of these functions is included and will be used if +you don't define this. On SunOS 4.1.1, if you do define this you +should set CC=/usr/xpg2bin/cc in the makefile. */ + +/* #define HAVE_CAT 1 */ + +#ifdef __STDC__ +/* Define this if your compiler supports prototypes. */ +#define USE_PROTOTYPES 1 +#endif + +/* Can't use <stdarg.h> without prototypes. */ +#ifndef USE_PROTOTYPES +#define VARARGS 1 +#endif + +/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>, +you must define VARARGS yourself here. */ +/* #define VARARGS 1 */ + +/* Define this if you do not have strerror(). */ +#define STRERROR_MISSING 1 + +/* Define this unless the character testing functions in ctype.h +are defined for all values representable as an unsigned char. You do +not need to define this if your system is ANSI C conformant. You +should define for old Unix systems. */ +/* #define USE_ISASCII 1 */ + +/* Define this if your system provides the BSD style string operations +rather than ANSI C ones (eg bcopy() rather than memcpy(), and index() +rather than strchr()). */ +/* #define BSD_STRINGS 1 */ + +/* Define this if you have getopt(). */ +#define HAVE_GETOPT 1 + +/* Define this if you have access(). */ +#define HAVE_ACCESS 1 + +/* Define this if you have <unistd.h>. */ +#define HAVE_UNISTD_H 1 + +/* Define this if you have <sys/stat.h>. */ +#define HAVE_SYS_STAT_H 1 + +/* Define this if you have waitpid(). */ +#define HAVE_WAITPID 1 + +/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */ +#define POSIX 1 + +/* Define this if you have the vfork() system call. */ +#define HAVE_VFORK 1 + +/* Define this if you have <vfork.h>. */ +#define HAVE_VFORK_H 1 + +/* Define this if you don't have <stdlib.h> */ +/* #define STDLIB_H_MISSING 1 */ + +/* Define this if you don't have <stddef.h> */ +/* #define STDDEF_H_MISSING 1 */ + +/* Define this if you don't have <limits.h> */ +/* #define LIMITS_H_MISSING 1 */ + +/* Define this if you don't have remove(); unlink() will be used instead. */ +#define REMOVE_MISSING 1 + +/* Define this if you don't have raise(); kill() will be used instead. */ +#define RAISE_MISSING 1 + +/* Define this if you don't have fsetpos() and fgetpos(). */ +#define FPOS_MISSING 1 + +/* Universal pointer type. */ +/* If your compiler doesn't fully support void *, change `void' to `char'. */ +typedef void *UNIV; + +/* If your compiler doesn't support void as a function return type, +change `void' to `int'. */ +typedef void VOID; + +/* If you don't have an ANSI C conformant <limits.h>, define +CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed. +The <limits.h> on some versions of System Release V 3.2 is not ANSI C +conformant: the value of CHAR_MIN is 0 even though the `char' type is +signed. */ + +/* #define CHAR_SIGNED 1 */ +/* #define CHAR_SIGNED 0 */ +#ifndef CHAR_SIGNED +#include <limits.h> +#if CHAR_MIN < 0 +#define CHAR_SIGNED 1 +#else +#define CHAR_SIGNED 0 +#endif +#endif /* not CHAR_SIGNED */ + +/* Assume the system character set is ISO Latin-1. */ +#include "latin1.h" diff --git a/usr.bin/sgmls/sgmls/unixproc.c b/usr.bin/sgmls/sgmls/unixproc.c new file mode 100644 index 0000000..9e79d62 --- /dev/null +++ b/usr.bin/sgmls/sgmls/unixproc.c @@ -0,0 +1,98 @@ +/* unixproc.c - + + Unix implementation of run_process(). + + Written by James Clark (jjc@jclark.com). +*/ + +#include "config.h" + +#ifdef SUPPORT_SUBDOC + +#ifdef POSIX + +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> + +#endif /* POSIX */ + +#include "std.h" +#include "entity.h" +#include "appl.h" + +#ifndef POSIX + +#define WIFSTOPPED(s) (((s) & 0377) == 0177) +#define WIFSIGNALED(s) (((s) & 0377) != 0 && ((s) & 0377 != 0177)) +#define WIFEXITED(s) (((s) & 0377) == 0) +#define WEXITSTATUS(s) (((s) >> 8) & 0377) +#define WTERMSIG(s) ((s) & 0177) +#define WSTOPSIG(s) (((s) >> 8) & 0377) +#define _SC_OPEN_MAX 0 +#define sysconf(name) (20) +typedef int pid_t; + +#endif /* not POSIX */ + +#ifndef HAVE_VFORK +#define vfork() fork() +#endif /* not HAVE_VFORK */ + +#ifdef HAVE_VFORK_H +#include <vfork.h> +#endif /* HAVE_VFORK_H */ + +int run_process(argv) +char **argv; +{ + pid_t pid; + int status; + int ret; + + /* Can't trust Unix implementations to support fflush(NULL). */ + fflush(stderr); + fflush(stdout); + + pid = vfork(); + if (pid == 0) { + /* child */ + int i; + int open_max = (int)sysconf(_SC_OPEN_MAX); + + for (i = 3; i < open_max; i++) + (void)close(i); + execvp(argv[0], argv); + appl_error(E_EXEC, argv[0], strerror(errno)); + fflush(stderr); + _exit(127); + } + if (pid < 0) { + appl_error(E_FORK, strerror(errno)); + return -1; + } + /* parent */ + while ((ret = wait(&status)) != pid) + if (ret < 0) { + appl_error(E_WAIT, strerror(errno)); + return -1; + } + if (WIFSIGNALED(status)) { + appl_error(E_SIGNAL, argv[0], WTERMSIG(status)); + return -1; + } + /* Must have exited normally. */ + return WEXITSTATUS(status); +} + +#endif /* SUPPORT_SUBDOC */ + +/* +Local Variables: +c-indent-level: 5 +c-continued-statement-offset: 5 +c-brace-offset: -5 +c-argdecl-indent: 0 +c-label-offset: -5 +End: +*/ diff --git a/usr.bin/sgmls/sgmls/version.c b/usr.bin/sgmls/sgmls/version.c new file mode 100644 index 0000000..7144593 --- /dev/null +++ b/usr.bin/sgmls/sgmls/version.c @@ -0,0 +1 @@ +char *version_string = "1.1"; diff --git a/usr.bin/sgmls/sgmls/xfprintf.c b/usr.bin/sgmls/sgmls/xfprintf.c new file mode 100644 index 0000000..1780795 --- /dev/null +++ b/usr.bin/sgmls/sgmls/xfprintf.c @@ -0,0 +1,568 @@ +/* xfprintf.c - + X/Open extended v?fprintf implemented in terms of v?fprintf. + + Written by James Clark (jjc@jclark.com). +*/ + +/* Compile with: + + -DVARARGS to use varargs.h instead of stdarg.h + -DLONG_DOUBLE_MISSING if your compiler doesn't like `long double' + -DFP_SUPPORT to include floating point stuff +*/ + +#include "config.h" + +#ifndef HAVE_EXTENDED_PRINTF + +#include "std.h" + +#ifdef lint +/* avoid stupid lint warnings */ +#undef va_arg +#define va_arg(ap, type) (ap, (type)0) +#endif + +#ifdef FP_SUPPORT +#ifdef LONG_DOUBLE_MISSING +typedef double long_double; +#else +typedef long double long_double; +#endif +#endif /* FP_SUPPORT */ + +#ifndef __STDC__ +#define const /* as nothing */ +#endif + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +#ifdef VARARGS +typedef int (*printer)(); +#else +typedef int (*printer)(UNIV, const char *, ...); +#endif + +enum arg_type { + NONE, + INT, + UNSIGNED, + LONG, + UNSIGNED_LONG, +#ifdef FP_SUPPORT + DOUBLE, + LONG_DOUBLE, +#endif /* FP_SUPPORT */ + PCHAR, + PINT, + PLONG, + PSHORT +}; + +union arg { + int i; + unsigned u; + long l; + unsigned long ul; +#ifdef FP_SUPPORT + double d; + long_double ld; +#endif /* FP_SUPPORT */ + char *pc; + UNIV pv; + int *pi; + short *ps; + long *pl; +}; + +#define NEXT 0 +#define MISSING 10 + +struct spec { + enum arg_type type; + char pos; + char field_width; + char precision; +}; + +#define FLAG_CHARS "-+ #0" + +static int parse_spec P((const char **, struct spec *)); +static int find_arg_types P((const char *, enum arg_type *)); +static void get_arg P((enum arg_type, va_list *, union arg *)); +static int do_arg P((UNIV, printer, const char *, enum arg_type, union arg *)); +static int xdoprt P((UNIV, printer, const char *, va_list)); +static int printit P((UNIV, printer, const char *, va_list, int, union arg *)); +static int maybe_positional P((const char *)); + +/* Return 1 if sucessful, 0 otherwise. **pp points to character after % */ + +static int parse_spec(pp, sp) +const char **pp; +struct spec *sp; +{ + char modifier = 0; + sp->pos = NEXT; + if (isdigit((unsigned char)(**pp)) && (*pp)[1] == '$') { + if (**pp == '0') + return 0; + sp->pos = **pp - '0'; + *pp += 2; + } + + while (**pp != '\0' && strchr(FLAG_CHARS, **pp)) + *pp += 1; + + /* handle the field width */ + + sp->field_width = MISSING; + if (**pp == '*') { + *pp += 1; + if (isdigit((unsigned char)**pp) && (*pp)[1] == '$') { + if (**pp == '0') + return 0; + sp->field_width = **pp - '0'; + *pp += 2; + } + else + sp->field_width = NEXT; + } + else { + while (isdigit((unsigned char)**pp)) + *pp += 1; + } + + /* handle the precision */ + sp->precision = MISSING; + if (**pp == '.') { + *pp += 1; + if (**pp == '*') { + *pp += 1; + if (isdigit((unsigned char)**pp) && (*pp)[1] == '$') { + if (**pp == '0') + return 0; + sp->precision = **pp - '0'; + *pp += 2; + } + else + sp->precision = NEXT; + } + else { + while (isdigit((unsigned char)**pp)) + *pp += 1; + } + } + /* handle h l or L */ + + if (**pp == 'h' || **pp == 'l' || **pp == 'L') { + modifier = **pp; + *pp += 1; + } + + switch (**pp) { + case 'd': + case 'i': + sp->type = modifier == 'l' ? LONG : INT; + break; + case 'o': + case 'u': + case 'x': + case 'X': + sp->type = modifier == 'l' ? UNSIGNED_LONG : UNSIGNED; + break; +#ifdef FP_SUPPORT + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': + sp->type = modifier == 'L' ? LONG_DOUBLE : DOUBLE; + break; +#endif /* FP_SUPPORT */ + case 'c': + sp->type = INT; + break; + case 's': + sp->type = PCHAR; + break; + case 'p': + /* a pointer to void has the same representation as a pointer to char */ + sp->type = PCHAR; + break; + case 'n': + if (modifier == 'h') + sp->type = PSHORT; + else if (modifier == 'l') + sp->type = PLONG; + else + sp->type = PINT; + break; + case '%': + sp->type = NONE; + break; + default: + return 0; + } + *pp += 1; + return 1; +} + + +static int find_arg_types(format, arg_type) + const char *format; + enum arg_type *arg_type; +{ + int i, pos; + const char *p; + struct spec spec; + + for (i = 0; i < 9; i++) + arg_type[i] = NONE; + + pos = 0; + + p = format; + while (*p) + if (*p == '%') { + p++; + if (!parse_spec(&p, &spec)) + return 0; + if (spec.type != NONE) { + int n; + if (spec.pos == NEXT) + n = pos++; + else + n = spec.pos - 1; + if (n < 9) { + enum arg_type t = arg_type[n]; + if (t != NONE && t != spec.type) + return 0; + arg_type[n] = spec.type; + } + } + if (spec.field_width != MISSING) { + int n; + if (spec.field_width == NEXT) + n = pos++; + else + n = spec.field_width - 1; + if (n < 9) { + enum arg_type t = arg_type[n]; + if (t != NONE && t != INT) + return 0; + arg_type[n] = INT; + } + } + if (spec.precision != MISSING) { + int n; + if (spec.precision == NEXT) + n = pos++; + else + n = spec.precision - 1; + if (n < 9) { + enum arg_type t = arg_type[n]; + if (t != NONE && t != INT) + return 0; + arg_type[n] = INT; + } + } + } + else + p++; + return 1; +} + +static void get_arg(arg_type, app, argp) + enum arg_type arg_type; + va_list *app; + union arg *argp; +{ + switch (arg_type) { + case NONE: + break; + case INT: + argp->i = va_arg(*app, int); + break; + case UNSIGNED: + argp->u = va_arg(*app, unsigned); + break; + case LONG: + argp->l = va_arg(*app, long); + break; + case UNSIGNED_LONG: + argp->ul = va_arg(*app, unsigned long); + break; +#ifdef FP_SUPPORT + case DOUBLE: + argp->d = va_arg(*app, double); + break; + case LONG_DOUBLE: + argp->ld = va_arg(*app, long_double); + break; +#endif /* FP_SUPPORT */ + case PCHAR: + argp->pc = va_arg(*app, char *); + break; + case PINT: + argp->pi = va_arg(*app, int *); + break; + case PSHORT: + argp->ps = va_arg(*app, short *); + break; + case PLONG: + argp->pl = va_arg(*app, long *); + break; + default: + abort(); + } +} + +static int do_arg(handle, func, buf, arg_type, argp) + UNIV handle; + printer func; + const char *buf; + enum arg_type arg_type; + union arg *argp; +{ + switch (arg_type) { + case NONE: + return (*func)(handle, buf); + case INT: + return (*func)(handle, buf, argp->i); + case UNSIGNED: + return (*func)(handle, buf, argp->u); + case LONG: + return (*func)(handle, buf, argp->l); + case UNSIGNED_LONG: + return (*func)(handle, buf, argp->ul); +#ifdef FP_SUPPORT + case DOUBLE: + return (*func)(handle, buf, argp->d); + case LONG_DOUBLE: + return (*func)(handle, buf, argp->ld); +#endif /* FP_SUPPORT */ + case PCHAR: + return (*func)(handle, buf, argp->pc); + case PINT: + return (*func)(handle, buf, argp->pi); + case PSHORT: + return (*func)(handle, buf, argp->ps); + case PLONG: + return (*func)(handle, buf, argp->pl); + default: + abort(); + } + /* NOTREACHED */ +} + +static int printit(handle, func, p, ap, nargs, arg) + UNIV handle; + printer func; + const char *p; + va_list ap; + int nargs; + union arg *arg; +{ + char buf[512]; /* enough for a spec */ + int count = 0; + int pos = 0; + + while (*p) + if (*p == '%') { + char *q; + struct spec spec; + const char *start; + int had_field_width; + union arg *argp; + union arg a; + int res; + + start = ++p; + if (!parse_spec(&p, &spec)) + abort(); /* should have caught it in find_arg_types */ + + buf[0] = '%'; + q = buf + 1; + + if (spec.pos != NEXT) + start += 2; + + /* substitute in precision and field width if necessary */ + had_field_width = 0; + while (start < p) { + if (*start == '*') { + char c; + int n, val; + + start++; + if (!had_field_width && spec.field_width != MISSING) { + c = spec.field_width; + had_field_width = 1; + } + else + c = spec.precision; + if (c == NEXT) + n = pos++; + else { + start += 2; + n = c - 1; + } + if (n >= nargs) + val = va_arg(ap, int); + else + val = arg[n].i; + + /* ignore negative precision */ + if (val >= 0 || q[-1] != '.') { + (void)sprintf(q, "%d", val); + q = strchr(q, '\0'); + } + } + else + *q++ = *start++; + } + *q++ = '\0'; + + argp = 0; + if (spec.type != NONE) { + int n = spec.pos == NEXT ? pos++ : spec.pos - 1; + if (n >= nargs) { + get_arg(spec.type, &ap, &a); + argp = &a; + } + else + argp = arg + n; + } + + res = do_arg(handle, func, buf, spec.type, argp); + if (res < 0) + return -1; + count += res; + } + else { + if ((*func)(handle, "%c", *p++) < 0) + return -1; + count++; + } + return count; +} + +/* Do a quick check to see if it may contains any positional thingies. */ + +static int maybe_positional(format) + const char *format; +{ + const char *p; + + p = format; + for (;;) { + p = strchr(p, '$'); + if (!p) + return 0; + if (p - format >= 2 + && isdigit((unsigned char)p[-1]) + && (p[-2] == '%' || p[-2] == '*')) + break; /* might be a positional thingy */ + } + return 1; +} + +static int xdoprt(handle, func, format, ap) + UNIV handle; + printer func; + const char *format; + va_list ap; +{ + enum arg_type arg_type[9]; + union arg arg[9]; + int nargs, i; + + if (!find_arg_types(format, arg_type)) + return -1; + + for (nargs = 0; nargs < 9; nargs++) + if (arg_type[nargs] == NONE) + break; + + for (i = nargs; i < 9; i++) + if (arg_type[i] != NONE) + return -1; + + for (i = 0; i < nargs; i++) + get_arg(arg_type[i], &ap, arg + i); + + return printit(handle, func, format, ap, nargs, arg); +} + +#ifdef VARARGS +static int do_fprintf(va_alist) va_dcl +#else +static int do_fprintf(UNIV p, const char *format,...) +#endif +{ +#ifdef VARARGS + UNIV p; + const char *format; +#endif + va_list ap; + int res; + +#ifdef VARARGS + va_start(ap); + p = va_arg(ap, UNIV); + format = va_arg(ap, char *); +#else + va_start(ap, format); +#endif + + res = vfprintf((FILE *)p, format, ap); + va_end(ap); + return res; +} + +#ifdef VARARGS +int xfprintf(va_alist) va_dcl +#else +int xfprintf(FILE *fp, const char *format, ...) +#endif +{ +#ifdef VARARGS + FILE *fp; + char *format; +#endif + va_list ap; + int res; + +#ifdef VARARGS + va_start(ap); + fp = va_arg(ap, FILE *); + format = va_arg(ap, char *); +#else + va_start(ap, format); +#endif + if (maybe_positional(format)) + res = xdoprt((UNIV)fp, do_fprintf, format, ap); + else + res = vfprintf(fp, format, ap); + va_end(ap); + return res; +} + +int xvfprintf(fp, format, ap) + FILE *fp; + const char *format; + va_list ap; +{ + int res; + if (maybe_positional(format)) + res = xdoprt((UNIV)fp, do_fprintf, format, ap); + else + res = vfprintf(fp, format, ap); + return res; +} + +#endif /* not HAVE_EXTENDED_PRINTF */ diff --git a/usr.bin/sgmls/sgmlsasp/Makefile b/usr.bin/sgmls/sgmlsasp/Makefile new file mode 100644 index 0000000..1d60f29 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/Makefile @@ -0,0 +1,18 @@ +# +# Bmakefile for sgmlsasp +# +# $id$ +# + +PROG= sgmlsasp + +SRCS+= sgmlsasp.c replace.c + +CFLAGS+= -I${.CURDIR}/../libsgmls -I${.CURDIR}/../sgmls + +LDADD= ${LIBSGMLS} +DPADD= ${LIBSGMLS} + +.include "../Makefile.inc" +.include <bsd.prog.mk> + diff --git a/usr.bin/sgmls/sgmlsasp/replace.c b/usr.bin/sgmls/sgmlsasp/replace.c new file mode 100644 index 0000000..95fa113 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/replace.c @@ -0,0 +1,467 @@ +/* replace.c + Parse ASP style replacement file. + + Written by James Clark (jjc@jclark.com). */ + +#include "sgmlsasp.h" +#include "replace.h" + +#define TABLE_SIZE 251 + +struct table_entry { + enum event_type type; + char *gi; + struct replacement replacement; + struct table_entry *next; +}; + +struct replacement_table { + struct table_entry *table[TABLE_SIZE]; +}; + +struct buffer { + char *s; + unsigned len; + unsigned size; +}; + +/* Tokens returned by get_token(). */ + +#define STRING 1 +#define STAGO 2 +#define ETAGO 3 +#define PLUS 4 + +static int get P((void)); +static int peek P((void)); +static int get_token P((void)); +static void scan_name P((struct buffer *, int)); +static struct replacement *define_replacement + P((struct replacement_table *, enum event_type, char *)); +static struct replacement_item **parse_string + P((struct replacement_item **, int)); +static UNIV xmalloc P((unsigned)); +static UNIV xrealloc P((UNIV, unsigned)); +static struct replacement_item **add_replacement_data + P((struct replacement_item **, char *, unsigned)); +static struct replacement_item **add_replacement_attr + P((struct replacement_item **, char *)); +static int hash P((enum event_type, char *)); +static NO_RETURN void parse_error VP((char *,...)); +static VOID buffer_init P((struct buffer *)); +static VOID buffer_append P((struct buffer *, int)); +static char *buffer_extract P((struct buffer *)); +#if 0 +static VOID buffer_free P((struct buffer *)); +#endif + +#define buffer_length(buf) ((buf)->len) + +#define NEW(type) ((type *)xmalloc(sizeof(type))) + +static int current_lineno; +static char *current_file; +static FILE *fp; + +struct replacement_table *make_replacement_table() +{ + int i; + struct replacement_table *tablep; + + tablep = NEW(struct replacement_table); + for (i = 0; i < TABLE_SIZE; i++) + tablep->table[i] = 0; + return tablep; +} + +void load_replacement_file(tablep, file) + struct replacement_table *tablep; + char *file; +{ + int tok; + struct buffer name; + + buffer_init(&name); + errno = 0; + fp = fopen(file, "r"); + if (!fp) { + if (errno) + error("can't open `%s': %s", file, strerror(errno)); + else + error("can't open `%s'", file); + } + + current_lineno = 1; + current_file = file; + tok = get_token(); + while (tok != EOF) { + struct replacement *p; + struct replacement_item **tail; + enum event_type type; + + if (tok != STAGO && tok != ETAGO) + parse_error("syntax error"); + type = tok == STAGO ? START_ELEMENT : END_ELEMENT; + scan_name(&name, '>'); + p = define_replacement(tablep, type, buffer_extract(&name)); + tok = get_token(); + if (tok == PLUS) { + if (p) + p->flags |= NEWLINE_BEGIN; + tok = get_token(); + } + tail = p ? &p->items : 0; + while (tok == STRING) { + tail = parse_string(tail, type == START_ELEMENT); + tok = get_token(); + } + if (tok == PLUS) { + if (p) + p->flags |= NEWLINE_END; + tok = get_token(); + } + } + fclose(fp); +} + +static +struct replacement_item **parse_string(tail, recog_attr) + struct replacement_item **tail; + int recog_attr; +{ + struct buffer buf; + unsigned len; + + buffer_init(&buf); + for (;;) { + int c = get(); + if (c == '\"') + break; + if (recog_attr && c == '[') { + if (buffer_length(&buf)) { + len = buffer_length(&buf); + tail = add_replacement_data(tail, buffer_extract(&buf), len); + } + scan_name(&buf, ']'); + tail = add_replacement_attr(tail, buffer_extract(&buf)); + } + else { + if (c == '\\') { + c = get(); + switch (c) { + case EOF: + parse_error("unfinished string at end of file"); + case 's': + buffer_append(&buf, ' '); + break; + case 'n': + buffer_append(&buf, '\n'); + break; + case 't': + buffer_append(&buf, '\t'); + break; + case 'r': + buffer_append(&buf, '\r'); + break; + case 'f': + buffer_append(&buf, '\f'); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + int val = c - '0'; + c = peek(); + if ('0' <= c && c <= '7') { + (void)get(); + val = val*8 + (c - '0'); + c = peek(); + if ('0' <= c && c <= '7') { + (void)get(); + val = val*8 + (c - '0'); + } + } + buffer_append(&buf, val); + break; + } + default: + buffer_append(&buf, c); + break; + } + } + else + buffer_append(&buf, c); + } + } + len = buffer_length(&buf); + if (len > 0) + tail = add_replacement_data(tail, buffer_extract(&buf), len); + return tail; +} + +static +struct replacement_item **add_replacement_data(tail, buf, n) + struct replacement_item **tail; + char *buf; + unsigned n; +{ + if (!tail) + free(buf); + else { + *tail = NEW(struct replacement_item); + (*tail)->type = DATA_REPL; + (*tail)->u.data.n = n; + (*tail)->next = 0; + (*tail)->u.data.s = buf; + tail = &(*tail)->next; + } + return tail; +} + +static +struct replacement_item **add_replacement_attr(tail, name) + struct replacement_item **tail; + char *name; +{ + if (!tail) + free(name); + else { + *tail = NEW(struct replacement_item); + (*tail)->type = ATTR_REPL; + (*tail)->next = 0; + (*tail)->u.attr = name; + tail = &(*tail)->next; + } + return tail; +} + +static +int get_token() +{ + int c; + + for (;;) { + c = get(); + while (isspace(c)) + c = get(); + if (c != '%') + break; + do { + c = get(); + if (c == EOF) + return EOF; + } while (c != '\n'); + } + switch (c) { + case '+': + return PLUS; + case '<': + c = peek(); + if (c == '/') { + (void)get(); + return ETAGO; + } + return STAGO; + case '"': + return STRING; + case EOF: + return EOF; + default: + parse_error("bad input character `%c'", c); + } +} + +static +void scan_name(buf, term) + struct buffer *buf; + int term; +{ + int c; + for (;;) { + c = get(); + if (c == term) + break; + if (c == '\n' || c == EOF) + parse_error("missing `%c'", term); + if (fold_general_names) { + if (islower((unsigned char)c)) + c = toupper((unsigned char)c); + } + buffer_append(buf, c); + } + if (buffer_length(buf) == 0) + parse_error("empty name"); + buffer_append(buf, '\0'); +} + +static +int get() +{ + int c = getc(fp); + if (c == '\n') + current_lineno++; + return c; +} + +static +int peek() +{ + int c = getc(fp); + if (c != EOF) + ungetc(c, fp); + return c; +} + +struct replacement *lookup_replacement(tablep, type, name) + struct replacement_table *tablep; + enum event_type type; + char *name; +{ + int h = hash(type, name); + struct table_entry *p; + + for (p = tablep->table[h]; p; p = p->next) + if (strcmp(name, p->gi) == 0 && type == p->type) + return &p->replacement; + return 0; +} + +/* Return 0 if already defined. */ + +static +struct replacement *define_replacement(tablep, type, name) + struct replacement_table *tablep; + enum event_type type; + char *name; +{ + int h = hash(type, name); + struct table_entry *p; + + for (p = tablep->table[h]; p; p = p->next) + if (strcmp(name, p->gi) == 0 && type == p->type) + return 0; + p = NEW(struct table_entry); + p->next = tablep->table[h]; + tablep->table[h] = p; + p->type = type; + p->gi = name; + p->replacement.flags = 0; + p->replacement.items = 0; + return &p->replacement; +} + +static +VOID buffer_init(buf) + struct buffer *buf; +{ + buf->size = buf->len = 0; + buf->s = 0; +} + +static +char *buffer_extract(buf) + struct buffer *buf; +{ + char *s = buf->s; + buf->s = 0; + buf->len = 0; + buf->size = 0; + return s; +} + +#if 0 +static +VOID buffer_free(buf) + struct buffer *buf; +{ + if (buf->s) { + free((UNIV)buf->s); + buf->s = 0; + buf->size = buf->size = 0; + } +} +#endif + +static +VOID buffer_append(buf, c) + struct buffer *buf; + int c; +{ + if (buf->len >= buf->size) { + if (!buf->size) + buf->s = (char *)xmalloc(buf->size = 10); + else + buf->s = (char *)xrealloc((UNIV)buf->s, buf->size *= 2); + } + buf->s[buf->len] = c; + buf->len += 1; +} + +static +int hash(type, s) + enum event_type type; + char *s; +{ + unsigned long h = 0, g; + + while (*s != 0) { + h <<= 4; + h += *s++; + if ((g = h & 0xf0000000) != 0) { + h ^= g >> 24; + h ^= g; + } + } + h ^= (int)type; + return (int)(h % TABLE_SIZE); +} + +static +UNIV xmalloc(n) + unsigned n; +{ + UNIV p = (UNIV)malloc(n); + if (!p) + parse_error("out of memory"); + return p; +} + +static +UNIV xrealloc(p, size) + UNIV p; + unsigned size; +{ + p = (UNIV)realloc(p, size); + if (!p) + parse_error("out of memory"); + return p; +} + +static NO_RETURN +#ifdef VARARGS +void parse_error(va_alist) va_dcl +#else +void parse_error(char *message,...) +#endif +{ + char buf[512]; +#ifdef VARARGS + char *message; +#endif + va_list ap; + +#ifdef VARARGS + va_start(ap); + message = va_arg(ap, char *); +#else + va_start(ap, message); +#endif + vsprintf(buf, message, ap); + va_end(ap); + error("%s:%d: %s", current_file, current_lineno, buf); +} diff --git a/usr.bin/sgmls/sgmlsasp/replace.h b/usr.bin/sgmls/sgmlsasp/replace.h new file mode 100644 index 0000000..18c9f82 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/replace.h @@ -0,0 +1,35 @@ +/* replace.h + Interface to replacement file parser. */ + +enum replacement_type { + DATA_REPL, + ATTR_REPL + }; + +struct replacement_item { + union { + char *attr; + struct { + char *s; + unsigned n; + } data; + } u; + enum replacement_type type; + struct replacement_item *next; +}; + +#define NEWLINE_BEGIN 01 +#define NEWLINE_END 02 + +struct replacement { + unsigned flags; + struct replacement_item *items; +}; + +enum event_type { START_ELEMENT, END_ELEMENT }; + +struct replacement_table *make_replacement_table P((void)); +void load_replacement_file P((struct replacement_table *, char *)); + +struct replacement * +lookup_replacement P((struct replacement_table *, enum event_type, char *)); diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 new file mode 100644 index 0000000..ab03371 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 @@ -0,0 +1,30 @@ +.\" -*- nroff -*- +.TH SGMLSASP 1 +.SH NAME +sgmlsasp \- translate output of sgmls using ASP replacement files +.SH SYNOPSIS +.B sgmls +.RB [ \-n ] +.I replacement_file\|.\|.\|. +.SH DESCRIPTION +.I sgmlsasp +translates the standard input using the specification in +.I replacement_file\|.\|.\|. +and writes the result to the standard output. +The standard input must be in the format output by +.IR sgmls . +Each replacement file must be in the format of an +Amsterdam SGML parser (ASP) replacement file; +this format is described in the ASP documentation. +Duplicate replacements are silently ignored. +The +.B \-n +option disables upper-case substitution (folding) for names in +replacement files; this option should be used with concrete syntaxes +that do not specify upper-case substitution for general names (that +is, names that are not entity names). +.SH BUGS +References to external data entities are ignored. +(Support for external data entities is not implemented in ASP.) +.SH "SEE ALSO" +.IR sgmls (1) diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c new file mode 100644 index 0000000..fdaf113 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c @@ -0,0 +1,278 @@ +/* sgmlsasp.c + Translate sgmls output using ASP replacement file. + + Written by James Clark (jjc@jclark.com). */ + +#include "sgmlsasp.h" +#include "sgmls.h" +#include "replace.h" +#include "getopt.h" + +/* Non-zero if general (non-entity) names should be folded to upper case. */ +int fold_general_names = 1; + +static char *program_name; +static char last_char = '\n'; + +static void output_begin_line P((void)); +static void output_data P((struct sgmls_data *, int)); +static void output_pi P((char *, unsigned)); +static void output_token P((char *)); +static void output_attribute P((struct sgmls_attribute *)); +static void output_data_char P((int)); +static void output_replacement + P((struct replacement *, struct sgmls_attribute *)); +static void do_file P((FILE *, struct replacement_table *)); +static void usage P((void)); +static void input_error P((int, char *, unsigned long)); + +#define output_char(c) (last_char = (c), putchar(c)) + +int main(argc, argv) + int argc; + char **argv; +{ + struct replacement_table *tablep; + int i; + int opt; + program_name = argv[0]; + + while ((opt = getopt(argc, argv, "n")) != EOF) + switch (opt) { + case 'n': + fold_general_names = 0; + break; + case '?': + usage(); + default: + assert(0); + } + if (argc - optind <= 0) + usage(); + tablep = make_replacement_table(); + for (i = optind; i < argc; i++) + load_replacement_file(tablep, argv[i]); + (void)sgmls_set_errhandler(input_error); + do_file(stdin, tablep); + exit(0); +} + +static +void usage() +{ + fprintf(stderr, "usage: %s [-n] replacement_file...\n", program_name); + exit(1); +} + +static +void input_error(num, str, lineno) + int num; + char *str; + unsigned long lineno; +{ + error("Error at input line %lu: %s", lineno, str); +} + +static +void do_file(fp, tablep) + FILE *fp; + struct replacement_table *tablep; +{ + struct sgmls *sp; + struct sgmls_event e; + + sp = sgmls_create(fp); + while (sgmls_next(sp, &e)) + switch (e.type) { + case SGMLS_EVENT_DATA: + output_data(e.u.data.v, e.u.data.n); + break; + case SGMLS_EVENT_ENTITY: + /* XXX what should we do here? */ + break; + case SGMLS_EVENT_PI: + output_pi(e.u.pi.s, e.u.pi.len); + break; + case SGMLS_EVENT_START: + output_replacement(lookup_replacement(tablep, + START_ELEMENT, e.u.start.gi), + e.u.start.attributes); + sgmls_free_attributes(e.u.start.attributes); + break; + case SGMLS_EVENT_END: + output_replacement(lookup_replacement(tablep, END_ELEMENT, e.u.end.gi), + 0); + break; + case SGMLS_EVENT_SUBSTART: + break; + case SGMLS_EVENT_SUBEND: + break; + case SGMLS_EVENT_APPINFO: + break; + case SGMLS_EVENT_CONFORMING: + break; + default: + abort(); + } + sgmls_free(sp); +} + +static +void output_data(v, n) +struct sgmls_data *v; +int n; +{ + int i; + + for (i = 0; i < n; i++) { + char *s = v[i].s; + int len = v[i].len; + for (; len > 0; len--, s++) + output_data_char(*s); + } +} + +static +void output_pi(s, len) + char *s; + unsigned len; +{ + for (; len > 0; len--, s++) + output_data_char(*s); +} + +static +void output_replacement(repl, attributes) +struct replacement *repl; +struct sgmls_attribute *attributes; +{ + struct replacement_item *p; + struct sgmls_attribute *a; + int i; + + if (!repl) + return; + if (repl->flags & NEWLINE_BEGIN) + output_begin_line(); + + for (p = repl->items; p; p = p->next) + switch (p->type) { + case DATA_REPL: + for (i = 0; i < p->u.data.n; i++) + output_char(p->u.data.s[i]); + break; + case ATTR_REPL: + for (a = attributes; a; a = a->next) + if (strcmp(a->name, p->u.attr) == 0) { + output_attribute(a); + break; + } + break; + default: + abort(); + } + + if (repl->flags & NEWLINE_END) + output_begin_line(); +} + +static +void output_attribute(p) +struct sgmls_attribute *p; +{ + switch (p->type) { + case SGMLS_ATTR_IMPLIED: + break; + case SGMLS_ATTR_CDATA: + output_data(p->value.data.v, p->value.data.n); + break; + case SGMLS_ATTR_TOKEN: + { + char **token = p->value.token.v; + int n = p->value.token.n; + + if (n > 0) { + int i; + output_token(token[0]); + for (i = 1; i < n; i++) { + output_char(' '); + output_token(token[i]); + } + } + } + break; + case SGMLS_ATTR_ENTITY: + { + struct sgmls_entity **v = p->value.entity.v; + int n = p->value.entity.n; + int i; + + for (i = 0; i < n; i++) { + if (i > 0) + output_char(' '); + output_token(v[i]->is_internal + ? v[i]->u.internal.name + : v[i]->u.external.name); + } + } + break; + case SGMLS_ATTR_NOTATION: + if (p->value.notation) + output_token(p->value.notation->name); + break; + default: + abort(); + } +} + +static +void output_token(s) + char *s; +{ + for (; *s; s++) + output_char(*s); +} + +static +void output_data_char(c) + int c; +{ + if (c != RSCHAR) { + if (c == RECHAR) + c = '\n'; + output_char(c); + } +} + +static +void output_begin_line() +{ + if (last_char != '\n') + output_char('\n'); +} + +NO_RETURN +#ifdef VARARGS +void error(va_alist) va_dcl +#else +void error(char *message,...) +#endif +{ +#ifdef VARARGS + char *message; +#endif + va_list ap; + + fprintf(stderr, "%s: ", program_name); +#ifdef VARARGS + va_start(ap); + message = va_arg(ap, char *); +#else + va_start(ap, message); +#endif + vfprintf(stderr, message, ap); + va_end(ap); + fputc('\n', stderr); + fflush(stderr); + exit(EXIT_FAILURE); +} diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.h b/usr.bin/sgmls/sgmlsasp/sgmlsasp.h new file mode 100644 index 0000000..b3ad402 --- /dev/null +++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.h @@ -0,0 +1,26 @@ +/* sgmlsasp.h */ + +#include "config.h" +#include "std.h" + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +#ifdef __GNUC__ +#define NO_RETURN volatile +#else +#define NO_RETURN /* as nothing */ +#endif + +#ifdef VARARGS +#define VP(parms) () +#else +#define VP(parms) P(parms) +#endif + +NO_RETURN void error VP((char *,...)); + +extern int fold_general_names; diff --git a/usr.bin/sgmls/unix.cfg b/usr.bin/sgmls/unix.cfg new file mode 100644 index 0000000..0bc8410 --- /dev/null +++ b/usr.bin/sgmls/unix.cfg @@ -0,0 +1,147 @@ +/* unix.cfg: Configuration file for sgmls on Unix. */ + +/* A list of filename templates to use for searching for external entities. +The filenames are separated by the character specified in PATH_FILE_SEP. +See sgmls.man for details. */ +#define DEFAULT_PATH "/usr/local/lib/sgml/%O/%C/%T:%N.%X:%N.%D" +/* The character that separates the filenames templates. */ +#define PATH_FILE_SEP ':' +/* The character that separates filenames in a system identifier. +Usually the same as PATH_FILE_SEP. */ +#define SYSID_FILE_SEP ':' +/* The environment variable that contains the list of filename templates. */ +#define PATH_ENV_VAR "SGML_PATH" + +/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name +or system identifier into a legal filename. A character in +MIN_DAT_SUBS_FROM will be transformed into the character in the +corresponding position in MIN_DAT_SUBS_TO. If there is no such +position, then the character is removed. */ +/* This says that spaces should be transformed to underscores, and +slashes to percents. */ +#define MIN_DAT_SUBS_FROM " /" +#define MIN_DAT_SUBS_TO "_%" + +/* Define this to allow tracing. */ +/* #define TRACE 1 */ + +/* Define this you want support for subdocuments. This is implemented +using features that are not part of Standard C, so you might not want +to define it if you are porting to a new system. Otherwise I suggest +you leave it defined. */ +#define SUPPORT_SUBDOC 1 + +/* Define HAVE_EXTENDED_PRINTF if your *printf functions supports +X/Open extensions; if they do, then, for example, + + printf("%2$s%1$s", "bar", "foo") + +should print `foobar'. */ + +/* #define HAVE_EXTENDED_PRINTF 1 */ + +/* Define HAVE_CAT if your system provides the X/Open message +catalogue functions catopen() and catgets(), and you want to use them. +An implementations of these functions is included and will be used if +you don't define this. On SunOS 4.1.1, if you do define this you +should set CC=/usr/xpg2bin/cc in the makefile. */ + +/* #define HAVE_CAT 1 */ + +#ifdef __STDC__ +/* Define this if your compiler supports prototypes. */ +#define USE_PROTOTYPES 1 +#endif + +/* Can't use <stdarg.h> without prototypes. */ +#ifndef USE_PROTOTYPES +#define VARARGS 1 +#endif + +/* If your compiler defines __STDC__ but doesn't provide <stdarg.h>, +you must define VARARGS yourself here. */ +/* #define VARARGS 1 */ + +/* Define this if you do not have strerror(). */ +#define STRERROR_MISSING 1 + +/* Define this unless the character testing functions in ctype.h +are defined for all values representable as an unsigned char. You do +not need to define this if your system is ANSI C conformant. You +should define for old Unix systems. */ +/* #define USE_ISASCII 1 */ + +/* Define this if your system provides the BSD style string operations +rather than ANSI C ones (eg bcopy() rather than memcpy(), and index() +rather than strchr()). */ +/* #define BSD_STRINGS 1 */ + +/* Define this if you have getopt(). */ +#define HAVE_GETOPT 1 + +/* Define this if you have access(). */ +#define HAVE_ACCESS 1 + +/* Define this if you have <unistd.h>. */ +#define HAVE_UNISTD_H 1 + +/* Define this if you have <sys/stat.h>. */ +#define HAVE_SYS_STAT_H 1 + +/* Define this if you have waitpid(). */ +#define HAVE_WAITPID 1 + +/* Define this if your system is POSIX.1 (ISO 9945-1:1990) compliant. */ +#define POSIX 1 + +/* Define this if you have the vfork() system call. */ +#define HAVE_VFORK 1 + +/* Define this if you have <vfork.h>. */ +#define HAVE_VFORK_H 1 + +/* Define this if you don't have <stdlib.h> */ +/* #define STDLIB_H_MISSING 1 */ + +/* Define this if you don't have <stddef.h> */ +/* #define STDDEF_H_MISSING 1 */ + +/* Define this if you don't have <limits.h> */ +/* #define LIMITS_H_MISSING 1 */ + +/* Define this if you don't have remove(); unlink() will be used instead. */ +#define REMOVE_MISSING 1 + +/* Define this if you don't have raise(); kill() will be used instead. */ +#define RAISE_MISSING 1 + +/* Define this if you don't have fsetpos() and fgetpos(). */ +#define FPOS_MISSING 1 + +/* Universal pointer type. */ +/* If your compiler doesn't fully support void *, change `void' to `char'. */ +typedef void *UNIV; + +/* If your compiler doesn't support void as a function return type, +change `void' to `int'. */ +typedef void VOID; + +/* If you don't have an ANSI C conformant <limits.h>, define +CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed. +The <limits.h> on some versions of System Release V 3.2 is not ANSI C +conformant: the value of CHAR_MIN is 0 even though the `char' type is +signed. */ + +/* #define CHAR_SIGNED 1 */ +/* #define CHAR_SIGNED 0 */ +#ifndef CHAR_SIGNED +#include <limits.h> +#if CHAR_MIN < 0 +#define CHAR_SIGNED 1 +#else +#define CHAR_SIGNED 0 +#endif +#endif /* not CHAR_SIGNED */ + +/* Assume the system character set is ISO Latin-1. */ +#include "latin1.h" |