summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjfieber <jfieber@FreeBSD.org>1996-06-04 19:09:50 +0000
committerjfieber <jfieber@FreeBSD.org>1996-06-04 19:09:50 +0000
commit71c5c555bf3338d51e0fc54ac132576058a896ff (patch)
tree389b5323f1f35df66551347e1645047f7423249d
parentdc83deeffb10bd9c4b55a1f5af2313d0428f3af4 (diff)
downloadFreeBSD-src-71c5c555bf3338d51e0fc54ac132576058a896ff.zip
FreeBSD-src-71c5c555bf3338d51e0fc54ac132576058a896ff.tar.gz
Upgrade from 1.1 to 1.1.91. Unknown to me, the latter version was
actually available at the time I brought in the former. Lots of assorted bug fixes and much needed support for catalogs.
-rw-r--r--usr.bin/sgmls/Makefile.inc6
-rw-r--r--usr.bin/sgmls/libsgmls/Makefile2
-rw-r--r--usr.bin/sgmls/libsgmls/sgmls.c32
-rw-r--r--usr.bin/sgmls/libsgmls/sgmls.h2
-rw-r--r--usr.bin/sgmls/rast/Makefile2
-rw-r--r--usr.bin/sgmls/rast/rast.c80
-rw-r--r--usr.bin/sgmls/sgmls/Makefile13
-rw-r--r--usr.bin/sgmls/sgmls/action.h1
-rw-r--r--usr.bin/sgmls/sgmls/alloc.h8
-rw-r--r--usr.bin/sgmls/sgmls/ambig.c2
-rw-r--r--usr.bin/sgmls/sgmls/appl.h2
-rw-r--r--usr.bin/sgmls/sgmls/catalog.c925
-rw-r--r--usr.bin/sgmls/sgmls/catalog.h45
-rw-r--r--usr.bin/sgmls/sgmls/config.h13
-rw-r--r--usr.bin/sgmls/sgmls/context.c41
-rw-r--r--usr.bin/sgmls/sgmls/context.h2
-rw-r--r--usr.bin/sgmls/sgmls/ebcdic.h15
-rw-r--r--usr.bin/sgmls/sgmls/entgen.c126
-rw-r--r--usr.bin/sgmls/sgmls/entity.h5
-rw-r--r--usr.bin/sgmls/sgmls/etype.h4
-rw-r--r--usr.bin/sgmls/sgmls/genlex.c46
-rw-r--r--usr.bin/sgmls/sgmls/getopt.c2
-rw-r--r--usr.bin/sgmls/sgmls/latin1.h14
-rw-r--r--usr.bin/sgmls/sgmls/lexcode.h1
-rw-r--r--usr.bin/sgmls/sgmls/lexrf.c3
-rw-r--r--usr.bin/sgmls/sgmls/lextaba.c211
-rw-r--r--usr.bin/sgmls/sgmls/lextabe.c175
-rw-r--r--usr.bin/sgmls/sgmls/lineout.c19
-rw-r--r--usr.bin/sgmls/sgmls/main.c62
-rw-r--r--usr.bin/sgmls/sgmls/md1.c10
-rw-r--r--usr.bin/sgmls/sgmls/md2.c47
-rw-r--r--usr.bin/sgmls/sgmls/msg.h28
-rw-r--r--usr.bin/sgmls/sgmls/msgcat.c33
-rw-r--r--usr.bin/sgmls/sgmls/pars1.c78
-rw-r--r--usr.bin/sgmls/sgmls/pars2.c63
-rw-r--r--usr.bin/sgmls/sgmls/pcbrf.c129
-rw-r--r--usr.bin/sgmls/sgmls/portproc.c1
-rw-r--r--usr.bin/sgmls/sgmls/serv.c2
-rw-r--r--usr.bin/sgmls/sgmls/sgml1.c28
-rw-r--r--usr.bin/sgmls/sgmls/sgml2.c43
-rw-r--r--usr.bin/sgmls/sgmls/sgmlaux.h2
-rw-r--r--usr.bin/sgmls/sgmls/sgmldecl.c168
-rw-r--r--usr.bin/sgmls/sgmls/sgmldecl.h48
-rw-r--r--usr.bin/sgmls/sgmls/sgmlfnsm.h1
-rw-r--r--usr.bin/sgmls/sgmls/sgmlio.c2
-rw-r--r--usr.bin/sgmls/sgmls/sgmlmsg.c32
-rw-r--r--usr.bin/sgmls/sgmls/sgmls.1184
-rw-r--r--usr.bin/sgmls/sgmls/sgmlxtrn.c4
-rw-r--r--usr.bin/sgmls/sgmls/sgmlxtrn.h4
-rw-r--r--usr.bin/sgmls/sgmls/std.h6
-rw-r--r--usr.bin/sgmls/sgmls/synxtrn.h2
-rw-r--r--usr.bin/sgmls/sgmls/trace.h16
-rw-r--r--usr.bin/sgmls/sgmls/traceset.c41
-rw-r--r--usr.bin/sgmls/sgmls/version.c2
-rw-r--r--usr.bin/sgmls/sgmls/xfprintf.c20
-rw-r--r--usr.bin/sgmls/sgmlsasp/Makefile2
-rw-r--r--usr.bin/sgmls/sgmlsasp/replace.c15
-rw-r--r--usr.bin/sgmls/sgmlsasp/replace.h4
-rw-r--r--usr.bin/sgmls/sgmlsasp/sgmlsasp.12
-rw-r--r--usr.bin/sgmls/sgmlsasp/sgmlsasp.c6
-rw-r--r--usr.bin/sgmls/unix.cfg18
61 files changed, 2385 insertions, 515 deletions
diff --git a/usr.bin/sgmls/Makefile.inc b/usr.bin/sgmls/Makefile.inc
index 1e4fc2b..0faf511 100644
--- a/usr.bin/sgmls/Makefile.inc
+++ b/usr.bin/sgmls/Makefile.inc
@@ -1,13 +1,15 @@
#
# Bmakefile for rast
#
-# $id$
+# $Id$
#
+.if exists(${.CURDIR}/../../Makefile.inc)
.include "${.CURDIR}/../../Makefile.inc"
+.endif
.if exists(${.CURDIR}/../libsgmls/obj)
LIBSGMLS= ${.CURDIR}/../libsgmls/obj/libsgmls.a
.else
LIBSGMLS= ${.CURDIR}/../libsgmls/libsgmls.a
-.endif \ No newline at end of file
+.endif
diff --git a/usr.bin/sgmls/libsgmls/Makefile b/usr.bin/sgmls/libsgmls/Makefile
index e94fcc4..0d058f3 100644
--- a/usr.bin/sgmls/libsgmls/Makefile
+++ b/usr.bin/sgmls/libsgmls/Makefile
@@ -1,7 +1,7 @@
#
# Bmakefile for libsgmls
#
-# $id$
+# $Id$
#
LIB= sgmls
diff --git a/usr.bin/sgmls/libsgmls/sgmls.c b/usr.bin/sgmls/libsgmls/sgmls.c
index cbb03f1..4e25957 100644
--- a/usr.bin/sgmls/libsgmls/sgmls.c
+++ b/usr.bin/sgmls/libsgmls/sgmls.c
@@ -8,22 +8,12 @@
#include "sgmls.h"
#include "lineout.h"
-#ifdef __GNUC__
-#define NO_RETURN volatile
-#else
-#define NO_RETURN /* as nothing */
-#endif
-
#ifdef USE_PROTOTYPES
#define P(parms) parms
#else
#define P(parms) ()
#endif
-#ifndef __STDC__
-#define const /* as nothing */
-#endif
-
typedef struct sgmls_data data_s;
typedef struct sgmls_notation notation_s;
typedef struct sgmls_internal_entity internal_entity_s;
@@ -112,7 +102,7 @@ static char *errlist[] = {
"Input line too long"
};
-static void NO_RETURN error P((enum error_code));
+static void error P((enum error_code));
static int parse_data P((char *, unsigned long *));
static void parse_location P((char *, struct sgmls *));
static void parse_notation P((char *, notation_s *));
@@ -303,7 +293,7 @@ int sgmls_next(sp, e)
char *name;
attribute_s *a;
external_entity_s *ext;
-
+
name = scan_token(&p);
a = parse_attribute(sp, p);
ext = lookup_external_entity(sp, name);
@@ -449,7 +439,7 @@ int parse_data(p, linenop)
else
*q++ = *p++;
}
-
+
if (q > start || is_sdata) {
if (n >= datav_size)
grow_datav();
@@ -656,7 +646,7 @@ data_s *copy_data(v, n)
unsigned total;
char *p;
data_s *result;
-
+
result = (data_s *)xmalloc(n*sizeof(data_s));
total = 0;
for (i = 0; i < n; i++)
@@ -683,7 +673,11 @@ char *unescape(s)
char *s;
{
int len = unescape1(s);
- if (memchr(s, '\0', len))
+ if (
+#ifdef __BORLANDC__
+ len > 0 &&
+#endif
+ memchr(s, '\0', len))
error(E_NULESCAPE);
s[len] = '\0';
return s;
@@ -810,7 +804,7 @@ int read_line(sp)
error(E_SYSTEM);
return 0;
}
-
+
sp->input_lineno++;
input_lineno = sp->input_lineno;
for (;;) {
@@ -973,13 +967,15 @@ static
void add_attribute(pp, a)
attribute_s **pp, *a;
{
+#if 0
for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next)
;
+#endif
a->next = *pp;
*pp = a;
}
-
+
static
char *strsave(s)
char *s;
@@ -1017,7 +1013,7 @@ UNIV xrealloc(p, n)
return p;
}
-static NO_RETURN
+static
void error(num)
enum error_code num;
{
diff --git a/usr.bin/sgmls/libsgmls/sgmls.h b/usr.bin/sgmls/libsgmls/sgmls.h
index c327f15..79b2658 100644
--- a/usr.bin/sgmls/libsgmls/sgmls.h
+++ b/usr.bin/sgmls/libsgmls/sgmls.h
@@ -35,7 +35,7 @@ struct sgmls_external_entity {
struct sgmls_attribute *attributes;
struct sgmls_notation *notation;
};
-
+
struct sgmls_entity {
union {
struct sgmls_internal_entity internal;
diff --git a/usr.bin/sgmls/rast/Makefile b/usr.bin/sgmls/rast/Makefile
index 4c8a7c2..214286a 100644
--- a/usr.bin/sgmls/rast/Makefile
+++ b/usr.bin/sgmls/rast/Makefile
@@ -1,7 +1,7 @@
#
# Bmakefile for rast
#
-# $id$
+# $Id$
#
PROG= rast
diff --git a/usr.bin/sgmls/rast/rast.c b/usr.bin/sgmls/rast/rast.c
index 2634679..f957187 100644
--- a/usr.bin/sgmls/rast/rast.c
+++ b/usr.bin/sgmls/rast/rast.c
@@ -37,6 +37,7 @@ NO_RETURN void error VP((char *,...));
static void input_error P((int, char *, unsigned long));
static int do_file P((FILE *));
static void usage P((void));
+static void init_sort_code P((void));
static void output_processing_instruction P((char *, unsigned));
static void output_data P((struct sgmls_data *, int));
@@ -47,6 +48,7 @@ static void output_external_entity_info P((struct sgmls_external_entity *));
static void output_element_start P((char *, struct sgmls_attribute *));
static void output_element_end P((char *));
static void output_attribute P((struct sgmls_attribute *));
+static void output_attribute_list P((struct sgmls_attribute *));
static void output_tokens P((char **, int));
static void output_markup_chars P((char *, unsigned));
static void output_markup_string P((char *));
@@ -56,6 +58,8 @@ static void output_external_id P((char *, char *));
static void output_entity P((struct sgmls_entity *));
static void output_external_entity_info P((struct sgmls_external_entity *));
static void output_internal_entity P((struct sgmls_internal_entity *));
+/* Don't use a prototype here to avoid problems with qsort. */
+static int compare_attributes();
#define output_flush_markup() output_flush('!')
#define output_flush_data() output_flush('|')
@@ -64,6 +68,10 @@ static FILE *outfp;
static int char_count = 0;
static char *program_name;
+static short sort_code[256];
+static struct sgmls_attribute **attribute_vector = 0;
+static int attribute_vector_length = 0;
+
int main(argc, argv)
int argc;
char **argv;
@@ -107,6 +115,8 @@ int main(argc, argv)
(void)sgmls_set_errhandler(input_error);
+ init_sort_code();
+
if (!do_file(stdin)) {
fclose(outfp);
if (output_file) {
@@ -141,6 +151,18 @@ void usage()
}
static
+void init_sort_code()
+{
+ int i;
+ static char print[] = "!\"#$%&'()*+,-./0123456789:;<=>?\
+@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
+ for (i = 0; i < 256; i++)
+ sort_code[i] = i + 128;
+ for (i = 0; print[i]; i++)
+ sort_code[(unsigned char)print[i]] = i;
+}
+
+static
int do_file(fp)
FILE *fp;
{
@@ -269,10 +291,8 @@ void output_element_start(gi, att)
{
fprintf(outfp, "[%s", gi);
if (att) {
- struct sgmls_attribute *p;
putc('\n', outfp);
- for (p = att; p; p = p->next)
- output_attribute(p);
+ output_attribute_list(att);
}
fputs("]\n", outfp);
}
@@ -285,6 +305,54 @@ void output_element_end(gi)
}
static
+void output_attribute_list(att)
+ struct sgmls_attribute *att;
+{
+ struct sgmls_attribute *p;
+ int n = 0;
+ int i;
+
+ for (p = att; p; p = p->next)
+ n++;
+ if (attribute_vector_length < n) {
+ if (attribute_vector_length == 0)
+ attribute_vector
+ = (struct sgmls_attribute **)malloc(n*sizeof(*attribute_vector));
+ else
+ attribute_vector
+ = (struct sgmls_attribute **)realloc((UNIV)attribute_vector,
+ n*sizeof(*attribute_vector));
+ attribute_vector_length = n;
+ if (!attribute_vector)
+ error("Out of memory");
+ }
+ i = 0;
+ for (p = att; p; p = p->next)
+ attribute_vector[i++] = p;
+ qsort(attribute_vector, n, sizeof(attribute_vector[0]), compare_attributes);
+ for (i = 0; i < n; i++)
+ output_attribute(attribute_vector[i]);
+}
+
+static
+int compare_attributes(p1, p2)
+ UNIV p1, p2;
+{
+ char *s1 = (*(struct sgmls_attribute **)p1)->name;
+ char *s2 = (*(struct sgmls_attribute **)p2)->name;
+
+ for (; *s1 && *s2; s1++, s2++)
+ if (*s1 != *s2)
+ return sort_code[(unsigned char)*s1] - sort_code[(unsigned char)*s2];
+ if (*s1)
+ return 1;
+ else if (*s2)
+ return -1;
+ else
+ return 0;
+}
+
+static
void output_attribute(p)
struct sgmls_attribute *p;
{
@@ -477,11 +545,9 @@ void output_external_entity_info(e)
putc('\n', outfp);
output_external_id(e->pubid, e->sysid);
if (e->type != SGMLS_ENTITY_SUBDOC) {
- struct sgmls_attribute *p;
fprintf(outfp, "#NOTATION=%s\n", e->notation->name);
output_external_id(e->notation->pubid, e->notation->sysid);
- for (p = e->attributes; p; p = p->next)
- output_attribute(p);
+ output_attribute_list(e->attributes);
}
}
@@ -518,7 +584,7 @@ void error(char *message,...)
char *message;
#endif
va_list ap;
-
+
fprintf(stderr, "%s: ", program_name);
#ifdef VARARGS
va_start(ap);
diff --git a/usr.bin/sgmls/sgmls/Makefile b/usr.bin/sgmls/sgmls/Makefile
index 3a0a0cf..b46e9f6 100644
--- a/usr.bin/sgmls/sgmls/Makefile
+++ b/usr.bin/sgmls/sgmls/Makefile
@@ -1,18 +1,19 @@
#
# Bmakefile for sgmls
#
-# $id$
+# $Id$
#
-PROG= sgmls
+PROG= sgmls
-SRCS+= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c
-SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c
-SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c
-SRCS+= msgcat.c lineout.c ambig.c exclude.c lextaba.c
+SRCS= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c
+SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c
+SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c
+SRCS+= lineout.c ambig.c lextaba.c catalog.c
CFLAGS+= -I${.CURDIR}/../libsgmls
.include "../Makefile.inc"
.include <bsd.prog.mk>
+
diff --git a/usr.bin/sgmls/sgmls/action.h b/usr.bin/sgmls/sgmls/action.h
index 08475bf..03bf478 100644
--- a/usr.bin/sgmls/sgmls/action.h
+++ b/usr.bin/sgmls/sgmls/action.h
@@ -48,6 +48,7 @@
#define MSP_ 75 /* Marked section start in prolog outside DTD */
#define APP_ 76 /* APPINFO (other than NONE) */
#define STE_ 77 /* Start tag ended prolog */
+#define ETE_ 78 /* End tag ended prolog */
/* GRPACT.H: Symbols for group tokenization action names (all alpha).
There must be no conflict with PARSEACT.H, which
diff --git a/usr.bin/sgmls/sgmls/alloc.h b/usr.bin/sgmls/sgmls/alloc.h
new file mode 100644
index 0000000..d732178
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/alloc.h
@@ -0,0 +1,8 @@
+/* alloc.h */
+
+typedef unsigned SIZE_T;
+
+/* Like malloc and realloc, but don't return if no memory is available. */
+
+extern UNIV xmalloc P((SIZE_T));
+extern UNIV xrealloc P((UNIV, SIZE_T));
diff --git a/usr.bin/sgmls/sgmls/ambig.c b/usr.bin/sgmls/sgmls/ambig.c
index 942aa5d..9da02eb 100644
--- a/usr.bin/sgmls/sgmls/ambig.c
+++ b/usr.bin/sgmls/sgmls/ambig.c
@@ -102,7 +102,7 @@ VOID ambig()
{
struct contoken *s;
int i;
-
+
if (!follow) {
/* We can't allocate everything in one chunk, because that would
overflow a 16-bit unsigned if GRPGTCNT was 253. */
diff --git a/usr.bin/sgmls/sgmls/appl.h b/usr.bin/sgmls/sgmls/appl.h
index 404d749..2513c98 100644
--- a/usr.bin/sgmls/sgmls/appl.h
+++ b/usr.bin/sgmls/sgmls/appl.h
@@ -15,8 +15,6 @@ enum {
VOID process_document P((int));
VOID output_conforming P((void));
-UNIV xmalloc P((UNS));
-UNIV xrealloc P((UNIV, UNS));
VOID appl_error VP((int, ...));
#ifdef SUPPORT_SUBDOC
diff --git a/usr.bin/sgmls/sgmls/catalog.c b/usr.bin/sgmls/sgmls/catalog.c
new file mode 100644
index 0000000..164b97d
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/catalog.c
@@ -0,0 +1,925 @@
+/* Normalize public identifiers to handle ISO 8879[-:]1986 problem.
+What should happen if there's a duplicate in a single catalog entry file? */
+
+#include "config.h"
+#include "std.h"
+#include "catalog.h"
+
+#ifdef USE_PROTOTYPES
+#define P(parms) parms
+#else
+#define P(parms) ()
+#endif
+
+#include "alloc.h"
+
+#define MINIMUM_DATA_CHARS \
+"abcdefghijklmnopqrstuvwxyz\
+ABCDEFGHIJKLMNOPQRSTUVWXYZ\
+0123456789-.'()+,/:=?"
+
+#define N_DECL_TYPE 3
+#define PUBLIC_ID_MAP N_DECL_TYPE
+#define N_TABLES (N_DECL_TYPE + 1)
+
+enum literal_type {
+ NORMAL_LITERAL,
+ MINIMUM_LITERAL
+};
+
+typedef enum {
+ EOF_PARAM,
+ NAME_PARAM,
+ LITERAL_PARAM
+} PARAM_TYPE;
+
+enum catalog_error {
+ E_NAME_EXPECTED,
+ E_LITERAL_EXPECTED,
+ E_ARG_EXPECTED,
+ E_MINIMUM_DATA,
+ E_EOF_COMMENT,
+ E_EOF_LITERAL,
+ E_NUL_CHAR,
+ E_CANNOT_OPEN,
+ E_GETC,
+ E_FCLOSE
+};
+
+#define FIRST_SYSTEM_ERROR E_CANNOT_OPEN
+
+#define HASH_TABLE_INITIAL_SIZE 8
+#define HASH_TABLE_MAX_SIZE (((SIZE_T)-1)/sizeof(struct hash_table_entry *))
+
+struct hash_table_entry {
+ int file_index;
+ const char *key;
+ const char *system_id;
+};
+
+/* Number of bytes per string block. */
+#define BLOCK_SIZE 1000
+
+/* Bytes follow the struct. */
+
+struct string_block {
+ struct string_block *next;
+};
+
+struct hash_table {
+ struct hash_table_entry **v;
+ SIZE_T size; /* must be power of 2 */
+ SIZE_T used;
+ SIZE_T used_limit;
+};
+
+struct catalog {
+ struct hash_table tables[N_TABLES];
+ char **files;
+ int n_files;
+ struct string_block *blocks;
+ char *block_ptr;
+ SIZE_T block_spare;
+ CATALOG_ERROR_HANDLER error_handler;
+ int loaded;
+};
+
+struct parser {
+ FILE *fp;
+ struct catalog *cat;
+ char *param;
+ SIZE_T param_length;
+ SIZE_T param_alloc;
+ int file_index;
+ const char *filename;
+ unsigned long newline_count;
+ char minimum_data[256];
+};
+
+static
+VOID add_catalog_file P((struct catalog *cat, const char *filename,
+ SIZE_T length));
+static
+VOID load P((struct catalog *cat));
+static
+VOID parse_file P((struct parser *parser));
+static
+VOID parse_public P((struct parser *parser));
+static
+VOID parse_name_map P((struct parser *parser,
+ int decl_type));
+static
+int parse_arg P((struct parser *parser));
+static
+PARAM_TYPE parse_param P((struct parser *parser, enum literal_type));
+static
+VOID skip_comment P((struct parser *parser));
+static
+PARAM_TYPE parse_literal P((struct parser *parser, int lit,
+ enum literal_type));
+static
+PARAM_TYPE parse_name P((struct parser *parser, int first_char));
+static
+VOID param_grow P((struct parser *parser));
+static
+const char *param_save P((struct parser *parser));
+static
+char *alloc_bytes P((struct catalog *catalog, SIZE_T n));
+static
+int param_equal P((struct parser *parser, const char *key));
+static
+int hash_table_add P((struct hash_table *table, const char *s,
+ const char *system_id, int file_index));
+static
+struct hash_table_entry *hash_table_lookup P((struct hash_table *table,
+ const char *s));
+static
+struct hash_table_entry *hash_table_lookup_subst P((struct hash_table *table,
+ const char *subst_table,
+ const char *s));
+static
+VOID hash_table_init P((struct hash_table *p));
+static
+VOID hash_table_delete P((struct hash_table *p));
+static
+SIZE_T hash_table_start_index P((struct hash_table *p, const char *s));
+static
+int subst_equal P((const char *subst_table, const char *s1, const char *s2));
+static
+VOID error P((struct parser *parser, enum catalog_error err));
+
+#define param_char(parser, c) \
+ ((((parser)->param_length < (parser)->param_alloc) \
+ || (param_grow(parser), 1)), \
+ ((parser)->param[(parser)->param_length] = (c)), \
+ ((parser)->param_length += 1))
+
+#define param_init(parser) ((parser)->param_length = 0)
+#define param_chop(parser) \
+ ((parser)->param_length = (parser)->param_length - 1)
+
+const char *catalog_error_text(error_number)
+ int error_number;
+{
+ static const char *text[] = {
+ "Name expected",
+ "Literal expected",
+ "Missing argument",
+ "Only minimum data characters allowed in a public identifier",
+ "End of file in comment",
+ "End of file in literal",
+ "Nul character is not allowed",
+ "Cannot open `%s': %s",
+ "Error reading `%s': %s",
+ "Error closing `%s': %s"
+ };
+ if (error_number >= 0 && error_number < sizeof(text)/sizeof(text[0]))
+ return text[error_number];
+ else
+ return "(invalid error number)";
+}
+
+
+CATALOG catalog_create(error_handler)
+ CATALOG_ERROR_HANDLER error_handler;
+{
+ int i;
+ struct catalog *p = (struct catalog *)xmalloc(sizeof(struct catalog));
+ p->loaded = 0;
+ p->n_files = 0;
+ p->files = 0;
+ p->error_handler = error_handler;
+ p->blocks = 0;
+ p->block_spare = 0;
+ p->block_ptr = 0;
+ for (i = 0; i < N_TABLES; i++)
+ hash_table_init(p->tables + i);
+ return (CATALOG)p;
+}
+
+VOID catalog_delete(cat)
+ CATALOG cat;
+{
+ int i;
+ struct string_block *block;
+ struct catalog *catalog = (struct catalog *)cat;
+ for (i = 0; i < 4; i++)
+ hash_table_delete(catalog->tables + i);
+ if (catalog->files)
+ free(catalog->files);
+ block = catalog->blocks;
+ while (block) {
+ struct string_block *tem = block;
+ block = block->next;
+ free((UNIV)tem);
+ }
+ catalog->blocks = 0;
+ free((UNIV)catalog);
+}
+
+VOID catalog_load_file(p, filename)
+ CATALOG p;
+ const char *filename;
+{
+ add_catalog_file((struct catalog *)p, filename, strlen(filename));
+}
+
+int catalog_lookup_entity(cat, public_id, name, decl_type, subst_table,
+ system_id, catalog_file)
+ CATALOG cat;
+ const char *public_id;
+ const char *name;
+ enum catalog_decl_type decl_type;
+ const char *subst_table;
+ const char **system_id;
+ const char **catalog_file;
+{
+ struct catalog *catalog = (struct catalog *)cat;
+ const struct hash_table_entry *entry = 0;
+ if (!catalog->loaded)
+ load(catalog);
+ if (public_id)
+ entry = hash_table_lookup(catalog->tables + PUBLIC_ID_MAP, public_id);
+ if (name
+ && decl_type >= 0
+ && decl_type < N_DECL_TYPE
+ && (!entry || entry->file_index > 0)) {
+ const struct hash_table_entry *entity_entry = 0;
+ if (!subst_table)
+ entity_entry = hash_table_lookup(catalog->tables + decl_type, name);
+ else
+ entity_entry = hash_table_lookup_subst(catalog->tables + decl_type,
+ subst_table, name);
+ if (!entry
+ || (entity_entry
+ && entity_entry->file_index < entry->file_index))
+ entry = entity_entry;
+ }
+ if (!entry)
+ return 0;
+ *system_id = entry->system_id;
+ *catalog_file = catalog->files[entry->file_index];
+ return 1;
+}
+
+static
+VOID add_catalog_file(cat, filename, length)
+ struct catalog *cat;
+ const char *filename;
+ SIZE_T length;
+{
+ char *s;
+ if (!cat->files)
+ cat->files = (char **)xmalloc(sizeof(char *));
+ else
+ cat->files
+ = (char **)xrealloc(cat->files, (cat->n_files + 1)*sizeof(char *));
+ s = alloc_bytes(cat, length + 1);
+ memcpy(s, filename, length);
+ s[length] = '\0';
+ cat->files[cat->n_files] = s;
+ cat->n_files += 1;
+}
+
+static
+VOID load(cat)
+ struct catalog *cat;
+{
+ int i;
+ const char *p;
+ struct parser parser;
+ const char *env_var;
+ int optional_file_index = cat->n_files;
+
+ cat->loaded = 1;
+ parser.param = 0;
+ parser.param_alloc = 0;
+ parser.cat = cat;
+ for (i = 0; i < 256; i++)
+ parser.minimum_data[i] = 0;
+ for (p = MINIMUM_DATA_CHARS; *p; p++)
+ parser.minimum_data[(unsigned char)*p] = 1;
+ env_var = getenv(CATALOG_FILES_ENV_VAR);
+ if (!env_var || *env_var == '\0')
+ env_var = DEFAULT_CATALOG_FILES;
+ for (;;) {
+ for (p = env_var; *p && *p != PATH_FILE_SEP; p++)
+ ;
+ if (p > env_var)
+ add_catalog_file(cat, env_var, p - env_var);
+ if (!*p)
+ break;
+ env_var = p + 1;
+ }
+ for (i = 0; i < cat->n_files; i++) {
+ parser.filename = cat->files[i];
+ parser.newline_count = 0;
+ parser.fp = fopen(cat->files[i], "r");
+ if (!parser.fp) {
+ if (i < optional_file_index)
+ error(&parser, E_CANNOT_OPEN);
+ }
+ else {
+ parser.file_index = i;
+ parse_file(&parser);
+ errno = 0;
+ if (fclose(parser.fp) < 0)
+ error(&parser, E_FCLOSE);
+ }
+ }
+ if (parser.param)
+ free(parser.param);
+}
+
+static
+VOID parse_file(parser)
+ struct parser *parser;
+{
+ int skipping = 0;
+ for (;;) {
+ PARAM_TYPE type = parse_param(parser, NORMAL_LITERAL);
+ if (type == NAME_PARAM) {
+ if (param_equal(parser, "PUBLIC"))
+ parse_public(parser);
+ else if (param_equal(parser, "ENTITY"))
+ parse_name_map(parser, CATALOG_ENTITY_DECL);
+ else if (param_equal(parser, "DOCTYPE"))
+ parse_name_map(parser, CATALOG_DOCTYPE_DECL);
+ else if (param_equal(parser, "LINKTYPE"))
+ parse_name_map(parser, CATALOG_LINKTYPE_DECL);
+ else
+ skipping = 1;
+ }
+ else if (type == EOF_PARAM)
+ break;
+ else if (!skipping) {
+ skipping = 1;
+ error(parser, E_NAME_EXPECTED);
+ }
+ }
+}
+
+static
+VOID parse_public(parser)
+ struct parser *parser;
+{
+ const char *public_id;
+
+ if (parse_param(parser, MINIMUM_LITERAL) != LITERAL_PARAM)
+ error(parser, E_LITERAL_EXPECTED);
+ public_id = param_save(parser);
+ if (!parse_arg(parser))
+ return;
+ hash_table_add(parser->cat->tables + PUBLIC_ID_MAP,
+ public_id, param_save(parser), parser->file_index);
+}
+
+static
+VOID parse_name_map(parser, decl_type)
+ struct parser *parser;
+ int decl_type;
+{
+ const char *name;
+
+ if (!parse_arg(parser))
+ return;
+ name = param_save(parser);
+ if (!parse_arg(parser))
+ return;
+ hash_table_add(parser->cat->tables + decl_type,
+ name, param_save(parser), parser->file_index);
+}
+
+static
+int parse_arg(parser)
+ struct parser *parser;
+{
+ PARAM_TYPE parm = parse_param(parser, NORMAL_LITERAL);
+ if (parm != NAME_PARAM && parm != LITERAL_PARAM) {
+ error(parser, E_ARG_EXPECTED);
+ return 0;
+ }
+ return 1;
+}
+
+static
+PARAM_TYPE parse_param(parser, lit_type)
+ struct parser *parser;
+ enum literal_type lit_type;
+{
+ for (;;) {
+ int c = getc(parser->fp);
+ switch (c) {
+ case EOF:
+ if (ferror(parser->fp))
+ error(parser, E_GETC);
+ return EOF_PARAM;
+ case '"':
+ case '\'':
+ return parse_literal(parser, c, lit_type);
+ case '\n':
+ parser->newline_count += 1;
+ break;
+ case '\t':
+ case ' ':
+ break;
+ case '\0':
+ error(parser, E_NUL_CHAR);
+ break;
+ case '-':
+ c = getc(parser->fp);
+ if (c == '-') {
+ skip_comment(parser);
+ break;
+ }
+ ungetc(c, parser->fp);
+ c = '-';
+ /* fall through */
+ default:
+ return parse_name(parser, c);
+ }
+ }
+}
+
+static
+VOID skip_comment(parser)
+ struct parser *parser;
+{
+ FILE *fp = parser->fp;
+ for (;;) {
+ int c = getc(fp);
+ if (c == '-') {
+ c = getc(fp);
+ if (c == '-')
+ return;
+ }
+ if (c == EOF) {
+ if (ferror(fp))
+ error(parser, E_GETC);
+ error(parser, E_EOF_COMMENT);
+ return;
+ }
+ if (c == '\n')
+ parser->newline_count += 1;
+ }
+}
+
+static
+PARAM_TYPE parse_literal(parser, lit, lit_type)
+ struct parser *parser;
+ int lit;
+ enum literal_type lit_type;
+{
+ enum { no, yes_begin, yes_middle } skipping = yes_begin;
+ FILE *fp = parser->fp;
+ param_init(parser);
+ for (;;) {
+ int c = getc(fp);
+ if (c == lit)
+ break;
+ switch (c) {
+ case '\0':
+ error(parser, E_NUL_CHAR);
+ break;
+ case EOF:
+ if (ferror(fp))
+ error(parser, E_GETC);
+ error(parser, E_EOF_LITERAL);
+ return LITERAL_PARAM;
+ case '\n':
+ parser->newline_count += 1;
+ /* fall through */
+ case ' ':
+ if (lit_type == MINIMUM_LITERAL) {
+ if (skipping == no) {
+ param_char(parser, ' ');
+ skipping = yes_middle;
+ }
+ }
+ else
+ param_char(parser, c);
+ break;
+ default:
+ if (lit_type == MINIMUM_LITERAL) {
+ if (!parser->minimum_data[c])
+ error(parser, E_MINIMUM_DATA);
+ else {
+ skipping = no;
+ param_char(parser, c);
+ }
+ }
+ else
+ param_char(parser, c);
+ break;
+ }
+ }
+ if (skipping == yes_middle)
+ param_chop(parser);
+ return LITERAL_PARAM;
+}
+
+static
+PARAM_TYPE parse_name(parser, first_char)
+ struct parser *parser;
+ int first_char;
+{
+ FILE *fp = parser->fp;
+ param_init(parser);
+ param_char(parser, first_char);
+ for (;;) {
+ int c = getc(fp);
+ switch (c) {
+ case '\0':
+ error(parser, E_NUL_CHAR);
+ break;
+ case EOF:
+ if (ferror(fp))
+ error(parser, E_GETC);
+ goto done;
+ case '\n':
+ parser->newline_count += 1;
+ goto done;
+ case ' ':
+ case '\t':
+ goto done;
+ case '"':
+ case '\'':
+ ungetc(c, fp);
+ goto done;
+ default:
+ param_char(parser, c);
+ }
+ }
+ done:
+ return NAME_PARAM;
+}
+
+static
+VOID param_grow(parser)
+ struct parser *parser;
+{
+ if (parser->param_alloc == 0) {
+ parser->param_alloc = 256;
+ parser->param = xmalloc(parser->param_alloc);
+ }
+ else {
+ parser->param_alloc *= 2;
+ parser->param = xrealloc(parser->param, parser->param_alloc);
+ }
+}
+
+static
+const char *param_save(parser)
+ struct parser *parser;
+{
+ char *s = alloc_bytes(parser->cat, parser->param_length + 1);
+ memcpy(s, parser->param, parser->param_length);
+ s[parser->param_length] = '\0';
+ return s;
+}
+
+static
+char *alloc_bytes(catalog, n)
+ struct catalog *catalog;
+ SIZE_T n;
+{
+ char *tem;
+ if (n > catalog->block_spare) {
+ struct string_block *block;
+ SIZE_T block_size = n > BLOCK_SIZE ? n : BLOCK_SIZE;
+ block
+ = (struct string_block *)xmalloc(sizeof(struct string_block)
+ + block_size);
+ block->next = catalog->blocks;
+ catalog->blocks = block;
+ catalog->block_ptr = (char *)(block + 1);
+ catalog->block_spare = block_size;
+ }
+ tem = catalog->block_ptr;
+ catalog->block_ptr += n;
+ catalog->block_spare -= n;
+ return tem;
+}
+
+
+/* Return 1 if the current parameter is equal to key. */
+
+static
+int param_equal(parser, key)
+ struct parser *parser;
+ const char *key;
+{
+ const char *param = parser->param;
+ SIZE_T param_length = parser->param_length;
+ for (; param_length > 0; param++, param_length--, key++) {
+ unsigned char c;
+ if (*key == '\0')
+ return 0;
+ c = *param;
+ if (islower(c))
+ c = toupper(c);
+ if (c != (unsigned char)*key)
+ return 0;
+ }
+ return *key == '\0';
+}
+
+/* Return 0 if it was a duplicate. */
+
+static
+int hash_table_add(table, s, system_id, file_index)
+ struct hash_table *table;
+ const char *s;
+ const char *system_id;
+ int file_index;
+{
+ SIZE_T i;
+ struct hash_table_entry *p;
+
+ if (table->size > 0) {
+ i = hash_table_start_index(table, s);
+ while (table->v[i] != 0) {
+ if (strcmp(table->v[i]->key, s) == 0)
+ return 0;
+ if (i == 0)
+ i = table->size;
+ i--;
+ }
+ }
+ if (table->used >= table->used_limit) {
+ SIZE_T j;
+ struct hash_table_entry **old_table = table->v;
+ SIZE_T old_size = table->size;
+ if (old_size == 0) {
+ table->size = HASH_TABLE_INITIAL_SIZE;
+ table->used_limit = table->size/2;
+ }
+ else {
+ if (old_size > HASH_TABLE_MAX_SIZE/2) {
+ if (old_size == HASH_TABLE_MAX_SIZE)
+ return 0; /* FIXME: give an error? */
+ table->size = HASH_TABLE_MAX_SIZE;
+ table->used_limit = HASH_TABLE_MAX_SIZE - 1;
+ }
+ else {
+ table->size = (old_size << 1);
+ table->used_limit = table->size/2;
+ }
+ }
+ table->v
+ = (struct hash_table_entry **)xmalloc(sizeof(struct hash_table_entry *)
+ * table->size);
+ for (j = 0; j < table->size; j++)
+ table->v[j] = 0;
+ for (j = 0; j < old_size; j++)
+ if (old_table[j]) {
+ SIZE_T k = hash_table_start_index(table, old_table[j]->key);
+ while (table->v[k] != 0) {
+ if (k == 0)
+ k = table->size;
+ k--;
+ }
+ table->v[k] = old_table[j];
+ }
+ if (old_table)
+ free((UNIV)old_table);
+ i = hash_table_start_index(table, s);
+ while (table->v[i] != 0) {
+ if (i == 0)
+ i = table->size;
+ i--;
+ }
+ }
+ p = (struct hash_table_entry *)xmalloc(sizeof(struct hash_table_entry));
+ p->key = s;
+ p->system_id = system_id;
+ p->file_index = file_index;
+ table->v[i] = p;
+ table->used += 1;
+ return 1;
+}
+
+static
+struct hash_table_entry *hash_table_lookup(table, s)
+ struct hash_table *table;
+ const char *s;
+{
+ if (table->size > 0) {
+ SIZE_T i;
+ i = hash_table_start_index(table, s);
+ while (table->v[i] != 0) {
+ if (strcmp(table->v[i]->key, s) == 0)
+ return table->v[i];
+ if (i == 0)
+ i = table->size;
+ i--;
+ }
+ }
+ return 0;
+}
+
+static
+struct hash_table_entry *hash_table_lookup_subst(table, subst_table, s)
+ struct hash_table *table;
+ const char *subst_table;
+ const char *s;
+{
+ SIZE_T i;
+ for (i = 0; i < table->size; i++) {
+ struct hash_table_entry *p = table->v[i];
+ if (p && subst_equal(subst_table, s, p->key))
+ return p;
+ }
+ return 0;
+}
+
+static
+VOID hash_table_init(p)
+ struct hash_table *p;
+{
+ p->v = 0;
+ p->size = 0;
+ p->used = 0;
+ p->used_limit = 0;
+}
+
+static
+VOID hash_table_delete(p)
+ struct hash_table *p;
+{
+ if (p->v) {
+ SIZE_T i;
+ for (i = 0; i < p->size; i++)
+ if (p->v[i])
+ free(p->v[i]);
+ free(p->v);
+ }
+}
+
+static
+SIZE_T hash_table_start_index(p, s)
+ struct hash_table *p;
+ const char *s;
+{
+ unsigned long h = 0;
+ while (*s)
+ h = (h << 5) + h + (unsigned char)*s++;
+ return (h & (p->size - 1));
+}
+
+/* s1 has already been substituted; s2 has not */
+
+static
+int subst_equal(subst_table, s1, s2)
+ const char *subst_table;
+ const char *s1;
+ const char *s2;
+{
+ for (; *s1 == subst_table[(unsigned char)*s2]; s1++, s2++)
+ if (*s1 == '\0')
+ return 1;
+ return 0;
+}
+
+static
+VOID error(parser, err)
+ struct parser *parser;
+ enum catalog_error err;
+{
+ (*parser->cat->error_handler)(parser->filename,
+ parser->newline_count + 1,
+ err,
+ (err >= FIRST_SYSTEM_ERROR
+ ? CATALOG_SYSTEM_ERROR
+ : 0),
+ (err >= FIRST_SYSTEM_ERROR
+ ? errno
+ : 0));
+}
+
+#ifdef MAIN
+
+static const char *program_name;
+
+#include "getopt.h"
+
+static VOID usage P((void));
+static VOID out_of_memory P((void));
+static VOID handle_catalog_error P((const char *filename,
+ unsigned long lineno,
+ int error_number,
+ unsigned flags,
+ int sys_errno));
+
+int main(argc, argv)
+ int argc;
+ char **argv;
+{
+ int entity_flag = 0;
+ enum catalog_decl_type entity_type = CATALOG_NO_DECL;
+ char *public_id = 0;
+ char *name = 0;
+ int exit_status;
+ int opt;
+ CATALOG catalog;
+ int i;
+ const char *file;
+ const char *system_id;
+
+ program_name = argv[0];
+
+ while ((opt = getopt(argc, argv, "edl")) != EOF)
+ switch (opt) {
+ case 'e':
+ entity_flag = 1;
+ entity_type = CATALOG_ENTITY_DECL;
+ break;
+ case 'd':
+ entity_flag = 1;
+ entity_type = CATALOG_DOCTYPE_DECL;
+ break;
+ case 'l':
+ entity_flag = 1;
+ entity_type = CATALOG_LINKTYPE_DECL;
+ break;
+ case '?':
+ usage();
+ }
+ if (argc - optind < 2)
+ usage();
+ if (entity_flag)
+ name = argv[optind];
+ else
+ public_id = argv[optind];
+
+ catalog = catalog_create(handle_catalog_error);
+ for (i = optind + 1; i < argc; i++)
+ catalog_load_file(catalog, argv[i]);
+ if (catalog_lookup_entity(catalog, public_id, name, entity_type, (char *)0,
+ &system_id, &file)) {
+ exit_status = 0;
+ fprintf(stderr, "%s (%s)\n", system_id, file);
+ }
+ else {
+ fprintf(stderr, "not found\n");
+ exit_status = 1;
+ }
+ catalog_delete(catalog);
+ return exit_status;
+}
+
+static
+VOID usage()
+{
+ fprintf(stderr, "usage: %s [-e] [-d] [-l] id file ...\n",
+ program_name);
+ exit(1);
+}
+
+static
+VOID handle_catalog_error(filename, lineno, error_number, flags, sys_errno)
+ const char *filename;
+ unsigned long lineno;
+ int error_number;
+ unsigned flags;
+ int sys_errno;
+{
+ fprintf(stderr, "%s:", program_name);
+ if (flags & CATALOG_SYSTEM_ERROR) {
+ putc(' ', stderr);
+ fprintf(stderr, catalog_error_text(error_number), filename);
+ putc('\n', stderr);
+ }
+ else
+ fprintf(stderr, "%s:%lu: %s\n", filename, lineno,
+ catalog_error_text(error_number));
+ fflush(stderr);
+}
+
+UNIV xmalloc(n)
+ SIZE_T n;
+{
+ UNIV p = malloc(n);
+ if (!p)
+ out_of_memory();
+ return p;
+}
+
+UNIV xrealloc(p, n)
+ UNIV p;
+ SIZE_T n;
+{
+ p = realloc(p, n);
+ if (!p)
+ out_of_memory();
+ return p;
+}
+
+static
+VOID out_of_memory()
+{
+ fprintf(stderr, "%s: out of memory\n", program_name);
+ exit(1);
+}
+
+#endif /* MAIN */
diff --git a/usr.bin/sgmls/sgmls/catalog.h b/usr.bin/sgmls/sgmls/catalog.h
new file mode 100644
index 0000000..b9509a5
--- /dev/null
+++ b/usr.bin/sgmls/sgmls/catalog.h
@@ -0,0 +1,45 @@
+#ifndef CATALOG_H
+#define CATALOG_H 1
+
+enum catalog_decl_type {
+ CATALOG_NO_DECL = -1,
+ CATALOG_ENTITY_DECL,
+ CATALOG_DOCTYPE_DECL,
+ CATALOG_LINKTYPE_DECL
+};
+
+#define CATALOG_SYSTEM_ERROR 1
+
+#ifdef __STDC__
+
+typedef void *CATALOG;
+typedef void (*CATALOG_ERROR_HANDLER)(const char *filename,
+ unsigned long lineno,
+ int error_number,
+ unsigned flags,
+ int sys_errno);
+CATALOG catalog_create(CATALOG_ERROR_HANDLER);
+void catalog_load_file(CATALOG, const char *);
+void catalog_delete(CATALOG);
+int catalog_lookup_entity(CATALOG,
+ const char *public_id,
+ const char *name,
+ enum catalog_decl_type,
+ const char *subst_table,
+ const char **system_id,
+ const char **catalog_file);
+const char *catalog_error_text(int error_number);
+
+#else /* not __STDC__ */
+
+typedef char *CATALOG;
+typedef void (*CATALOG_ERROR_HANDLER)();
+CATALOG catalog_create();
+void catalog_load_file();
+void catalog_delete();
+int catalog_lookup_entity();
+char *catalog_error_text();
+
+#endif /* not __STDC__ */
+
+#endif /* not CATALOG_H */
diff --git a/usr.bin/sgmls/sgmls/config.h b/usr.bin/sgmls/sgmls/config.h
index 562cdcf..a7fa92c 100644
--- a/usr.bin/sgmls/sgmls/config.h
+++ b/usr.bin/sgmls/sgmls/config.h
@@ -11,6 +11,17 @@ Usually the same as PATH_FILE_SEP. */
#define SYSID_FILE_SEP ':'
/* The environment variable that contains the list of filename templates. */
#define PATH_ENV_VAR "SGML_PATH"
+/* A macro that returns non-zero if the filename is relative to the
+ current directory. */
+#define FILE_IS_RELATIVE(p) ((p)[0] != '/')
+/* A string containing the characters that can separate the directory
+ part of a filename from the basename. */
+#define DIR_BASE_SEP "/"
+/* The environment variable that contains the list of catalog entry files.
+ Filenames are separated by PATH_FILE_SEP. */
+#define CATALOG_FILES_ENV_VAR "SGML_CATALOG_FILES"
+/* Default list of catalog entry files. */
+#define DEFAULT_CATALOG_FILES "CATALOG:/usr/share/sgml/CATALOG"
/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name
or system identifier into a legal filename. A character in
@@ -46,7 +57,7 @@ An implementations of these functions is included and will be used if
you don't define this. On SunOS 4.1.1, if you do define this you
should set CC=/usr/xpg2bin/cc in the makefile. */
-/* #define HAVE_CAT 1 */
+#define HAVE_CAT 1
#ifdef __STDC__
/* Define this if your compiler supports prototypes. */
diff --git a/usr.bin/sgmls/sgmls/context.c b/usr.bin/sgmls/sgmls/context.c
index 1eb5a5c..10a123a 100644
--- a/usr.bin/sgmls/sgmls/context.c
+++ b/usr.bin/sgmls/sgmls/context.c
@@ -44,27 +44,34 @@ int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
{
UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
- if (mexts == -1) {
- if (STATUS == RCEND)
+ if (mexts != 0) {
+ if (mexts == -1 && STATUS == RCEND)
return RCPEX;
copypos(savedpos, pos);
}
Tstart = T; /* Save starting token for AND group testing. */
while (STATUS!=RCMISS && STATUS!=RCEND) {
- TRACEGI("CONTEXT", gi, mod, pos, Tstart);
+ TRACEGI("CONTEXT", gi, mod, pos);
while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
Tstart = T; /* Save starting token for AND group testing. */
- TRACEGI("OPENGRP", gi, mod, pos, Tstart);
+ TRACEGI("OPENGRP", gi, mod, pos);
}
STATUS = (UNCH)tokenreq(gi, mod, pos);
- TRACEGI("STATUS", gi, mod, pos, Tstart);
+ TRACEGI("STATUS", gi, mod, pos);
if (gi==TOKEN.tu.thetd) { /* Hit in model. */
STATUS = (UNCH)RCHIT;
gtypesv = GTYPE; toccsv = TOCC;
newtoken(mod, pos, statuspt);
- return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT))
- ? RCMEX : RCHITMEX);
+ if (mexts <= 0)
+ return RCHIT;
+ else if (gtypesv==TTOR || BITON(toccsv, TOPT)) {
+ /* restore position */
+ copypos(pos, savedpos);
+ return RCMEX;
+ }
+ else
+ return RCHITMEX;
}
if (STATUS==RCREQ) {
if (mexts == -1)
@@ -100,12 +107,12 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
unsigned next; /* Position in AND group of next testable token.*/
Tstart = T;
- TRACEEND("ECONT", mod, pos, 0, 0, Tstart);
+ TRACEEND("ECONT", mod, pos, 0, 0);
if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
while (STATUS!=RCMISS && STATUS!=RCEND) {
STATUS = (UNCH)testend(mod, pos, 0, 0);
- TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart);
+ TRACEEND("ECONTEND", mod, pos, 0, 0);
nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
if (STATUS==RCEND) return(1);
if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
@@ -121,7 +128,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
next : offbit(H, 0, GNUM));
M = G + grpsz(&GHDR, (int)T-1) + 1;
- TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart);
+ TRACEEND("ECONTNEW", mod, pos, 0, 0);
}
if (STATUS==RCMISS) {
if (BITON(TOCC, TOPT)) nextetd = 0;
@@ -182,7 +189,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
In either case, set M to correspond to the new T.
*/
retest:
- TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart);
+ TRACEEND("RETEST", mod, pos, (int)nextand, 1);
if (GTYPE==TTAND) {
nextand = offbit(H, (int)T, GNUM);
if (!nextand)
@@ -212,7 +219,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
}
}
else STATUS = RCMISS;
- TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart);
+ TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1);
}
/* TESTEND: End the current group, if possible, and any that it is nested in.
The current token will either be a group header, or some token
@@ -228,7 +235,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */
int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
while (!rc) {
- TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart);
+ TRACEEND("TRACEEND", mod, pos, rc, andoptsw);
/* TESTMISS:
If we've hit no tokens yet in the current group, and
the current token is the last unhit one in the group we can test,
@@ -244,7 +251,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */
*/
if (!ANYHIT(H) && (T==GNUM
|| (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
- M = G; --P; Tstart = T;
+ M = G; --P;
if (P<=1) {
if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
else rc = RCMISS;
@@ -280,7 +287,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */
}
else rc = RCNREQ; /* No group ended this time, so return. */
}
- TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart);
+ TRACEEND("ENDFOUND", mod, pos, rc, andoptsw);
return(rc);
}
/* TOKENOPT: Return 1 if current token is contextually optional;
@@ -290,7 +297,7 @@ int tokenopt(mod, pos)
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
- TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart);
+ TRACEEND("TOKENOPT", mod, pos, 0, 0);
return (BITON(TOCC, TOPT) /* Inherently optional. */
|| TOKENHIT /* Was hit (handles "plus" suffix case). */
|| (!ANYHIT(H) && groupopt(mod, pos)));
@@ -330,7 +337,7 @@ struct etd *gi; /* ETD of new GI. */
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
{
- TRACEGI("TOKENREQ", gi, mod, pos, Tstart);
+ TRACEGI("TOKENREQ", gi, mod, pos);
return( tokenopt(mod, pos) ? RCNREQ
: ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
#if 0
diff --git a/usr.bin/sgmls/sgmls/context.h b/usr.bin/sgmls/sgmls/context.h
index 04350c7..01f4383 100644
--- a/usr.bin/sgmls/sgmls/context.h
+++ b/usr.bin/sgmls/sgmls/context.h
@@ -7,6 +7,8 @@
#define P pos[0].t /* Index of current group in pos. */
#define G pos[P].g /* Index of current group in model. */
#define T pos[P].t /* Index of current token in its group. */
+#define Tstart pos[P].tstart /* Index of starting token in its group
+ for AND group testing. */
#define H pos[P].h /* Pointer to hit bits for current group. */
#define GHDR mod[G] /* Current group header. */
#define TOKEN mod[M] /* Current token. */
diff --git a/usr.bin/sgmls/sgmls/ebcdic.h b/usr.bin/sgmls/sgmls/ebcdic.h
index 1c35bcb..3e0f3bd 100644
--- a/usr.bin/sgmls/sgmls/ebcdic.h
+++ b/usr.bin/sgmls/sgmls/ebcdic.h
@@ -23,18 +23,3 @@
/* See comment in latin1.h. */
#define CANON_DATACHAR 254
-
-/* Components for a formal public identifier for the whole of the
-system character set. Protect with ifndef so that it can be overriden
-in config.h. */
-
-/* Use a private escape sequence. */
-#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE
-#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0"
-#endif
-#ifndef SYSTEM_CHARSET_OWNER
-#define SYSTEM_CHARSET_OWNER "-//IBM"
-#endif
-#ifndef SYSTEM_CHARSET_DESCRIPTION
-#define SYSTEM_CHARSET_DESCRIPTION "Code Page 1047"
-#endif
diff --git a/usr.bin/sgmls/sgmls/entgen.c b/usr.bin/sgmls/sgmls/entgen.c
index e08e9f0..2146829 100644
--- a/usr.bin/sgmls/sgmls/entgen.c
+++ b/usr.bin/sgmls/sgmls/entgen.c
@@ -1,7 +1,7 @@
/* entgen.c -
Implement entgen() which generates a list of filenames from a struct fpi.
-
+
Written by James Clark (jjc@jclark.com).
*/
@@ -51,6 +51,8 @@ static int field P((struct fpi *, int, char *));
static int mindatcpy P((char *, char *, int, int));
static int testopen P((char *));
static UNIV sysidgen P((char *));
+static UNIV catsysidgen P((const char *, const char *));
+static const char *basename P((const char *));
static char *path = 0;
@@ -105,6 +107,14 @@ static char *ext[] = {
"lpd", /* Link process definition */
};
+static CATALOG catalog;
+
+VOID entginit(swp)
+struct switches *swp;
+{
+ catalog = swp->catalog;
+}
+
/* Like memcpy, but substitute, fold to lower case (if fold is
non-zero) and null terminate. This is used both for minimum data and
for names. If p is NULL, do nothing. Return len. */
@@ -207,7 +217,7 @@ char *buf;
/* return -1 if the formal public identifier was invalid or missing. */
if (f->fpiversw < 0 || !f->fpipubis)
return -1;
-
+
switch (c) {
case 'A': /* Is it available? */
return f->fpitt == '+' ? 0 : -1;
@@ -278,7 +288,12 @@ char *pathname;
UNIV entgen(f)
struct fpi *f;
{
+ char *qname;
char *file;
+ enum catalog_decl_type dtype;
+ char *subst = 0;
+ const char *sysid;
+ const char *catfile;
assert(f->fpistore != 6); /* Musn't call entgen for a notation. */
if (!path) {
@@ -300,14 +315,46 @@ struct fpi *f;
p++;
}
}
+
+ if (f->fpisysis && !sysidsrch)
+ return sysidgen((char *)f->fpisysis);
+
+ qname = (char *)f->fpinm;
+
+ switch (f->fpistore) {
+ case 3:
+ /* fall through */
+ qname--; /* hack */
+ case 1:
+ case 2:
+ dtype = CATALOG_ENTITY_DECL;
+ if (ENTCASE)
+ subst = getsubst();
+ break;
+ case 4:
+ dtype = CATALOG_DOCTYPE_DECL;
+ if (NAMECASE)
+ subst = getsubst();
+ break;
+ default:
+ dtype = CATALOG_NO_DECL;
+ }
+
+ if (catalog_lookup_entity(catalog,
+ (char *)f->fpipubis,
+ qname,
+ dtype,
+ (char *)subst,
+ &sysid,
+ &catfile))
+ return catsysidgen(sysid, catfile);
if (f->fpisysis
- && (!sysidsrch
- || strchr((char *)f->fpisysis, SYSID_FILE_SEP)
+ && (strchr((char *)f->fpisysis, SYSID_FILE_SEP)
|| strcmp((char *)f->fpisysis, STDINNAME) == 0))
return sysidgen((char *)f->fpisysis);
file = path;
-
+
for (;;) {
char *p;
int len = 0;
@@ -334,7 +381,7 @@ struct fpi *f;
}
else
len++;
-
+
if (len > 0) {
/* We've got a valid non-empty filename. */
char *s;
@@ -368,7 +415,7 @@ UNIV sysidgen(s)
char *s;
{
char *buf, *p;
-
+
buf = (char *)rmalloc(strlen(s) + 2);
for (p = buf; *s; s++) {
@@ -394,6 +441,71 @@ char *s;
return buf;
}
+/* Handle a system id in a catalog entry file. */
+static
+UNIV catsysidgen(s, catfile)
+const char *s;
+const char *catfile;
+{
+ const char *p;
+ char *bufp;
+ char *buf;
+ int nrelative = 0;
+ int catdirlen = 0;
+ if (FILE_IS_RELATIVE(s))
+ nrelative++;
+ for (p = s; *p; p++)
+ if (*p == SYSID_FILE_SEP
+ && FILE_IS_RELATIVE(p + 1))
+ nrelative++;
+ if (nrelative) {
+ const char *base = basename(catfile);
+ catdirlen = base - catfile;
+ }
+ buf = (char *)rmalloc(p - s + 2 + nrelative*catdirlen);
+ bufp = buf;
+ for (;;) {
+ if (!*s)
+ break;
+ if (*s != SYSID_FILE_SEP && FILE_IS_RELATIVE(s)) {
+ memcpy(bufp, catfile, catdirlen);
+ bufp += catdirlen;
+ }
+ for (;;) {
+ if (*s == SYSID_FILE_SEP) {
+ s++;
+ break;
+ }
+ *bufp++ = *s++;
+ if (*s == '\0')
+ break;
+ }
+ if (bufp > buf && bufp[-1] != '\0')
+ *bufp++ = '\0';
+ }
+ if (bufp == buf) {
+ frem((UNIV)buf);
+ return 0;
+ }
+ *bufp++ = '\0';
+ return buf;
+}
+
+static
+const char *basename(s)
+const char *s;
+{
+ const char *p = s;
+ while (*p)
+ p++;
+ if (p > s) {
+ while (--p > s)
+ if (strchr(DIR_BASE_SEP, *p))
+ return p + 1;
+ }
+ return s;
+}
+
/*
Local Variables:
c-indent-level: 5
diff --git a/usr.bin/sgmls/sgmls/entity.h b/usr.bin/sgmls/sgmls/entity.h
index d7d3096..84a3515 100644
--- a/usr.bin/sgmls/sgmls/entity.h
+++ b/usr.bin/sgmls/sgmls/entity.h
@@ -5,6 +5,7 @@
*/
#include "tools.h" /* Definitions for type declarations, etc. */
#include "msgcat.h"
+#include "catalog.h"
#define STDINNAME "-" /* File name that refers to standard input. */
@@ -151,7 +152,8 @@ struct switches { /* Parser control switches (1=non-standard). */
int swenttr; /* 1=trace entity stack in error messages; 0=no.*/
int sweltr; /* 1=trace element stack in error messages; 0=no. */
int swambig; /* 1=check content model ambiguity */
- int swundef; /* 1=warn about undefined elements and notations. */
+ int swundef; /* 1=warn about undefined elements. */
+ int swcap; /* 1=report capcity errors */
char *prog; /* Program name for error messages. */
#ifdef TRACE
char *trace; /* What to trace in the body. */
@@ -163,6 +165,7 @@ struct switches { /* Parser control switches (1=non-standard). */
char **includes; /* List of parameter entities to be defined
as "INCLUDE"; NULL terminated.*/
VOID (*die) P((void)); /* Function to call on fatal error. */
+ CATALOG catalog; /* Catalog for generating system identifiers. */
};
struct markup { /* Delimiter strings for text processor. */
UNCH *cro; /* LEXCON markup string: CRO */
diff --git a/usr.bin/sgmls/sgmls/etype.h b/usr.bin/sgmls/sgmls/etype.h
index 707f602..8ec64c1 100644
--- a/usr.bin/sgmls/sgmls/etype.h
+++ b/usr.bin/sgmls/sgmls/etype.h
@@ -25,7 +25,7 @@
struct thdr { /* Token header or model header. */
UNCH ttype; /* Token type attributes or model content. */
union {
- int tnum; /* Group token: tokens in group.
+ int tnum; /* Group token: tokens in group.
Model header: content tokens at any level. */
struct etd *thetd; /* GI token: ptr to etd. */
} tu;
@@ -64,6 +64,8 @@ extern struct etd dumetd[];
struct mpos { /* Position of current element in model. */
UNCH g; /* Index of this group in the model. */
UNCH t; /* Index of the current token in this group. */
+ UNCH tstart; /* Index of starting token for AND group
+ testing. */
unsigned long *h; /* Hit bits of this group's tokens. */
};
diff --git a/usr.bin/sgmls/sgmls/genlex.c b/usr.bin/sgmls/sgmls/genlex.c
index 2a0d3a6..b653d14 100644
--- a/usr.bin/sgmls/sgmls/genlex.c
+++ b/usr.bin/sgmls/sgmls/genlex.c
@@ -12,7 +12,28 @@ extern UNCH *lextabs[];
extern UNCH lextran[];
static char *lextabnames[] = {
- "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke"
+ "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke",
+ "lexmin"
+};
+
+#define UNUSED -1
+
+extern int iso646charset[];
+extern int iso646G0charset[];
+extern int iso646C0charset[];
+extern int iso8859_1charset[];
+extern int iso6429C1charset[];
+
+static struct {
+ char *name;
+ int *map;
+} charsets[] = {
+ { "iso646charset", iso646charset },
+ { "iso646G0charset", iso646G0charset },
+ { "iso646G0charset", iso646G0charset },
+ { "iso8859_1charset", iso8859_1charset },
+ { "iso646C0charset", iso646C0charset },
+ { "iso6429C1charset", iso6429C1charset },
};
static VOID print_tab(s, t)
@@ -34,7 +55,7 @@ int main(argc, argv)
UNCH tab[256];
char special[256];
/* Shunned character numbers in the reference concrete syntax. */
- static UNCH refshun[] = {
+ static UNCH refshun[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
};
@@ -89,7 +110,7 @@ int main(argc, argv)
for (j = 0; j < 256; j++)
if (!special[j]) {
- if (shunned[j])
+ if (shunned[j])
tab[j] = lextabs[i][CANON_ASCII_NONSGML];
else
tab[j] = lextabs[i][CANON_ASCII_DATACHAR];
@@ -102,13 +123,18 @@ int main(argc, argv)
tab[charset[i]] = charset[lextran[i]];
print_tab("lextran", tab);
- /* Generate asciicharset. */
- fputs("int asciicharset[] = {\n", stdout);
- for (i = 0; i < 128; i++)
- printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' ');
- for (i = 128; i < 256; i++)
- printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' ');
- fputs("};\n", stdout);
+ /* Generate charsets. */
+ for (i = 0; i < sizeof(charsets)/sizeof(charsets[0]); i++) {
+ int j;
+ int *map = charsets[i].map;
+ printf("\nint %s[] = {\n", charsets[i].name);
+ for (j = 0; j < 256; j++)
+ if (map[j] == UNUSED)
+ printf("UNUSED,%c", (j + 1) % 8 == 0 ? '\n' : ' ');
+ else
+ printf("%3d,%c", charset[map[j]], (j + 1) % 16 == 0 ? '\n' : ' ');
+ fputs("};\n", stdout);
+ }
exit(EXIT_SUCCESS);
}
diff --git a/usr.bin/sgmls/sgmls/getopt.c b/usr.bin/sgmls/sgmls/getopt.c
index bc8edeb..9a218b3 100644
--- a/usr.bin/sgmls/sgmls/getopt.c
+++ b/usr.bin/sgmls/sgmls/getopt.c
@@ -132,7 +132,7 @@ char *opts;
else
optarg = argv[optind++];
sp = 1;
- }
+ }
else {
if (argv[optind][++sp] == '\0') {
sp = 1;
diff --git a/usr.bin/sgmls/sgmls/latin1.h b/usr.bin/sgmls/sgmls/latin1.h
index 44f43f3..c6df696 100644
--- a/usr.bin/sgmls/sgmls/latin1.h
+++ b/usr.bin/sgmls/sgmls/latin1.h
@@ -35,17 +35,3 @@ shunned in the reference concrete syntax and is not the number of a
significant (in the reference concrete syntax) SGML character nor one
of the above characters. */
#define CANON_DATACHAR 254
-
-/* Components for a formal public identifier for the whole of the
-system character set. Protect with ifndef so that it can be overriden
-in config.h. */
-
-#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE
-#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/13 4/1"
-#endif
-#ifndef SYSTEM_CHARSET_OWNER
-#define SYSTEM_CHARSET_OWNER "ISO Registration Number 100"
-#endif
-#ifndef SYSTEM_CHARSET_DESCRIPTION
-#define SYSTEM_CHARSET_DESCRIPTION "ECMA-94 Right Part of Latin Alphabet Nr. 1"
-#endif
diff --git a/usr.bin/sgmls/sgmls/lexcode.h b/usr.bin/sgmls/sgmls/lexcode.h
index e4047ba..d34e3e6 100644
--- a/usr.bin/sgmls/sgmls/lexcode.h
+++ b/usr.bin/sgmls/sgmls/lexcode.h
@@ -3,6 +3,7 @@
#define FCE 27 /* FRE Free character in use as an entity reference */
#define FRE 0 /* FREECHAR that is not in a CON delimiter-in-context. */
#define LITC 21 /* LIT LITA PIC or EE in use as a literal terminator */
+#define MINLITC 13 /* LIT LITA as literal terminator in minimum data */
#define MSC3 15 /* ] Also MSC[2]. */
#define NET 17 /* / When enabled. */
#define ETI 16 /* / Actually ETAGO[2] */
diff --git a/usr.bin/sgmls/sgmls/lexrf.c b/usr.bin/sgmls/sgmls/lexrf.c
index ec3db83..643b336 100644
--- a/usr.bin/sgmls/sgmls/lexrf.c
+++ b/usr.bin/sgmls/sgmls/lexrf.c
@@ -110,6 +110,7 @@ struct lexical lex = { /* Delimiter set constants for parser use. */
FCE, /* LEXCNM: FRE char as entity reference.*/
FRE, /* LEXLMS: Free character not an entity ref.*/
LITC, /* LEXLMS: Literal close delimiter enabled. */
+ MINLITC, /* LEXMIN: Literal close delimiter enabled. */
MSC3, /* LEXLMS: Marked section close delim enabled. */
NET, /* LEXCON: Null end-tag delimiter enabled. */
ETI, /* LEXCON: NET disabled; still used as ETI. */
@@ -120,5 +121,5 @@ struct lexical lex = { /* Delimiter set constants for parser use. */
};
UNCH *lextabs[] = {
- lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, 0
+ lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, lexmin, 0
};
diff --git a/usr.bin/sgmls/sgmls/lextaba.c b/usr.bin/sgmls/sgmls/lextaba.c
index 38a2fd1..a851d85 100644
--- a/usr.bin/sgmls/sgmls/lextaba.c
+++ b/usr.bin/sgmls/sgmls/lextaba.c
@@ -331,6 +331,69 @@ FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON
#undef TGC3
#undef TGO3
/* def LITC*/
+/* LEXMIN: Lexical table for minimum data literals.
+*/
+/* Symbols for SGML character set divisions and function characters.
+*/
+#define FRE 0 /* Free char: not in a delimiter or minimum literal. */
+#define NU 1 /* Numeral Numerals */
+#undef MIN
+#define MIN 2 /* Minimum literal '()+,-./:?= */
+#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */
+#define SPC 4 /* SPACE 32 Space */
+#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */
+#define EE 6 /* NONSGML 00 26 Entity end (end of file) */
+#define EOB 7 /* NONSGML 28 End disk buffer */
+#define RS 8 /* Function 10 Line feed */
+#define RE 9 /* Function 13 Carrier return */
+#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */
+/*#define CDE 11 NONSGML delcdata CDATA/SDATA delimiter */
+#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */
+/* Either LIT or LITA changed to LITC when a literal is begun.
+ It is changed back when the LITC occurs (i.e., when the literal ends).
+*/
+UNCH lexmin[256] = { /*
+000 001       bs tab lf home ff cr so si */
+EE, NON, NON, NON, NON, NON, NON, NON, NON ,SEP, RS, NON, NON, RE, NON, NON, /*
+          eof esc rt left up down */
+NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /*
+032 ! " # $ % & ' ( ) * + , - . / */
+SPC, FRE, FRE, FRE, FRE, FRE, FRE, MIN, MIN, MIN, FRE, MIN, MIN, MIN, MIN, MIN, /*
+0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
+NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , MIN, FRE, FRE, MIN, FRE, MIN, /*
+@ A B C D E F G H I J K L M N O */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+P Q R S T U V W X Y Z [ \ ] ^ _ */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, FRE, /*
+` a b c d e f g h i j k l m n o */
+FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /*
+p q r s t u v w x y z { | } ~ 127 */
+NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE,
+FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON
+};
+/* free nu min nms spc non ee eob rs re sep cde nsc ero
+ mdo msc mso pero rni tago tagc litc */
+/* def FRE*/
+#undef NU
+#undef MIN
+#undef NMS
+#undef SPC
+#undef NON
+#undef EE
+#undef EOB
+#undef RS
+#undef RE
+#undef SEP
+/* def CDE*/
+/* def NSC*/
+/* def LITC*/
/* LEXMARK: Lexical scan table for markup: PCBMD? and PCB?TAG.
*/
/* Symbols for SGML character set divisions. */
@@ -457,15 +520,15 @@ DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT,
DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, NON
};
-#undef SIG
-#undef DAT
-#undef NON
-#undef NU
-#undef NMS
-#undef SPC
-#undef EE
-#undef EOB
-#undef RS
+#undef SIG
+#undef DAT
+#undef NON
+#undef NU
+#undef NMS
+#undef SPC
+#undef EE
+#undef EOB
+#undef RS
#undef COM1
#undef LIT3
#undef LITA
@@ -531,7 +594,7 @@ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV
};
/* This table maps ASCII to the system character set. */
-int asciicharset[] = {
+int iso646charset[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
@@ -557,3 +620,131 @@ UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
};
+
+/* This table maps the C0 part of ISO646 to the system character set. */
+/* We through in 32 and 127 for free, since ISO 2022 maps them in
+automatically. */
+int iso646C0charset[] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+32, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, 127,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+/* This table maps the G0 part of ISO646 to the system character set. */
+int iso646G0charset[] = {
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso8859_1charset[] = {
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso6429C1charset[] = {
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
diff --git a/usr.bin/sgmls/sgmls/lextabe.c b/usr.bin/sgmls/sgmls/lextabe.c
index f93af89..5cfe0de 100644
--- a/usr.bin/sgmls/sgmls/lextabe.c
+++ b/usr.bin/sgmls/sgmls/lextabe.c
@@ -137,6 +137,25 @@ UNCH lextoke[] = {
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0,
};
+UNCH lexmin[] = {
+ 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 12,
+ 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
+ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5,
+};
+
UNCH lextran[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -156,7 +175,8 @@ UNCH lextran[] = {
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
};
-int asciicharset[] = {
+
+int iso646charset[] = {
0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15,
16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97,
@@ -182,3 +202,156 @@ UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
};
+
+int iso646G0charset[] = {
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+ 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111,
+124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214,
+215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109,
+121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150,
+151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso646G0charset[] = {
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+ 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97,
+240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111,
+124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214,
+215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109,
+121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150,
+151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso8859_1charset[] = {
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+ 65, 170, 74, 177, 159, 178, 106, 181, 187, 180, 154, 138, 95, 202, 175, 188,
+144, 143, 234, 250, 190, 160, 182, 179, 157, 218, 155, 139, 183, 184, 185, 171,
+100, 101, 98, 102, 99, 103, 158, 104, 116, 113, 114, 115, 120, 117, 118, 119,
+172, 105, 237, 238, 235, 239, 236, 191, 128, 253, 254, 251, 252, 186, 174, 89,
+ 68, 69, 66, 70, 67, 71, 156, 72, 84, 81, 82, 83, 88, 85, 86, 87,
+140, 73, 205, 206, 203, 207, 204, 225, 112, 221, 222, 219, 220, 141, 142, 223,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso646C0charset[] = {
+ 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
+ 64, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, 7,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
+
+int iso6429C1charset[] = {
+ 4, 6, 8, 9, 10, 20, 21, 23, 26, 27, 32, 33, 34, 35, 36, 40,
+ 41, 42, 43, 44, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 62, 255,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED,
+};
diff --git a/usr.bin/sgmls/sgmls/lineout.c b/usr.bin/sgmls/sgmls/lineout.c
index 553c835..794eff8 100644
--- a/usr.bin/sgmls/sgmls/lineout.c
+++ b/usr.bin/sgmls/sgmls/lineout.c
@@ -150,7 +150,7 @@ UNCH *ename;
int rc;
PNE np;
UNCH *tp;
-
+
if (sgmlment(ename)) /* already defined it */
return;
rc = sgmlgent(ename, &np, &tp);
@@ -234,8 +234,10 @@ int aln;
else if (ADTYPE(al, aln) == AENTITY)
define_entity(ADVAL(al, aln));
output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln));
- if (ADTYPE(al, aln) == ACHARS)
- output_attribute_token(ustrlen(ADVAL(al, aln)), ADVAL(al, aln));
+ if (ADTYPE(al, aln) == ACHARS) {
+ putchar(' ');
+ print_string(ustrlen(ADVAL(al, aln)), ADVAL(al, aln), 0);
+ }
else
output_attribute_token(*ADVAL(al, aln) - 2, ADVAL(al, aln) + 1);
output_end_attribute();
@@ -294,7 +296,7 @@ UNIV id;
ret = run_process(argv);
if (ret != 0)
suberr++;
-
+
current_filename = 0;
free(argv);
if (ret == 0)
@@ -410,7 +412,7 @@ UNCH *s;
print_string(n, s, 0);
putchar('\n');
}
-
+
static VOID output_implied_attribute(ent, aname)
UNCH *ent, *aname;
@@ -451,7 +453,7 @@ int type;
fatal("invalid attribute type %d", type);
#endif
return "INVALID";
-}
+}
static VOID output_begin_attribute(ent, aname, type)
UNCH *ent, *aname;
@@ -472,7 +474,8 @@ UNS vallen;
UNCH *val;
{
putchar(' ');
- print_string(vallen, val, 0);
+ for (; vallen > 0; --vallen, ++val)
+ putchar(*val);
}
static VOID output_end_attribute()
@@ -576,7 +579,7 @@ int is_sdata;
if (is_sdata)
fputs("\\|", stdout);
}
-
+
static VOID print_id(id, pubid, sysid)
UNIV id;
diff --git a/usr.bin/sgmls/sgmls/main.c b/usr.bin/sgmls/sgmls/main.c
index 4c8bbb3..fb2d303 100644
--- a/usr.bin/sgmls/sgmls/main.c
+++ b/usr.bin/sgmls/sgmls/main.c
@@ -11,6 +11,7 @@
#include "adl.h" /* Definitions for attribute list processing. */
#include "sgmlmain.h" /* Main interface to SGML services. */
#include "appl.h"
+#include "alloc.h"
#define READCNT 512
@@ -25,6 +26,10 @@ including the last character in prog that occurs in PROG_PREFIX. */
#define CAT_NAME "sgmls"
/* Message set to use for application error messages. */
#define APP_SET 4
+/* Message set to use for error messages from catalog.c. */
+#define CAT_SET 5
+#define CATALOG_ERROR_HEADER_MSGNO 20
+#define CATALOG_ERROR_HEADER_TEXT "Catalog error at %s, line %lu"
#ifdef HAVE_EXTENDED_PRINTF
#define xvfprintf vfprintf
@@ -37,6 +42,7 @@ static VOID fatal VP((int, ...));
static VOID do_error P((int, va_list));
static VOID swinit P((struct switches *));
static VOID write_caps P((char *, struct sgmlcap *));
+static VOID do_catalog_error();
static UNIV make_docent P((int, char **));
static char *munge_program_name P((char *, char *));
@@ -66,9 +72,11 @@ static char *prog; /* Program name (for error messages). */
static nl_catd catd; /* Message catalogue descriptor. */
static char *capfile = 0; /* File for capacity report. */
extern char *version_string;
+static CATALOG catalog; /* Entity catalog. */
char options[] = {
'c', ':', 'd', 'e', 'g', 'i', ':', 'l', 'o', ':', 'p', 'r', 's', 'u', 'v',
+ 'm', ':',
#ifdef CANT_REDIRECT_STDERR
'f', ':',
#endif /* CANT_REDIRECT_STDERR */
@@ -112,14 +120,19 @@ char **argv;
prog = argv[0] = munge_program_name(argv[0], "sgmls");
catd = catopen(CAT_NAME, 0);
+ catalog = catalog_create(do_catalog_error);
swinit(&sw);
while ((opt = getopt(argc, argv, options)) != EOF) {
switch (opt) {
+ case 'm':
+ catalog_load_file(catalog, optarg);
+ break;
case 'l': /* Generate location information. */
locsw = 1;
break;
case 'c': /* Print capacity usage. */
+ sw.swcap = 1;
capfile = optarg;
break;
case 's': /* Suppress output. */
@@ -178,7 +191,7 @@ char **argv;
abort();
}
}
-
+
#ifdef CANT_REDIRECT_STDERR
if (errfile) {
FILE *fp;
@@ -285,7 +298,7 @@ char **argv;
for (i = 0; i < argc; i++)
len += strlen(argv[i]) + 1;
-
+
res = xmalloc(len);
ptr = (char *)res;
for (i = 0; i < argc; i++) {
@@ -300,7 +313,7 @@ char **argv;
static VOID usage()
{
/* Don't mention -o since this are for internal use only. */
- fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity]%s [filename ...]\n",
+ fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity] [-m file]%s [filename ...]\n",
prog,
#ifdef CANT_REDIRECT_STDERR
" [-f file]",
@@ -338,8 +351,10 @@ struct switches *swp;
swp->ptrace = 0;
#endif /* TRACE */
swp->catd = catd;
+ swp->catalog = catalog;
swp->swambig = 1; /* Always check for ambiguity. */
swp->swundef = 0;
+ swp->swcap = 0; /* Don't check capacities. */
swp->nopen = 0;
swp->onlypro = 0;
swp->includes = 0;
@@ -455,10 +470,10 @@ UNIV id;
for (p = (char *)id, nfiles = 0; *p; p = strchr(p, '\0') + 1)
nfiles++;
-
+
argv = (char **)xmalloc((subargc + 2 + 1 + nfiles + 1)*sizeof(char *));
memcpy((UNIV)argv, (UNIV)subargv, subargc*sizeof(char *));
-
+
i = subargc;
argv[i++] = "-c";
@@ -541,7 +556,7 @@ VOID fatal(int errnum,...)
int errnum;
#endif
va_list ap;
-
+
#ifdef VARARGS
va_start(ap);
errnum = va_arg(ap, int);
@@ -563,7 +578,7 @@ VOID appl_error(int errnum,...)
int errnum;
#endif
va_list ap;
-
+
#ifdef VARARGS
va_start(ap);
errnum = va_arg(ap, int);
@@ -590,6 +605,39 @@ va_list ap;
fflush(stderr);
}
+static
+VOID do_catalog_error(filename, lineno, error_number, flags, sys_errno)
+char *filename;
+unsigned long lineno;
+int error_number;
+unsigned flags;
+int sys_errno;
+{
+ char *text;
+ unsigned indent;
+ text = catgets(catd, CAT_SET, error_number,
+ (char *)catalog_error_text(error_number)); /* XXX */
+ assert(text != 0);
+ fprintf(stderr, "%s: ", prog);
+ indent = strlen(prog) + 2;
+ if (flags & CATALOG_SYSTEM_ERROR)
+ fprintf(stderr, text, filename, strerror(sys_errno));
+ else {
+ unsigned i;
+ fprintf(stderr,
+ catgets(catd, APP_SET,
+ CATALOG_ERROR_HEADER_MSGNO,
+ CATALOG_ERROR_HEADER_TEXT),
+ filename, lineno);
+ fputs(":\n", stderr);
+ for (i = 0; i < indent; i++)
+ putc(' ', stderr);
+ fputs(text, stderr);
+ }
+ putc('\n', stderr);
+ fflush(stderr);
+}
+
/*
Local Variables:
c-indent-level: 5
diff --git a/usr.bin/sgmls/sgmls/md1.c b/usr.bin/sgmls/sgmls/md1.c
index 9a294e3..66c476d 100644
--- a/usr.bin/sgmls/sgmls/md1.c
+++ b/usr.bin/sgmls/sgmls/md1.c
@@ -129,10 +129,12 @@ struct etd *p; /* Pointer to element type definition. */
if (GET(p->adl[0].adflags, ADLCONR))
mderr(85, (UNCH *)0, (UNCH *)0);
}
+#if 0
/* "-" should not be specified for the end-tag minimization if
the element has a content reference attribute. */
if (GET(p->adl[0].adflags, ADLCONR) && BITON(p->etdmin, EMM))
mderr(153, (UNCH *)0, (UNCH *)0);
+#endif
}
/* MDNADL: Process ATTLIST declaration for notation.
TO DO: Pass deftab and dvtab as parameters so
@@ -583,7 +585,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
/* PARAMETER 2: External identifier keyword or MDS.
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD("2: extid or MDS");
switch (pcbmd.action) {
@@ -645,7 +646,7 @@ UNCH *tbuf; /* Work area for tokenization. */
parmno = 0; /* No parameters as yet. */
/* PARAMETER 4: End of declaration.
*/
- pcbmd.newstate = 0;
+ pcbmd.newstate = pcbmdtk;
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
TRACEMD(emd);
if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
@@ -721,7 +722,7 @@ UNCH *tbuf; /* Work area for tokenization (tbuf). */
mderr(129, tbuf+1, (UNCH *)0);
return;
}
- /* Must omit omitted end-tag minimization, if omitted
+ /* Must omit omitted end-tag minimization, if omitted
start-tag minimization was omitted (because OMITTAG == NO). */
if (!minomitted) {
/* PARAMETER 2B: End-tag minimization.
@@ -733,6 +734,9 @@ UNCH *tbuf; /* Work area for tokenization (tbuf). */
if (ustrcmp(tbuf+1, key[KO])) {mderr(129, tbuf+1, (UNCH *)0); return;}
if (OMITTAG==YES) SET(fmin, EMO);
break;
+ case MGRP:
+ REPEATCC;
+ /* fall through */
case CDR:
SET(fmin, EMM);
break;
diff --git a/usr.bin/sgmls/sgmls/md2.c b/usr.bin/sgmls/sgmls/md2.c
index 94dc4d3..df7e57e 100644
--- a/usr.bin/sgmls/sgmls/md2.c
+++ b/usr.bin/sgmls/sgmls/md2.c
@@ -54,7 +54,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
subdcl = nmbuf+1; /* Subject name for error messages. */
/* PARAMETER 2: Entity text keyword (optional).
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
TRACEMD("2: keyword");
switch (pcbmd.action) {
@@ -77,7 +76,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
mderr(38, tbuf+1, (UNCH *)0);
estore = ESM;
}
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
break;
default:
@@ -98,7 +96,7 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
etx.c = savestr(tbuf);
break;
case ESMD: /* MD: parameter literal required. */
- etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc);
+ etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc);
goto bcheck;
case ESMS: /* MS: parameter literal required. */
etx.c = sandwich(tbuf, lex.m.mss, lex.m.mse);
@@ -122,7 +120,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
}
/* PARAMETER 4: End of declaration.
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
parm4:
TRACEMD(emd);
@@ -230,7 +227,6 @@ PNE pne; /* Caller's external entity ptr. */
/* PARAMETER 2: Public ID literal.
*/
- pcbmd.newstate = 0;
/* The length of a minimum literal cannot exceed the value of LITLEN
in the reference quantity set. */
parsemd(pubibuf, NAMECASE, &pcblitv, REFLITLEN);
@@ -248,13 +244,11 @@ PNE pne; /* Caller's external entity ptr. */
/* PARAMETER 3: System ID literal.
*/
parm3:
- pcbmd.newstate = 0;
parsemd(sysibuf, NAMECASE, &pcblitc, LITLEN);
TRACEMD("3: sys ID literal");
if (pcbmd.action==LIT || pcbmd.action==LITE) {
entlen += ustrlen(sysibuf);
fpis->fpisysis = sysibuf;
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
}
else memcpy(tbuf, sysibuf, *sysibuf);
@@ -277,13 +271,11 @@ PNE pne; /* Caller's external entity ptr. */
if (exetype==ESNSUB) {
pne->nedcn = 0;
- pcbmd.newstate = 0; /* Parse next token for caller. */
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
goto genfpi;
}
/* PARAMETER 5: Notation name.
*/
- pcbmd.newstate = 0;
parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD("5: notation");
if (pcbmd.action!=NAS) {mderr(119, tbuf+1, (UNCH *)0); return (struct fpi *)0;}
@@ -296,7 +288,6 @@ PNE pne; /* Caller's external entity ptr. */
/* PARAMETER 6: Data attribute specification.
*/
- pcbmd.newstate = 0;
parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD("6: [att list]");
if (pcbmd.action!=MDS) { /* No attributes specified. */
@@ -321,7 +312,6 @@ PNE pne; /* Caller's external entity ptr. */
storedatt(pne);
}
parse(&pcbeal); /* Parse the list ending. */
- pcbmd.newstate = 0; /* Parse next token for caller. */
parsemd(tbuf, NAMECASE, &pcblitp, LITLEN);
/* GENFPI: Builds a formal public identifier structure, including the
@@ -339,10 +329,9 @@ PNE pne; /* Caller's external entity ptr. */
}
/* Analyze public ID and make structure entries. */
if (exidtype==EDPUBLIC) {
- if (FORMAL==NO)
- fpis->fpiversw = -1;
- else if (parsefpi(fpis)>0) {
- mderr(88, fpis->fpipubis, (UNCH *)0);
+ if (parsefpi(fpis)>0) {
+ if (FORMAL==YES)
+ mderr(88, fpis->fpipubis, (UNCH *)0);
fpis->fpiversw = -1; /* Signal bad formal public ID. */
}
}
@@ -355,7 +344,7 @@ VOID storedatt(pne)
PNE pne;
{
int i;
-
+
NEAL(pne) = (struct ad *)rmalloc((1+ADN(al))*ADSZ);
memcpy((UNIV)NEAL(pne), (UNIV)al, (1+ADN(al))*ADSZ);
for (i = 1; i <= (int)ADN(al); i++) {
@@ -387,9 +376,11 @@ PFPI f; /* Ptr to formal public identifier structure. */
p = f->fpipubis; /* Point to start of identifier. */
l = p + ustrlen(p); /* Point to EOS of identifier. */
- if (*p=='+' || *p=='-') { /* If owner registered, unregistered. */
+ if ((*p=='+' || *p=='-')
+ && p[1] == '/' && p[2] == '/') { /* If owner registered,
+ unregistered. */
f->fpiot = *p; /* Save owner type. */
- if ((p += 3)>=l) return 1; /* Get to owner ID field. */
+ p += 3;
}
else f->fpiot = '!'; /* Indicate ISO owner identifier. */
if ((q = pubfield(p, l, '/', &len))==0) /* Find end of owner ID field. */
@@ -407,9 +398,10 @@ PFPI f; /* Ptr to formal public identifier structure. */
/* The public text class in a notation identifier must be NOTATION. */
if (f->fpistore == ESK - ESFM + 1 && f->fpic != FPINOT) return 10;
- if (*p=='-') { /* If text is unavailable public text.*/
+ if (*p=='-' && p[1] == '/' && p[2] == '/') { /* If text is unavailable
+ public text.*/
f->fpitt = *p; /* Save text type. */
- if ((p += 3)>=l) return 5; /* Get to text description field. */
+ p += 3;
}
else f->fpitt = '+'; /* Indicate available public text. */
if ((q = pubfield(p, l, '/', &len))==0) /* Find end of text description. */
@@ -423,8 +415,11 @@ PFPI f; /* Ptr to formal public identifier structure. */
/* Language must be all upper-case letters. */
/* The standard only says that it *should* be two letters, so
don't enforce that. */
+ /* Language must be a name, which means it can't be empty. */
+ if (len == 0)
+ return 7;
for (i = 0; i < len; i++) {
- /* Don't assume ASCII. */
+ /* Don't assume ASCII. */
if (!strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", q[i]))
return 7;
}
@@ -552,7 +547,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
/* PARAMETER 2: External identifier keyword.
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD("2: extid");
if (pcbmd.action!=NAS) {mderr(29, (UNCH *)0, (UNCH *)0); return;}
@@ -630,9 +624,8 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
SRM(0) = (PECB)srhptr; /* Indicate map was actually declared.*/
subdcl = srhptr->ename+1; /* Save map name for error msgs. */
- while ( pcbmd.newstate = 0,
- parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN)==LIT
- || pcbmd.action==LITE ) {
+ while (parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN) == LIT
+ || pcbmd.action==LITE ) {
/* PARAMETER 2: Delimiter string.
*/
TRACEMD("2: SR string");
@@ -642,7 +635,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
}
/* PARAMETER 3: Entity name.
*/
- pcbmd.newstate = 0;
parsemd(tbuf, ENTCASE, &pcblitp, NAMELEN);
TRACEMD("3: entity");
if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); goto cleanup;}
@@ -725,7 +717,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
}
/* PARAMETER 2: Element name or a group of them. (In DTD only.)
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD("2: GI or grp");
switch (pcbmd.action) {
@@ -740,6 +731,7 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
break;
case EMD:
if (indtdsw) {mderr(28, (UNCH *)0, (UNCH *)0); return;}
+ if (docelsw) {mderr(233, (UNCH *)0, (UNCH *)0); return;}
tags[ts].tsrm = srmptr;
TRACESRM("USEMAP", tags[ts].tsrm, tags[ts].tetd->etdgi+1);
goto realemd;
@@ -749,7 +741,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */
}
/* PARAMETER 3: End of declaration.
*/
- pcbmd.newstate = 0;
parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN);
TRACEMD(emd);
if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0);
diff --git a/usr.bin/sgmls/sgmls/msg.h b/usr.bin/sgmls/sgmls/msg.h
index fa97a4c..5526337 100644
--- a/usr.bin/sgmls/sgmls/msg.h
+++ b/usr.bin/sgmls/sgmls/msg.h
@@ -112,7 +112,7 @@ struct {
/* 90 */ {"\"%s\" keyword is for unsupported feature; declaration terminated", 'E', 'D'},
/* 91 */ {"Attribute specification list in prolog cannot be empty", 'E', 'D'},
/* 92 */ {"Document ended invalidly within a literal; parsing ended", 'C', 'S'},
-/* 93 */ {"Short ref in map \"%2$s\" to undeclared entity \"%1$s\" treated as data", 'E', 'C'},
+/* 93 */ {"General entity \"%s\" in short reference map \"%s\" undeclared", 'E', 'D'},
/* 94 */ {"Could not reopen file to continue entity \"%s\"; entity terminated", 'E', 'R'},
/* 95 */ {"Out-of-context data ended %s document element (and parse)", 'E', 'C'},
/* 96 */ {"Short start-tag (no GI) ended %s document element (and parse)", 'E', 'C'},
@@ -169,16 +169,16 @@ struct {
/* 147 */ {"Could not find external general entity \"%s\"", 'I', 'R'},
/* 148 */ {"Could not find external parameter entity \"%s\"", 'I', 'R'},
/* 149 */ {"Reference to non-existent general entity \"%s\" ignored", 'E', 'R'},
-/* 150 */ {"Could not find entity \"%s\" using default declaration", 'E', 'R'},
+/* 150 */ {"Could not find entity \"%s\" using default declaration", 'I', 'R'},
/* 151 */ {"Could not find entity \"%2$s\" in attribute %1$s using default declaration", 'E', 'R'},
-/* 152 */ {"Short reference map \"%s\" used in DTD but not defined", 'I', 'D'},
+/* 152 */ {"Short reference map \"%s\" used in USEMAP declaration but not defined; declaration will be ignored", 'E', 'D'},
/* 153 */ {"End-tag minimization should be \"O\" for element with CONREF attribute", 'I', 'D'},
/* 154 */ {"Declared value of data attribute cannot be ENTITY or ENTITIES", 'E', 'D' },
/* 155 */ {"Declared value of data attribute cannot be IDREF or IDREFS", 'E', 'D' },
/* 156 */ {"Declared value of data attribute cannot be NOTATION", 'E', 'D' },
/* 157 */ {"CURRENT cannot be specified for a data attribute", 'E', 'D' },
/* 158 */ {"CONREF cannot be specified for a data attribute", 'E', 'D' },
-/* 159 */ {"Short reference map for element \"%s\" not defined; ignored", 'E', 'C'},
+/* 159 */ {"Parameter must be a number or CONTROLS or NONE", 'E', 'D'},
/* 160 */ {"Cannot create temporary file", 'C', 'R'},
/* 161 */ {"Document ended invalidly within SGML declaration", 'C', 'D'},
/* 162 */ {"Capacity limit %s exceeded by %s points", 'W', 'Q'},
@@ -208,9 +208,9 @@ struct {
/* 186 */ {"\"%s\" invalid; must be \"PUBLIC\" or \"SGMLREF\"", 'E', 'D'},
/* 187 */ {"Feature \"%s\" is not supported", 'E', 'U'},
/* 188 */ {"Too many open subdocument entities", 'E', 'Q'},
-/* 189 */ {"Invalid formal public identifier", 'I', 'D'},
-/* 190 */ {"Public text class should have been \"%s\"", 'I', 'D'},
-/* 191 */ {"Character number %s must be non-SGML", 'W', 'D'},
+/* 189 */ {"Invalid formal public identifier", 'W', 'D'},
+/* 190 */ {"Public text class must be \"%s\"", 'W', 'D'},
+/* 191 */ {"Use of character number %s as an SGML character is not supported", 'W', 'U'},
/* 192 */ {"Notation \"%s\" not defined in DTD", 'W', 'D'},
/* 193 */ {"Unclosed start or end tag requires \"SHORTTAG YES\"", 'W', 'M'},
/* 194 */ {"Net-enabling start tag requires \"SHORTTAG YES\"", 'W', 'M'},
@@ -218,12 +218,12 @@ struct {
/* 196 */ {"Undelimited attribute value requires \"SHORTTAG YES\"", 'W', 'M'},
/* 197 */ {"Attribute specification omitted for \"%s\": requires markup minimization", 'W', 'M'},
/* 198 */ {"Concrete syntax does not have any short reference delimiters", 'E', 'D'},
-/* 199 */ {"Character number %s does not exist in the base character set", 'E', 'D'},
+/* 199 */ {"Character number %s not in the base character set; assuming UNUSED", 'E', 'D'},
/* 200 */ {"Character number %s is UNUSED in the syntax reference character set", 'E', 'D'},
/* 201 */ {"Character number %s was not described in the syntax reference character set", 'E', 'D'},
/* 202 */ {"Character number %s in the syntax reference character set has no corresponding character in the system character set", 'E', 'D'},
/* 203 */ {"Character number %s was described using an unknown base set", 'E', 'D'},
-/* 204 */ {"Duplication specification for added funtion \"%s\"", 'E', 'D'},
+/* 204 */ {"Duplication specification for added function \"%s\"", 'E', 'D'},
/* 205 */ {"Added function character cannot be \"%s\"", 'E', 'D'},
/* 206 */ {"Only reference concrete syntax function characters supported", 'E', 'U'},
/* 207 */ {"Only reference concrete syntax general delimiters supported", 'E', 'U'},
@@ -235,8 +235,8 @@ struct {
/* 213 */ {"Duplicate replacement reserved name \"%s\"", 'E', 'D'},
/* 214 */ {"Quantity \"%s\" must not be less than %s", 'E', 'D'},
/* 215 */ {"Only values up to %2$s are supported for quantity \"%1$s\"", 'E', 'U'},
-/* 216 */ {"Exclusions attempt to change required status of group in \"%s\"", 'E', 'C'},
-/* 217 */ {"Exclusion cannot apply to token \"%s\" in content model for \"%s\"", 'E', 'C'},
+/* 216 */ {"%s element cannot be excluded from %s element because it is neither inherently optional nor a member of an or group", 'E', 'C'},
+/* 217 */ {"Marked section not allowed in other prolog", 'E', 'C'},
/* 218 */ {"Required %s attribute was not specified for entity %s", 'E', 'C'},
/* 219 */ {"UCNMSTRT must have the same number of characters as LCNMSTRT", 'E', 'D'},
/* 220 */ {"UCNMCHAR must have the same number of characters as LCNMCHAR", 'E', 'D'},
@@ -249,4 +249,10 @@ struct {
/* 227 */ {"Unrecognized designating escape sequence \"%s\"", 'I', 'U'},
/* 228 */ {"Earlier reference to entity \"%s\" used default entity", 'I', 'D'},
/* 229 */ {"Reference to non-existent parameter entity \"%s\" ignored", 'E', 'R'},
+/* 230 */ {"DSC within marked section; marked section terminated", 'E', 'C'},
+/* 231 */ {"Document element end tag can only occur in document element because entity end not allowed in other prolog", 'E', 'C'},
+/* 232 */ {"Character reference not allowed in other prolog", 'E', 'C'},
+/* 233 */ {"USEMAP declaration not allowed in other prolog", 'E', 'D'},
+/* 234 */ {"Entity reference not allowed in other prolog", 'E', 'C'},
+/* 235 */ {"Value assigned to capacity %s exceeds value assigned to TOTALCAP", 'W', 'D'},
};
diff --git a/usr.bin/sgmls/sgmls/msgcat.c b/usr.bin/sgmls/sgmls/msgcat.c
index ec6a8b5..5c7ee9f 100644
--- a/usr.bin/sgmls/sgmls/msgcat.c
+++ b/usr.bin/sgmls/sgmls/msgcat.c
@@ -25,6 +25,12 @@ merging catalogues. */
#define P(parms) ()
#endif
+#ifdef USE_ISASCII
+#define ISASCII(c) isascii(c)
+#else
+#define ISASCII(c) (1)
+#endif
+
/* Default message set. */
#define NL_SETD 1
@@ -48,7 +54,7 @@ struct message {
unsigned setnum;
char *text;
};
-
+
struct cat {
char *name;
int loaded;
@@ -117,7 +123,7 @@ int oflag;
if (!name)
return 0;
-
+
catp = (struct cat *)malloc(sizeof *catp);
if (!catp)
return 0;
@@ -476,7 +482,7 @@ int quote;
p[i] = '\0';
return p;
}
-
+
/* 0 success, -1 error */
static
@@ -559,7 +565,7 @@ struct message **table;
unsigned setnum, msgnum;
{
struct message **pp;
-
+
for (pp = &table[hash(setnum, msgnum)]; *pp; pp = &(*pp)->next)
if ((*pp)->setnum == setnum && (*pp)->msgnum == msgnum) {
struct message *p = *pp;
@@ -638,9 +644,9 @@ char **argv;
struct message **list;
unsigned setnum;
struct message *table[HASH_TAB_SIZE];
-
+
program_name = argv[0];
-
+
if (argc < 3)
usage();
@@ -666,7 +672,7 @@ char **argv;
fclose(fp);
}
}
-
+
errno = 0;
fp = fopen(argv[1], "w");
if (!fp)
@@ -687,7 +693,7 @@ char **argv;
list[j++] = p;
}
assert(j == nmessages);
-
+
qsort((UNIV)list, nmessages, sizeof(struct message *), message_compare);
setnum = NL_SETD;
@@ -728,8 +734,8 @@ VOID fatal(char *message,...)
message = va_arg(ap, char *);
#else /* not VARARGS */
va_start(ap, message);
-#endif /* not VARARGS */
-
+#endif /* not VARARGS */
+
fprintf(stderr, "%s: ", program_name);
vfprintf(stderr, message, ap);
putc('\n', stderr);
@@ -739,7 +745,8 @@ VOID fatal(char *message,...)
static
int message_compare(p1, p2)
-UNIV p1, UNIV p2;
+UNIV p1;
+UNIV p2;
{
struct message *m1 = *(struct message **)p1;
struct message *m2 = *(struct message **)p2;
@@ -763,7 +770,7 @@ FILE *fp;
for (; *s; s++) {
if (*s == '\\')
fputs("\\\\", fp);
- else if (ISASCII(*s) && isprint((UNCH)*s))
+ else if (ISASCII(*s) && isprint((unsigned char)*s))
putc(*s, fp);
else {
switch (*s) {
@@ -803,7 +810,7 @@ char **argv;
{
nl_catd catd;
int msgnum, setnum;
-
+
if (argc != 2) {
fprintf(stderr, "usage: %s catalogue\n", argv[0]);
exit(1);
diff --git a/usr.bin/sgmls/sgmls/pars1.c b/usr.bin/sgmls/sgmls/pars1.c
index 8616107..0a67cbc 100644
--- a/usr.bin/sgmls/sgmls/pars1.c
+++ b/usr.bin/sgmls/sgmls/pars1.c
@@ -90,6 +90,7 @@ struct parse *pcb; /* Parse control block for this parse. */
case STG_: /* Process non-null start-tag. */
CTRSET(tagctr); /* Start counting tag length. */
+ tages = es;
parsenm(tbuf, NAMECASE); /* Get the GI. */
newetd = etdref(tbuf);
if (newetd && newetd->adl) {
@@ -264,6 +265,10 @@ struct parse *pcb; /* Parse control block for this parse. */
case RSR_: /* Record start: ccnt=0; ++rcnt.*/
++RCNT; CTRSET(RSCC);
+ return RSR_;
+ case MSS_:
+ if (ts == 0) synerr(217, pcb);
+ return MSS_;
default:
return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */
}
@@ -288,8 +293,12 @@ struct parse *pcb; /* Parse control block for this parse. */
*/
int nstetd()
{
- newetd = ts>0 ? tags[ts].tetd
- : tags[0].tetd->etdmod[2].tu.thetd;
+ if (sd.omittag && ts > 0)
+ newetd = tags[ts].tetd;
+ else if (!sd.omittag && lastetd != 0)
+ newetd = lastetd;
+ else
+ newetd = tags[0].tetd->etdmod[2].tu.thetd;
stagmin = MINNULL; stagreal = ETDNULL;
etisw = 0;
return stag(0);
@@ -332,11 +341,6 @@ struct parse *pcb; /* Parse control block for this parse. */
int rc; /* Return code from entopen. */
if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF;
- if (!tags[ts].tsrm[srn]->estore) {
- sgmlerr(93, pcb, tags[ts].tsrm[srn]->ename+1,
- tags[ts].tsrm[0]->ename+1);
- return(ENTUNDEF);
- }
rc = entopen(tags[ts].tsrm[srn]);
if (rc==ENTDATA) return DEF_;
if (rc==ENTPI) return PIS_;
@@ -395,7 +399,14 @@ int parsepro()
REPEATCC; /* Put back MSC so it follows referenced DTD. */
entref(indtdent);
}
- else mddtde(tbuf);
+ else {
+ if (mslevel > 0) {
+ sgmlerr(230, propcb, (UNCH *)0, (UNCH *)0);
+ mslevel = 0;
+ msplevel = 0;
+ }
+ mddtde(tbuf);
+ }
continue;
case MD_:
@@ -442,12 +453,13 @@ int parsepro()
return(PIS_);
case EOD_: /* Return end of primary entity. */
- if (!sw.onlypro || propcb != &pcbpro || !dtdsw)
- sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0);
- else {
+ if (dtdsw && propcb == &pcbpro) {
+ /* We've had a DTD, so check it. */
setdtype();
checkdtd();
}
+ if (!sw.onlypro || propcb != &pcbpro || !dtdsw)
+ sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0);
return propcb->action;
case PIS_: /* Return processing instruction (string). */
sgmlsw++; /* SGML declaration not allowed after PI */
@@ -457,6 +469,9 @@ int parsepro()
synerr(E_RESTART, propcb);
REPEATCC;
continue;
+ case ETE_: /* End tag ended prolog */
+ REPEATCC;
+ /* fall through */
case STE_: /* Start tag ended prolog */
REPEATCC;
REPEATCC;
@@ -506,22 +521,33 @@ static
VOID checkdtd()
{
struct dcncb *np;
+ struct srh *sp;
if (sw.swundef) {
int i;
struct etd *ep;
- struct srh *sp;
for (i = 0; i < ETDHASH; i++)
for (ep = etdtab[i]; ep; ep = ep->etdnext)
if (!ep->etdmod)
sgmlerr(140, (struct parse *)0, ep->etdgi + 1,
(UNCH *)0);
- for (sp = srhtab[0]; sp; sp = sp->enext)
- if (sp->srhsrm[0] == 0)
- sgmlerr(152, (struct parse *)0, sp->ename + 1,
- (UNCH *)0);
}
+ for (sp = srhtab[0]; sp; sp = sp->enext)
+ if (sp->srhsrm[0] == 0)
+ sgmlerr(152, (struct parse *)0, sp->ename + 1, (UNCH *)0);
+ else {
+ int i;
+ for (i = 1; i < lex.s.dtb[0].mapdata + 1; i++) {
+ struct entity *ecb = sp->srhsrm[i];
+ if (ecb && !ecb->estore) {
+ sgmlerr(93, (struct parse *)0,
+ ecb->ename + 1,
+ sp->srhsrm[0]->ename + 1);
+ sp->srhsrm[i] = 0;
+ }
+ }
+ }
for (np = dcntab[0]; np; np = np->enext)
if (!np->defined)
sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0);
@@ -604,7 +630,7 @@ struct mpos *newmpos()
VOID endprolog()
{
int i;
-
+
ambigfree();
if (dtdsw) {
frem((UNIV)nmgrp);
@@ -739,9 +765,8 @@ int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */
realrc = RCEND;
break;
case RCHITMEX: /* Invalid minus exclusion for required element. */
-#if 0 /* This will have been detected by exclude.c. */
- sgmlerr(E_MEXERR, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1);
-#endif
+ sgmlerr(216, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1);
+ /* fall through */
case RCHIT: /* Start-tag was valid. */
realrc = RCHIT;
break;
@@ -764,11 +789,9 @@ int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */
return ETG_;
case RCREQ: /* Stack compulsory GI, then retry start-tag. */
if (!BADPTR(nextetd)) {
-#if 0 /* This will have been detected in exclude.c. */
if ((mexts = pexmex(nextetd))>0)
sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1,
tags[mexts].tetd->etdgi+1);
-#endif
if (!nextetd->etdmod) {
sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0);
etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr,
@@ -847,8 +870,8 @@ struct etd *curetd; /* The etd for this entry. */
/* If etd has ALT table, use it; otherwise, use last element's ALT. */
if (curetd->etdsrm) {
if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) {
- /* Map hasn't been defined. Ignore it. */
- sgmlerr(159, &pcbstag, curetd->etdgi + 1, (UNCH *)0);
+ /* Map hasn't been defined. Ignore it.
+ We already gave an error. */
curetd->etdsrm = 0;
tags[ts].tsrm = tags[ts-1].tsrm;
}
@@ -867,8 +890,6 @@ struct etd *curetd; /* The etd for this entry. */
tags[ts].tpos[1].t = 1; /* 1st token is next in grp to be tested. */
HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */
TRACESTK(&tags[ts], ts, etictr);
-
- exclude();
return;
}
/* ETAG: Check validity of an end-tag by seeing if it matches any tag
@@ -908,6 +929,7 @@ VOID destack()
are required tags left, and no CONREF attribute was specified,
issue an error message.
*/
+ lastetd = tags[ts].tetd;
if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD)
&& !conrefsw
&& !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) {
@@ -945,6 +967,10 @@ VOID destack()
/* TEMP: See if parser bug caused stack to go below zero. */
else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;}
TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr);
+ if (ts == 0) {
+ docelsw = 1; /* Finished document element. */
+ if (es > 0) sgmlerr(231, conpcb, (UNCH *)0, (UNCH *)0);
+ }
}
/*
Local Variables:
diff --git a/usr.bin/sgmls/sgmls/pars2.c b/usr.bin/sgmls/sgmls/pars2.c
index cc4c4ec..4249797 100644
--- a/usr.bin/sgmls/sgmls/pars2.c
+++ b/usr.bin/sgmls/sgmls/pars2.c
@@ -104,6 +104,7 @@ struct parse *pcb; /* Current parse control block. */
parsenm(entbuf, NAMECASE);
parse(&pcbref); /* Handle reference terminator. */
charrefa(entbuf);
+ if (docelsw) synerr(232, pcb);
continue;
case SYS_: /* Invalid NONCHAR: send msg and ignore. */
@@ -172,10 +173,10 @@ int ch;
change the entity, since the entity might be referenced again.
So in this case we copy the entity. This is inefficient, but
it will only happen in a case like this:
-
+
<!entity % amp "&">
<!entity e "x%amp;#SPACE;">
-
+
Usually character references will have been processed while the
entity was being defined. */
if (*FPOS != ch) {
@@ -754,12 +755,19 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
#endif
{
UNCH *pt = tbuf; /* Current pointer into tbuf. */
- UNCH lexsv = lexlms[del];/* Saved lexlms value of delimiter. */
+ UNCH lexsv = pcb->plex[del];/* Saved value of delimiter in lexical table. */
int essv = es; /* Entity stack level when literal started. */
UNCH datadel; /* Delimiter for CDATA/SDATA entity. */
- int parmlen = (int)maxlen; /* Working limit (to be decremented). */
+ int parmlen = (int)maxlen + 1; /* Working limit (to be decremented). */
+ int overflow = 0; /* Did the buffer overflow? */
+
+ pcb->plex[del] = pcb->plex == lexlms ? lex.l.litc : lex.l.minlitc;
+
+ /* The RPR_ action may cause the length of the literal to decrease by
+ 1 (this discards a final space in a minimum literal); so while
+ building the literal, the length must be allowed to grow to
+ maxlen + 1. */
- lexlms[del] = lex.l.litc; /* Set delimiter to act as literal close. */
do {
switch (parse(pcb)) {
case LP2_: /* Move 2nd char back to buffer; redo prev.*/
@@ -767,15 +775,19 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
case LPR_: /* Move previous char to buffer; REPEATCC; */
REPEATCC;
case MLA_: /* Move character to buffer. */
+ if (parmlen <= 0) { overflow = 1; break; }
*pt++ = *FPOS; --parmlen;
continue;
case FUN_: /* Function char found; replace with space.*/
+ if (parmlen <= 0) { overflow = 1; break; }
*pt++ = ' '; --parmlen;
continue;
case RSM_: /* Record start: ccnt=0; ++rcnt.*/
- ++RCNT; CTRSET(RSCC); *pt++ = *FPOS; --parmlen;
+ ++RCNT; CTRSET(RSCC);
+ if (parmlen <= 0) { overflow = 1; break; }
+ *pt++ = *FPOS; --parmlen;
continue;
case ERX_: /* Entity reference: cancel LITC delim. */
@@ -806,7 +818,12 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
pt += parmlensv - parmlen;
continue;
}
- if ((parmlen -= (int)datalen+2)<0) {entdatsw = 0; break;}
+ if (parmlen < datalen + 2) {
+ entdatsw = 0;
+ overflow = 1;
+ break;
+ }
+ parmlen -= datalen + 2;
*pt++ = datadel =
BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA;
entdatsw = 0;
@@ -816,7 +833,8 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
continue;
case NON_: /* Non-SGML char (delimited and shifted). */
- if ((parmlen -= 2)<0) break;
+ if (parmlen < 2) { overflow = 1; break; }
+ parmlen -= 2;
memcpy( pt , nonchbuf, 2 );
pt += 2;
continue;
@@ -832,19 +850,25 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
break;
}
break;
- } while (parmlen>=0 && pcb->action!=TER_);
+ } while (!overflow && pcb->action!=TER_);
+
+ if (parmlen <= 0) {
+ --pt;
+ overflow = 1;
+ }
+ if (overflow)
+ sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0);
- if (parmlen<0) {--pt; sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); REPEATCC;}
datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */
*pt++ = EOS;
- lexlms[del] = lexsv; /* Restore normal delimiter handling. */
+ pcb->plex[del] = lexsv; /* Restore normal delimiter handling. */
if (es!=essv) synerr(37, pcb);
- return;
}
/* Handle a data entity in a tokenized attribute value literal.
Parmlen is amount of space left. Return new parmlen. If there's not
-enough space return -1, and copy up to parmlen + 1 characters. */
+enough space return -1, and copy up to parmlen + 1 characters. Only
+tokenization should be done, not attribute value interpretation. */
int tokdata(pt, parmlen)
UNCH *pt;
@@ -852,14 +876,9 @@ int parmlen;
{
int skip = (pcblitt.newstate == 0);
int i;
-
+
for (i = 0; parmlen >= 0 && i < datalen; i++) {
switch (data[i]) {
- case RSCHAR:
- /* ignore it */
- break;
- case RECHAR:
- case TABCHAR:
case SPCCHAR:
if (!skip) {
*pt++ = data[i];
@@ -935,6 +954,7 @@ UNS tokenlen; /* Max length of expected token: NAMELEN LITLEN */
return (int)pcb->action;
case NUM: /* Number or number token string. */
parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen);
+ if (tokenlen > NAMELEN) pcb->newstate = 0;
return (int)pcb->action;
case PENR:
REPEATCC;
@@ -976,6 +996,11 @@ int dctype; /* Content type (0=model). */
case OREP: /* OREP occurrence indicator for model. */
SET(gbuf[1].ttype, TOREP|TXOREP);
break;
+ case EE_:
+ if (es < mdessv) {
+ synerr(37, &pcbmd);
+ mdessv = es;
+ }
default: /* RCR_: Repeat char and return. */
break;
}
diff --git a/usr.bin/sgmls/sgmls/pcbrf.c b/usr.bin/sgmls/sgmls/pcbrf.c
index a18617e..554fdfb 100644
--- a/usr.bin/sgmls/sgmls/pcbrf.c
+++ b/usr.bin/sgmls/sgmls/pcbrf.c
@@ -39,14 +39,14 @@ et0a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,GET_,GET_,RSR_,SR2_,DAS_,DAS_,NSC_,LAS_,
REF_,NOP_,DAS_,NED_,SR10,DAS_,DAS_,NOP_,SR25,DAS_,SR11,DAS_,LAS_,FCE_},
da0 []={DA0 ,DA0 ,DA0 ,DA0 ,DA1 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,
- ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da0*/
+ ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,ET0 ,DA0 ,ET0 ,ET0 },/*da0*/
da0a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,
- DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_},
+ DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,DAF_,NOP_,DAF_,DAF_},
da1 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,
- ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da1*/
+ ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,ET0 ,DA0 ,ET0 ,ET0 },/*da1*/
da1a[]={NOP_,NOP_,NOP_,NOP_,DAR_,DAF_,DAF_,DAR_,DAF_,DAR_,DAR_,NOP_,DAF_,DAF_,
- DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_},
+ DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,DAF_,NOP_,DAF_,DAF_},
er0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,
ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/
@@ -176,7 +176,7 @@ static UNCH
et2 []={ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,TB2 ,ET2 ,ET2 ,ER2 ,
ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*et2*/
et2a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,GET_,GET_,RS_ ,SR2_,NOP_,DCE_,DCE_,LAS_,
- NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_},
+ NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,SR25,DCE_,SR11,DCE_,LAS_,FCE_},
er2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,
ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 },/*er2*/
@@ -391,6 +391,7 @@ struct parse pcbconc = {"CONC", lexcon, conctab, 0, 0, 0, 0};
#define ES7 2 /* TAGO found; start lookahead buffer. */
#define MD7 4 /* MDO found (TAGO, MDO[2]). */
#define MC7 6 /* MDO, COM found. */
+#define EE7 8 /* TAGO, ETI found */
static UNCH
/* free nu nmc nms spc non ee eob rs re sep cde nsc ero
@@ -401,9 +402,9 @@ et7a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,EE_ ,GET_,RS_ ,NOP_,NOP_,DCE_,DCE_,DCE_,
DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,LAS_},
es7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,
- ET7 ,ET7 ,ET7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/
+ ET7 ,ET7 ,EE7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/
es7a[]={PEP_,PEP_,PEP_,STE_,PEP_,PEP_,PEP_,GET_,PEP_,PEP_,PEP_,PEP_,PEP_,PEP_,
- PEP_,PEP_,PEP_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_},
+ PEP_,PEP_,LAM_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_},
md7 []={ET7, ET7, ET7, ET7, ET7 ,ET7, ET7, MD7, ET7 ,ET7 ,ET7 ,ET7, ET7, ET7,
ET7, MC7, ET7, ET7, ET7, ET7 ,ET7, ET7, ET7, ET7 ,ET7, ET7 },/*md7*/
@@ -415,12 +416,18 @@ mc7 []={ET7, ET7, ET7, ET7, ET7, ET7 ,ET7, MC7, ET7 ,ET7, ET7 ,ET7, ET7, ET7,
mc7a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
LAF_,MDC_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_},
-*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a};
+ee7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,EE7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,
+ ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*ee7*/
+ee7a[]={LAF_,LAF_,LAF_,ETE_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,
+ LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,ETE_,LAF_},
+
+*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a, ee7, ee7a};
struct parse pcbpro = {"PRO", lexcon, protab, 0, 0, 0, 0};
#undef ET7
#undef ES7
#undef MD7
#undef MC7
+#undef EE7
/* PCBMDS: State and action table for parse of markup declaration subset.
Initial state assumes subset just began (MSO found).
*/
@@ -550,7 +557,7 @@ dn01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_,
INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,DTAG,INV_},
dt01 []={TK1 ,TK1 ,TK1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,TK1 ,DT1 ,DT1 ,LI1 ,LA1 ,
- CO1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/
+ SP1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/
dt01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,NOP_,NOP_,NOP_,NOP_,
GRPE,INV_,INV_,NOP_,NOP_,INV_,INV_,INV_,NOP_,INV_},
@@ -798,23 +805,23 @@ struct parse pcblitr = {"LITR", lexlms, litrtab, 0, 0, 0, 0};
#define SP0 4 /* SPACE/RE sequence begun. */
static UNCH
-/* free num min nms spc non ee eob rs re sep cde nsc ero
- mdo msc mso pero rni tagc tago litc */
-ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 ,VA0 ,
- VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*ls0*/
-ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_,
- MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_},
-va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 ,
- VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*va0*/
-da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_,MLE_,
- MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_},
-sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 ,
- VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*sp0*/
-sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_,
- MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,RPR_},
+/* free num min nms spc non ee eob rs re sep cde nsc
+ litc */
+ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 ,
+ LS0 },/*ls0*/
+ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,
+ TER_},
+va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 ,
+ LS0 },/*va0*/
+da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_,
+ TER_},
+sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 ,
+ LS0 },/*sp0*/
+sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,
+ RPR_},
*litvtab[] = {ls10, ls10a, va10, da10a, sp10, sp10a};
-struct parse pcblitv = {"LITV", lexlms, litvtab, 0, 0, 0, 0};
+struct parse pcblitv = {"LITV", lexmin, litvtab, 0, 0, 0, 0};
#undef LS0
#undef VA0
#undef SP0
@@ -881,9 +888,9 @@ struct parse pcblitt = {"LITT", lexlms, litttab, 0, 0, 0, 0};
Columns are based on LEXMARK.C.
*/
/* Symbols for state names (end with a number). */
-#define SP1 0 /* Separator before token expected. */
-#define TK1 2 /* Token expected. */
-#define CM0 4 /* COM[1] found when sep expected: possible comment, MGRP.*/
+#define SP1 0 /* Separator before token expected (but not -). */
+#define SP2 2 /* Separator before token expected. */
+#define TK1 4 /* Token expected. */
#define CM1 6 /* COM[1] found: possible comment, MGRP, or minus.*/
#define CM2 8 /* COM[2] found; in comment. */
#define CM3 10 /* Ending COM[1] found; end comment or continue it. */
@@ -896,22 +903,23 @@ int pcbmdtk = TK1; /* PCBMD: token expected. */
static UNCH
/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
dso dsc pero plus refc rni tagc tago vi */
-sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,CM0 ,SP1 ,TK1 ,TK1 ,TK1 ,
+sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,
TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 },
-sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE,
+sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,LEN_,INV_,GRPS,LIT ,LITE,
+ MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_},
+
+sp22 []={SP2 ,SP2 ,SP2 ,SP2 ,TK1 ,SP2 ,TK1 ,SP2 ,TK1 ,CM1 ,SP2 ,TK1 ,TK1 ,TK1 ,
+ TK1 ,SP2 ,PR1 ,PX1 ,SP2 ,RN1 ,SP2 ,SP2 ,SP2 },
+sp22a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE,
MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_},
-tk21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 ,
+tk21 []={SP1 ,SP1 ,SP2 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 ,
TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 },
tk21a[]={INV_,NMT ,NUM ,NAS ,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE,
MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_},
/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
dso dsc pero plus refc rni tagc tago vi */
-cm20 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 ,SP1 ,
- SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
-cm20a[]={LNR_,LNR_,LNR_,LNR_,LNR_,SYS_,LNR_,GET_,LNR_,NOP_,LNR_,LNR_,LNR_,LNR_,
- LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_},
cm21 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 ,TK1 ,
TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 },
@@ -930,8 +938,8 @@ cm23 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 ,CM2 ,
cm23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_,
NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_},
-pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP1 ,PR1 ,TK1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
- SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 },
+pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP2 ,PR1 ,TK1 ,SP2 ,SP1 ,SP1 ,SP1 ,SP1 ,
+ SP1 ,SP1 ,SP2 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 },
pr21a[]={PCI_,PCI_,PCI_,PER_,PEN ,SYS_,PENR,GET_,PEN ,PENR,PCI_,PCI_,PCI_,PCI_,
PCI_,PCI_,PENR,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
@@ -945,12 +953,12 @@ rn21 []={TK1 ,TK1 ,TK1 ,SP1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,
rn21a[]={PCI_,PCI_,PCI_,RNS ,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,
PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_},
-*mdtab[] = {sp21, sp21a, tk21, tk21a, cm20, cm20a, cm21, cm21a, cm22, cm22a,
+*mdtab[] = {sp21, sp21a, sp22, sp22a, tk21, tk21a, cm21, cm21a, cm22, cm22a,
cm23, cm23a, pr21, pr21a, px21, px21a, rn21, rn21a};
struct parse pcbmd = {"MD", lexmark, mdtab, 0, 0, 0, 0};
#undef SP1
+#undef SP2
#undef TK1
-#undef CM0
#undef CM1
#undef CM2
#undef CM3
@@ -1214,29 +1222,29 @@ static UNCH
dso dsc pero plus refc rni tagc tago vi */
sp41 []={SP1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
-sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_,
+sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,ETIC,INV_,INV_,INV_,
INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_},
an41 []={SP1 ,SP1 ,SP1 ,SP2 ,AN1 ,AN1 ,AN1 ,AN1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
-an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_,
+an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,NTV ,ETIC,INV_,INV_,INV_,
INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_},
sp42 []={SP1 ,SP1 ,SP1 ,SP1 ,VI1 ,SP2 ,SP2 ,SP2 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 },
-sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_,
+sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,NASV,INV_,INV_,INV_,
INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_},
/* bit nmc num nms spc non ee eob rs com eti grpo lit lita
dso dsc pero plus refc rni tagc tago vi */
-vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,
+vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,
SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 },
-vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_,
+vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,NASV,NASV,INV_,INV_,INV_,
INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_},
av41 []={SP1 ,SP1 ,SP1 ,SP1 ,AV1 ,AV1 ,AV1 ,AV1 ,AV1 ,SP1 ,SP1 ,SP1 ,AN1 ,AN1 ,
SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 },
-av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,AVD ,AVDA,
+av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,AVU ,INV_,INV_,AVD ,AVDA,
INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_},
*stagtab[] = {sp41, sp41a, an41, an41a, sp42, sp42a, vi41, vi41a, av41, av41a};
@@ -1303,42 +1311,41 @@ struct parse pcbeal = {"EAL", lexgrp, ealtab, 0, 0, 0, 0};
/* Symbols for state names. */
-#define SP1 0 /* Separator before token expected. */
-#define TK1 2 /* Token expected. */
-#define CM0 4 /* COM[1] found when sep expected: possible comment.*/
+#define SP1 0 /* Separator before token expected (but not -) */
+#define SP2 2 /* Separator before token expected. */
+#define TK1 4 /* Token expected. */
#define CM1 6 /* COM[1] found: possible comment.*/
#define CM2 8 /* COM[2] found; in comment. */
#define CM3 10 /* Ending COM[1] found; end comment or continue it. */
-
static UNCH
/* sig dat num nms spc non ee eob rs com lit lita tagc */
-
-sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,CM0 ,TK1 ,TK1 ,SP1 },
-sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD},
-
-tk31 []={TK1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 },
+
+sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,TK1 ,SP1 },
+sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,LIT1,LIT2,ESGD},
+
+sp32 []={SP2 ,SP2 ,SP2 ,SP2 ,TK1 ,SP2 ,SP2 ,SP2 ,TK1 ,CM1 ,TK1 ,TK1 ,SP2 },
+sp32a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD},
+
+tk31 []={TK1 ,TK1 ,SP2 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 },
tk31a[]={INV_,ISIG,NUM1,NAS1,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD},
-
-cm30 []={SP1 ,CM0 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 },
-cm30a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_},
-
+
cm31 []={TK1 ,CM1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 },
cm31a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_},
-
+
cm32 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 },
cm32a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_},
-
+
cm33 []={CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 },
cm33a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_},
-*sdtab[]={sp31, sp31a, tk31, tk31a, cm30, cm30a, cm31, cm31a, cm32, cm32a,
+*sdtab[]={sp31, sp31a, sp32, sp32a, tk31, tk31a, cm31, cm31a, cm32, cm32a,
cm33, cm33a};
struct parse pcbsd = {"SD", lexsd, sdtab, 0, 0, 0, 0};
#undef SP1
+#undef SP2
#undef TK1
-#undef CM0
#undef CM1
#undef CM2
#undef CM3
diff --git a/usr.bin/sgmls/sgmls/portproc.c b/usr.bin/sgmls/sgmls/portproc.c
index 0bb2431..a057d24 100644
--- a/usr.bin/sgmls/sgmls/portproc.c
+++ b/usr.bin/sgmls/sgmls/portproc.c
@@ -12,6 +12,7 @@
#include "std.h"
#include "entity.h"
#include "appl.h"
+#include "alloc.h"
/* This code shows how you might use system() to implement run_process().
ANSI C says very little about the behaviour of system(), and so this
diff --git a/usr.bin/sgmls/sgmls/serv.c b/usr.bin/sgmls/sgmls/serv.c
index 68b5fe1..b9699d2 100644
--- a/usr.bin/sgmls/sgmls/serv.c
+++ b/usr.bin/sgmls/sgmls/serv.c
@@ -72,7 +72,7 @@ UNCH *s; /* String to be hashed. */
int hashsize; /* Size of hash table array. */
{
unsigned long h = 0, g;
-
+
while (*s != 0) {
h <<= 4;
h += *s++;
diff --git a/usr.bin/sgmls/sgmls/sgml1.c b/usr.bin/sgmls/sgmls/sgml1.c
index a2808f4..c138c9f 100644
--- a/usr.bin/sgmls/sgmls/sgml1.c
+++ b/usr.bin/sgmls/sgmls/sgml1.c
@@ -195,8 +195,8 @@ static UNCH sgmltab[][11] = {
{DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*nr2*/
{CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_},
- {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR1 ,ST1 ,NR2 ,ST1 },/*st2*/
- {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_},
+ {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*st2*/
+ {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,CON_,LOP_,EOD_},
};
int scbsgmst = ST1; /* SCBSGML: trailing stag or markup; ignore RE. */
int scbsgmnr = NR1; /* SCBSGML: new record; do not ignore RE. */
@@ -257,6 +257,7 @@ struct switches *swp;
TRACEPRO(); /* Set trace switches for prolog. */
msginit(swp);
ioinit(swp);
+ entginit(swp);
sdinit();
return &lex.m;
}
@@ -323,13 +324,15 @@ struct sgmlcap *p;
p->limit = sd.capacity;
p->name = captab;
- for (i = 0; i < NCAPACITY; i++) {
- long excess = capnumber[i]*cappoints[i] - sd.capacity[i];
- if (excess > 0) {
- char buf[sizeof(long)*3 + 1];
- sprintf(buf, "%ld", excess);
- sgmlerr(162, (struct parse *)0,
- (UNCH *)captab[i], (UNCH *)buf);
+ if (sw.swcap) {
+ for (i = 0; i < NCAPACITY; i++) {
+ long excess = capnumber[i]*cappoints[i] - sd.capacity[i];
+ if (excess > 0) {
+ char buf[sizeof(long)*3 + 1];
+ sprintf(buf, "%ld", excess);
+ sgmlerr(162, (struct parse *)0,
+ (UNCH *)captab[i], (UNCH *)buf);
+ }
}
}
}
@@ -370,7 +373,7 @@ PNE *np;
UNCH **tp;
{
PECB ep; /* Pointer to an entity control block. */
-
+
ep = entfind(iname);
if (!ep)
return -1;
@@ -412,6 +415,11 @@ int sgmlgcnterr()
return msgcnterr();
}
+char *getsubst()
+{
+ return (char *)lextran;
+}
+
/* This is for error handling functions that want to print a gi backtrace. */
UNCH *getgi(i)
diff --git a/usr.bin/sgmls/sgmls/sgml2.c b/usr.bin/sgmls/sgmls/sgml2.c
index 83bccbd..df75b6a 100644
--- a/usr.bin/sgmls/sgmls/sgml2.c
+++ b/usr.bin/sgmls/sgmls/sgml2.c
@@ -49,13 +49,12 @@ UNCH *ename; /* Entity name (with length and EOS). */
/* Get the entity control block, if the entity has been defined. */
if ((ecb = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)))==0
|| ecb->estore == 0) {
- if ( ename[1]==lex.d.pero
- || ecbdeflt==0
- || (ecb = usedef(ename))==0 ) {
- sgmlerr(ename[1] == lex.d.pero || ecbdeflt == 0 ? 35 : 150,
- (struct parse *)0, ename+1, (UNCH *)0);
+ if (ename[1] == lex.d.pero || ecbdeflt == 0) {
+ sgmlerr(35, (struct parse *)0, ename+1, (UNCH *)0);
return(ENTUNDEF);
}
+ else
+ ecb = usedef(ename);
}
return(entopen(ecb));
}
@@ -74,6 +73,7 @@ struct entity *ecb; /* Entity control block. */
sgmlerr(34, (struct parse *)0, ecb->ename+1, ntoa(ENTLVL));
return(ENTMAX);
}
+ if (docelsw) sgmlerr(234, (struct parse *)0, (UNCH *)0, (UNCH *)0);
/* If entity is an etd, pi, or data, return it without creating an scb. */
switch (ecb->estore) {
case ESN:
@@ -99,6 +99,8 @@ struct entity *ecb; /* Entity control block. */
case ESC:
case ESX:
datalen = ustrlen(ecb->etx.c);
+ /* Ignore reference to empty CDATA entity. */
+ if (datalen == 0 && ecb->estore == ESC) return(0);
data = ecb->etx.c;
entdatsw = (ecb->estore==ESC) ? CDECONT : SDECONT;
return(ENTDATA);
@@ -169,7 +171,8 @@ int entget()
{
RSCC += (CCO = FPOS-FBUF);
/* Characters-in-record (ignore EOB/EOF). */
- tagctr += CCO; /* Update tag length counter. */
+ if (es == tages)
+ tagctr += CCO; /* Update tag length counter. */
switch (*FPOS) {
case EOBCHAR: /* End of file buffer: refill it. */
rbufs[-2] = FPOS[-2];
@@ -227,9 +230,10 @@ UNCH *ename; /* Entity name (with length and EOS). */
else {
/* Move entity name into fpi. */
fpidf.fpinm = ename + 1;
- if ((etx.x = entgen(&fpidf))==0) return (PECB)0;
+ if ((etx.x = entgen(&fpidf))==0)
+ sgmlerr(150, (struct parse *)0, ename + 1, (UNCH *)0);
if (estore==ESN) {
- memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ);
+ memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ);
NEID(pne) = etx.x;
etx.n = pne;
}
@@ -288,7 +292,8 @@ int es; /* Local index to scbs. */
SCB.pushback = FPOS[-1];
FBUF = 0; /* Indicate pending file. */
RSCC += off; /* Update characters-in-record counter. */
- tagctr += off; /* Update tag length counter. */
+ if (es == tages)
+ tagctr += off; /* Update tag length counter. */
iopend(SCBFCB, off, rbufs);
return;
}
@@ -386,7 +391,7 @@ UNCH *parm2; /* Additional parameters (or NULL). */
{
struct error err;
errorinit(&err, subdcl ? MDERR : MDERR2, number);
- err.parmno = parmno;
+ err.parmno = parmno;
err.subdcl = subdcl;
err.eparm[0] = (UNIV)parm1;
err.eparm[1] = (UNIV)parm2;
@@ -427,6 +432,24 @@ UNCH *parm2; /* Error message parameters. */
scbset();
return msgsave(&err);
}
+/* SAVMDERR: Save an md error for possible later use.
+*/
+UNIV savmderr(number, parm1, parm2)
+UNS number; /* Error number. */
+UNCH *parm1; /* Additional parameters (or NULL). */
+UNCH *parm2; /* Additional parameters (or NULL). */
+{
+ struct error err;
+ errorinit(&err, subdcl ? MDERR : MDERR2, number);
+ err.parmno = parmno;
+ err.subdcl = subdcl;
+ err.eparm[0] = (UNIV)parm1;
+ err.eparm[1] = (UNIV)parm2;
+ err.errsp = (sizeof(pcbtab)/sizeof(pcbtab[0])) + ptrsrch(mdnmtab,
+ (UNIV)mdname);
+ scbset();
+ return msgsave(&err);
+}
/* SVDERR: Print a saved error.
*/
VOID svderr(p)
diff --git a/usr.bin/sgmls/sgmls/sgmlaux.h b/usr.bin/sgmls/sgmls/sgmlaux.h
index f87ac8b..6073e66 100644
--- a/usr.bin/sgmls/sgmls/sgmlaux.h
+++ b/usr.bin/sgmls/sgmls/sgmlaux.h
@@ -51,6 +51,7 @@ VOID ioinit P((struct switches *));
char *ioflid P((UNIV));
UNIV entgen P((struct fpi *));
+VOID entginit P((struct switches *));
VOID msgprint P((struct error *));
VOID msginit P((struct switches *));
@@ -68,3 +69,4 @@ UNIV rmalloc P((unsigned int));
UNIV rrealloc P((UNIV, UNS));
VOID frem P((UNIV));
VOID exiterr P((unsigned int,struct parse *));
+char *getsubst P((void));
diff --git a/usr.bin/sgmls/sgmls/sgmldecl.c b/usr.bin/sgmls/sgmls/sgmldecl.c
index d9f06b3..6ef6b68 100644
--- a/usr.bin/sgmls/sgmls/sgmldecl.c
+++ b/usr.bin/sgmls/sgmls/sgmldecl.c
@@ -9,6 +9,7 @@
/* Symbolic names for the error numbers that are be generated only by
this module. */
+#define E_SHUNCHAR 159
#define E_STANDARD 163
#define E_SIGNIFICANT 164
#define E_BADLIT 165
@@ -60,6 +61,7 @@ this module. */
#define E_NMBAD 222
#define E_NMMINUS 223
#define E_UNKNOWNSET 227
+#define E_TOTALCAP 235
#define CANON_NMC '.' /* Canonical name character. */
#define CANON_NMS 'A' /* Canonical name start character. */
@@ -163,21 +165,21 @@ in a buffer intended for a literal.) */
/* Table of quantity names. Must match Q* in sgmldecl.h. */
static char *quantity_names[] = {
- "ATTCNT",
- "ATTSPLEN",
- "BSEQLEN",
- "DTAGLEN",
- "DTEMPLEN",
- "ENTLVL",
- "GRPCNT",
- "GRPGTCNT",
- "GRPLVL",
- "LITLEN",
- "NAMELEN",
- "NORMSEP",
- "PILEN",
- "TAGLEN",
- "TAGLVL",
+ "ATTCNT",
+ "ATTSPLEN",
+ "BSEQLEN",
+ "DTAGLEN",
+ "DTEMPLEN",
+ "ENTLVL",
+ "GRPCNT",
+ "GRPGTCNT",
+ "GRPLVL",
+ "LITLEN",
+ "NAMELEN",
+ "NORMSEP",
+ "PILEN",
+ "TAGLEN",
+ "TAGLVL",
};
static int max_quantity[] = {
@@ -253,9 +255,18 @@ static int systemcharset[] = {
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
};
+/* This is a private use designating sequence that by convention
+refers to the whole system character set whatever it is. */
+
+#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0"
+
static struct pmap charset_map[] = {
- { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */
- { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */
+ { "ESC 2/5 4/0", (UNIV)iso646charset }, /* ISO 646 IRV */
+ { "ESC 2/8 4/2", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
+ { "ESC 2/8 4/0", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
+ { "ESC 2/13 4/1", (UNIV)iso8859_1charset }, /* Latin 1 */
+ { "ESC 2/1 4/0", (UNIV)iso646C0charset }, /* ISO 646, C0 */
+ { "ESC 2/2 4/3", (UNIV)iso6429C1charset }, /* ISO 6429, C1 */
{ SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
/* system character set */
{ 0 }
@@ -272,7 +283,9 @@ static UNCH char_flags[256];
static int done_nonsgml = 0;
static UNCH *nlextoke = 0; /* new lextoke */
static UNCH *nlextran = 0; /* new lextran */
-
+#define MAX_SAVED_ERRS 4
+static UNIV saved_errs[MAX_SAVED_ERRS];
+static int nsaved_errs = 0;
static UNCH kcharset[] = "CHARSET";
static UNCH kbaseset[] = "BASESET";
@@ -327,7 +340,7 @@ static UNCH kquantity[] = "QUANTITY";
static UNIV pmaplookup P((struct pmap *, char *));
static UNCH *ltous P((long));
-static VOID sdfixstandard P((UNCH *));
+static VOID sdfixstandard P((UNCH *, int));
static int sdparm P((UNCH *, struct parse *));
static int sdname P((UNCH *, UNCH *));
static int sdckname P((UNCH *, UNCH *));
@@ -353,6 +366,7 @@ static int sdnames P((UNCH *));
static int sdquantity P((UNCH *));
static int sdfeatures P((UNCH *));
static int sdappinfo P((UNCH *));
+static VOID sdsaverr P((UNS, UNCH *, UNCH *));
static VOID bufsalloc P((void));
static VOID bufsrealloc P((void));
@@ -377,11 +391,25 @@ int sgmldecl()
parmno = 0;
mdname = sgmlkey;
subdcl = NULL;
+ nsaved_errs = 0;
for (i = 0; i < SIZEOF(section); i++)
if ((*section[i])(tbuf) == FAIL) {
errsw = 1;
break;
}
+ if (sd.formal) {
+ /* print saved errors */
+ int i;
+ for (i = 0; i < nsaved_errs; i++)
+ svderr(saved_errs[i]);
+ }
+ else {
+ /* free saved errors */
+ int i;
+ for (i = 0; i < nsaved_errs; i++)
+ msgsfree(saved_errs[i]);
+ }
+
if (!errsw)
setlexical();
bufsrealloc();
@@ -406,7 +434,7 @@ UNCH *tbuf;
sderr(123, (UNCH *)0, (UNCH *)0);
return FAIL;
}
- sdfixstandard(tbuf);
+ sdfixstandard(tbuf, 0);
if (ustrcmp(tbuf, standard) != 0)
sderr(E_BADVERSION, tbuf, standard);
return SUCCESS;
@@ -426,6 +454,7 @@ UNCH *tbuf;
if (sdcsdesc(tbuf, status) == FAIL)
return FAIL;
+#if 0
for (i = 128; i < 256; i++)
if (status[i] != UNDESC)
break;
@@ -437,11 +466,14 @@ UNCH *tbuf;
sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
#endif
}
+#endif
/* Characters that are declared UNUSED in the document character set
are assigned to non-SGML. */
for (i = 0; i < 256; i++) {
if (status[i] == UNDESC) {
+#if 0
sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
+#endif
char_flags[i] |= CHAR_NONSGML;
}
else if (status[i] == UNUSED)
@@ -491,9 +523,9 @@ int *status;
fpi.fpipubis = tbuf;
/* Give a warning if it is not a CHARSET fpi. */
if (parsefpi(&fpi))
- sderr(E_FORMAL, (UNCH *)0, (UNCH *)0);
+ sdsaverr(E_FORMAL, (UNCH *)0, (UNCH *)0);
else if (fpi.fpic != FPICHARS)
- sderr(E_BADCLASS, kcharset, (UNCH *)0);
+ sdsaverr(E_BADCLASS, kcharset, (UNCH *)0);
else {
fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
baseset = (int *)pmaplookup(charset_map,
@@ -547,10 +579,12 @@ int *status;
int n = basenum + (i - start);
if (n < 0 || n > 255)
sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
- else if (baseset[n] == UNUSED)
- sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0);
- else
+ else {
+ if (baseset[n] == UNUSED)
+ sderr(E_BADBASECHAR, ltous((long)n),
+ (UNCH *)0);
status[i] = baseset[n];
+ }
}
}
}
@@ -570,6 +604,7 @@ static int sdcapacity(tbuf)
UNCH *tbuf;
{
int ncap;
+ int i;
if (sdckname(tbuf, kcapacity) == FAIL)
return FAIL;
@@ -609,7 +644,9 @@ UNCH *tbuf;
sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
return FAIL;
}
-
+ for (i = 1; i < NCAPACITY; i++)
+ if (sd.capacity[i] > sd.capacity[0])
+ sderr(E_TOTALCAP, (UNCH *)captab[i], (UNCH *)0);
return SUCCESS;
}
@@ -624,7 +661,7 @@ UNCH *tbuf;
sderr(123, (UNCH *)0, (UNCH *)0);
return FAIL;
}
- sdfixstandard(tbuf);
+ sdfixstandard(tbuf, 1);
ptr = pmaplookup(capset_map, (char *)tbuf);
if (!ptr)
sderr(E_CAPSET, tbuf, (UNCH *)0);
@@ -680,7 +717,7 @@ UNCH *tbuf;
int nswitches;
if (sdparm(tbuf, &pcblitv) != LIT1)
return FAIL;
- sdfixstandard(tbuf);
+ sdfixstandard(tbuf, 1);
if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
sd.shortref = 0;
else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
@@ -769,7 +806,7 @@ UNCH *tbuf;
}
}
if (pcbsd.action != NUM1) {
- sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
+ sderr(E_SHUNCHAR, (UNCH *)0, (UNCH *)0);
return FAIL;
}
do {
@@ -944,7 +981,7 @@ UNCH *tbuf;
return FAIL;
}
start[i] = bufi;
-
+
for (s = tbuf; *s; s++) {
int c = *s;
if (c == DELNONCH) {
@@ -955,8 +992,7 @@ UNCH *tbuf;
if (c < 0)
bad = 1;
else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
- && c != '.' && c != '-'
- && !(c == '_' && i >= 2)) {
+ && c != '.' && c != '-') {
int class = lextoke[c];
if (class == SEP || class == SP || class == NMC
|| class == NMS || class == NU)
@@ -996,7 +1032,7 @@ UNCH *tbuf;
nlextoke[uc] = NMS;
nlextran[lc] = uc;
}
-
+
for (i = 0; i < count[2]; i++) {
UNCH lc = buf[start[2] + i];
UNCH uc = buf[start[3] + i];
@@ -1149,7 +1185,7 @@ UNCH *tbuf;
for (i = 0; i < NKEYS; i++)
if (newkey[i][0] != '\0') {
UNCH temp[REFNAMELEN + 1];
-
+
ustrcpy(temp, key[i]);
ustrcpy(key[i], newkey[i]);
ustrcpy(newkey[i], temp);
@@ -1304,11 +1340,13 @@ UNCH *tbuf;
/* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to
the standard requires the latter. */
-static VOID sdfixstandard(tbuf)
+static VOID sdfixstandard(tbuf, silently)
UNCH *tbuf;
+int silently;
{
if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
- sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
+ if (!silently)
+ sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
tbuf[8] = ':';
}
}
@@ -1389,7 +1427,7 @@ VOID sdinit()
{
int i;
/* Shunned character numbers in the reference concrete syntax. */
- static UNCH refshun[] = {
+ static UNCH refshun[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
};
@@ -1435,7 +1473,7 @@ static
VOID bufsrealloc()
{
UNS size;
-
+
if (ENTLVL != REFENTLVL)
scbs = (struct source *)rrealloc((UNIV)scbs,
(ENTLVL+1)*sizeof(struct source));
@@ -1463,7 +1501,7 @@ static VOID setlexical()
{
int i;
UNCH **p;
-
+
if (nlextoke) {
/* Handle characters that were made significant by the
NAMING section. */
@@ -1493,7 +1531,7 @@ static VOID setlexical()
}
}
-
+
/* Now munge the lexical tables. */
for (p = lextabs; *p; p++) {
UNCH nonclass = (*p)[CANON_NONSGML];
@@ -1513,6 +1551,12 @@ static VOID setlexical()
}
else if (!(char_flags[i] & CHAR_SIGNIFICANT))
(*p)[i] = datclass;
+ else if (*p == lexmin) {
+ /* If it used to be NONSGML, but its now significant,
+ treat it like a datachar. */
+ if ((*p)[i] == nonclass)
+ (*p)[i] = datclass;
+ }
else if (nlextoke
/* This relies on the fact that lextoke
occurs last in lextabs. */
@@ -1544,7 +1588,7 @@ static VOID setlexical()
frem((UNIV)nlextoke);
nlextoke = 0;
}
-
+
}
/* Munge parse tables so that empty start and end tags are not recognized. */
@@ -1553,7 +1597,7 @@ static VOID noemptytag()
{
static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
int i;
-
+
for (i = 0; i < SIZEOF(pcbs); i++) {
int maxclass, maxstate;
int j, k, act;
@@ -1617,11 +1661,10 @@ FILE *fp;
char lcletter[256]; /* LC letters: a-z */
fprintf(fp, "<!SGML \"%s\"\n", standard);
- fprintf(fp, "CHARSET\nBASESET \"%s//CHARSET %s//%s\"\nDESCSET\n",
- SYSTEM_CHARSET_OWNER,
- SYSTEM_CHARSET_DESCRIPTION,
+ fprintf(fp,
+ "CHARSET\nBASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET\n",
SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
-
+
if (!done_nonsgml) {
done_nonsgml = 1;
for (i = 0; i < 256; i++)
@@ -1655,18 +1698,26 @@ FILE *fp;
if (!changed)
fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
fprintf(fp, "SCOPE DOCUMENT\n");
-
+
fprintf(fp, "SYNTAX\nSHUNCHAR");
for (i = 0; i < 256; i++)
if (char_flags[i] & CHAR_SHUNNED)
- fprintf(fp, " %d", i);
- fprintf(fp, "\n");
- fprintf(fp, "BASESET \"%s//CHARSET %s//%s\"\nDESCSET 0 256 0\n",
- SYSTEM_CHARSET_OWNER,
- SYSTEM_CHARSET_DESCRIPTION,
+ break;
+ if (i == 256)
+ fprintf(fp, " NONE\n");
+ else {
+ for (; i < 256; i++)
+ if (char_flags[i] & CHAR_SHUNNED)
+ fprintf(fp, " %d", i);
+ fprintf(fp, "\n");
+ }
+
+ fprintf(fp,
+ "BASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET 0 256 0\n",
SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
- fprintf(fp, "FUNCTION\nRE 13\nRS 10\nSPACE 32\nTAB SEPCHAR 9\n");
+ fprintf(fp, "FUNCTION\nRE %d\nRS %d\nSPACE %d\nTAB SEPCHAR %d\n",
+ RECHAR, RSCHAR, ' ', TABCHAR);
MEMZERO((UNIV)uc, 256);
for (i = 0; i < 256; i++)
@@ -1731,6 +1782,17 @@ FILE *fp;
fprintf(fp, ">\n");
}
+/* Save an error to be printed only if FORMAL is declared as YES. */
+
+static
+VOID sdsaverr(number, parm1, parm2)
+UNS number;
+UNCH *parm1;
+UNCH *parm2;
+{
+ saved_errs[nsaved_errs++] = savmderr(number, parm1, parm2);
+}
+
/*
Local Variables:
c-indent-level: 5
diff --git a/usr.bin/sgmls/sgmls/sgmldecl.h b/usr.bin/sgmls/sgmls/sgmldecl.h
index 296bdb8..1111f72 100644
--- a/usr.bin/sgmls/sgmls/sgmldecl.h
+++ b/usr.bin/sgmls/sgmls/sgmldecl.h
@@ -1,15 +1,15 @@
/* sgmldecl.h: SGML declaration parsing. */
-#define QATTCNT 0
-#define QATTSPLEN 1
-#define QBSEQLEN 2
-#define QDTAGLEN 3
-#define QDTEMPLEN 4
-#define QENTLVL 5
-#define QGRPCNT 6
-#define QGRPGTCNT 7
-#define QGRPLVL 8
-#define QLITLEN 9
+#define QATTCNT 0
+#define QATTSPLEN 1
+#define QBSEQLEN 2
+#define QDTAGLEN 3
+#define QDTEMPLEN 4
+#define QENTLVL 5
+#define QGRPCNT 6
+#define QGRPGTCNT 7
+#define QGRPLVL 8
+#define QLITLEN 9
#define QNAMELEN 10
#define QNORMSEP 11
#define QPILEN 12
@@ -18,16 +18,16 @@
#define NQUANTITY (QTAGLVL+1)
-#define TOTALCAP 0
-#define ENTCAP 1
-#define ENTCHCAP 2
-#define ELEMCAP 3
-#define GRPCAP 4
-#define EXGRPCAP 5
-#define EXNMCAP 6
-#define ATTCAP 7
-#define ATTCHCAP 8
-#define AVGRPCAP 9
+#define TOTALCAP 0
+#define ENTCAP 1
+#define ENTCHCAP 2
+#define ELEMCAP 3
+#define GRPCAP 4
+#define EXGRPCAP 5
+#define EXNMCAP 6
+#define ATTCAP 7
+#define ATTCHCAP 8
+#define AVGRPCAP 9
#define NOTCAP 10
#define NOTCHCAP 11
#define IDCAP 12
@@ -81,4 +81,10 @@ extern struct sgmldecl sd;
#define UNDESC -3
#define UNKNOWN_SET -4
-extern int asciicharset[];
+extern int iso646charset[];
+extern int iso646G0charset[];
+extern int iso646C0charset[];
+extern int iso8859_1charset[];
+extern int iso6429C1charset[];
+
+
diff --git a/usr.bin/sgmls/sgmls/sgmlfnsm.h b/usr.bin/sgmls/sgmls/sgmlfnsm.h
index 0d617fb..3003d67 100644
--- a/usr.bin/sgmls/sgmls/sgmlfnsm.h
+++ b/usr.bin/sgmls/sgmls/sgmlfnsm.h
@@ -86,6 +86,7 @@ UNCH *pubfield P((UNCH *,UNCH *,UNCH,UNS *));
UNCH *replace P((UNCH *,UNCH *));
UNCH *sandwich P((UNCH *,UNCH *,UNCH *));
UNIV saverr P((unsigned int,struct parse *,UNCH *,UNCH *));
+UNIV savmderr P((unsigned int,UNCH *,UNCH *));
VOID scbset P((void));
VOID sdinit P((void));
VOID setcurchar P((int));
diff --git a/usr.bin/sgmls/sgmls/sgmlio.c b/usr.bin/sgmls/sgmls/sgmlio.c
index 3db1d0d..c78bb7a 100644
--- a/usr.bin/sgmls/sgmls/sgmlio.c
+++ b/usr.bin/sgmls/sgmls/sgmlio.c
@@ -191,7 +191,7 @@ int *newfilep;
struct iofcb *f = (struct iofcb *)p;
FILE *fp;
int c;
-
+
*newfilep = 0;
if (f->first) {
buf[i] = EOBCHAR;
diff --git a/usr.bin/sgmls/sgmls/sgmlmsg.c b/usr.bin/sgmls/sgmls/sgmlmsg.c
index 454bc3e..4d98c55 100644
--- a/usr.bin/sgmls/sgmls/sgmlmsg.c
+++ b/usr.bin/sgmls/sgmls/sgmlmsg.c
@@ -64,16 +64,16 @@ static char *headers[] = {
/* Indexes into headers[] */
-#define HDRPFX 0
-#define HDRALL 1
-#define HDRUNSUP 2
-#define HDRSYS 3
-#define HDRWARN 4
-#define HDRLOC 5
-#define HDRELOC 6
-#define HDRMD 7
-#define HDRMD2 8
-#define HDRMODE 9
+#define HDRPFX 0
+#define HDRALL 1
+#define HDRUNSUP 2
+#define HDRSYS 3
+#define HDRWARN 4
+#define HDRLOC 5
+#define HDRELOC 6
+#define HDRMD 7
+#define HDRMD2 8
+#define HDRMODE 9
#define HDREOF 10
#define HDREE 11
#define HDRRS 12
@@ -229,7 +229,7 @@ struct error *e;
}
else
indent = 4;
-
+
for (toplevel = 0; getlocation(toplevel, &loc); toplevel++)
if (loc.filesw) {
prevfilelevel = filelevel;
@@ -285,7 +285,7 @@ struct error *e;
hdrcode = HDRUNSUP;
else
hdrcode = HDRALL;
-
+
xfprintf(efp, getheader(hdrcode), type, severity, e->errnum);
if (filelevel >= 0) {
@@ -301,9 +301,9 @@ struct error *e;
}
}
}
-
+
/* It is necessary to copy the result of getparm() because
- the specification of catgets() says in can return a
+ the specification of catgets() says in can return a
pointer to a static buffer which may get overwritten
by the next call to catgets(). */
@@ -388,14 +388,14 @@ int indent;
{
int i = 1;
UNCH *gi;
-
+
gi = getgi(i);
if (!gi)
return;
spaces(efp, indent);
xfprintf(efp, getheader(HDRELT));
do {
- fprintf(efp, " %s", gi);
+ fprintf(efp, " %s", (char *)gi);
gi = getgi(++i);
} while (gi);
putc('\n', efp);
diff --git a/usr.bin/sgmls/sgmls/sgmls.1 b/usr.bin/sgmls/sgmls/sgmls.1
index b9967a0..634601b 100644
--- a/usr.bin/sgmls/sgmls/sgmls.1
+++ b/usr.bin/sgmls/sgmls/sgmls.1
@@ -44,6 +44,9 @@ Standard Generalized Markup Language
[
.BI \-i name
]
+[
+.BI \-m file
+]
.if \n(Tr \{\
[
.BI \-x flags
@@ -77,7 +80,8 @@ can also be used to refer to the standard input.
The following options are available:
.TP
.BI \-c file
-Write a report of capacity usage to
+Report any capacity limits that are exceeded
+and write a report of capacity usage to
.IR file .
The report is in the format of a RACT result.
RACT is the Reference Application for Capacity Testing defined in the
@@ -137,6 +141,17 @@ Output
.B L
commands giving the current line number and filename.
.TP
+.BI \-m file
+Map public identifiers and entity names to system identifiers
+using the catalog entry file
+.IR file .
+Multiple
+.B \-m
+options are allowed.
+Catalog entry files specified with the
+.B -m
+option will be searched before the defaults.
+.TP
.B \-p
Parse only the prolog.
.I Sgmls
@@ -153,7 +168,6 @@ Error messages will still be printed.
.TP
.B \-u
Warn about undefined elements: elements used in the DTD but not defined.
-Also warn about undefined short reference maps.
.TP
.B \-v
Print the version number.
@@ -223,11 +237,80 @@ interpreted as a list of filenames separated by
A filename of
.B \-
can be used to refer to the standard input.
-If no system identifier is supplied, then the entity manager will
-attempt to generate a filename using the public identifier
-(if there is one) and other information available to it.
-Notation identifiers are not subject to this treatment.
-This process is controlled by the environment variable
+.LP
+If a system identifier is not specified,
+then the entity manager can generate one using catalog
+entry files in the format defined in the SGML Open Draft Technical
+Resolution on Entity Management. A catalog entry file contains a
+sequence of entries in one of the following four forms:
+.TP
+.BI PUBLIC\ pubid\ sysid
+This specifies that
+.I sysid
+should be used as the system identifier if the the public
+identifier is
+.IR pubid .
+.I Sysid
+is a system identifier as defined in ISO 8879 and
+.I pubid
+is a public identifier as defined in ISO 8879.
+.TP
+.BI ENTITY\ name\ sysid
+This specifies that
+.I sysid
+should be used as the system identifier if the entity is a general
+entity whose name is
+.IR name .
+.TP
+.BI ENTITY\ % name\ sysid
+This specifies that
+.I sysid
+should be used as the system identifier if the entity is a parameter
+entity whose name is
+.IR name .
+Note that there is no space between the
+.B %
+and the
+.IR name .
+.TP
+.BI DOCTYPE\ name\ sysid
+This specifies that
+.I sysid
+should be used as the system identifier if the entity is an
+entity declared in a document type declaration whose document type name is
+.IR name .
+.LP
+The last two forms are extensions to the SGML Open format.
+The delimiters can be omitted from the
+.I sysid
+provided it does not contain any white space.
+Comments are allowed between parameters delimited by
+.B --
+as in SGML.
+The environment variable
+.B \s-1SGML_CATALOG_FILES\s0
+contains a
+.if \n(Os=0 colon-separated
+.if \n(Os=1 semicolon-separated
+list of catalog entry files.
+These will be searched after any catalog entry files specified
+using the
+.B \-m
+option.
+If this environment variable is not set,
+then a system dependent list of catalog entry files will be used.
+A match in a catalog entry file for a PUBLIC entry will take
+precedence over a match in the same file for an ENTITY
+or DOCTYPE entry.
+A filename in a system identifier in a catalog entry file
+is interpreted relative to the directory containing the catalog
+entry file.
+.LP
+If no match can be found in a catalog entry file, then the entity
+manager will attempt to generate a filename using the public
+identifier (if there is one) and other information available to it.
+Notation identifiers are not subject to this treatment. This process
+is controlled by the environment variable
.BR \s-1SGML_PATH\s0 ;
this contains a
.if \n(Os=0 colon-separated
@@ -238,18 +321,6 @@ substitution fields; a substitution field is a
.B %
character followed by a single letter that indicates the value
of the substitution.
-If
-.B \s-1SGML_PATH\s0
-uses the
-.B %S
-field (the value of which is the system identifier),
-then the entity manager will also use
-.B \s-1SGML_PATH\s0
-to generate a filename
-when a system identifier that does not contain any
-.if \n(Os=0 colons
-.if \n(Os=1 semi-colons
-is supplied.
The value of a substitution can either be a string
or it can be
.IR null .
@@ -395,6 +466,27 @@ does not allow a display version or if no version was specified.
If an empty version was specified, a value of
.B default
will be used.
+.LP
+Normally if the external identifier for an entity includes a system
+identifier, the entity manager will use the specified system
+identifier and not attempt to generate one.
+If, however,
+.B \s-1SGML_PATH\s0
+uses the
+.B %S
+field,
+then the entity manager will first search for a matching
+entry in the catalog entry files.
+If a match is found, then this will be used instead of the
+specified system identifier.
+Otherwise,
+if the specified system identifier does not contain any
+.if \n(Os=0 colons,
+.if \n(Os=1 semi-colons,
+the entity manager will use
+.B \s-1SGML_PATH\s0
+to generate a filename.
+Otherwise the entity manager will use the specified system identifier.
.br
.ne 18
.SS "System declaration"
@@ -442,6 +534,10 @@ SDIF
&PACK&NO&UNPACK&NO
.TE
.LP
+Exceeding a capacity limit will be ignored unless the
+.B \-c
+option is given.
+.LP
The memory usage of
.I sgmls
is not a function of the capacity points used by a document;
@@ -465,18 +561,6 @@ The shunned character numbers can be changed.
.LP
Eight bit characters can be assigned to
\s-1LCNMSTRT\s0, \s-1UCNMSTRT\s0, \s-1LCNMCHAR\s0 and \s-1UCNMCHAR\s0.
-Declaring this requires that the syntax reference character set be declared
-like this:
-.RS
-.ne 3
-.TS
-tab(&);
-l l.
-BASESET&"ISO Registration Number 100//CHARSET
-&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1"
-DESCSET&0\0256\00
-.TE
-.RE
.LP
Uppercase substitution can be performed or not performed
both for entity names and for other names.
@@ -544,21 +628,35 @@ APPINFO NONE>
.TE
with the exception that characters 128 through 254 will be assigned to
\s-1DATACHAR\s0.
-When exporting documents that use characters in this range,
-an accurate description of the upper half of the document character set
-should be added to this declaration.
-For ISO Latin-1, an appropriate description would be:
-.br
-.ne 5
+.LP
+.I Sgmls
+identifies base character sets using the designating sequence in the
+public identifier. The following designating sequences are
+recognized:
.TS
tab(&);
-l l.
-BASESET&"ISO Registration Number 100//CHARSET
-&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1"
-DESCSET&128\032\0UNUSED
-&160\095\032
-&255\0\01\0UNUSED
+c c c c c
+c c c c ^
+c c c c ^
+l n n n l.
+Designating&ISO&Minimum&Number&Description
+Escape&Registration&Character&of&
+Sequence&Number&Number&Characters&
+_
+ESC 2/5 4/0&-&0&128&full set of ISO 646 IRV
+ESC 2/8 4/0&2&33&94&G0 set of ISO 646 IRV
+ESC 2/8 4/2&6&33&94&G0 set of ASCII
+ESC 2/13 4/1&100&32&96&G1 set of ISO 8859-1
+ESC 2/1 4/0&1&0&32&C0 set of ISO 646
+ESC 2/2 4/3&77&0&32&C1 set of ISO 6429
+ESC 2/5 2/15 3/0&-&0&256&the system character set
.TE
+.LP
+When one of the G0 sets is used as a base set, the characters SPACE
+and DELETE are treated as occurring at positions 32 and 127
+respectively; although these characters are not part of the character
+sets designated by the escape sequences, this mimics the behaviour of
+ISO 2022 with respect to these code positions.
.SS "Output format"
The output is a series of lines.
Lines can be arbitrarily long.
diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.c b/usr.bin/sgmls/sgmls/sgmlxtrn.c
index d27eb66..74d7894 100644
--- a/usr.bin/sgmls/sgmls/sgmlxtrn.c
+++ b/usr.bin/sgmls/sgmls/sgmlxtrn.c
@@ -29,6 +29,7 @@ int contersw = 0; /* 1=element or #CHARS out of context; 0=valid. */
int datarc = 0; /* Return code for data: DAF_ or REF_. */
int delmscsw = 0; /* 1=DELMSC must be read on return to es==0. */
int didreq = 0; /* 1=required implied tag processed; 0=no. */
+int docelsw = 0; /* 1=had document element; 0=no */
int dostag = 0; /* 1=retry newetd instead of parsing; 0=parse. */
int dtdsw = 0; /* DOCTYPE declaration found: 1=yes; 0=no. */
int entdatsw = 0; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */
@@ -49,6 +50,7 @@ int pss = 0; /* SGMLACT: scbsgml stack level. */
int sgmlsw = 0; /* SGML declaration found: 1=yes; 0=no. */
int stagmin = MINNONE; /* Minimization: NONE, NULL tag, implied by STAG*/
int tagctr = 0; /* Tag source chars read. */
+int tages = -1; /* ES level at start of tag. */
int ts = -1; /* Index of current tag in stack. */
struct parse *propcb = &pcbpro; /* Current PCB for prolog parse. */
int aentctr = 0; /* Number of ENTITY tokens in this att list. */
@@ -70,6 +72,7 @@ struct etd *docetd = 0; /* The etd for the document as a whole. */
struct etd *etagreal = 0; /* Actual or dummy etd that implied this tag. */
struct etd *newetd = 0; /* The etd for a start- or end-tag recognized. */
struct etd *nextetd = 0; /* ETD that must come next (only one choice). */
+struct etd *lastetd = 0; /* most recently ended ETD. */
struct etd *stagreal = 0; /* Actual or dummy etd that implied this tag. */
struct parse *conpcb = 0; /* Current PCB for content parse. */
UNCH *data = 0; /* Pointer to returned data in buffer. */
@@ -78,7 +81,6 @@ UNCH *ptcon = 0; /* Current pointer into tbuf. */
UNCH *ptpro = 0; /* Current pointer into tbuf. */
UNCH *rbufs = 0; /* DOS file read area: start position for read. */
UNCH *subdcl = 0; /* Subject of markup declaration (e.g., GI). */
-int Tstart = 0; /* Save starting token for AND group testing. */
UNS conradn = 0; /* 1=CONREF attribute in list (0=no). */
UNS datalen = 0; /* Length of returned data in buffer. */
UNS entlen = 0; /* Length of TAG or EXTERNAL entity text. */
diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.h b/usr.bin/sgmls/sgmls/sgmlxtrn.h
index f1b0b4b..e551200 100644
--- a/usr.bin/sgmls/sgmls/sgmlxtrn.h
+++ b/usr.bin/sgmls/sgmls/sgmlxtrn.h
@@ -13,6 +13,7 @@ extern int contersw; /* 1=element or #CHARS out of context; 0=valid. */
extern int datarc; /* Return code for data: DAF_ or REF_. */
extern int delmscsw; /* 1=DELMSC must be read on return to es==0. */
extern int didreq; /* 1=required implied tag processed; 0=no. */
+extern int docelsw; /* 1=had document element; 0=no */
extern int dostag; /* 1=retry newetd instead of parsing; 0=parse. */
extern int dtdsw; /* DOCTYPE declaration found: 1=yes; 0=no. */
extern int entdatsw; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */
@@ -32,6 +33,7 @@ extern int pss; /* SGMLACT: scbsgml stack level. */
extern int sgmlsw; /* SGML declaration found: 1=yes; 0=no. */
extern int stagmin; /* Minimization: NONE, NULL tag, implied by STAG*/
extern int tagctr; /* Tag source chars read. */
+extern int tages; /* ES level at start of tag. */
extern int ts; /* Index of current tag in stack. */
extern struct parse *propcb; /* Current PCB for prolog parse. */
extern int aentctr; /* Number of ENTITY tokens in this att list. */
@@ -53,6 +55,7 @@ extern struct etd *docetd; /* The etd for the document as a whole. */
extern struct etd *etagreal; /* Actual or dummy etd that implied this tag. */
extern struct etd *newetd; /* The etd for a start- or end-tag recognized. */
extern struct etd *nextetd; /* ETD that must come next (only one choice). */
+extern struct etd *lastetd; /* Most recently ended ETD. */
extern struct etd *stagreal; /* Actual or dummy etd that implied this tag. */
extern struct parse *conpcb; /* Current PCB for content parse. */
extern UNCH *data; /* Pointer to returned data in buffer. */
@@ -61,7 +64,6 @@ extern UNCH *ptcon; /* Current pointer into tbuf. */
extern UNCH *ptpro; /* Current pointer into tbuf. */
extern UNCH *rbufs; /* DOS file read area: start position for read. */
extern UNCH *subdcl; /* Subject of markup declaration (e.g., GI). */
-extern int Tstart; /* Save starting token for AND group testing. */
extern UNS conradn; /* 1=CONREF attribute in list (0=no). */
extern UNS datalen; /* Length of returned data in buffer. */
extern UNS entlen; /* Length of TAG or EXTERNAL entity text. */
diff --git a/usr.bin/sgmls/sgmls/std.h b/usr.bin/sgmls/sgmls/std.h
index 3a9ab4b..4e6e856 100644
--- a/usr.bin/sgmls/sgmls/std.h
+++ b/usr.bin/sgmls/sgmls/std.h
@@ -42,13 +42,7 @@
#include <string.h>
#endif /* not BSD_STRINGS */
-#ifdef STRERROR_MISSING
-#ifdef USE_PROTOTYPES
-extern char *strerror(int);
-#else
extern char *strerror();
-#endif
-#endif /* STRERROR_MISSING */
#ifdef STDLIB_H_MISSING
UNIV malloc();
diff --git a/usr.bin/sgmls/sgmls/synxtrn.h b/usr.bin/sgmls/sgmls/synxtrn.h
index 75b6471..1cdf9a0 100644
--- a/usr.bin/sgmls/sgmls/synxtrn.h
+++ b/usr.bin/sgmls/sgmls/synxtrn.h
@@ -32,6 +32,7 @@ struct lexcode {
UNCH fce; /* LEXCNM: FRE character as entity reference. */
UNCH fre; /* LEXCON: Free character not an entity ref. */
UNCH litc; /* LEXLMS: Literal close delimiter enabled. */
+ UNCH minlitc; /* LEXMIN: Literal close delimiter enabled. */
UNCH msc; /* LEXLMS: Marked section close delim enabled. */
UNCH net; /* LEXCON: Null end-tag delimiter enabled. */
UNCH nonet; /* LEXCON: NET disabled; still used as ETI. */
@@ -50,6 +51,7 @@ extern UNCH lexcnm[]; /* Lexical table: mixed content. */
extern UNCH lexcon[]; /* Lexical table for content (except mixed). */
extern UNCH lexgrp[]; /* Lexical table for groups. */
extern UNCH lexlms[]; /* Lexical table: literals and marked sections. */
+extern UNCH lexmin[]; /* Lexical table: minimum data literal. */
extern UNCH lexmark[]; /* Lexical table for markup. */
extern UNCH lexsd[]; /* Lexical table for SGML declaration. */
extern UNCH lextran[]; /* Case translation table for SGML names. */
diff --git a/usr.bin/sgmls/sgmls/trace.h b/usr.bin/sgmls/sgmls/trace.h
index 56362be..f917a26 100644
--- a/usr.bin/sgmls/sgmls/trace.h
+++ b/usr.bin/sgmls/sgmls/trace.h
@@ -20,11 +20,11 @@ VOID tracecon P((int,int,int,struct parse *,int,int));
VOID tracedcn P((struct dcncb *));
VOID tracedsk P((struct tag *,struct tag *,int,int));
VOID traceecb P((char *,struct entity *));
-VOID traceend P((char *,struct thdr *,struct mpos *,int,int,int));
+VOID traceend P((char *,struct thdr *,struct mpos *,int,int));
VOID traceesn P((struct ne *));
VOID traceetd P((struct etd *));
VOID traceetg P((struct tag *,struct etd *,int,int));
-VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *,int));
+VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *));
VOID tracegml P((struct restate *,int,int,int));
VOID tracegrp P((struct etd **));
VOID traceid P((char *,struct id *));
@@ -50,15 +50,15 @@ VOID traceval P((struct parse *,unsigned int,UNCH *,int));
((void)(gtrace && (tracedsk(pts, ptso, ts3, etictr), 1)))
#define TRACEECB(action, p) \
((void)(etrace && (traceecb(action, p), 1)))
-#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) \
- ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt, Tstart), 1)))
+#define TRACEEND(stagenm, mod, pos, rc, opt) \
+ ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt), 1)))
#define TRACEESN(p) \
((void)((etrace || atrace || ntrace) && (traceesn(p), 1)))
#define TRACEETD(p) ((void)(gtrace && (traceetd(p), 1)))
#define TRACEETG(pts, curetd, tsl, etagimct) \
((void)(gtrace && (traceetg(pts, curetd, tsl, etagimct), 1)))
-#define TRACEGI(stagenm, gi, mod, pos, Tstart) \
- ((void)(ctrace && (tracegi(stagenm, gi, mod, pos, Tstart), 1)))
+#define TRACEGI(stagenm, gi, mod, pos) \
+ ((void)(ctrace && (tracegi(stagenm, gi, mod, pos), 1)))
#define TRACEGML(scb, pss, conactsw, conact) \
((void)(trace && (tracegml(scb, pss, conactsw, conact), 1)))
#define TRACEGRP(p) ((void)(gtrace && (tracegrp(p), 1)))
@@ -89,11 +89,11 @@ VOID traceval P((struct parse *,unsigned int,UNCH *,int));
#define TRACEDCN(dcn) /* empty */
#define TRACEDSK(pts, ptso, ts3, etictr) /* empty */
#define TRACEECB(action, p) /* empty */
-#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) /* empty */
+#define TRACEEND(stagenm, mod, pos, rc, opt) /* empty */
#define TRACEESN(p) /* empty */
#define TRACEETG(pts, curetd, tsl, etagimct) /* empty */
#define TRACEETD(p) /* empty */
-#define TRACEGI(stagenm, gi, mod, pos, Tstart) /* empty */
+#define TRACEGI(stagenm, gi, mod, pos) /* empty */
#define TRACEGML(scb, pss, conactsw, conact) /* empty */
#define TRACEGRP(p) /* empty */
#define TRACEID(action, p) /* empty */
diff --git a/usr.bin/sgmls/sgmls/traceset.c b/usr.bin/sgmls/sgmls/traceset.c
index 64ebd48..e57003f 100644
--- a/usr.bin/sgmls/sgmls/traceset.c
+++ b/usr.bin/sgmls/sgmls/traceset.c
@@ -55,7 +55,7 @@ char *s;
VOID traceset()
{
dotrace(sw.trace);
-
+
if (trace||atrace||ctrace||dtrace||etrace||gtrace||itrace||mtrace||ntrace)
fprintf(stderr,
"TRACESET: state=%d;att=%d;con=%d;dcl=%d;ent=%d;grp=%d;id=%d;ms=%d;dcn=%d.\n",
@@ -67,7 +67,7 @@ VOID traceset()
VOID tracepro()
{
dotrace(sw.ptrace);
-
+
if (trace||atrace||dtrace||etrace||gtrace||mtrace||ntrace)
fprintf(stderr,
"TRACEPRO: state=%d; att=%d; dcl=%d; ent=%d; grp=%d; ms=%d; dcn=%d.\n",
@@ -78,7 +78,7 @@ VOID tracepro()
VOID tracepcb(pcb)
struct parse *pcb;
{
- fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %d:%d.\n",
+ fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %lu:%d.\n",
pcb->pname, pcb->state, pcb->input, pcb->action,
pcb->newstate, printable(*FPOS), *FPOS, ENTITY+1, RCNT,
RSCC+FPOS+1-FBUF);
@@ -89,7 +89,7 @@ VOID tracetkn(scope, lextoke)
int scope;
UNCH lextoke[]; /* Lexical table for token and name parses. */
{
- fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %d:%d.\n",
+ fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %lu:%d.\n",
scope, lextoke[*FPOS],
printable(*FPOS), *FPOS, ENTITY+1, RCNT,
RSCC+FPOS+1-FBUF);
@@ -217,7 +217,7 @@ struct entity *p;
VOID tracedcn(p)
struct dcncb *p;
{
- fprintf(stderr,
+ fprintf(stderr,
"DCN dcn=%p; adl=%p; notation is %s\n",
(UNIV)p, (UNIV)p->adl, p->ename+1);
if (p->adl)
@@ -245,7 +245,7 @@ TECB pg;
UNCH *gi;
{
int i = 0; /* Loop counter. */
-
+
if (pg==SRMNULL)
fprintf(stderr, "%-8s SHORTREF table empty for %s.\n", action, gi);
else {
@@ -264,12 +264,12 @@ VOID traceadl(al)
struct ad al[];
{
int i=0;
-
+
fprintf(stderr, "ADLIST %p %d membe%s; %d attribut%s\n",
(UNIV)al, ADN(al), ADN(al)==1 ? "r" : "rs", AN(al),
AN(al)==1 ? "e" : "es");
while (++i<=ADN(al)) {
- fprintf(stderr,
+ fprintf(stderr,
(BITOFF(ADFLAGS(al,i), AGROUP) && ADTYPE(al,i)<=ANOTEGRP)
? " %p %-8s %02x %02x %2d %2d %p %p\n"
: " %p %-8s %02x %02x %2d %2d %p %p\n",
@@ -281,11 +281,10 @@ struct ad al[];
fprintf(stderr, "=>");
traceesn(ADDATA(al,i).n);
}
- else if (ADTYPE(al,i)==ANOTEGRP)
- fprintf(stderr, "=>%s",
- (ADDATA(al,i).x->dcnid!=0)
- ? (char *)ADDATA(al,i).x->dcnid
- : "[UNDEFINED]");
+ else if (ADTYPE(al,i)==ANOTEGRP) {
+ fprintf(stderr, "=>");
+ tracedcn(ADDATA(al,i).x);
+ }
}
else
fprintf(stderr, "[%s]",
@@ -325,7 +324,7 @@ VOID tracegrp(pg)
struct etd *pg[];
{
int i = -1; /* Loop counter. */
-
+
fprintf(stderr, "ETDGRP %p\n", (UNIV)pg);
while (pg[++i]!=0)
fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->etdgi+1);
@@ -336,7 +335,7 @@ VOID tracengr(pg)
struct dcncb *pg[];
{
int i = -1; /* Loop counter. */
-
+
fprintf(stderr, "DCNGRP %p\n", (UNIV)pg);
while (pg[++i]!=0)
fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->ename+1);
@@ -346,7 +345,7 @@ struct dcncb *pg[];
VOID traceetd(p)
struct etd *p; /* Pointer to an etd. */
{
- fprintf(stderr,
+ fprintf(stderr,
"ETD etd=%p %s min=%02x cmod=%p ttype=%02x mex=%p, pex=%p, ",
(UNIV)p, p->etdgi+1, p->etdmin, (UNIV)p->etdmod,
p->etdmod->ttype, (UNIV)p->etdmex, (UNIV)p->etdpex);
@@ -400,15 +399,14 @@ unsigned long *h;
/* TRACEGI: Trace GI testing stages in CONTEXT.C processing.
*/
-VOID tracegi(stagenm, gi, mod, pos, Tstart)
+VOID tracegi(stagenm, gi, mod, pos)
char *stagenm;
struct etd *gi; /* ETD of new GI. */
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
-int Tstart; /* Initial T for this group. */
{
int i = 0; /* Loop counter. */
-
+
fprintf(stderr, "%-10s %d:", stagenm, P);
while (++i<=P)
fprintf(stderr, " %d-%d", pos[i].g, pos[i].t);
@@ -427,16 +425,15 @@ int Tstart; /* Initial T for this group. */
}
/* TRACEEND: Trace testing for end of group in CONTEXT.C processing.
*/
-VOID traceend(stagenm, mod, pos, rc, opt, Tstart)
+VOID traceend(stagenm, mod, pos, rc, opt)
char *stagenm;
struct thdr mod[]; /* Model of current open element. */
struct mpos pos[]; /* Position in open element's model. */
int rc; /* Return code: RCNREQ RCHIT RCMISS RCEND */
int opt; /* ALLHIT parm: 1=test optionals; 0=ignore. */
-int Tstart; /* Initial T for this group. */
{
int i = 0; /* Loop counter. */
-
+
fprintf(stderr, "%-10s %d:", stagenm, P);
while (++i<=P)
fprintf(stderr, " %d-%d", pos[i].g, pos[i].t);
diff --git a/usr.bin/sgmls/sgmls/version.c b/usr.bin/sgmls/sgmls/version.c
index 7144593..f3b2d7c 100644
--- a/usr.bin/sgmls/sgmls/version.c
+++ b/usr.bin/sgmls/sgmls/version.c
@@ -1 +1 @@
-char *version_string = "1.1";
+char *version_string = "1.1.91";
diff --git a/usr.bin/sgmls/sgmls/xfprintf.c b/usr.bin/sgmls/sgmls/xfprintf.c
index f544faa..1c50469 100644
--- a/usr.bin/sgmls/sgmls/xfprintf.c
+++ b/usr.bin/sgmls/sgmls/xfprintf.c
@@ -31,10 +31,6 @@ typedef long double long_double;
#endif
#endif /* FP_SUPPORT */
-#ifndef __STDC__
-#define const /* as nothing */
-#endif
-
#ifdef USE_PROTOTYPES
#define P(parms) parms
#else
@@ -113,10 +109,10 @@ struct spec *sp;
sp->pos = **pp - '0';
*pp += 2;
}
-
+
while (**pp != '\0' && strchr(FLAG_CHARS, **pp))
*pp += 1;
-
+
/* handle the field width */
sp->field_width = MISSING;
@@ -162,7 +158,7 @@ struct spec *sp;
modifier = **pp;
*pp += 1;
}
-
+
switch (**pp) {
case 'd':
case 'i':
@@ -219,7 +215,7 @@ static int find_arg_types(format, arg_type)
int i, pos;
const char *p;
struct spec spec;
-
+
for (i = 0; i < 9; i++)
arg_type[i] = NONE;
@@ -384,7 +380,7 @@ static int printit(handle, func, p, ap, nargs, arg)
start = ++p;
if (!parse_spec(&p, &spec))
abort(); /* should have caught it in find_arg_types */
-
+
buf[0] = '%';
q = buf + 1;
@@ -470,7 +466,7 @@ static int maybe_positional(format)
}
return 1;
}
-
+
static int xdoprt(handle, func, format, ap)
UNIV handle;
printer func;
@@ -483,7 +479,7 @@ static int xdoprt(handle, func, format, ap)
if (!find_arg_types(format, arg_type))
return -1;
-
+
for (nargs = 0; nargs < 9; nargs++)
if (arg_type[nargs] == NONE)
break;
@@ -491,7 +487,7 @@ static int xdoprt(handle, func, format, ap)
for (i = nargs; i < 9; i++)
if (arg_type[i] != NONE)
return -1;
-
+
for (i = 0; i < nargs; i++)
get_arg(arg_type[i], &ap, arg + i);
diff --git a/usr.bin/sgmls/sgmlsasp/Makefile b/usr.bin/sgmls/sgmlsasp/Makefile
index 69bfdab..17da01e 100644
--- a/usr.bin/sgmls/sgmlsasp/Makefile
+++ b/usr.bin/sgmls/sgmlsasp/Makefile
@@ -1,7 +1,7 @@
#
# Bmakefile for sgmlsasp
#
-# $id$
+# $Id$
#
PROG= sgmlsasp
diff --git a/usr.bin/sgmls/sgmlsasp/replace.c b/usr.bin/sgmls/sgmlsasp/replace.c
index a37086b..95fa113 100644
--- a/usr.bin/sgmls/sgmlsasp/replace.c
+++ b/usr.bin/sgmls/sgmlsasp/replace.c
@@ -90,7 +90,7 @@ void load_replacement_file(tablep, file)
else
error("can't open `%s'", file);
}
-
+
current_lineno = 1;
current_file = file;
tok = get_token();
@@ -131,7 +131,7 @@ struct replacement_item **parse_string(tail, recog_attr)
{
struct buffer buf;
unsigned len;
-
+
buffer_init(&buf);
for (;;) {
int c = get();
@@ -274,7 +274,6 @@ int get_token()
default:
parse_error("bad input character `%c'", c);
}
- return EOF;
}
static
@@ -325,7 +324,7 @@ struct replacement *lookup_replacement(tablep, type, name)
{
int h = hash(type, name);
struct table_entry *p;
-
+
for (p = tablep->table[h]; p; p = p->next)
if (strcmp(name, p->gi) == 0 && type == p->type)
return &p->replacement;
@@ -342,7 +341,7 @@ struct replacement *define_replacement(tablep, type, name)
{
int h = hash(type, name);
struct table_entry *p;
-
+
for (p = tablep->table[h]; p; p = p->next)
if (strcmp(name, p->gi) == 0 && type == p->type)
return 0;
@@ -409,7 +408,7 @@ int hash(type, s)
char *s;
{
unsigned long h = 0, g;
-
+
while (*s != 0) {
h <<= 4;
h += *s++;
@@ -442,7 +441,7 @@ UNIV xrealloc(p, size)
parse_error("out of memory");
return p;
}
-
+
static NO_RETURN
#ifdef VARARGS
void parse_error(va_alist) va_dcl
@@ -455,7 +454,7 @@ void parse_error(char *message,...)
char *message;
#endif
va_list ap;
-
+
#ifdef VARARGS
va_start(ap);
message = va_arg(ap, char *);
diff --git a/usr.bin/sgmls/sgmlsasp/replace.h b/usr.bin/sgmls/sgmlsasp/replace.h
index be2bbcd..18c9f82 100644
--- a/usr.bin/sgmls/sgmlsasp/replace.h
+++ b/usr.bin/sgmls/sgmlsasp/replace.h
@@ -5,7 +5,7 @@ enum replacement_type {
DATA_REPL,
ATTR_REPL
};
-
+
struct replacement_item {
union {
char *attr;
@@ -30,6 +30,6 @@ enum event_type { START_ELEMENT, END_ELEMENT };
struct replacement_table *make_replacement_table P((void));
void load_replacement_file P((struct replacement_table *, char *));
-
+
struct replacement *
lookup_replacement P((struct replacement_table *, enum event_type, char *));
diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1
index ab03371..5033744 100644
--- a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1
+++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1
@@ -3,7 +3,7 @@
.SH NAME
sgmlsasp \- translate output of sgmls using ASP replacement files
.SH SYNOPSIS
-.B sgmls
+.B sgmlsasp
.RB [ \-n ]
.I replacement_file\|.\|.\|.
.SH DESCRIPTION
diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c
index eacf1c1..fdaf113 100644
--- a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c
+++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c
@@ -154,7 +154,7 @@ struct sgmls_attribute *attributes;
return;
if (repl->flags & NEWLINE_BEGIN)
output_begin_line();
-
+
for (p = repl->items; p; p = p->next)
switch (p->type) {
case DATA_REPL:
@@ -190,7 +190,7 @@ struct sgmls_attribute *p;
{
char **token = p->value.token.v;
int n = p->value.token.n;
-
+
if (n > 0) {
int i;
output_token(token[0]);
@@ -262,7 +262,7 @@ void error(char *message,...)
char *message;
#endif
va_list ap;
-
+
fprintf(stderr, "%s: ", program_name);
#ifdef VARARGS
va_start(ap);
diff --git a/usr.bin/sgmls/unix.cfg b/usr.bin/sgmls/unix.cfg
index 0bc8410..4245511 100644
--- a/usr.bin/sgmls/unix.cfg
+++ b/usr.bin/sgmls/unix.cfg
@@ -11,6 +11,17 @@ Usually the same as PATH_FILE_SEP. */
#define SYSID_FILE_SEP ':'
/* The environment variable that contains the list of filename templates. */
#define PATH_ENV_VAR "SGML_PATH"
+/* A macro that returns non-zero if the filename is relative to the
+ current directory. */
+#define FILE_IS_RELATIVE(p) ((p)[0] != '/')
+/* A string containing the characters that can separate the directory
+ part of a filename from the basename. */
+#define DIR_BASE_SEP "/"
+/* The environment variable that contains the list of catalog entry files.
+ Filenames are separated by PATH_FILE_SEP. */
+#define CATALOG_FILES_ENV_VAR "SGML_CATALOG_FILES"
+/* Default list of catalog entry files. */
+#define DEFAULT_CATALOG_FILES "CATALOG:/usr/local/lib/sgml/CATALOG"
/* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name
or system identifier into a legal filename. A character in
@@ -126,6 +137,13 @@ typedef void *UNIV;
change `void' to `int'. */
typedef void VOID;
+/* If your compiler doesn't understand const, define it to be nothing. */
+#ifndef __STDC__
+#ifndef const
+#define const /* as nothing */
+#endif
+#endif
+
/* If you don't have an ANSI C conformant <limits.h>, define
CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed.
The <limits.h> on some versions of System Release V 3.2 is not ANSI C
OpenPOWER on IntegriCloud