From 3352e1a4abfc0a4e4dee5832345b657cf6794ad3 Mon Sep 17 00:00:00 2001 From: bapt Date: Sat, 22 Nov 2014 20:18:38 +0000 Subject: Add missing bits svn merge did not catch --- contrib/mdocml/demandoc.1 | 108 +++++++++ contrib/mdocml/man-cgi.css | 13 + contrib/mdocml/preconv.1 | 157 +++++++++++++ contrib/mdocml/preconv.c | 523 +++++++++++++++++++++++++++++++++++++++++ contrib/mdocml/tbl.3 | 295 +++++++++++++++++++++++ contrib/mdocml/test-strptime.c | 14 ++ 6 files changed, 1110 insertions(+) create mode 100644 contrib/mdocml/demandoc.1 create mode 100644 contrib/mdocml/man-cgi.css create mode 100644 contrib/mdocml/preconv.1 create mode 100644 contrib/mdocml/preconv.c create mode 100644 contrib/mdocml/tbl.3 create mode 100644 contrib/mdocml/test-strptime.c (limited to 'contrib/mdocml') diff --git a/contrib/mdocml/demandoc.1 b/contrib/mdocml/demandoc.1 new file mode 100644 index 0000000..84571ba --- /dev/null +++ b/contrib/mdocml/demandoc.1 @@ -0,0 +1,108 @@ +.\" $Id: demandoc.1,v 1.7 2013/07/13 19:41:16 schwarze Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: July 13 2013 $ +.Dt DEMANDOC 1 +.Os +.Sh NAME +.Nm demandoc +.Nd emit only text of UNIX manuals +.Sh SYNOPSIS +.Nm demandoc +.Op Fl w +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility emits only the text portions of well-formed +.Xr mdoc 7 +and +.Xr man 7 +.Ux +manual files. +.Pp +By default, +.Nm +parses standard input and outputs only text nodes, preserving line +and column position. +Escape sequences are omitted from the output. +.Pp +Its arguments are as follows: +.Bl -tag -width Ds +.It Fl w +Output a word list. +This outputs each word of text on its own line. +A +.Qq word , +in this case, refers to whitespace-delimited terms beginning with at +least two letters and not consisting of any escape sequences. +Words have their leading and trailing punctuation +.Pq double-quotes, sentence punctuation, etc. +stripped. +.It Ar +The input files. +.El +.Pp +If a document is not well-formed, it is skipped. +.Pp +The +.Fl i , +.Fl k , +.Fl m , +and +.Fl p +flags are silently discarded for calling compatibility with the +historical deroff. +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed . +.El +.Sh EXAMPLES +The traditional usage of +.Nm +is for spell-checking manuals on +.Bx . +This is accomplished as follows (assuming British spelling): +.Pp +.Dl $ demandoc -w file.1 | spell -b +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 +.Xr mdoc 7 +.Sh HISTORY +.Nm +replaces the historical deroff utility for handling modern +.Xr man 7 +and +.Xr mdoc 7 +documents. +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/man-cgi.css b/contrib/mdocml/man-cgi.css new file mode 100644 index 0000000..5300267 --- /dev/null +++ b/contrib/mdocml/man-cgi.css @@ -0,0 +1,13 @@ +body { font-family: Helvetica, Arial, sans-serif; } +body > div { padding-left: 2em; + padding-top: 1em; } +body > div#mancgi { padding-left: 0em; + padding-top: 0em; } +body > div.results { font-size: smaller; } +#mancgi fieldset { text-align: center; + border: thin solid silver; + border-radius: 1em; + font-size: small; } +#mancgi input[name=expr] { width: 25%; } +.results td.title { vertical-align: top; + padding-right: 1em; } diff --git a/contrib/mdocml/preconv.1 b/contrib/mdocml/preconv.1 new file mode 100644 index 0000000..8daee30 --- /dev/null +++ b/contrib/mdocml/preconv.1 @@ -0,0 +1,157 @@ +.\" $Id: preconv.1,v 1.7 2013/07/13 19:41:16 schwarze Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: July 13 2013 $ +.Dt PRECONV 1 +.Os +.Sh NAME +.Nm preconv +.Nd recode multibyte UNIX manuals +.Sh SYNOPSIS +.Nm preconv +.Op Fl D Ar enc +.Op Fl e Ar enc +.Op Ar file +.Sh DESCRIPTION +The +.Nm +utility recodes multibyte +.Ux +manual files into +.Xr mandoc 1 +.Po +or other troff system supporting the +.Sq \e[uNNNN] +escape sequence +.Pc +input. +.Pp +By default, it parses from standard output, determining encoding as +described in +.Sx Algorithm . +.Pp +Its arguments are as follows: +.Bl -tag -width Ds +.It Fl D Ar enc +The default encoding. +.It Fl e Ar enc +The document's encoding. +.It Ar file +The input file. +.El +.Pp +The recoded input is written to standard output: Unicode characters in +the ASCII range are printed as regular ASCII characters, while those +above this range are printed using the +.Sq \e[uNNNN] +format documented in +.Xr mandoc_char 7 . +.Pp +If input bytes are improperly formed in the current encoding, they're +passed unmodified to standard output. +For some encodings, such as UTF-8, unrecoverable input sequences will +cause +.Nm +to stop processing and exit. +.Ss Algorithm +An encoding is chosen according to the following steps: +.Bl -enum +.It +From the argument passed to +.Fl e Ar enc . +.It +If a BOM exists, UTF\-8 encoding is selected. +.It +From the coding tags parsed from +.Qq File Variables +on the first two lines of input. +A file variable is an input line of the form +.Pp +.Dl \%.\e\(dq -*- key: val [; key: val ]* -*- +.Pp +A coding tag variable is where +.Cm key +is +.Qq coding +and +.Cm val +is the name of the encoding. +A typical file variable with a coding tag is +.Pp +.Dl \%.\e\(dq -*- mode: troff; coding: utf-8 -*- +.It +From the argument passed to +.Fl D Ar enc . +.It +If all else fails, Latin\-1 is used. +.El +.Pp +The +.Nm +utility recognises the UTF\-8, us\-ascii, and latin\-1 encodings as +passed to the +.Fl e +and +.Fl D +arguments, or as coding tags. +Encodings are matched case-insensitively. +.\" .Sh IMPLEMENTATION NOTES +.\" Not used in OpenBSD. +.\" .Sh RETURN VALUES +.\" For sections 2, 3, & 9 only. +.\" .Sh ENVIRONMENT +.\" For sections 1, 6, 7, & 8 only. +.\" .Sh FILES +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Explicitly page a UTF\-8 manual +.Pa foo.1 +in the current locale: +.Pp +.Dl $ preconv \-e utf\-8 foo.1 | mandoc -Tlocale | less +.\" .Sh DIAGNOSTICS +.\" For sections 1, 4, 6, 7, & 8 only. +.\" .Sh ERRORS +.\" For sections 2, 3, & 9 only. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_char 7 +.Sh STANDARDS +The +.Nm +utility references the US-ASCII character set standard, ANSI_X3.4\-1968; +the Latin\-1 character set standard, ISO/IEC 8859\-1:1998; the UTF\-8 +character set standard; and UCS (Unicode), ISO/IEC 10646. +.Sh HISTORY +The +.Nm +utility first appeared in the GNU troff +.Pq Dq groff +system in December 2005, authored by Tomohiro Kubota and Werner +Lemberg. +The implementation that is part of the +.Xr mandoc 1 +utility appeared in May 2011. +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . +.\" .Sh CAVEATS +.\" .Sh BUGS +.\" .Sh SECURITY CONSIDERATIONS +.\" Not used in OpenBSD. diff --git a/contrib/mdocml/preconv.c b/contrib/mdocml/preconv.c new file mode 100644 index 0000000..7595887 --- /dev/null +++ b/contrib/mdocml/preconv.c @@ -0,0 +1,523 @@ +/* $Id: preconv.c,v 1.6 2013/06/02 03:52:21 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MMAP +#include +#include +#endif + +#include +#include +#include +#include +#include +#include + +/* + * The read_whole_file() and resize_buf() functions are copied from + * read.c, including all dependency code. + */ + +enum enc { + ENC_UTF_8, /* UTF-8 */ + ENC_US_ASCII, /* US-ASCII */ + ENC_LATIN_1, /* Latin-1 */ + ENC__MAX +}; + +struct buf { + char *buf; /* binary input buffer */ + size_t sz; /* size of binary buffer */ + size_t offs; /* starting buffer offset */ +}; + +struct encode { + const char *name; + int (*conv)(const struct buf *); +}; + +static int cue_enc(const struct buf *, size_t *, enum enc *); +static int conv_latin_1(const struct buf *); +static int conv_us_ascii(const struct buf *); +static int conv_utf_8(const struct buf *); +static int read_whole_file(const char *, int, + struct buf *, int *); +static void resize_buf(struct buf *, size_t); +static void usage(void); + +static const struct encode encs[ENC__MAX] = { + { "utf-8", conv_utf_8 }, /* ENC_UTF_8 */ + { "us-ascii", conv_us_ascii }, /* ENC_US_ASCII */ + { "latin-1", conv_latin_1 }, /* ENC_LATIN_1 */ +}; + +static const char *progname; + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-D enc] " + "[-e ENC] " + "[file]\n", progname); +} + +static int +conv_latin_1(const struct buf *b) +{ + size_t i; + unsigned char cu; + const char *cp; + + cp = b->buf + (int)b->offs; + + /* + * Latin-1 falls into the first 256 code-points of Unicode, so + * there's no need for any sort of translation. Just make the + * 8-bit characters use the Unicode escape. + * Note that binary values 128 < v < 160 are passed through + * unmodified to mandoc. + */ + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + cu < 160U ? putchar(cu) : printf("\\[u%.4X]", cu); + } + + return(1); +} + +static int +conv_us_ascii(const struct buf *b) +{ + + /* + * US-ASCII has no conversion since it falls into the first 128 + * bytes of Unicode. + */ + + fwrite(b->buf, 1, b->sz, stdout); + return(1); +} + +static int +conv_utf_8(const struct buf *b) +{ + int state, be; + unsigned int accum; + size_t i; + unsigned char cu; + const char *cp; + const long one = 1L; + + cp = b->buf + (int)b->offs; + state = 0; + accum = 0U; + be = 0; + + /* Quick test for big-endian value. */ + + if ( ! (*((const char *)(&one)))) + be = 1; + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + if (state) { + if ( ! (cu & 128) || (cu & 64)) { + /* Bad sequence header. */ + return(0); + } + + /* Accept only legitimate bit patterns. */ + + if (cu > 191 || cu < 128) { + /* Bad in-sequence bits. */ + return(0); + } + + accum |= (cu & 63) << --state * 6; + + /* + * Accum is held in little-endian order as + * stipulated by the UTF-8 sequence coding. We + * need to convert to a native big-endian if our + * architecture requires it. + */ + + if (0 == state && be) + accum = (accum >> 24) | + ((accum << 8) & 0x00FF0000) | + ((accum >> 8) & 0x0000FF00) | + (accum << 24); + + if (0 == state) { + accum < 128U ? putchar(accum) : + printf("\\[u%.4X]", accum); + accum = 0U; + } + } else if (cu & (1 << 7)) { + /* + * Entering a UTF-8 state: if we encounter a + * UTF-8 bitmask, calculate the expected UTF-8 + * state from it. + */ + for (state = 0; state < 7; state++) + if ( ! (cu & (1 << (7 - state)))) + break; + + /* Accept only legitimate bit patterns. */ + + switch (state) { + case (4): + if (cu <= 244 && cu >= 240) { + accum = (cu & 7) << 18; + break; + } + /* Bad 4-sequence start bits. */ + return(0); + case (3): + if (cu <= 239 && cu >= 224) { + accum = (cu & 15) << 12; + break; + } + /* Bad 3-sequence start bits. */ + return(0); + case (2): + if (cu <= 223 && cu >= 194) { + accum = (cu & 31) << 6; + break; + } + /* Bad 2-sequence start bits. */ + return(0); + default: + /* Bad sequence bit mask. */ + return(0); + } + state--; + } else + putchar(cu); + } + + if (0 != state) { + /* Bad trailing bits. */ + return(0); + } + + return(1); +} + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial / 2 ? + 2 * buf->sz : initial; + + buf->buf = realloc(buf->buf, buf->sz); + if (NULL == buf->buf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static int +read_whole_file(const char *f, int fd, + struct buf *fb, int *with_mmap) +{ + size_t off; + ssize_t ssz; + +#ifdef HAVE_MMAP + struct stat st; + if (-1 == fstat(fd, &st)) { + perror(f); + return(0); + } + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + return(0); + } + + if (S_ISREG(st.st_mode)) { + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return(1); + } +#endif + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz && fb->sz == (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + break; + } + + if (off == fb->sz) + resize_buf(fb, 65536); + + ssz = read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return(1); + } + if (ssz == -1) { + perror(f); + break; + } + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + return(0); +} + +static int +cue_enc(const struct buf *b, size_t *offs, enum enc *enc) +{ + const char *ln, *eoln, *eoph; + size_t sz, phsz, nsz; + int i; + + ln = b->buf + (int)*offs; + sz = b->sz - *offs; + + /* Look for the end-of-line. */ + + if (NULL == (eoln = memchr(ln, '\n', sz))) + return(-1); + + /* Set next-line marker. */ + + *offs = (size_t)((eoln + 1) - b->buf); + + /* Check if we have the correct header/trailer. */ + + if ((sz = (size_t)(eoln - ln)) < 10 || + memcmp(ln, ".\\\" -*-", 7) || + memcmp(eoln - 3, "-*-", 3)) + return(0); + + /* Move after the header and adjust for the trailer. */ + + ln += 7; + sz -= 10; + + while (sz > 0) { + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Find the end-of-phrase marker (or eoln). */ + + if (NULL == (eoph = memchr(ln, ';', sz))) + eoph = eoln - 3; + else + eoph++; + + /* Only account for the "coding" phrase. */ + + if ((phsz = (size_t)(eoph - ln)) < 7 || + strncasecmp(ln, "coding:", 7)) { + sz -= phsz; + ln += phsz; + continue; + } + + sz -= 7; + ln += 7; + + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Check us against known encodings. */ + + for (i = 0; i < (int)ENC__MAX; i++) { + nsz = strlen(encs[i].name); + if (phsz < nsz) + continue; + if (strncasecmp(ln, encs[i].name, nsz)) + continue; + + *enc = (enum enc)i; + return(1); + } + + /* Unknown encoding. */ + + *enc = ENC__MAX; + return(1); + } + + return(0); +} + +int +main(int argc, char *argv[]) +{ + int i, ch, map, fd, rc; + struct buf b; + const char *fn; + enum enc enc, def; + unsigned char bom[3] = { 0xEF, 0xBB, 0xBF }; + size_t offs; + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + fn = ""; + fd = STDIN_FILENO; + rc = EXIT_FAILURE; + enc = def = ENC__MAX; + map = 0; + + memset(&b, 0, sizeof(struct buf)); + + while (-1 != (ch = getopt(argc, argv, "D:e:rdvh"))) + switch (ch) { + case ('D'): + /* FALLTHROUGH */ + case ('e'): + for (i = 0; i < (int)ENC__MAX; i++) { + if (strcasecmp(optarg, encs[i].name)) + continue; + break; + } + if (i < (int)ENC__MAX) { + if ('D' == ch) + def = (enum enc)i; + else + enc = (enum enc)i; + break; + } + + fprintf(stderr, "%s: Bad encoding\n", optarg); + return(EXIT_FAILURE); + case ('r'): + /* FALLTHROUGH */ + case ('d'): + /* FALLTHROUGH */ + case ('v'): + /* Compatibility with GNU preconv. */ + break; + case ('h'): + /* Compatibility with GNU preconv. */ + /* FALLTHROUGH */ + default: + usage(); + return(EXIT_FAILURE); + } + + argc -= optind; + argv += optind; + + /* + * Open and read the first argument on the command-line. + * If we don't have one, we default to stdin. + */ + + if (argc > 0) { + fn = *argv; + fd = open(fn, O_RDONLY, 0); + if (-1 == fd) { + perror(fn); + return(EXIT_FAILURE); + } + } + + if ( ! read_whole_file(fn, fd, &b, &map)) + goto out; + + /* Try to read the UTF-8 BOM. */ + + if (ENC__MAX == enc) + if (b.sz > 3 && 0 == memcmp(b.buf, bom, 3)) { + b.offs = 3; + enc = ENC_UTF_8; + } + + /* Try reading from the "-*-" cue. */ + + if (ENC__MAX == enc) { + offs = b.offs; + ch = cue_enc(&b, &offs, &enc); + if (0 == ch) + ch = cue_enc(&b, &offs, &enc); + } + + /* + * No encoding has been detected. + * Thus, we either fall into our default encoder, if specified, + * or use Latin-1 if all else fails. + */ + + if (ENC__MAX == enc) + enc = ENC__MAX == def ? ENC_LATIN_1 : def; + + if ( ! (*encs[(int)enc].conv)(&b)) { + fprintf(stderr, "%s: Bad encoding\n", fn); + goto out; + } + + rc = EXIT_SUCCESS; +out: +#ifdef HAVE_MMAP + if (map) + munmap(b.buf, b.sz); + else +#endif + free(b.buf); + + if (fd > STDIN_FILENO) + close(fd); + + return(rc); +} diff --git a/contrib/mdocml/tbl.3 b/contrib/mdocml/tbl.3 new file mode 100644 index 0000000..05e423f --- /dev/null +++ b/contrib/mdocml/tbl.3 @@ -0,0 +1,295 @@ +.\" $Id: tbl.3,v 1.1 2013/06/01 05:44:39 schwarze Exp $ +.\" +.\" Copyright (c) 2013 Ingo Schwarze +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: June 1 2013 $ +.Dt TBL 3 +.Os +.Sh NAME +.Nm tbl_alloc , +.Nm tbl_read , +.Nm tbl_restart , +.Nm tbl_span , +.Nm tbl_end , +.Nm tbl_free +.Nd roff table parser library for mandoc +.Sh SYNOPSIS +.In mandoc.h +.In libmandoc.h +.In libroff.h +.Ft struct tbl_node * +.Fo tbl_alloc +.Fa "int pos" +.Fa "int line" +.Fa "struct mparse *parse" +.Fc +.Ft enum rofferr +.Fo tbl_read +.Fa "struct tbl_node *tbl" +.Fa "int ln" +.Fa "const char *p" +.Fa "int offs" +.Fc +.Ft void +.Fo tbl_restart +.Fa "int line" +.Fa "int pos" +.Fa "struct tbl_node *tbl" +.Fc +.Ft const struct tbl_span * +.Fo tbl_span +.Fa "struct tbl_node *tbl" +.Fc +.Ft void +.Fo tbl_end +.Fa "struct tbl_node **tblp" +.Fc +.Ft void +.Fo tbl_free +.Fa "struct tbl_node *tbl" +.Fc +.Sh DESCRIPTION +This library is tightly integrated into the +.Xr mandoc 1 +utility and not designed for stand-alone use. +The present manual is intended as a reference for developers working on +.Xr mandoc 1 . +.Ss Data structures +Unless otherwise noted, all of the following data structures are defined in +.In mandoc.h +and are deleted in +.Fn tbl_free . +.Bl -tag -width Ds +.It Vt struct tbl_node +This structure describes a complete table. +It is defined in +.In libroff.h , +created in +.Fn tbl_alloc , +and stored in the members +.Va first_tbl , +.Va last_tbl , +and +.Va tbl +of +.Vt struct roff Bq Pa roff.c . +.It Vt struct tbl_opts +This structure describes the options of one table. +It is used as a substructure of +.Vt struct tbl_node +and thus created and deleted together with it. +It is filled in +.Fn tbl_options . +.It Vt struct tbl_head +This structure describes one layout column in a table, +in particular the vertical line to its left. +It is allocated and filled in +.Fn cell_alloc Bq Pa tbl_layout.c +and referenced from the +.Va first_head +and +.Va last_head +members of +.Vt struct tbl_node . +.It Vt struct tbl_row +This structure describes one layout line in a table +by maintaining a list of all the cells in that line. +It is allocated and filled in +.Fn row Bq Pa tbl_layout.c +and referenced from the +.Va layout +member of +.Vt struct tbl_node . +.It Vt struct tbl_cell +This structure describes one layout cell in a table, +in particular its alignment, membership in spans, and +usage for lines. +It is allocated and filled in +.Fn cell_alloc Bq Pa tbl_layout.c +and referenced from the +.Va first +and +.Va last +members of +.Vt struct tbl_row . +.It Vt struct tbl_span +This structure describes one data line in a table +by maintaining a list of all data cells in that line +or by specifying that it is a horizontal line. +It is allocated and filled in +.Fn newspan Bq Pa tbl_data.c +which is called from +.Fn tbl_data +and referenced from the +.Va first_span , +.Va current_span , +and +.Va last_span +members of +.Vt struct tbl_node , +and from the +.Va span +members of +.Vt struct man_node +and +.Vt struct mdoc_node +from +.In man.h +and +.In mdoc.h . +.It Vt struct tbl_dat +This structure describes one data cell in a table by specifying +whether it contains a line or data, whether it spans additional +layout cells, and by storing the data. +It is allocated and filled in +.Fn data +and referenced from the +.Va first +and +.Va last +members of +.Vt struct tbl_span . +.El +.Ss Interface functions +The following functions are implemented in +.Pa tbl.c , +and all callers in +.Pa roff.c . +.Bl -tag -width Ds +.It Fn tbl_alloc +Allocates, initializes, and returns a new +.Vt struct tbl_node . +Called from +.Fn roff_TS . +.It Fn tbl_read +Dispatches to +.Fn tbl_option , +.Fn tbl_layout , +.Fn tbl_cdata , +and +.Fn tbl_data , +see below. +Called from +.Fn roff_parseln . +.It Fn tbl_restart +Resets the +.Va part +member of +.Vt struct tbl_node +to +.Dv TBL_PART_LAYOUT . +Called from +.Fn roff_T_ . +.It Fn tbl_span +On the first call, return the first +.Vt struct tbl_span ; +for later calls, return the next one or +.Dv NULL . +Called from +.Fn roff_span . +.It Fn tbl_end +Flags the last span as +.Dv TBL_SPAN_LAST +and clears the pointer passed as an argment. +Called from +.Fn roff_TE +and +.Fn roff_endparse . +.It Fn tbl_free +Frees the specified +.Vt struct tbl_node +and all the tbl_row, tbl_cell, tbl_span, tbl_dat and tbl_head structures +referenced from it. +Called from +.Fn roff_free +and +.Fn roff_reset . +.El +.Ss Private functions +.Bl -tag -width Ds +.It Ft int Fn tbl_options "struct tbl_node *tbl" "int ln" "const char *p" +Parses the options line into +.Vt struct tbl_opts . +Implemented in +.Pa tbl_opts.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_layout "struct tbl_node *tbl" "int ln" "const char *p" +Allocates and fills one +.Vt struct tbl_head +for each layout column, one +.Vt struct tbl_row +for each layout line, and one +.Vt struct tbl_cell +for each layout cell. +Implemented in +.Pa tbl_layout.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_data "struct tbl_node *tbl" "int ln" "const char *p" +Allocates one +.Vt struct tbl_span +for each data line and calls +.Fn data +on that line. +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_cdata "struct tbl_node *tbl" "int ln" "const char *p" +Continues parsing a data line: +When finding +.Sq T} , +switches back to +.Dv TBL_PART_DATA +mode and calls +.Fn data +if there are more data cells on the line. +Otherwise, appends the data to the current data cell. +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_read . +.It Xo +.Ft int +.Fo data +.Fa "struct tbl_node *tbl" +.Fa "struct tbl_span *dp" +.Fa "int ln" +.Fa "const char *p" +.Fa "int *pos" +.Fc +.Xc +Parses one data cell into one +.Vt struct tbl_dat . +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_data +and +.Fn tbl_cdata . +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc 3 , +.Xr tbl 7 +.Sh AUTHORS +.An -nosplit +The +.Nm tbl +library was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +with contributions from +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/test-strptime.c b/contrib/mdocml/test-strptime.c new file mode 100644 index 0000000..bedc775 --- /dev/null +++ b/contrib/mdocml/test-strptime.c @@ -0,0 +1,14 @@ +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* strptime() */ +#endif + +#include + +int +main(void) +{ + struct tm tm; + const char input[] = "2014-01-04"; + return( ! (input+10 == strptime(input, "%Y-%m-%d", &tm) && + 114 == tm.tm_year && 0 == tm.tm_mon && 4 == tm.tm_mday)); +} -- cgit v1.1