diff options
Diffstat (limited to 'contrib/mdocml')
157 files changed, 57520 insertions, 0 deletions
diff --git a/contrib/mdocml/INSTALL b/contrib/mdocml/INSTALL new file mode 100644 index 0000000..085fb3e --- /dev/null +++ b/contrib/mdocml/INSTALL @@ -0,0 +1,160 @@ +$Id: INSTALL,v 1.13 2015/11/07 14:01:16 schwarze Exp $ + +About mdocml, the portable mandoc distribution +---------------------------------------------- +The mandoc manpage compiler toolset is a suite of tools compiling +mdoc(7), the roff(7) macro language of choice for BSD manual pages, +and man(7), the predominant historical language for UNIX manuals. +It includes a man(1) manual viewer and additional tools. +For general information, see <http://mdocml.bsd.lv/>. + +In case you have questions or want to provide feedback, read +<http://mdocml.bsd.lv/contact.html>. Consider subscribing to the +discuss@ mailing list mentioned on that page. If you intend to +help with the development of mandoc, consider subscribing to the +tech@ mailing list, too. + +Enjoy using the mandoc toolset! + +Ingo Schwarze, Karlsruhe, March 2015 + + +Installation +------------ +Before manually installing mandoc on your system, please check +whether the newest version of mandoc is already installed by default +or available via a binary package or a ports system. A list of the +latest bundled and ported versions of mandoc for various operating +systems is maintained at <http://mdocml.bsd.lv/ports.html>. + +Regarding how packages and ports are maintained for your operating +system, please consult your operating system documentation. +To install mandoc manually, the following steps are needed: + +1. If you want to build the CGI program, man.cgi(8), too, run the +command "echo BUILD_CGI=1 > configure.local". Then run "cp +cgi.h.examples cgi.h" and edit cgi.h as desired. + +2. Run "./configure". +This script attempts autoconfiguration of mandoc for your system. +Read both its standard output and the file "Makefile.local" it +generates. If anything looks wrong or different from what you +wish, read the file "configure.local.example", create and edit +a file "configure.local", and re-run "./configure" until the +result seems right to you. +On Solaris 10 and earlier, you may have to run "ksh ./configure" +because the native /bin/sh lacks some POSIX features. + +3. Run "make". +Any POSIX-compatible make, in particular both BSD make and GNU make, +should work. If the build fails, look at "configure.local.example" +and go back to step 2. + +4. Run "make -n install" and check whether everything will be +installed to the intended places. Otherwise, put some *DIR or *NM* +variables into "configure.local" and go back to step 2. + +5. Run "sudo make install". If you intend to build a binary +package using some kind of fake root mechanism, you may need a +command like "make DESTDIR=... install". Read the *-install targets +in the "Makefile" to understand how DESTDIR is used. + +6. If you want to use the integrated man(1) and your system uses +manpath(1), make sure it is configured correctly, in particular, +it returns all directory trees where manual pages are installed. +Otherwise, if your system uses man.conf(5), make sure it contains +a "_whatdb" line for each directory tree, and the order of these +lines meets your wishes. + +7. If you compiled with database support, run the command "sudo +makewhatis" to build mandoc.db(5) databases in all the directory +trees configured in step 6. Whenever installing new manual pages, +re-run makewhatis(8) to update the databases, or apropos(1) will +not find the new pages. + +8. To set up a man.cgi(8) server, read its manual page. + +Note that some man(7) pages may contain low-level roff(7) markup +that mandoc does not yet understand. On some BSD systems using +mandoc, third-party software is vetted on whether it may be formatted +with mandoc. If not, groff(1) is pulled in as a dependency and +used to install a pre-formatted "catpage" instead of directly as +manual page source. + + +Understanding mandoc dependencies +--------------------------------- +The mandoc(1), man(1), and demandoc(1) utilities only depend +on the zlib library for decompressing gzipped manual pages, +but makewhatis(8) and apropos(1) depend on the following +additional software: + +1. The SQLite database system, see <http://sqlite.org/>. +The recommended version of SQLite is 3.8.4.3 or newer. The mandoc +toolset is known to work with version 3.7.5 or newer. Versions +older than 3.8.3 may not achieve full performance due to the +missing SQLITE_DETERMINISTIC optimization flag. Versions older +than 3.8.0 may not show full error information if opening a database +fails due to the missing sqlite3_errstr() API. Both are very minor +problems, apropos(1) is fully usable with SQLite 3.7.5. Versions +older than 3.7.5 may or may not work, they have not been tested. + +2. The fts(3) directory traversion functions. +If your system does not have them, the bundled compatibility version +will be used, so you need not worry in that case. But be careful: the +glibc version of fts(3) is known to be broken on 32bit platforms, +see <https://sourceware.org/bugzilla/show_bug.cgi?id=15838>. +If you run into that problem, set "HAVE_FTS=0" in configure.local. + +3. Marc Espie's ohash(3) library. +If your system does not have it, the bundled compatibility version +will be used, so you probably need not worry about it. + +One of the chief design goals of the mandoc toolbox is to make +sure that nothing related to documentation requires C++. +Consequently, linking mandoc against any kind of C++ program +would defeat the purpose and is not supported. + + +Checking autoconfiguration quality +---------------------------------- +If you want to check whether automatic configuration works well +on your platform, consider the following: + +The mandoc package intentionally does not use GNU autoconf because +we consider that toolset a blatant example of overengineering that +is obsolete nowadays, since all modern operating systems are now +reasonably close to POSIX and do not need arcane shell magic any +longer. If your system does need such magic, consider upgrading +to reasonably modern POSIX-compliant tools rather than asking for +autoconf-style workarounds. + +As far as mandoc is using any features not mandated by ANSI X3.159-1989 +("ANSI C") or IEEE Std 1003.1-2008 ("POSIX") that some modern systems +do not have, we intend to provide autoconfiguration tests and +compat_*.c implementations. Please report any that turn out to be +missing. Note that while we do strive to produce portable code, +we do not slavishly restrict ourselves to POSIX-only interfaces. +For improved security and readability, we do use well-designed, +modern interfaces like reallocarray(3) even if they are still rather +uncommon, of course bundling compat_*.c implementations as needed. + +Where mandoc is using ANSI C or POSIX features that some systems +still lack and that compat_*.c implementations can be provided for +without too much hassle, we will consider adding them, too, so +please report whatever is missing on your platform. + +The following steps can be used to manually check the automatic +configuration on your platform: + +1. Run "make distclean". + +2. Run "./configure" + +3. Read the file "config.log". It shows the compiler commands used +to test the libraries installed on your system and the standard +output and standard error output these commands produce. Watch out +for unexpected failures. Those are most likely to happen if headers +or libraries are installed in unusual places or interfaces defined +in unusual headers. You can also look at the file "config.h" and +check that no "#define HAVE_*" differ from your expectations. diff --git a/contrib/mdocml/LICENSE b/contrib/mdocml/LICENSE new file mode 100644 index 0000000..ad3cd4b --- /dev/null +++ b/contrib/mdocml/LICENSE @@ -0,0 +1,52 @@ +$Id: LICENSE,v 1.11 2015/11/07 17:58:55 schwarze Exp $ + +With the exceptions noted below, all code and documentation +contained in the mdocml toolkit is protected by the Copyright +of the following developers: + +Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> +Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> +Copyright (c) 2009, 2010, 2011, 2012 Joerg Sonnenberger <joerg@netbsd.org> +Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> +Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org> +Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org> +Copyright (c) 1998, 2004, 2010 Todd C. Miller <Todd.Miller@courtesan.com> +Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> +Copyright (c) 2004 Ted Unangst <tedu@openbsd.org> +Copyright (c) 1994 Christos Zoulas <christos@netbsd.org> +Copyright (c) 2003, 2007, 2008, 2014 Jason McIntyre <jmc@openbsd.org> + +See the individual source files for information about who contributed +to which file during which years. + + +The mdocml distribution as a whole is distributed by its developers +under the following license: + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +The following files included from outside sources are protected by +other people's Copyright and are distributed under various 2-clause +and 3-clause BSD licenses; see these individual files for details. + +soelim.c, soelim.1: +Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org> + +compat_err.c, compat_fts.c, compat_fts.h, +compat_getsubopt.c, compat_strcasestr.c, compat_strsep.c, +man.1: +Copyright (c) 1989,1990,1993,1994 The Regents of the University of California + +compat_stringlist.c, compat_stringlist.h: +Copyright (c) 1994 Christos Zoulas <christos@netbsd.org> diff --git a/contrib/mdocml/Makefile b/contrib/mdocml/Makefile new file mode 100644 index 0000000..9be2bd9 --- /dev/null +++ b/contrib/mdocml/Makefile @@ -0,0 +1,462 @@ +# $Id: Makefile,v 1.480 2015/11/07 21:53:14 schwarze Exp $ +# +# Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +# Copyright (c) 2011, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +VERSION = 1.13.3 + +# === LIST OF FILES ==================================================== + +TESTSRCS = test-dirent-namlen.c \ + test-err.c \ + test-fts.c \ + test-getline.c \ + test-getsubopt.c \ + test-isblank.c \ + test-mkdtemp.c \ + test-mmap.c \ + test-ohash.c \ + test-pledge.c \ + test-progname.c \ + test-reallocarray.c \ + test-sqlite3.c \ + test-sqlite3_errstr.c \ + test-strcasestr.c \ + test-stringlist.c \ + test-strlcat.c \ + test-strlcpy.c \ + test-strptime.c \ + test-strsep.c \ + test-strtonum.c \ + test-vasprintf.c \ + test-wchar.c + +SRCS = att.c \ + cgi.c \ + chars.c \ + compat_err.c \ + compat_fts.c \ + compat_getline.c \ + compat_getsubopt.c \ + compat_isblank.c \ + compat_mkdtemp.c \ + compat_ohash.c \ + compat_progname.c \ + compat_reallocarray.c \ + compat_sqlite3_errstr.c \ + compat_strcasestr.c \ + compat_stringlist.c \ + compat_strlcat.c \ + compat_strlcpy.c \ + compat_strsep.c \ + compat_strtonum.c \ + compat_vasprintf.c \ + demandoc.c \ + eqn.c \ + eqn_html.c \ + eqn_term.c \ + html.c \ + lib.c \ + main.c \ + man.c \ + man_hash.c \ + man_html.c \ + man_macro.c \ + man_term.c \ + man_validate.c \ + mandoc.c \ + mandoc_aux.c \ + mandoc_ohash.c \ + mandocdb.c \ + manpage.c \ + manpath.c \ + mansearch.c \ + mansearch_const.c \ + mdoc.c \ + mdoc_argv.c \ + mdoc_hash.c \ + mdoc_html.c \ + mdoc_macro.c \ + mdoc_man.c \ + mdoc_state.c \ + mdoc_term.c \ + mdoc_validate.c \ + msec.c \ + out.c \ + preconv.c \ + read.c \ + roff.c \ + soelim.c \ + st.c \ + tag.c \ + tbl.c \ + tbl_data.c \ + tbl_html.c \ + tbl_layout.c \ + tbl_opts.c \ + tbl_term.c \ + term.c \ + term_ascii.c \ + term_ps.c \ + tree.c + +DISTFILES = INSTALL \ + LICENSE \ + Makefile \ + Makefile.depend \ + NEWS \ + TODO \ + apropos.1 \ + cgi.h.example \ + compat_fts.h \ + compat_ohash.h \ + compat_stringlist.h \ + configure \ + configure.local.example \ + demandoc.1 \ + eqn.7 \ + gmdiff \ + html.h \ + lib.in \ + libman.h \ + libmandoc.h \ + libmdoc.h \ + libroff.h \ + main.h \ + makewhatis.8 \ + man.1 \ + man.7 \ + man.cgi.8 \ + man.conf.5 \ + man.h \ + manconf.h \ + mandoc.1 \ + mandoc.3 \ + mandoc.css \ + mandoc.db.5 \ + mandoc.h \ + mandoc_aux.h \ + mandoc_char.7 \ + mandoc_escape.3 \ + mandoc_headers.3 \ + mandoc_html.3 \ + mandoc_malloc.3 \ + mandoc_ohash.h \ + mansearch.3 \ + mansearch.h \ + mchars_alloc.3 \ + mdoc.7 \ + mdoc.h \ + msec.in \ + out.h \ + predefs.in \ + roff.7 \ + roff.h \ + soelim.1 \ + st.in \ + tag.h \ + tbl.3 \ + tbl.7 \ + term.h \ + $(SRCS) \ + $(TESTSRCS) + +LIBMAN_OBJS = man.o \ + man_hash.o \ + man_macro.o \ + man_validate.o + +LIBMDOC_OBJS = att.o \ + lib.o \ + mdoc.o \ + mdoc_argv.o \ + mdoc_hash.o \ + mdoc_macro.o \ + mdoc_state.o \ + mdoc_validate.o \ + st.o + +LIBROFF_OBJS = eqn.o \ + roff.o \ + tbl.o \ + tbl_data.o \ + tbl_layout.o \ + tbl_opts.o + +LIBMANDOC_OBJS = $(LIBMAN_OBJS) \ + $(LIBMDOC_OBJS) \ + $(LIBROFF_OBJS) \ + chars.o \ + mandoc.o \ + mandoc_aux.o \ + mandoc_ohash.o \ + msec.o \ + preconv.o \ + read.o + +COMPAT_OBJS = compat_err.o \ + compat_fts.o \ + compat_getline.o \ + compat_getsubopt.o \ + compat_isblank.o \ + compat_mkdtemp.o \ + compat_ohash.o \ + compat_progname.o \ + compat_reallocarray.o \ + compat_sqlite3_errstr.o \ + compat_strcasestr.o \ + compat_strlcat.o \ + compat_strlcpy.o \ + compat_strsep.o \ + compat_strtonum.o \ + compat_vasprintf.o + +MANDOC_HTML_OBJS = eqn_html.o \ + html.o \ + man_html.o \ + mdoc_html.o \ + tbl_html.o + +MANDOC_MAN_OBJS = mdoc_man.o + +MANDOC_TERM_OBJS = eqn_term.o \ + man_term.o \ + mdoc_term.o \ + term.o \ + term_ascii.o \ + term_ps.o \ + tbl_term.o + +BASE_OBJS = $(MANDOC_HTML_OBJS) \ + $(MANDOC_MAN_OBJS) \ + $(MANDOC_TERM_OBJS) \ + main.o \ + manpath.o \ + out.o \ + tag.o \ + tree.o + +MAIN_OBJS = $(BASE_OBJS) + +DB_OBJS = mandocdb.o \ + mansearch.o \ + mansearch_const.o + +CGI_OBJS = $(MANDOC_HTML_OBJS) \ + cgi.o \ + mansearch.o \ + mansearch_const.o \ + out.o + +MANPAGE_OBJS = manpage.o mansearch.o mansearch_const.o manpath.o + +DEMANDOC_OBJS = demandoc.o + +SOELIM_OBJS = soelim.o \ + compat_err.o \ + compat_getline.o \ + compat_progname.o \ + compat_reallocarray.o \ + compat_stringlist.o + +WWW_MANS = apropos.1.html \ + demandoc.1.html \ + man.1.html \ + mandoc.1.html \ + soelim.1.html \ + mandoc.3.html \ + mandoc_escape.3.html \ + mandoc_headers.3.html \ + mandoc_html.3.html \ + mandoc_malloc.3.html \ + mansearch.3.html \ + mchars_alloc.3.html \ + tbl.3.html \ + man.conf.5.html \ + mandoc.db.5.html \ + eqn.7.html \ + man.7.html \ + mandoc_char.7.html \ + mdoc.7.html \ + roff.7.html \ + tbl.7.html \ + makewhatis.8.html \ + man.cgi.8.html \ + man.h.html \ + manconf.h.html \ + mandoc.h.html \ + mandoc_aux.h.html \ + mansearch.h.html \ + mdoc.h.html \ + roff.h.html + +WWW_OBJS = mdocml.tar.gz \ + mdocml.sha256 + +# === USER CONFIGURATION =============================================== + +include Makefile.local + +# === DEPENDENCY HANDLING ============================================== + +all: base-build $(BUILD_TARGETS) Makefile.local + +base-build: mandoc demandoc soelim + +cgi-build: man.cgi + +install: base-install $(INSTALL_TARGETS) + +www: $(WWW_OBJS) $(WWW_MANS) + +$(WWW_MANS): mandoc + +.PHONY: base-install cgi-install db-install install www-install +.PHONY: clean distclean depend + +include Makefile.depend + +# === TARGETS CONTAINING SHELL COMMANDS ================================ + +distclean: clean + rm -f Makefile.local config.h config.h.old config.log config.log.old + +clean: + rm -f libmandoc.a $(LIBMANDOC_OBJS) $(COMPAT_OBJS) + rm -f mandoc $(BASE_OBJS) $(DB_OBJS) + rm -f man.cgi $(CGI_OBJS) + rm -f manpage $(MANPAGE_OBJS) + rm -f demandoc $(DEMANDOC_OBJS) + rm -f soelim $(SOELIM_OBJS) + rm -f $(WWW_MANS) $(WWW_OBJS) + rm -rf *.dSYM + +base-install: base-build + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(LIBDIR) + mkdir -p $(DESTDIR)$(INCLUDEDIR) + mkdir -p $(DESTDIR)$(MANDIR)/man1 + mkdir -p $(DESTDIR)$(MANDIR)/man3 + mkdir -p $(DESTDIR)$(MANDIR)/man5 + mkdir -p $(DESTDIR)$(MANDIR)/man7 + $(INSTALL_PROGRAM) mandoc demandoc $(DESTDIR)$(BINDIR) + $(INSTALL_PROGRAM) soelim $(DESTDIR)$(BINDIR)/$(BINM_SOELIM) + ln -f $(DESTDIR)$(BINDIR)/mandoc $(DESTDIR)$(BINDIR)/$(BINM_MAN) + $(INSTALL_LIB) libmandoc.a $(DESTDIR)$(LIBDIR) + $(INSTALL_LIB) man.h mandoc.h mandoc_aux.h mdoc.h roff.h \ + $(DESTDIR)$(INCLUDEDIR) + $(INSTALL_MAN) mandoc.1 demandoc.1 $(DESTDIR)$(MANDIR)/man1 + $(INSTALL_MAN) soelim.1 $(DESTDIR)$(MANDIR)/man1/$(BINM_SOELIM).1 + $(INSTALL_MAN) man.1 $(DESTDIR)$(MANDIR)/man1/$(BINM_MAN).1 + $(INSTALL_MAN) mandoc.3 mandoc_escape.3 mandoc_malloc.3 \ + mchars_alloc.3 tbl.3 $(DESTDIR)$(MANDIR)/man3 + $(INSTALL_MAN) man.conf.5 $(DESTDIR)$(MANDIR)/man5/${MANM_MANCONF}.5 + $(INSTALL_MAN) man.7 $(DESTDIR)$(MANDIR)/man7/${MANM_MAN}.7 + $(INSTALL_MAN) mdoc.7 $(DESTDIR)$(MANDIR)/man7/${MANM_MDOC}.7 + $(INSTALL_MAN) roff.7 $(DESTDIR)$(MANDIR)/man7/${MANM_ROFF}.7 + $(INSTALL_MAN) eqn.7 $(DESTDIR)$(MANDIR)/man7/${MANM_EQN}.7 + $(INSTALL_MAN) tbl.7 $(DESTDIR)$(MANDIR)/man7/${MANM_TBL}.7 + $(INSTALL_MAN) mandoc_char.7 $(DESTDIR)$(MANDIR)/man7 + +db-install: base-build + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(SBINDIR) + mkdir -p $(DESTDIR)$(MANDIR)/man1 + mkdir -p $(DESTDIR)$(MANDIR)/man3 + mkdir -p $(DESTDIR)$(MANDIR)/man5 + mkdir -p $(DESTDIR)$(MANDIR)/man8 + ln -f $(DESTDIR)$(BINDIR)/mandoc $(DESTDIR)$(BINDIR)/$(BINM_APROPOS) + ln -f $(DESTDIR)$(BINDIR)/mandoc $(DESTDIR)$(BINDIR)/$(BINM_WHATIS) + ln -f $(DESTDIR)$(BINDIR)/mandoc \ + $(DESTDIR)$(SBINDIR)/$(BINM_MAKEWHATIS) + $(INSTALL_MAN) apropos.1 $(DESTDIR)$(MANDIR)/man1/$(BINM_APROPOS).1 + ln -f $(DESTDIR)$(MANDIR)/man1/$(BINM_APROPOS).1 \ + $(DESTDIR)$(MANDIR)/man1/$(BINM_WHATIS).1 + $(INSTALL_MAN) mansearch.3 $(DESTDIR)$(MANDIR)/man3 + $(INSTALL_MAN) mandoc.db.5 $(DESTDIR)$(MANDIR)/man5 + $(INSTALL_MAN) makewhatis.8 \ + $(DESTDIR)$(MANDIR)/man8/$(BINM_MAKEWHATIS).8 + +cgi-install: cgi-build + mkdir -p $(DESTDIR)$(CGIBINDIR) + mkdir -p $(DESTDIR)$(HTDOCDIR) + mkdir -p $(DESTDIR)$(WWWPREFIX)/man/mandoc/man1 + mkdir -p $(DESTDIR)$(WWWPREFIX)/man/mandoc/man8 + $(INSTALL_PROGRAM) man.cgi $(DESTDIR)$(CGIBINDIR) + $(INSTALL_DATA) mandoc.css $(DESTDIR)$(HTDOCDIR) + $(INSTALL_MAN) apropos.1 $(DESTDIR)$(WWWPREFIX)/man/mandoc/man1/ + $(INSTALL_MAN) man.cgi.8 $(DESTDIR)$(WWWPREFIX)/man/mandoc/man8/ + +Makefile.local config.h: configure ${TESTSRCS} + @echo "$@ is out of date; please run ./configure" + @exit 1 + +libmandoc.a: $(COMPAT_OBJS) $(LIBMANDOC_OBJS) + $(AR) rs $@ $(COMPAT_OBJS) $(LIBMANDOC_OBJS) + +mandoc: $(MAIN_OBJS) libmandoc.a + $(CC) $(LDFLAGS) -o $@ $(MAIN_OBJS) libmandoc.a $(DBLIB) + +manpage: $(MANPAGE_OBJS) libmandoc.a + $(CC) $(LDFLAGS) -o $@ $(MANPAGE_OBJS) libmandoc.a $(DBLIB) + +man.cgi: $(CGI_OBJS) libmandoc.a + $(CC) $(LDFLAGS) $(STATIC) -o $@ $(CGI_OBJS) libmandoc.a $(DBLIB) + +demandoc: $(DEMANDOC_OBJS) libmandoc.a + $(CC) $(LDFLAGS) -o $@ $(DEMANDOC_OBJS) libmandoc.a $(DBLIB) + +soelim: $(SOELIM_OBJS) + $(CC) $(LDFLAGS) -o $@ $(SOELIM_OBJS) + +# --- maintainer targets --- + +www-install: www + mkdir -p $(HTDOCDIR)/snapshots + $(INSTALL_DATA) $(WWW_MANS) mandoc.css $(HTDOCDIR) + $(INSTALL_DATA) $(WWW_OBJS) $(HTDOCDIR)/snapshots + $(INSTALL_DATA) mdocml.tar.gz \ + $(HTDOCDIR)/snapshots/mdocml-$(VERSION).tar.gz + $(INSTALL_DATA) mdocml.sha256 \ + $(HTDOCDIR)/snapshots/mdocml-$(VERSION).sha256 + +depend: config.h + mkdep -f Makefile.depend $(CFLAGS) $(SRCS) + perl -e 'undef $$/; $$_ = <>; s|/usr/include/\S+||g; \ + s|\\\n||g; s| +| |g; s| $$||mg; print;' \ + Makefile.depend > Makefile.tmp + mv Makefile.tmp Makefile.depend + +mdocml.sha256: mdocml.tar.gz + sha256 mdocml.tar.gz > $@ + +mdocml.tar.gz: $(DISTFILES) + mkdir -p .dist/mdocml-$(VERSION)/ + $(INSTALL) -m 0644 $(DISTFILES) .dist/mdocml-$(VERSION) + chmod 755 .dist/mdocml-$(VERSION)/configure + ( cd .dist/ && tar zcf ../$@ mdocml-$(VERSION) ) + rm -rf .dist/ + +# === SUFFIX RULES ===================================================== + +.SUFFIXES: .1 .3 .5 .7 .8 .h +.SUFFIXES: .1.html .3.html .5.html .7.html .8.html .h.html + +.h.h.html: + highlight -I $< > $@ + +.1.1.html .3.3.html .5.5.html .7.7.html .8.8.html: mandoc + ./mandoc -Thtml -Wall,stop \ + -Ostyle=mandoc.css,man=%N.%S.html,includes=%I.html $< > $@ diff --git a/contrib/mdocml/Makefile.depend b/contrib/mdocml/Makefile.depend new file mode 100644 index 0000000..98a1928 --- /dev/null +++ b/contrib/mdocml/Makefile.depend @@ -0,0 +1,68 @@ +att.o: att.c config.h roff.h mdoc.h libmdoc.h +cgi.o: cgi.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h main.h manconf.h mansearch.h cgi.h +chars.o: chars.c config.h mandoc.h mandoc_aux.h mandoc_ohash.h compat_ohash.h libmandoc.h +compat_err.o: compat_err.c config.h +compat_fts.o: compat_fts.c config.h compat_fts.h +compat_getline.o: compat_getline.c config.h +compat_getsubopt.o: compat_getsubopt.c config.h +compat_isblank.o: compat_isblank.c config.h +compat_mkdtemp.o: compat_mkdtemp.c config.h +compat_ohash.o: compat_ohash.c config.h compat_ohash.h +compat_progname.o: compat_progname.c config.h +compat_reallocarray.o: compat_reallocarray.c config.h +compat_sqlite3_errstr.o: compat_sqlite3_errstr.c config.h +compat_strcasestr.o: compat_strcasestr.c config.h +compat_stringlist.o: compat_stringlist.c config.h compat_stringlist.h +compat_strlcat.o: compat_strlcat.c config.h +compat_strlcpy.o: compat_strlcpy.c config.h +compat_strsep.o: compat_strsep.c config.h +compat_strtonum.o: compat_strtonum.c config.h +compat_vasprintf.o: compat_vasprintf.c config.h +demandoc.o: demandoc.c config.h roff.h man.h mdoc.h mandoc.h +eqn.o: eqn.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h +eqn_html.o: eqn_html.c config.h mandoc.h out.h html.h +eqn_term.o: eqn_term.c config.h mandoc.h out.h term.h +html.o: html.c config.h mandoc.h mandoc_aux.h out.h html.h manconf.h main.h +lib.o: lib.c config.h roff.h mdoc.h libmdoc.h lib.in +main.o: main.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h tag.h main.h manconf.h mansearch.h +man.o: man.c config.h mandoc_aux.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h +man_hash.o: man_hash.c config.h roff.h man.h libman.h +man_html.o: man_html.c config.h mandoc_aux.h roff.h man.h out.h html.h main.h +man_macro.o: man_macro.c config.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h +man_term.o: man_term.c config.h mandoc_aux.h mandoc.h roff.h man.h out.h term.h main.h +man_validate.o: man_validate.c config.h mandoc_aux.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h +mandoc.o: mandoc.c config.h mandoc.h mandoc_aux.h libmandoc.h +mandoc_aux.o: mandoc_aux.c config.h mandoc.h mandoc_aux.h +mandoc_ohash.o: mandoc_ohash.c mandoc_aux.h mandoc_ohash.h compat_ohash.h +mandocdb.o: mandocdb.c config.h compat_fts.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h mdoc.h man.h manconf.h mansearch.h +manpage.o: manpage.c config.h manconf.h mansearch.h +manpath.o: manpath.c config.h mandoc_aux.h manconf.h +mansearch.o: mansearch.c config.h mandoc.h mandoc_aux.h mandoc_ohash.h compat_ohash.h manconf.h mansearch.h +mansearch_const.o: mansearch_const.c config.h mansearch.h +mdoc.o: mdoc.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h +mdoc_argv.o: mdoc_argv.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h libmandoc.h libmdoc.h +mdoc_hash.o: mdoc_hash.c config.h roff.h mdoc.h libmdoc.h +mdoc_html.o: mdoc_html.c config.h mandoc_aux.h roff.h mdoc.h out.h html.h main.h +mdoc_macro.o: mdoc_macro.c config.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h +mdoc_man.o: mdoc_man.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h out.h main.h +mdoc_state.o: mdoc_state.c mandoc.h roff.h mdoc.h libmandoc.h libmdoc.h +mdoc_term.o: mdoc_term.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h out.h term.h tag.h main.h +mdoc_validate.o: mdoc_validate.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h +msec.o: msec.c config.h mandoc.h libmandoc.h msec.in +out.o: out.c config.h mandoc_aux.h mandoc.h out.h +preconv.o: preconv.c config.h mandoc.h libmandoc.h +read.o: read.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h libmandoc.h roff_int.h +roff.o: roff.c config.h mandoc.h mandoc_aux.h roff.h libmandoc.h roff_int.h libroff.h predefs.in +soelim.o: soelim.c config.h compat_stringlist.h +st.o: st.c config.h roff.h mdoc.h libmdoc.h st.in +tag.o: tag.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h tag.h +tbl.o: tbl.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h +tbl_data.o: tbl_data.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h +tbl_html.o: tbl_html.c config.h mandoc.h out.h html.h +tbl_layout.o: tbl_layout.c config.h mandoc.h mandoc_aux.h libmandoc.h libroff.h +tbl_opts.o: tbl_opts.c config.h mandoc.h libmandoc.h libroff.h +tbl_term.o: tbl_term.c config.h mandoc.h out.h term.h +term.o: term.c config.h mandoc.h mandoc_aux.h out.h term.h main.h +term_ascii.o: term_ascii.c config.h mandoc.h mandoc_aux.h out.h term.h manconf.h main.h +term_ps.o: term_ps.c config.h mandoc_aux.h out.h term.h manconf.h main.h +tree.o: tree.c config.h mandoc.h roff.h mdoc.h man.h main.h diff --git a/contrib/mdocml/NEWS b/contrib/mdocml/NEWS new file mode 100644 index 0000000..985f265 --- /dev/null +++ b/contrib/mdocml/NEWS @@ -0,0 +1,631 @@ +$Id: NEWS,v 1.10 2015/11/05 16:58:20 schwarze Exp $ + +This file lists the most important changes in the mdocml.bsd.lv distribution. + +Changes in version 1.13.3, released on March 13, 2015 + + --- MAJOR NEW FEATURES --- + * When a manual is missing from an outdated database, let man(1) + show it anyway, using a KISS file system lookup as a fallback. + * Use this to always provide man(1), even without database support. + * Fatal errors no longer exist. If a file can be opened, mandoc + will produce some output; at worst, the output may be almost empty. + * New -Wunsupp message level. + --- POTENTIONALLY SECURITY RELEVANT BUGFIXES --- + * Fix a potential write buffer overrun on incomplete string conditionals. + http://mdocml.bsd.lv/cgi-bin/cvsweb/roff.c#rev1.241 + * Fix a potential write buffer overrun on backslash at EOF in a conditional. + http://mdocml.bsd.lv/cgi-bin/cvsweb/roff.c#rev1.247 + * Fix a use after free sometimes hit when validation deletes a block. + http://mdocml.bsd.lv/cgi-bin/cvsweb/mdoc_macro.c#rev1.180 + --- MAJOR FUNCTIONALLY RELEVANT BUGFIXES --- + * Let man(1) show manuals for the current architecture by default, + and support the MACHINE environment variable. + * Fix the man(1) and apropos(1) -m option, it didn't work at all. + * Do not spawn a pager when there is no output. + * In makewhatis(8), fix detection of hardlinked manuals on platforms + having padding in struct inodev (typically 64bit platforms). + --- PORTABILITY IMPROVEMENTS --- + * Ignore O_CLOEXEC when the operating system doesn't provide it. + * Avoid forward reference to enum type which violates ISO C99. + * Support homebrew-style linking on Mac OS X. + --- MINOR NEW FEATURES --- + * lookup: Accept digit+letter and "n" as section names in man(1), + and consistently handle digit+letter in file name extensions. + * lookup: Speed up -s/-S by using the "mlinks" rather than the "keys" table. + * output: Insert horizontal lines between formatted manual pages. + * input: New stricter and more resilient UTF-8 parser. + * mdoc(7): Refactor block rewinding for simpler and more robust parsing. + * man(7): Use the -Ios option when .TH has less than four arguments. + * tbl(7): Implement the "center" option. + * tbl(7): New option and format parsers, improved in many respects. + * roff(7): Basic implementation of the \o escape sequence (overstrike), + and improved rendering of overstrikes in PostScript and PDF output. + * Message improvements, in particular for, but not restricted to, + eqn(7), tbl(7), and wrong numbers of arguments in mdoc(7) and man(7), + in various cases also improving output generated by invalid input. + * Delete the -V option. It serves no purpose but keeps confusing people. + * gmdiff: Minimal support for Heirloom roff. + --- RELIABILITY BUGFIXES --- + * tbl(7): Fix a read buffer overrun on 'f' at EOL in a layout. + * roff(7): Fix a read buffer overrun on incomplete numerical conditions. + * mdoc(7): Fix a NULL pointer access on .Nd followed by an explicit block. + * mdoc(7): Fix a NULL pointer access on .It Xo without .Xc. + * mdoc(7): Fix a NULL pointer access on .Eo without a tail. + * mdoc(7): Fix a NULL pointer access in the validation of empty .St macros. + * man(7)/tbl(7): Fix a NULL pointer access on .TS right after .TP. + * tbl(7): Fix a NULL pointer access on layout lines without any cells. + * eqn(7): Fix NULL pointer accesses in the terminal formatter. + * roff(7): Fix a NULL pointer access on trailing \s-/\s+ without an argument. + * gz: Fix a potential NULL pointer access after waitpid() failure. + * roff(7): Don't let the modulo operator divide by zero. + * input: Fix an assertion failure on certain invalid UTF-8 input. + * terminal output: Allow arbitrary depth of the font stack (assertion fix). + * mdoc(7): Fix assertion failures and endless loops on invalid block closing. + * mdoc(7): Fix an assertion failure on .Bl .Sm not followed by .It. + * mdoc(7): Fix an assertion failure on .Bl -column ... .El .Ta. + * tbl(7): Fix assertion failures by macros inside table data, + but do not throw away the macro arguments. + * Prevent certain kinds of unreasonable input from producing excessive + output, in one case caused by unsigned integer underflow. + * Fix a potential memory leak in makewhatis(8) on very long filenames. + --- MINOR BUGFIXES --- + * mdoc(7): Fix parsing of badly nested blocks with multiple identical blocks. + * mdoc(7): Support negative indentations for displays and lists. + * mdoc(7): Don't mistreat negative .sp arguments as large positive ones. + * mdoc(7): Some spacing fixes for .Eo/.Ec. + * man(7): Support negative horizontal widths. + * man(7): Do not print out invalid .IP arguments. + * man(7): Correctly handle scaling units after .PD. + * man(7): Support .RE with an argument. + * man(7): Fix restoring indentation after .RS with large negative arguments. + * tbl(7): Prevent tables from breaking the filling of preceding text. + * tbl(7): Fix vertical spacing at the beginning of tables. + * tbl(7): Parser and formatter fixes for line drawing and font modifiers. + * tbl(7): Correct handling of blank data lines. + * eqn(7): Add sometimes missing whitespace before equation output. + * roff(7): Fix vertical scaling, most of it was wrong. + * roff(7): Slightly improve \w width measurements. + * roff(7): Accept the historic aliases \s10 to \s39 for \s(10 to \s(39. + * roff(7): Correctly escape quotes when expanding macro arguments. + * roff(7): Correctly handle scaling units in numerical expressions, + and some other improvements to the parsing of numerical expressions. + * roff(7): Three minor fixes with respect to evaluation of conditionals. + * roff(7): Let .it accept numerical expressions, not just constants. + * mandoc_char(7): Correct some character names and renderings. + * If earlier files set a non-zero exit status, never reset it to zero. + --- THANKS TO --- + * Jonathan Gray (OpenBSD) for yet more testing with afl (the American + Fuzzy Lop security fuzzer), again resulting in many bug reports. + * Theo de Raadt (OpenBSD) for suggesting the main new feature (man(1) file + system lookup) and for reporting an important bug (pager without output). + * Theo Buehler for an important bug report (-s/-S slowness) + and for proposing a nice new feature (lines between pages). + * Jason McIntyre for an important bug report (hardlink detection) + and multiple documentation patches. + * Pascal Stumpf (OpenBSD) and Alessandro de Laurenzis for + important bug reports (architecture and man -m, respectively). + * Thomas Klausner (NetBSD) for proposing a new feature (man(7) -Ios), + a bug report, and release testing. + * Anthony Bentley, Daniel Dickman, Ted Unangst (OpenBSD) and + Kristaps Dzonsons (bsd.lv) for source code patches and bug reports. + * Christian Weisgerber (OpenBSD) for more than half a dozen bug reports. + * Carsten Kunze (Heirloom troff) for bug reports and release testing. + * Antoine Jacoutot (OpenBSD) for release testing. + * Alexis Hildebrandt (Homebrew), Baptiste Daroussin (FreeBSD), + Jonathan Perkin (SmartOS), Pedro Giffuni (FreeBSD), Svyatoslav + Mishyn (Crux Linux), Ulrich Spoerlein (FreeBSD), Jan Stary, Patrick + Keshishian, Sebastien Marie, and Steffen Nurpmeso for bug reports. + +Changes in version 1.13.2, released on December 13, 2014 + + --- MAJOR NEW FEATURES --- + * Include an implementation of man(1), the manual page viewer. + * Unified set of command line option, each one supported by all + command names, including new options -a (format all), -c (no + pager), -h (synopsis only), and -w (list filenames). + * Support the MANPAGER and PAGER environment variables. + * Support gzip'ed manuals by the whole toolset, even as .so targets. + * Support UTF-8 and Latin-1 input by the whole toolset, delete preconv(1). + * Switch the default output mode from -Tascii to -Tlocale. + * Improve -Tascii output for Unicode escape sequences. + * Let the -Thtml output mode produce polyglot HTML5. + * Many improvements for eqn(7), in particular in-line equations, + MathML output in -Thtml mode, and much improved terminal formatting. + --- PORTABILITY IMPROVEMENTS --- + * Change the build sequence to the usual ./configure; make; make install. + * Support ./configure.local for build customizations. + * Autodetect wchar, sqlite3, and manpath support. + * Provide a fallback version of fts(3) for systems lacking it. + * Support choosing alternative binary and manual names. + --- MINOR NEW FEATURES --- + * Rudimentary implementation of the e, x, and z tbl(7) layout + modifiers to equalize, maximize, and ignore the width of columns. + * Implement font modifiers in tbl(7) layouts. + * Allow comma-separated options in the tbl(7) options line. + * Parse and ignore the .pl (page length) roff(7) request. + * Implement .An -[no]split for the mdoc(7) -Thtml output mode. + * Support bold italic font in PostScript and PDF output. + * Warn about commas in function arguments and parentheses in function names. + * Warn about botched .Xr ordering and punctuation below SEE ALSO. + * Warn about AUTHORS sections without .An macros. + * Warn about attempts to call non-callable macros. + * New developer documentation manual page mandoc_headers(3). + --- BUGFIXES --- + * Fix read buffer overrun sometimes triggered by trailing whitespace. + * Fix read buffer overrun triggered by certain invalid \H sequences. + * Fix NULL pointer access triggered by .Bl without any arguments. + * Fix NULL pointer access triggered by .It Nm Fo without .Fc. + * Fix NULL pointer access triggered by .Sh Xo .Sh without .Xc. + * Fix NULL pointer access triggered by missing .Nm. + * Fix an assertion triggered by .It right after .El. + * Fix an assertion triggered by .Ec without preceding .Eo. + * Fix an assertion triggered by .Sm or .Db with multiple arguments. + * Fix assertion failures triggered by very large width arguments. + * Fix a division by zero in the roff(7) parser. + * Prevent negative arguments to .ll from causing integer underflow. + * Correctly autodetect source format even when .Dd is preceded by .ll. + * Multiple fixes with respect to .Bd and .Bl -offset and -width. + * Many bugfixes with respect to scaling units. + * Multiple fixes with respect to delimiter handling by in-line macros. + * Multiple fixes with respect to .Pf. + * Make \c work properly in no-fill mode. + * Stricter syntax checking of Unicode character names. + --- THANKS TO --- + * Kristaps Dzonsons for rewriting the eqn(7) parser, implementing + HTML5 and MathML output, and various other code contributions. + * Jonathan Gray (OpenBSD) for extensive testing with afl (the + American Fuzzy Lop security fuzzer) resulting in many bug reports. + * Anthony Bentley (OpenBSD), Baptiste Daroussin (FreeBSD), Daniel + Dickman, Doug Hogan, Jason McIntyre, Theo de Raadt (OpenBSD), + and Martin Natano for source code patches. + * Carsten Kunze (Heirloom troff), Daniel Levai (Slackware), + Garrett D'Amore (illumos), Giovanni Becchis, Matthew Dempsky, + Stuart Henderson, Ted Unangst, Todd Miller (OpenBSD), Thomas + Klausner (NetBSD), Ulrich Spoerlein (FreeBSD), Justin Haynes, + Marcus Merighi, Sebastien Marie, Steffen Nurpmeso and Theo Buehler + for bug reports. + +Changes in version 1.13.1, released on August 10, 2014 + + --- MAJOR NEW FEATURES --- + * A complete apropos(1)/makewhatis(8)/man.cgi(8) suite + based on SQLite3 is now included. + * The roff(7) parser now provides an almost complete implementation + of numerical expressions. + * Warning and error messages have been improved in many ways. + Almost all fatal errors were downgraded to normal errors and some + even to warnings. Almost all messages now mention the macro where + the issue is detected and many indicate the workaround employed. + The mandoc(1) manual now includes a list explaining all messages. + --- MINOR NEW FEATURES --- + * The roff(7) parser now supports the .ami (append to macro with + indirectly specified name), .as (append to user-defined + string), .dei (define macro with indirectly specified name), + .ll (line length), and .rr (remove register) requests. + * The roff(7) parser now supports string comparison and numerical + conditionals in the .if and .ie requests. + * The roff parser now fully supports the \B (validate numerical + expression) and partially supports the \w (measure text width) + escape sequences. + * The terminal formatter now supports the \: (optional line break) + escape sequence. + * The roff parser now supports expansion of user-defined strings + involving indirect references. + * The roff(7) parser now handles some pre-defined read-only + number registers that occur in the pod2man(1) preamble. + * For backward compatibility, the mdoc(7) parser and formatters + now support the obsolete macros .En, .Es, .Fr, and .Ot. + * The mdoc(7) formatter non partially supports .Bd -centered. + * tbl(7) now handles leading and trailing vertical lines. + * The build system now provides fallback versions of strcasestr(3) + and strsep(3) for systems lacking them. + * The mdoc(7) manual now explains how various standards + supported by the .St macro are related to each other. + --- BUGFIXES --- + * In the roff(7) parser, several bugs were fixed with respect + to closing conditional blocks on macro lines. + * Parsing of roff(7) identifiers and escape sequences was improved + in multiple respects. + * In the mdoc(7) parser, the handling of defective document + prologues was improved in multiple ways. + * The mdoc(7) parser no longer skips content before the first section + header, and it no longer deletes non-.% content from .Rs blocks. + * In the mdoc(7) parser, a crash was fixed related to weird .Sh headers. + * In the mdoc(7) parser, handling of .Sm with missing or invalid + arguments was corrected. + * In the mdoc(7) parser, trailing punctuation at the end of partial + implicit macros no longer triggers end-of-sentence spacing. + * In the terminal formatter, two crashes were fixed: one triggered by + excessive indentation and another by excessively long .Nm arguments. + * In the terminal formatter, a floating point rounding bug was + fixed that sometimes caused an off-by-one error in indentation. + * In the UTF-8 formatter, rendering of accents, breakable hyphens, + and non-breakable spaces was corrected. + * In the HTML formatter, encoding of special characters was + corrected in multiple respects. + * In the mdoc(7) formatter, rendering of .Ex and .Rv was + improved for various edge cases. + * In the mdoc(7) formatter, handling of empty .Bl -inset item + heads was improved. + * In the man(7) formatter, some bugs were fixed with respect + to same-line detection in the context of .TP and .nf macros, + and the indentation of .IP and .TP blocks was improved. + * The mandoc(3) library no longer prints to stderr. + --- THANKS TO --- + Abhinav Upadhyay (NetBSD), Andreas Voegele, Anthony Bentley (OpenBSD), + Christian Weisgerber (OpenBSD), Havard Eidnes (NetBSD), Jan Stary, + Jason McIntyre (OpenBSD), Jeremie Courreges-Anglas (OpenBSD), + Joerg Sonnenberger (NetBSD), Juan Francisco Cantero Hurtado (OpenBSD), + Marc Espie (OpenBSD), Matthias Scheler (NetBSD), Pascal Stumpf (OpenBSD), + Paul Onyschuk (Alpine Linux), Sebastien Marie, Steffen Nurpmeso, + Stuart Henderson (OpenBSD), Ted Unangst (OpenBSD), Theo de Raadt (OpenBSD), + Thomas Klausner (NetBSD), and Ulrich Spoerlein (FreeBSD) + for reporting bugs and missing features. + +Changes in version 1.12.3, released on December 31, 2013 + + * In the mdoc(7) SYNOPSIS, line breaks and hanging indentation + now work correctly for .Fo/.Fa/.Fc and .Fn blocks. + Thanks to Franco Fichtner for doing part of the work. + * The mdoc(7) .Bk macro got some addititonal bugfixes. + * In mdoc(7) macro arguments, double quotes can now be quoted + by doubling them, just like in man(7). + Thanks to Tsugutomo ENAMI for the patch. + * At the end of man(7) macro lines, end-of-sentence spacing + now works. Thanks to Franco Fichtner for the patch. + * For backward compatibility, the man(7) parser now supports the + man-ext .UR/.UE (uniform resource identifier) block macros. + * The man(7) parser now handles closing blocks that are not open + more gracefully. + * The man(7) parser now ignores blank lines right after .SH and .SS. + * In the man(7) formatter, reset indentation when leaving a block, + not just when entering the next one. + * The roff(7) .nr request now supports incrementing and decrementing + number registers and stops parsing the number right before the + first non-digit character. + * The roff(7) parser now supports the alternative escape sequence + syntax \C'uXXXX' for Unicode characters. + * The roff(7) parser now parses and ignores the .fam (font family) + and .hw (hyphenation points) requests and the \d and \u escape + sequences. + * The roff(7) manual got a new ESCAPE SEQUENCE REFERENCE. + +Changes in version 1.12.2, released on Oktober 5, 2013 + + * The mdoc(7) to man(7) converter, to be called as mandoc -Tman, + is now fully functional. + * The mandoc(1) utility now supports the -Ios (default operating system) + input option, and the -Tutf8 output mode now actually works. + * The mandocdb(8) utility no longer truncates existing databases when + starting to build new ones, but only replaces them when the build + actually succeeds. + * The man(7) parser now supports the PD macro (paragraph distance), + and (for GNU man-ext compatibility only) EX (example block) and EE + (example end). Plus several bugfixes regarding indentation, line + breaks, and vertical spacing, and regarding RS following TP. + * The roff(7) parser now supports the \f(BI (bold+italic) font escape, + the \z (zero cursor advance) escape and the cc (change control + character) and it (input line trap) requests. Plus bugfixes regarding + the \t (tab) escape, nested escape sequences, and conditional requests. + * In mdoc(7), several bugs were fixed related to UTF-8 output of quoting + enclosures, delimiter handling, list indentation and horizontal and + vertical spacing, formatting of the Lk, %U, and %C macros, plus some + bugfixes related to the handling of syntax errors like badly nested + font blocks, stray Ta macros outside column lists, unterminated It Xo + blocks, and non-text children of Nm blocks. + * In tbl(7), the width of horizontal spans and the vertical spacing + around tables was corrected, and in man(7) files, a crash was fixed + that was triggered by some particular unclosed T{ macros. + * For mandoc developers, we now provide a tbl(3) library manual and + gmdiff, a very small, very simplistic groff-versus-mandoc output + comparison tool. + * Provide this NEWS file. + +Changes in version 1.12.1, released on March 23, 2012 + + * Significant work on apropos(1) and mandocdb(8). These tools are now + much more robust. A whatis(1) implementation is now handled as an + apropos(1) mode. These tools are also able to minimally handle + pre-formatted pages, that is, those already formatted by another + utility such as GNU troff. + * The man.cgi(7) script is also now available for wider testing. + It interfaces with mandocdb(8) manuals cached by catman(8). + HTML output is generated on-the-fly by libmandoc or internal + methods to convert pre-formatted pages. + * The mailing list archive for the discuss and tech lists are being + hosted by Gmane at gmane.comp.tools.mdocml.user and + gmane.comp.tools.mdocml.devel, respectively. + +Changes in version 1.12.0, released on October 8, 2011 + + * This version features a new, work-in-progress mandoc(1) output mode: + -Tman. This mode allows a system maintainer to distribute man(7) + media for older systems that may not natively support mdoc(7), such + as old Solaris systems. + * The -Ofragment option was added to mandoc(1)'s -Thtml and -Txhtml modes. + * While adding features, an apropos(1) utility has been merged from the + mandoc-tools sandbox. This interfaces with mandocdb(8) for semantic + search of manual content. apropos(1) is different from the traditional + apropos primarily in allowing keyword search (such as for functions, + utilities, etc.) and regular expressions. Note that the calling + syntax for apropos is likely to change as it settles down. + * In documentation news, the mdoc(7) and man(7) manuals have been + made considerably more readable by adding MACRO OVERVIEW sections, by + moving the gory details of the LANGUAGE SYNTAX to the roff(7) manual, + and by moving the very technical MACRO SYNTAX sections down to the + bottom of the page. + * Furthermore, for tbl(7), the -Tascii mode horizontal spacing of tables + was rewritten completely. It is now compatible with groff(1), both + with and without frames and rulers. + * Nesting of indented blocks is now supported in man(7), and several + bugs were fixed regarding indentation and alignment. + * The page headers in mdoc(7) are now nicer for very long titles. + +Changes in version 1.11.7, released on September 2, 2011 + + * Added demandoc(1) utility for stripping away macros and escapes. + This replaces the historical deroff(1) utility. + * Also improved the mdoc(7) and man(7) manuals. + +Changes in version 1.11.6, released on August 16, 2011 + + * Handling of tr macro in roff(7) implemented. This makes Perl + documentation much more readable. Hyphenation is also now enabled in + man(7) format documents. Many other general improvements have been + implemented. + +Changes in version 1.11.5, released on July 24, 2011 + + * Significant eqn(7) improvements. mdocml can now parse arbitrary eqn + input (although few GNU extensions are accepted, nor is mixing + low-level roff with eqn). See the eqn(7) manual for details. + For the time being, equations are rendered as simple in-line text. + The equation parser satisfies the language specified in the + Second Edition User's Guide: + http://www.kohala.com/start/troff/v7man/eqn/eqn2e.ps + +Changes in version 1.11.4, released on July 12, 2011 + + * Bug-fixes and clean-ups across all systems, especially in mandocdb(8) + and the man(7) parser. This release was significantly assisted by + participants in OpenBSD's c2k11. Thanks! + +Changes in version 1.11.3, released on May 26, 2011 + + * Introduce locale-encoding of output with the -Tlocale output option and + Unicode escaped-character input. See mandoc(1) and mandoc_char(7), + respectively, for details. This allows for non-ASCII characters (e.g., + \[u5000]) to be rendered in the locale's encoding, if said environment + supports wide-character encoding (if it does not, -Tascii is used + instead). Locale support can be turned off at compile time by removing + -DUSE_WCHAR in the Makefile, in which case -Tlocale is always a synonym + for -Tascii. + * Furthermore, multibyte-encoded documents, such as those in UTF-8, may + be on-the-fly recoded into mandoc(1) input by using the newly-added + preconv(1) utility. Note: in the future, this feature may be + integrated into mandoc(1). + +Changes in version 1.11.2, released on May 12, 2011 + + * Corrected some installation issues in version 1.11.1. + * Further migration to libmandoc. + * Initial public release (this utility is very much under development) + of mandocdb(8). This utility produces keyword databases of manual + content, which features semantic querying of manual content. + +Changes in version 1.11.1, released on April 4, 2011 + + * The earlier libroff, libmdoc, and libman soup have been merged into + a single library, libmandoc, which manages all aspects of parsing + real manuals, from line-handling to tbl(7) parsing. + * As usual, many general fixes and improvements have also occurred. + In particular, a great deal of redundancy and superfluous code has + been removed with the merging of the backend libraries. + * see also the changes in 1.10.10 + +Changes in version 1.10.10, March 20, 2011, NOT released + + * Initial eqn(7) functionality is in place. For the time being, + this is limited to the recognition of equation blocks; + future version of mdocml will expand upon this framework. + +Changes in version 1.10.9, released on January 7, 2011 + + * Many back-end fixes have been implemented: argument handling (quoting), + man(7) improvements, error/warning classes, and many more. + * Initial tbl(7) functionality (see the "TS", "TE", and "T&" macros in + the roff(7) manual) has been merged from tbl.bsd.lv. Output is still + minimal, especially for -Thtml and -Txhtml, but manages to at least + display data. This means that mandoc(1) now has built-in support + for two troff preprocessors via libroff: soelim(1) and tbl(1). + +Changes in version 1.10.8, released on December 24, 2010 + + * Overhauled the -Thtml and -Txhtml output modes. They now display + readable output in arbitrary browsers, including text-based ones like + lynx(1). See HTML and XHTML manuals in the DOCUMENTATION section + for examples. Attention: available style-sheet classes have been + considerably changed! See the example.style.css file for details. + Lastly, libmdoc and libman have been cleaned up and reduced in size + and complexity. + * see also the changes in 1.10.7 + +Changes in version 1.10.7, December 6, 2010, NOT released + + Significant improvements merged from OpenBSD downstream, including: + * many new roff(7) components, + * in-line implementation of troff's soelim(1), + * broken-block handling, + * overhauled error classifications, and + * cleaned up handling of error conditions. + +Changes in version 1.10.6, released on September 27, 2010 + + * Calling conventions for mandoc(1) have changed: -W improved and -f + deprecated. + * Non-ASCII characters are also now uniformly discarded. + * Lots of documentation improvements. + * Many incremental fixes accomodating for groff's more interesting + productions. + * Lastly, pod2man(1) preambles are now fully accepted after some + considerable roff(7) and special character support. + +Changes in version 1.10.5, released on July 27, 2010 + + * Primarily a bug-fix and polish release, but including -Tpdf support + in mandoc(1) by way of "Summer of Code". Highlights: + * fix "Sm" and "Bd" handling + * fix end-of-sentence handling for embedded sentences + * polish man(7) documentation + * document all mdoc(7) macros + * polish mandoc(1) -Tps output + * lots of internal clean-ups in character escapes + * un-break literal contexts in man(7) documents + * improve -Thtml output for -man + * add mandoc(1) -Tpdf support + +Changes in version 1.10.4, released on July 12, 2010 + + * Lots of features developed during both "Summer of Code" and the + OpenBSD c2k10 hackathon: + * minimal "ds" roff(7) symbols are supported + * beautified SYNOPSIS section output + * acceptance of scope-block breakage in mdoc(7) + * clarify error message status + * many minor bug-fixes and formatting issues resolved + * see also changes in 1.10.3 + +Changes in version 1.10.3, June 29, 2010, NOT released + + * variable font-width and paper-size support in mandoc(1) -Tps output + * "Bk" mdoc(7) support + +Changes in version 1.10.2, released on June 19, 2010 + + * Small release featuring text-decoration in -Tps output, + a few minor relaxations of errors, and some optimisations. + +Changes in version 1.10.1, released on June 7, 2010 + + * This primarily focusses on the "Bl" and "It" macros described in + mdoc(7). Multi-line column support is now fully compatible with groff, + as are implicit list entries for columns. + * Removed manuals(7) in favour of http://manpages.bsd.lv. + * The way we handle the SYNOPSIS section (see the SYNOPSIS documentation + in MANUAL STRUCTURE) has also been considerably simplified compared + to groff's method. + * Furthermore, the -Owidth=width output option has been added to -Tascii, + see mandoc(1). + * Lastly, initial PostScript output has been added with the -Tps option + to mandoc(1). It's brutally simple at the moment: fixed-font, with no + font decorations. + +Changes in version 1.10.0, released on May 29, 2010 + + * Release consisting of the results from the m2k10 hackathon and up-merge + from OpenBSD. This requires a significant note of thanks to Ingo + Schwarze (OpenBSD) and Joerg Sonnenberger (NetBSD) for their hard work, + and again to Joerg for hosting m2k10. Highlights (mostly cribbed from + Ingo's m2k10 report) follow in no particular order: + * a libroff preprocessor in front of libmdoc and libman stripping out + roff(7) instructions; + * end-of-sentence (EOS) detection in free-form and macro lines; + * correct handling of tab-separated columnar lists in mdoc(7); + * improved main calling routines to optionally use mmap(3) for better + performance; + * cleaned up exiting when invoked as -Tlint or over multiple files + with -fign-errors; + * error and warning message handling re-written to be unified for + libroff, libmdoc, and libman; + * handling of badly-nested explicit-scoped macros; + * improved free-form text parsing in libman and libmdoc; + * significant GNU troff compatibility improvements in -Tascii, + largely in terms of spacing; + * a regression framework for making sure the many fragilities of GNU + troff aren't trampled in subsequent work; + * support for -Tascii breaking at hyphens encountered in free-form text; + * and many more minor fixes and improvements + +Changes in version 1.9.25, released on May 13, 2010 + + * Fixed handling of "\*(Ba" escape. + * Backed out -fno-ign-chars (pointless complexity). + * Fixed erroneous breaking of literal lines. + * Fixed SYNOPSIS breaking lines before non-initial macros. + * Changed default section ordering. + * Most importantly, the framework for end-of-sentence double-spacing is + in place, now implemented for the "end-of-sentence, end-of-line" rule. + * This is a stable roll-back point before the mandoc hackathon in Rostock! + +Changes in version 1.9.24, released on May 9, 2010 + + * Rolled back break-at-hyphen. + * -DUGLY is now the default (no feature splits!). + * Free-form text is not de-chunked any more: lines are passed + whole-sale into the front-end, including whitespace. + * Added mailing lists. + +Changes in version 1.9.23, released on April 7, 2010 + + * mdocml has been linked to the OpenBSD build. + * This version incorporates many small changes, mostly from patches + by OpenBSD, allowing crufty manuals to slip by with warnings instead + of erroring-out. + * Some subtle semantic issues, such as punctuation scope, have also + been fixed. + * Lastly, some issues with -Thtml have been fixed, which prompted an + update to the online manual pages style layout. + +Changes in version 1.9.22, released on March 31, 2010 + + * Adjusted merge of the significant work by Ingo Schwarze + in getting "Xo" blocks (block full implicit, e.g., "It" + for non-columnar lists) to work properly. This isn't + enabled by default: you must specify -DUGLY as a compiler + flag (see the Makefile for details). + +Changes in version 1.9.20, released on March 30, 2010 + + * More efforts to get roff instructions in man(7) documents under + control. Note that roff instructions embedded in line-scoped, + next-line macros (e.g. "B") are not supported. + * Leading punctuation for mdoc(7) macros, such as "Fl ( ( a", + are now correctly handled. + +Changes in version 1.9.18, released on March 27, 2010 + + * Many fixes (largely pertaining to scope) + and improvements (e.g., handling of apostrophe-control macros, + which fixes the strange "BR" seen in some macro output) + to handling roff instructions in man(7) documents. + +Changes in version 1.9.17, released on March 25, 2010 + + * Accept perlpod(1) standard preamble. + * Also accept (and discard) "de", "dei", "am", "ami", and "ig" + roff macro blocks. + +Changes in version 1.9.16, released on March 22, 2010 + + * Inspired by patches and bug reports by Ingo Schwarze, + allowed man(7) to accept non-printing elements to be nested + within next-line scopes, such as "br" within "B" or "TH", + which is valid roff. + * Longsoon architecture also noted and Makefile cleaned up. + +Changes in version 1.9.15, released on February 18, 2010 + + * Moved to our new BSD.lv home. + * XHTML is now an acceptable output mode for mandoc(1); + * "Xr" made more compatible with groff; + * "Vt" fixed when invoked in SYNOPSIS; + * "\\" escape removed; + * end-of-line white-space detected for all lines; + * subtle bug fixed in list display for some modes; + * compatibility layer checked in for compilation in diverse + UNIX systems; + * and column lengths handled correctly. + +For older releases, see the ChangeLog files +in http://mdocml.bsd.lv/snapshots/ . diff --git a/contrib/mdocml/TODO b/contrib/mdocml/TODO new file mode 100644 index 0000000..d24c939 --- /dev/null +++ b/contrib/mdocml/TODO @@ -0,0 +1,673 @@ +************************************************************************ +* Official mandoc TODO. +* $Id: TODO,v 1.216 2016/01/08 01:37:32 schwarze Exp $ +************************************************************************ + +Many issues are annotated for difficulty as follows: + + - loc = locality of the issue + * single file issue, affects file only, or very few + ** single module issue, affects several files of one module + *** cross-module issue, significantly impacts multiple modules + and may require substantial changes to internal interfaces + - exist = difficulty of the existing code in this area + * affected code is straightforward and easy to read and change + ** affected code is somewhat complex, but once you understand + the design, not particularly difficult to understand + *** affected code uses a special, exceptionally tricky design + - algo = difficulty of the new algorithm to be written + * the required logic and code is straightforward + ** the required logic is somewhat complex and needs a careful design + *** the required logic is exceptionally tricky, + maybe an approach to solve that is not even known yet + - size = the amount of code to be written or changed + * a small number of lines (at most 100, usually much less) + ** a considerable amount of code (several dozen to a few hundred) + *** a large amount of code (many hundreds, maybe thousands) + - imp = importance of the issue + * mostly for completeness + ** would be nice to have + *** issue causes considerable inconvenience + +Obviously, as the issues have not been solved yet, these annotations +are mere guesses, and some may be wrong. + +************************************************************************ +* crashes +************************************************************************ + +- The abort() in bufcat(), html.c, can be triggered via buffmt_includes() + by running -Thtml -Oincludes on a file containing a long .In argument. + Fixing this will probably require reworking the whole bufcat() concept. + loc ** exist * algo * size ** imp ** + +************************************************************************ +* missing features +************************************************************************ + +--- missing roff features ---------------------------------------------- + +- .ad (adjust margins) + .ad l -- adjust left margin only (flush left) + .ad r -- adjust right margin only (flush right) + .ad c -- center text on line + .ad b -- adjust both margins (alias: .ad n) + .na -- temporarily disable adjustment without changing the mode + .ad -- re-enable adjustment without changing the mode + Adjustment mode is ignored while in no-fill mode (.nf). + loc *** exist *** algo ** size ** imp ** (parser reorg would help) + +- .fc (field control) + found by naddy@ in xloadimage(1) + loc ** exist *** algo * size * imp * + +- .nr third argument (auto-increment step size, requires \n+) + found by bentley@ in sbcl(1) Mon, 9 Dec 2013 18:36:57 -0700 + loc * exist * algo * size * imp ** + +- .ns (no-space mode) occurs in xine-config(1) + when implementing this, also let .TH set it + reported by brad@ Sat, 15 Jan 2011 15:45:23 -0500 + loc *** exist *** algo *** size ** imp * + +- .ta (tab settings) + #1 most important issue naddy@ Mon, 16 Feb 2015 20:59:17 +0100 + ircbug(1) gnats(1) reported by brad@ Sat, 15 Jan 2011 15:50:51 -0500 + also Tcl_NewStringObj(3) via wiz@ Wed, 5 Mar 2014 22:27:43 +0100 + also posix2time(3) Carsten Kunze Mon, 1 Dec 2014 13:03:10 +0100 + loc ** exist *** algo ** size ** imp *** + +- .ti (temporary indent) + found by naddy@ in xloadimage(1) [devel/libvstr] vstr(3) + found by bentley@ in nmh(1) Mon, 23 Apr 2012 13:38:28 -0600 + loc ** exist ** algo ** size * imp ** (parser reorg helps a lot) + +- .while and .shift + found by jca@ in ratpoison(1) Sun, 30 Jun 2013 12:01:09 +0200 + loc * exist ** algo ** size ** imp ** + +- \h horizontal move + #2 most important issue naddy@ Mon, 16 Feb 2015 20:59:17 +0100 + found in cclive(1) nasm(1) bogofilter(1) asciidoc/DocBook output + bentley@ on discuss@ Sat, 21 Sep 2013 22:29:34 -0600 + naddy@ Thu, 4 Dec 2014 16:26:41 +0100 + loc ** exist ** algo ** size * imp *** (parser reorg helps a lot) + +- \n+ and \n- numerical register increment and decrement + found by bentley@ in sbcl(1) Mon, 9 Dec 2013 18:36:57 -0700 + loc * exist * algo * size * imp ** + +- \n(.$ macro argument count number register; ocserv(8) by autogen + found by sthen@ Thu, 19 Feb 2015 22:03:01 +0000 + loc * exist ** algo * size * imp ** + +- \w'' improve width measurements + would not be very useful without an expression parser, see below + needed for Tcl_NewStringObj(3) via wiz@ Wed, 5 Mar 2014 22:27:43 +0100 + loc ** exist *** algo *** size * imp *** + +- \\ in high-level macro arguments + Currently, \\ is expanded in two situations: + 1) macro and string definition (roff.c setstrn()) + 2) macro argument parsing (mandoc.c mandoc_getarg()) + For user defined macros, the second happens in time because of ROFF_REPARSE. + But for standard high-level macros, it only happens after entering the + high level parsers, which is too late because the code doesn't get + back to roff.c roff_res() from that point. Because this requires + distinguishing requests, user-defined macros and standard macros + on the roff_res() level, it is hard to solve without the parser reorg. + Found by naddy@ in devel/cutils cobfusc(1) Mon, 16 Feb 2015 19:10:52 +0100 + loc *** exist *** algo *** size ** imp * + +- using undefined strings or macros defines them to be empty + wl@ Mon, 14 Nov 2011 14:37:01 +0000 + loc * exist * algo * size * imp * + +--- missing mdoc features ---------------------------------------------- + +- .Bl -column .Xo support is missing + ultimate goal: + restore .Xr and .Dv to + lib/libc/compat-43/sigvec.3 + lib/libc/gen/signal.3 + lib/libc/sys/sigaction.2 + loc * exist *** algo *** size * imp ** + +- edge case: decide how to deal with blk_full bad nesting, e.g. + .Sh .Nm .Bk .Nm .Ek .Sh found by jmc@ in ssh-keygen(1) + from jmc@ Wed, 14 Jul 2010 18:10:32 +0100 + loc * exist *** algo *** size ** imp ** + +- .Bd -centered implies -filled, not -unfilled, which is not + easy to implement; it requires code similar to .ce, which + we don't have either. + Besides, groff has bug causing text right *before* .Bd -centered + to be centered as well. + loc *** exist *** algo ** size ** imp ** (parser reorg would help) + +- .Bd -filled should not be the same as .Bd -ragged, but align both + the left and right margin. In groff, it is implemented in terms + of .ad b, which we don't have either. Found in cksum(1). + loc *** exist *** algo ** size ** imp ** (parser reorg would help) + +- implement blank `Bl -column', such as + .Bl -column + .It foo Ta bar + .El + loc * exist *** algo *** size * imp * + +- explicitly disallow nested `Bl -column', which would clobber internal + flags defined for struct mdoc_macro + loc * exist * algo * size * imp ** + +- In .Bl -column .It, the end of the line probably has to be regarded + as an implicit .Ta, if there could be one, see the following mildly + ugly code from login.conf(5): + .Bl -column minpasswordlen program xetcxmotd + .It path Ta path Ta value of Dv _PATH_DEFPATH + .br + Default search path. + reported by Michal Mazurek <akfaew at jasminek dot net> + via jmc@ Thu, 7 Apr 2011 16:00:53 +0059 + loc * exist *** algo ** size * imp ** + +- inside `.Bl -column' phrases, punctuation is handled like normal + text, e.g. `.Bl -column .It Fl x . Ta ...' should give "-x -." + +- inside `.Bl -column' phrases, TERMP_IGNDELIM handling by `Pf' + is not safe, e.g. `.Bl -column .It Pf a b .' gives "ab." + but should give "ab ." + +- check whether it is correct that `D1' uses INDENT+1; + does it need its own constant? + loc * exist ** algo ** size * imp ** + +- prohibit `Nm' from having non-text HEAD children + (e.g., NetBSD mDNSShared/dns-sd.1) + (mdoc_html.c and mdoc_term.c `Nm' handlers can be slightly simplified) + +- support translated section names + e.g. x11/scrotwm scrotwm_es.1:21:2: error: NAME section must be first + that one uses NOMBRE because it is spanish... + deraadt tends to think that section-dependent macro behaviour + is a bad idea in the first place, so this may be irrelevant + loc ** exist ** algo ** size * imp ** + +- When there is free text in the SYNOPSIS and that free text contains + the .Nm macro, groff somehow understands to treat the .Nm as an in-line + macro, while mandoc treats it as a block macro and breaks the line. + No idea how the logic for distinguishing in-line and block instances + should be, needs investigation. + uqs@ Thu, 2 Jun 2011 11:03:51 +0200 + uqs@ Thu, 2 Jun 2011 11:33:35 +0200 + loc * exist ** algo *** size * imp ** + +--- missing man features ----------------------------------------------- + +- -T[x]html doesn't stipulate non-collapsing spaces in literal mode + +--- missing tbl features ----------------------------------------------- + +- horizontal lines in the layout still consume data cells + and can be mixed with actual data on the same table line + synaptics(4) found by tedu@ Mon, 17 Aug 2015 21:17:42 -0400 + loc ** exist ** algo ** size ** imp *** + +- the "w" layout option is ignored + synaptics(4) found by tedu@ Mon, 17 Aug 2015 21:17:42 -0400 + loc * exist * algo * size * imp ** + +- the "s" layout column specifier is used for placement of data + into columns, but ignored during column width calculations + synaptics(4) found by tedu@ Mon, 17 Aug 2015 21:17:42 -0400 + loc * exist ** algo *** size * imp ** + +- support mdoc(7) and man(7) macros inside tbl(7) code; + probably requires the parser reorg and letting tbl(7) + use roff_node such that macro sets can mix; + informed by bapt@ that FreeBSD needs this. + loc *** exist ** algo *** size ** imp *** + +- look at the POSIX manuals in the books/man-pages-posix port, + they use some unsupported tbl(7) features. + loc * exist ** algo ** size ** imp *** + +- use Unicode U+2500 to U+256C for table borders + in tbl(7) -Tutf-8 output + suggested by bentley@ Tue, 14 Oct 2014 04:10:55 -0600 + loc * exist ** algo * size * imp ** + +--- missing eqn features ----------------------------------------------- + +- In a matrix, break the output line after each matrix line. + Found in the discussion at CDBUG 2015. + Suggested by Avi Weinstock. + loc * exist * algo * size * imp ** + +- The "size" keyword is parsed, but ignored by the formatter. + loc * exist * algo * size * imp * + +- The spacing characters `~', `^', and tab are currently ignored, + see User's Guide (Second Edition) page 2 section 4. + loc * exist * algo ** size * imp ** + +- Mark and lineup are parsed and ignored, + see User's Guide (Second Edition) page 5 section 15. + loc ** exist ** algo ** size ** imp ** + +--- missing misc features ---------------------------------------------- + +- italic correction (\/) in PostScript mode + Werner LEMBERG on groff at gnu dot org Sun, 10 Nov 2013 12:47:46 + loc ** exist ** algo * size * imp * + +- change the default PAGER to more -Es and use the pager + even for apropos title line output; req by bapt@ + loc * exist * algo * size * imp *** + +- makewhatis(8) for preformatted pages: + parse the section number from the header line + and compare to the section number from the directory name + loc * exist * algo * size * imp ** + +- Does makewhatis(8) detect missing NAME sections, missing names, + and missing descriptions in all the file formats? + loc * exist * algo * size * imp *** + +- clean up escape sequence handling, creating three classes: + (1) fully implemented, or parsed and ignored without loss of content + (2) unimplemented, potentially causing loss of content + or serious mangling of formatting (e.g. \n) -> ERROR + see textproc/mgdiff(1) for nice examples + (3) undefined, just output the character -> perhaps WARNING + loc *** exist ** algo ** size ** imp *** (parser reorg helps) + +- kettenis wants base roff, ms, and me Fri, 1 Jan 2010 22:13:15 +0100 (CET) + loc ** exist ** algo ** size *** imp * + +- Vsevolod Stakhov (FreeBSD) needs either a markdown output formatter + for mandoc -mdoc or a markdown to mdoc converter because they + have to maintain manuals needed both in markdown and mdoc format. + Look at the libsoldout (markdown -> whatever) + loc * exist * algo * size ** imp ** + +--- compatibility checks ----------------------------------------------- + +- is .Bk implemented correctly in modern groff? + sobrado@ Tue, 19 Apr 2011 22:12:55 +0200 + +- compare output to Heirloom roff, Solaris roff, and + http://repo.or.cz/w/neatroff.git http://litcave.rudi.ir/ + +- look at AT&T DWB http://www2.research.att.com/sw/download + Carsten Kunze <carsten dot kunze at arcor dot de> has patches + Mon, 4 Aug 2014 17:01:28 +0200 + +- look at pages generated from reStructeredText, e.g. devel/mercurial hg(1) + These are a weird mixture of man(7) and custom autogenerated low-level + roff stuff. Figure out to what extent we can cope. + For details, see http://docutils.sourceforge.net/rst.html + noted by stsp@ Sat, 24 Apr 2010 09:17:55 +0200 + reminded by nicm@ Mon, 3 May 2010 09:52:41 +0100 + +- look at pages generated from ronn(1) github.com/rtomayko/ronn + (based on markdown) + +- look at pages generated from Texinfo source by yat2m, e.g. security/gnupg + First impression is not that bad. + +- look at pages generated by pandoc; see + https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Writers/Man.hs + porting planned by kili@ Thu, 19 Jun 2014 19:46:28 +0200 + +- check compatibility with Plan9: + http://swtch.com/usr/local/plan9/tmac/tmac.an + http://swtch.com/plan9port/man/man7/man.html + "Anthony J. Bentley" <anthonyjbentley@gmail.com> 28 Dec 2010 21:58:40 -0700 + +- check compatibility with COHERENT troff: + http://www.nesssoftware.com/home/mwc/source.php + +- check compatibility with the man(7) formatter + https://raw.githubusercontent.com/rofl0r/hardcore-utils/master/man.c + +- check compatibility with + http://ikiwiki.info/plugins/contrib/mandoc/ + https://github.com/schmonz/ikiwiki/compare/mandoc + Amitai Schlair Mon, 19 May 2014 14:05:53 -0400 + +************************************************************************ +* formatting issues: ugly output +************************************************************************ + +- revisit empty in-line macros + look at the difference between "Em x Em ." and "Sq x Em ." + Carsten Kunze Fri, 12 Dec 2014 00:15:41 +0100 + loc *** exist *** algo *** size * imp ** + +- a column list with blank `Ta' cells triggers a spurious + start-with-whitespace printing of a newline + +- In .Bl -column, .It a<tab>"b<tab>c" + shows the quotes in groff, but not in mandoc + loc * exist *** algo ** size * imp ** + +- In .Bl -column, + .It Em Authentication<tab>Key Length + ought to render "Key Length" with emphasis, too, + see OpenBSD iked.conf(5). + reported again Nicolas Joly via wiz@ Wed, 12 Oct 2011 00:20:00 +0200 + loc * exist *** algo *** size ** imp *** + +- empty phrases in .Bl column produce too few blanks + try e.g. .Bl -column It Ta Ta + reported by millert Fri, 02 Apr 2010 16:13:46 -0400 + loc * exist *** algo *** size * imp ** + +- .%T can have trailing punctuation. Currently, it puts the trailing + punctuation into a trailing MDOC_TEXT element inside its own scope. + That element should rather be outside its scope, such that the + punctuation does not get underlines. This is not trivial to + implement because .%T then needs some features of in_line_eoln() - + slurp all arguments into one single text element - and one feature + of in_line() - put trailing punctuation out of scope. + Found in mount_nfs(8) and exports(5), search for "Appendix". + loc ** exist ** algo *** size * imp ** + +- Trailing punctuation after .%T triggers EOS spacing, at least + outside .Rs (eek!). Simply setting ARGSFL_DELIM for .%T is not + the right solution, it sends mandoc into an endless loop. + reported by Nicolas Joly Sat, 17 Nov 2012 11:49:54 +0100 + loc * exist ** algo ** size * imp ** + +- global variables in the SYNOPSIS of section 3 pages + .Vt vs .Vt/.Va vs .Ft/.Va vs .Ft/.Fa ... + from kristaps@ Tue, 08 Jun 2010 11:13:32 +0200 + +- in enclosures, mandoc sometimes fancies a bogus end of sentence + reminded by jmc@ Thu, 23 Sep 2010 18:13:39 +0059 + loc * exist ** algo *** size * imp *** + +- a line starting with "\fB something" counts as starting with whitespace + and triggers a line break; found in audio/normalize-mp3(1) + loc ** exist * algo ** size * imp ** + +- formatting /usr/local/man/man1/latex2man.1 with groff and mandoc + reveals lots of bugs both in groff and mandoc... + reported by bentley@ Wed, 22 May 2013 23:49:30 -0600 + +--- PDF issues --------------------------------------------------------- + +- PDF output doesn't use a monospaced font for .Bd -literal + Example: "mandoc -Tpdf afterboot.8 > output.pdf && pdfviewer output.pdf". + Search the text "Routing tables". + Also check what PostScript mode does when fixing this. + reported by juanfra@ Wed, 04 Jun 2014 21:44:58 +0200 + instructions from juanfra@ Wed, 11 Jun 2014 02:21:01 +0200 + add a new <</Type /Font>> block to the PDF files with /BaseFont /Courier + and change the /Name from /F0 to the new font (/F5 (?)). + loc * exist ** algo ** size * imp ** + +--- HTML issues -------------------------------------------------------- + +- <dl><dt><dd> formatting is ugly + hints are easy to find on the web, e.g. + http://stackoverflow.com/questions/1713048/ + see also matthew@ Fri, 18 Jul 2014 19:25:12 -0700 + loc * exist * algo ** size * imp *** + +- .Bf at the beginning of a paragraph inserts a bogus 1ex horizontal + space, see for example random(3). Introduced in + http://mdocml.bsd.lv/cgi-bin/cvsweb/mdoc_html.c.diff?r1=1.91&r2=1.92 + reported by deraadt@ Mon, 28 Sep 2015 20:14:13 -0600 (MDT) + loc ** exist ** algo ** size * imp * + +- jsg on icb, Nov 3, 2014: + try to guess Xr in man(7) for hyperlinking + +- The tables used to render the three-part page headers actually force + the width of the <body> to the max-width given for <html>. + Not yet sure how to fix that... + Observed by an Anonymous Coward on undeadly.org: + http://undeadly.org/cgi?action=article&sid=20140925064244&pid=1 + loc * exist * algo ** size * imp *** + +- consider whether <var> can be used for Ar Dv Er Ev Fa Va. + from bentley@ Wed, 13 Aug 2014 09:17:55 -0600 + +- generate <img> tags in HTML + idea from florian@ Tue, 7 Apr 2015 00:26:28 +0000 + may be possible to implement with .Lk img://something.png alt_text + +- check https://github.com/trentm/mdocml + +************************************************************************ +* formatting issues: gratuitous differences +************************************************************************ + +- .Fn reopens a new scope after punctuation in mandoc, + but closes its scope for good in groff. + Do we want to change mandoc or groff? + Steffen Nurpmeso Sat, 08 Nov 2014 13:34:59 +0100 + loc * exist ** algo ** size * imp ** + +- In .Bl -enum -width 0n, groff continues one the same line after + the number, mandoc breaks the line. + mail to kristaps@ Mon, 20 Jul 2009 02:21:39 +0200 + loc * exist ** algo ** size * imp ** + +- .Pp between two .It in .Bl -column should produce one, + not two blank lines, see e.g. login.conf(5). + reported by jmc@ Sun, 17 Apr 2011 14:04:58 +0059 + reported again by sthen@ Wed, 18 Jan 2012 02:09:39 +0000 (UTC) + loc * exist *** algo ** size * imp ** + +- If the *first* line after .It is .Pp, break the line right after + the tag, do not pad with space characters before breaking. + See the description of the a, c, and i commands in sed(1). + loc * exist ** algo ** size * imp ** + +- If the first line after .It is .D1, do not assert a blank line + in between, see for example tmux(1). + reported by nicm@ 13 Jan 2011 00:18:57 +0000 + loc * exist ** algo ** size * imp ** + +- Trailing punctuation after .It should trigger EOS spacing. + reported by Nicolas Joly Sat, 17 Nov 2012 11:49:54 +0100 + Probably, this should be fixed somewhere in termp_it_pre(), not sure. + loc * exist ** algo ** size * imp ** + +- .Nx 1.0a + should be "NetBSD 1.0A", not "NetBSD 1.0a", + see OpenBSD ccdconfig(8). + loc * exist * algo * size * imp ** + +- In .Bl -tag, if a tag exceeds the right margin and must be continued + on the next line, it must be indented by -width, not width+1; + see "rule block|pass" in OpenBSD ifconfig(8). + loc * exist *** algo ** size * imp ** + +- When the -width string contains macros, the macros must be rendered + before measuring the width, for example + .Bl -tag -width ".Dv message" + in magic(5), located in src/usr.bin/file, is the same + as -width 7n, not -width 11n. + The same applies to .Bl -column column widths; + reported again by Nicolas Joly Thu, 1 Mar 2012 13:41:26 +0100 via wiz@ 5 Mar + reported again by Franco Fichtner Fri, 27 Sep 2013 21:02:28 +0200 + loc *** exist *** algo *** size ** imp *** + An easy partial fix would be to just skip the first word if it starts + with a dot, including any following white space, when measuring. + loc * exist * algo * size * imp *** + +- The \& zero-width character counts as output. + That is, when it is alone on a line between two .Pp, + we want three blank lines, not two as in mandoc. + loc ** exist ** algo ** size * imp ** + +- Header lines of excessive length: + Port OpenBSD man_term.c rev. 1.25 to mdoc_term.c + and document it in mdoc(7) and man(7) COMPATIBILITY + found while talking to Chris Bennett + loc * exist * algo * size * imp * + +- Sequences of multiple man(7) paragraphs (.PP, .IP) interspersed + with .ps and .nf/.fi produce execessive blank lines, see libJudy + and graphics/dcmtk. The parser reorg may help with this. + +- trailing whitespace must be ignored even when followed by a font escape, + see for example + makes + \fBdig \fR + operate in batch mode + in dig(1). + loc ** exist ** algo ** size * imp ** + +************************************************************************ +* portability +************************************************************************ + +- systems having UTF-8 but not en_US.UTF-8 + call locale(1) from ./configure, select a UTF-8-locale, + and use that for test-wchar.c and term_ascii.c + to Markus Waldeck Sat, 18 Jul 2015 01:55:37 +0200 + loc * exist * algo * size * imp * + +************************************************************************ +* warning issues +************************************************************************ + +- provide a way in mandoc(1) to warn about broken .Xr links; + probably cannot be on by default in -Tlint because it needs + to access the manpath and mandoc.db(3) after parsing. + asked for by jmc@ Fri, 4 Dec 2015 22:39:40 +0000 + +- Report errors in -O suboption parsing. + loc * exist * algo * size * imp ** + +- warn when .Sh or .Ss contain other macros + Steffen Nurpmeso, savannah.gnu.org/bugs/index.php?45034 + loc * exist * algo * size * imp ** + +- check that MANDOCERR_BADTAB is thrown in the right cases, + i.e. when finding a literal tab character in fill mode, + and possibly change the wording of the warning message + to refer to fill mode, not literal mode + See the mail from Werner LEMBERG on the groff list, + Fri, 14 Feb 2014 18:54:42 +0100 (CET) + loc * exist ** algo ** size * imp ** + +- warn about attempts to call non-callable macros + Steffen Nurpmeso Tue, 11 Nov 2014 22:55:16 +0100 + Note that formatting is inconsistent in groff. + .Fn Po prints "Po()", .Ar Sh prints "file ..." and no "Sh". + Relatively hard because the relevant code is scattered + all over mdoc_macro.c and all subtly different. + loc ** exist ** algo ** size ** imp ** + +- warn about "new sentence, new line" + loc ** exist ** algo *** size * imp ** + +- mandoc_special does not really check the escape sequence, + but just the overall format + loc ** exist ** algo *** size ** imp ** + +- integrate mdoclint into mandoc ("end-of-line whitespace" thread) + from jmc@ Mon, 13 Jul 2009 17:12:09 +0100 + from kristaps@ Mon, 13 Jul 2009 18:34:53 +0200 + from jmc@ Mon, 13 Jul 2009 17:45:37 +0059 + from kristaps@ Mon, 13 Jul 2009 19:02:03 +0200 + (mostly done, check what remains) + +- -Tlint parser errors and warnings to stdout + to tech@mdocml, naddy@ Wed, 28 Sep 2011 11:21:46 +0200 + wait! kristaps@ Sun, 02 Oct 2011 17:12:52 +0200 + +- for system errors, use errno/strerror/warn/err + +************************************************************************ +* documentation issues +************************************************************************ + +- mention hyphenation rules: + breaking at letter-letter in text mode (not macro args) + proper hyphenation is unimplemented + +- talk about spacing around delimiters + to jmc@, kristaps@ Sat, 23 Apr 2011 17:41:27 +0200 + +- mark macros as: page structure domain, manual domain, general text domain + is this useful? + +- mention /usr/share/misc/mdoc.template in mdoc(7)? + +- Is all the content from http://www.std.com/obi/BSD/doc/usd/28.tbl/tbl + covered in tbl(7)? + +************************************************************************ +* performance issues +************************************************************************ + +- Why are we using MAP_SHARED, not MAP_PRIVATE for mmap(2)? + How does SQLITE_CONFIG_PAGECACHE actually work? Document it! + from kristaps@ Sat, 09 Aug 2014 13:51:36 +0200 + +Several areas can be cleaned up to make mandoc even faster. These are + +- improve hashing mechanism for macros (quite important: performance) + +- improve hashing mechanism for characters (not as important) + +- the PDF file is HUGE: this can be reduced by using relative offsets + +- instead of re-initialising the roff predefined-strings set before each + parse, create a read-only version the first time and copy it + loc * exist ** algo ** size * imp ** + +************************************************************************ +* structural issues +************************************************************************ + +- Use libz directly instead of forking gunzip(1). + Suggested by bapt at FreeBSD among others. + +- We use the input line number at several places to distinguish + same-line from different-line input. That plainly doesn't work + with user-defined macros, leading to random breakage. + +- Find better ways to prevent endless loops + in roff(7) macro and string expansion. + +- Finish cleanup of date handling. + Decide which formats should be recognized where. + Update both mdoc(7) and man(7) documentation. + Triggered by Tim van der Molen Tue, 22 Feb 2011 20:30:45 +0100 + +- struct mparse refactoring + Steffen Nurpmeso Thu, 04 Sep 2014 12:50:00 +0200 + +- Consider creating some views that will make the database more + readable from the sqlite3 shell. Consider using them to + abstract from the database structure, too. + suggested by espie@ Sat, 19 Apr 2014 14:52:57 +0200 + +************************************************************************ +* CGI issues +************************************************************************ + + - Enable HTTP compression by detecting gzip encoding and filtering + output through libz. + - Sandbox (see OpenSSH). + - Enable caching support via HTTP 304 and If-Modified-Since. + - Allow for cgi.h to be overridden by CGI environment variables. + Otherwise, binary distributions will inherit the compile-time + behaviour, which is not optimal. + - Have Mac OSX systems automatically disable -static compilation of the + CGI: -static isn't supported. + +************************************************************************ +* to improve in the groff_mdoc(7) macros +************************************************************************ + +- use uname(1) to set doc-default-operating-system at install time + tobimensch Mon, 1 Dec 2014 00:25:07 +0100 diff --git a/contrib/mdocml/apropos.1 b/contrib/mdocml/apropos.1 new file mode 100644 index 0000000..10ba3c6 --- /dev/null +++ b/contrib/mdocml/apropos.1 @@ -0,0 +1,485 @@ +.\" $Id: apropos.1,v 1.39 2015/04/03 08:46:17 schwarze Exp $ +.\" +.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011, 2012, 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: April 3 2015 $ +.Dt APROPOS 1 +.Os +.Sh NAME +.Nm apropos , +.Nm whatis +.Nd search manual page databases +.Sh SYNOPSIS +.Nm +.Op Fl acfhklw +.Op Fl C Ar file +.Op Fl M Ar path +.Op Fl m Ar path +.Op Fl O Ar outkey +.Op Fl S Ar arch +.Op Fl s Ar section +.Ar expression ... +.Sh DESCRIPTION +The +.Nm apropos +and +.Nm whatis +utilities query manual page databases generated by +.Xr makewhatis 8 , +evaluating +.Ar expression +for each file in each database. +By default, they display the names, section numbers, and description lines +of all matching manuals. +.Pp +By default, +.Nm +searches for +.Xr makewhatis 8 +databases in the default paths stipulated by +.Xr man 1 +and uses case-insensitive substring matching +.Pq the Cm = No operator +over manual names and descriptions +.Pq the Li \&Nm No and Li \&Nd No macro keys . +Multiple terms imply pairwise +.Fl o . +.Pp +.Nm whatis +is a synonym for +.Nm +.Fl f . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Instead of showing only the title lines, show the complete manual pages, +just like +.Xr man 1 +.Fl a +would. +If the standard output is a terminal device and +.Fl c +is not specified, use +.Xr more 1 +to paginate them. +In +.Fl a +mode, the options +.Fl IKOTW +described in the +.Xr mandoc 1 +manual are also available. +.It Fl C Ar file +Specify an alternative configuration +.Ar file +in +.Xr man.conf 5 +format. +.It Fl c +In +.Fl a +mode, copy the formatted manual pages to the standard output without using +.Xr more 1 +to paginate them. +.It Fl f +Search for all words in +.Ar expression +in manual page names only. +The search is case insensitive and matches whole words only. +In this mode, macro keys, comparison operators, and logical operators +are not available. +This overrides any earlier +.Fl k +and +.Fl l +options. +.It Fl h +Instead of showing the title lines, show the SYNOPSIS sections, just like +.Xr man 1 +.Fl h +would. +.It Fl k +Support the full +.Ar expression +syntax. +This overrides any earlier +.Fl f +and +.Fl l +options. +It is the default for +.Nm . +.It Fl l +An alias for +.Xr mandoc 1 +.Fl a . +This overrides any earlier +.Fl f , +.Fl k , +and +.Fl w +options. +.It Fl M Ar path +Use the colon-separated path instead of the default list of paths +searched for +.Xr makewhatis 8 +databases. +Invalid paths, or paths without manual databases, are ignored. +.It Fl m Ar path +Prepend the colon-separated paths to the list of paths searched +for +.Xr makewhatis 8 +databases. +Invalid paths, or paths without manual databases, are ignored. +.It Fl O Ar outkey +Show the values associated with the key +.Ar outkey +instead of the manual descriptions. +.It Fl S Ar arch +Restrict the search to pages for the specified +.Xr machine 1 +architecture. +.Ar arch +is case insensitive. +By default, pages for all architectures are shown. +.It Fl s Ar section +Restrict the search to the specified section of the manual. +By default, pages from all sections are shown. +See +.Xr man 1 +for a listing of sections. +.It Fl w +Instead of showing title lines, show the pathnames of the matching +manual pages, just like +.Xr man 1 +.Fl w +would. +.El +.Pp +An +.Ar expression +consists of search terms joined by logical operators +.Fl a +.Pq and +and +.Fl o +.Pq or . +The +.Fl a +operator has precedence over +.Fl o +and both are evaluated left-to-right. +.Bl -tag -width Ds +.It \&( Ar expr No \&) +True if the subexpression +.Ar expr +is true. +.It Ar expr1 Fl a Ar expr2 +True if both +.Ar expr1 +and +.Ar expr2 +are true (logical +.Sq and ) . +.It Ar expr1 Oo Fl o Oc Ar expr2 +True if +.Ar expr1 +and/or +.Ar expr2 +evaluate to true (logical +.Sq or ) . +.It Ar term +True if +.Ar term +is satisfied. +This has syntax +.Sm off +.Oo +.Op Ar key Op , Ar key ... +.Pq Cm = | \(ti +.Oc +.Ar val , +.Sm on +where +.Ar key +is an +.Xr mdoc 7 +macro to query and +.Ar val +is its value. +See +.Sx Macro Keys +for a list of available keys. +Operator +.Cm = +evaluates a substring, while +.Cm \(ti +evaluates a regular expression. +.It Fl i Ar term +If +.Ar term +is a regular expression, it +is evaluated case-insensitively. +Has no effect on substring terms. +.El +.Pp +Results are sorted by manual sections and names, with output formatted as +.Pp +.D1 name[, name...](sec) \- description +.Pp +Where +.Dq name +is the manual's name, +.Dq sec +is the manual section, and +.Dq description +is the manual's short description. +If an architecture is specified for the manual, it is displayed as +.Pp +.D1 name(sec/arch) \- description +.Pp +Resulting manuals may be accessed as +.Pp +.Dl $ man \-s sec name +.Pp +If an architecture is specified in the output, use +.Pp +.Dl $ man \-s sec \-S arch name +.Ss Macro Keys +Queries evaluate over a subset of +.Xr mdoc 7 +macros indexed by +.Xr makewhatis 8 . +In addition to the macro keys listed below, the special key +.Cm any +may be used to match any available macro key. +.Pp +Names and description: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Nm Ta manual name +.It Li \&Nd Ta one-line manual description +.It Li arch Ta machine architecture (case-insensitive) +.It Li sec Ta manual section number +.El +.Pp +Sections and cross references: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Sh Ta section header (excluding standard sections) +.It Li \&Ss Ta subsection header +.It Li \&Xr Ta cross reference to another manual page +.It Li \&Rs Ta bibliographic reference +.El +.Pp +Semantic markup for command line utilities: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Fl Ta command line options (flags) +.It Li \&Cm Ta command modifier +.It Li \&Ar Ta command argument +.It Li \&Ic Ta internal or interactive command +.It Li \&Ev Ta environmental variable +.It Li \&Pa Ta file system path +.El +.Pp +Semantic markup for function libraries: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Lb Ta function library name +.It Li \&In Ta include file +.It Li \&Ft Ta function return type +.It Li \&Fn Ta function name +.It Li \&Fa Ta function argument type and name +.It Li \&Vt Ta variable type +.It Li \&Va Ta variable name +.It Li \&Dv Ta defined variable or preprocessor constant +.It Li \&Er Ta error constant +.It Li \&Ev Ta environmental variable +.El +.Pp +Various semantic markup: +.Bl -column "xLix" description -offset indent -compact +.It Li \&An Ta author name +.It Li \&Lk Ta hyperlink +.It Li \&Mt Ta Do mailto Dc hyperlink +.It Li \&Cd Ta kernel configuration declaration +.It Li \&Ms Ta mathematical symbol +.It Li \&Tn Ta tradename +.El +.Pp +Physical markup: +.Bl -column "xLix" description -offset indent -compact +.It Li \&Em Ta italic font or underline +.It Li \&Sy Ta boldface font +.It Li \&Li Ta typewriter font +.El +.Pp +Text production: +.Bl -column "xLix" description -offset indent -compact +.It Li \&St Ta reference to a standards document +.It Li \&At Ta At No version reference +.It Li \&Bx Ta Bx No version reference +.It Li \&Bsx Ta Bsx No version reference +.It Li \&Nx Ta Nx No version reference +.It Li \&Fx Ta Fx No version reference +.It Li \&Ox Ta Ox No version reference +.It Li \&Dx Ta Dx No version reference +.El +.Sh ENVIRONMENT +.Bl -tag -width MANPAGER +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +will be used instead of the standard pagination program, +.Xr more 1 . +.It Ev MANPATH +The standard search path used by +.Xr man 1 +may be changed by specifying a path in the +.Ev MANPATH +environment variable. +Invalid paths, or paths without manual databases, are ignored. +Overridden by +.Fl M . +If +.Ev MANPATH +begins with a colon, it is appended to the default list; +if it ends with a colon, it is prepended to the default list; +or if it contains two adjacent colons, +the standard search path is inserted between the colons. +If none of these conditions are met, it overrides the +standard search path. +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +will be used. +.El +.Sh FILES +.Bl -tag -width "/etc/man.conf" -compact +.It Pa mandoc.db +name of the +.Xr makewhatis 8 +keyword database +.It Pa /etc/man.conf +default +.Xr man 1 +configuration file +.El +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +Search for +.Qq .cf +as a substring of manual names and descriptions: +.Pp +.Dl $ apropos .cf +.Pp +Include matches for +.Qq .cnf +and +.Qq .conf +as well: +.Pp +.Dl $ apropos .cf .cnf .conf +.Pp +Search in names and descriptions using a regular expression: +.Pp +.Dl $ apropos \(aq\(tiset.?[ug]id\(aq +.Pp +Search for manuals in the library section mentioning both the +.Qq optind +and the +.Qq optarg +variables: +.Pp +.Dl $ apropos \-s 3 Va=optind \-a Va=optarg +.Pp +Do exactly the same as calling +.Xr whatis 1 +with the argument +.Qq ssh : +.Pp +.Dl $ apropos \-\- \-i \(aqNm\(ti[[:<:]]ssh[[:>:]]\(aq +.Pp +The following two invocations are equivalent: +.Pp +.D1 Li $ apropos -S Ar arch Li -s Ar section expression +.Bd -ragged -offset indent +.Li $ apropos \e( Ar expression Li \e) +.Li -a arch\(ti^( Ns Ar arch Ns Li |any)$ +.Li -a sec\(ti^ Ns Ar section Ns Li $ +.Ed +.Sh SEE ALSO +.Xr man 1 , +.Xr re_format 7 , +.Xr makewhatis 8 +.Sh HISTORY +Part of the functionality of +.Nm whatis +was already provided by the former +.Nm manwhere +utility in +.Bx 1 . +The +.Nm +and +.Nm whatis +utilities first appeared in +.Bx 2 . +They were rewritten from scratch for +.Ox 5.6 . +.Pp +The +.Fl M +option and the +.Ev MANPATH +variable first appeared in +.Bx 4.3 ; +.Fl m +in +.Bx 4.3 Reno ; +.Fl C +in +.Bx 4.4 Lite1 ; +and +.Fl S +and +.Fl s +in +.Ox 4.5 +for +.Nm +and in +.Ox 5.6 +for +.Nm whatis . +.Sh AUTHORS +.An -nosplit +.An Bill Joy +wrote +.Nm manwhere +in 1977 and the original +.Bx +.Nm +and +.Nm whatis +in February 1979. +The current version was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/att.c b/contrib/mdocml/att.c new file mode 100644 index 0000000..872f982 --- /dev/null +++ b/contrib/mdocml/att.c @@ -0,0 +1,50 @@ +/* $Id: att.c,v 1.15 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#include <string.h> + +#include "roff.h" +#include "mdoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y) + + +const char * +mdoc_a2att(const char *p) +{ + + LINE("v1", "Version\\~1 AT&T UNIX"); + LINE("v2", "Version\\~2 AT&T UNIX"); + LINE("v3", "Version\\~3 AT&T UNIX"); + LINE("v4", "Version\\~4 AT&T UNIX"); + LINE("v5", "Version\\~5 AT&T UNIX"); + LINE("v6", "Version\\~6 AT&T UNIX"); + LINE("v7", "Version\\~7 AT&T UNIX"); + LINE("32v", "Version\\~32V AT&T UNIX"); + LINE("III", "AT&T System\\~III UNIX"); + LINE("V", "AT&T System\\~V UNIX"); + LINE("V.1", "AT&T System\\~V Release\\~1 UNIX"); + LINE("V.2", "AT&T System\\~V Release\\~2 UNIX"); + LINE("V.3", "AT&T System\\~V Release\\~3 UNIX"); + LINE("V.4", "AT&T System\\~V Release\\~4 UNIX"); + + return NULL; +} diff --git a/contrib/mdocml/cgi.c b/contrib/mdocml/cgi.c new file mode 100644 index 0000000..0b01d06 --- /dev/null +++ b/contrib/mdocml/cgi.c @@ -0,0 +1,1129 @@ +/* $Id: cgi.c,v 1.116 2016/01/04 12:36:26 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#include <sys/time.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "main.h" +#include "manconf.h" +#include "mansearch.h" +#include "cgi.h" + +/* + * A query as passed to the search function. + */ +struct query { + char *manpath; /* desired manual directory */ + char *arch; /* architecture */ + char *sec; /* manual section */ + char *query; /* unparsed query expression */ + int equal; /* match whole names, not substrings */ +}; + +struct req { + struct query q; + char **p; /* array of available manpaths */ + size_t psz; /* number of available manpaths */ +}; + +static void catman(const struct req *, const char *); +static void format(const struct req *, const char *); +static void html_print(const char *); +static void html_putchar(char); +static int http_decode(char *); +static void http_parse(struct req *, const char *); +static void pathgen(struct req *); +static void pg_error_badrequest(const char *); +static void pg_error_internal(void); +static void pg_index(const struct req *); +static void pg_noresult(const struct req *, const char *); +static void pg_search(const struct req *); +static void pg_searchres(const struct req *, + struct manpage *, size_t); +static void pg_show(struct req *, const char *); +static void resp_begin_html(int, const char *); +static void resp_begin_http(int, const char *); +static void resp_copy(const char *); +static void resp_end_html(void); +static void resp_searchform(const struct req *); +static void resp_show(const struct req *, const char *); +static void set_query_attr(char **, char **); +static int validate_filename(const char *); +static int validate_manpath(const struct req *, const char *); +static int validate_urifrag(const char *); + +static const char *scriptname; /* CGI script name */ + +static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; +static const char *const sec_numbers[] = { + "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" +}; +static const char *const sec_names[] = { + "All Sections", + "1 - General Commands", + "2 - System Calls", + "3 - Library Functions", + "3p - Perl Library", + "4 - Device Drivers", + "5 - File Formats", + "6 - Games", + "7 - Miscellaneous Information", + "8 - System Manager\'s Manual", + "9 - Kernel Developer\'s Manual" +}; +static const int sec_MAX = sizeof(sec_names) / sizeof(char *); + +static const char *const arch_names[] = { + "amd64", "alpha", "armish", "armv7", + "aviion", "hppa", "hppa64", "i386", + "ia64", "landisk", "loongson", "luna88k", + "macppc", "mips64", "octeon", "sgi", + "socppc", "solbourne", "sparc", "sparc64", + "vax", "zaurus", + "amiga", "arc", "arm32", "atari", + "beagle", "cats", "hp300", "mac68k", + "mvme68k", "mvme88k", "mvmeppc", "palm", + "pc532", "pegasos", "pmax", "powerpc", + "sun3", "wgrisc", "x68k" +}; +static const int arch_MAX = sizeof(arch_names) / sizeof(char *); + +/* + * Print a character, escaping HTML along the way. + * This will pass non-ASCII straight to output: be warned! + */ +static void +html_putchar(char c) +{ + + switch (c) { + case ('"'): + printf(""e;"); + break; + case ('&'): + printf("&"); + break; + case ('>'): + printf(">"); + break; + case ('<'): + printf("<"); + break; + default: + putchar((unsigned char)c); + break; + } +} + +/* + * Call through to html_putchar(). + * Accepts NULL strings. + */ +static void +html_print(const char *p) +{ + + if (NULL == p) + return; + while ('\0' != *p) + html_putchar(*p++); +} + +/* + * Transfer the responsibility for the allocated string *val + * to the query structure. + */ +static void +set_query_attr(char **attr, char **val) +{ + + free(*attr); + if (**val == '\0') { + *attr = NULL; + free(*val); + } else + *attr = *val; + *val = NULL; +} + +/* + * Parse the QUERY_STRING for key-value pairs + * and store the values into the query structure. + */ +static void +http_parse(struct req *req, const char *qs) +{ + char *key, *val; + size_t keysz, valsz; + + req->q.manpath = NULL; + req->q.arch = NULL; + req->q.sec = NULL; + req->q.query = NULL; + req->q.equal = 1; + + key = val = NULL; + while (*qs != '\0') { + + /* Parse one key. */ + + keysz = strcspn(qs, "=;&"); + key = mandoc_strndup(qs, keysz); + qs += keysz; + if (*qs != '=') + goto next; + + /* Parse one value. */ + + valsz = strcspn(++qs, ";&"); + val = mandoc_strndup(qs, valsz); + qs += valsz; + + /* Decode and catch encoding errors. */ + + if ( ! (http_decode(key) && http_decode(val))) + goto next; + + /* Handle key-value pairs. */ + + if ( ! strcmp(key, "query")) + set_query_attr(&req->q.query, &val); + + else if ( ! strcmp(key, "apropos")) + req->q.equal = !strcmp(val, "0"); + + else if ( ! strcmp(key, "manpath")) { +#ifdef COMPAT_OLDURI + if ( ! strncmp(val, "OpenBSD ", 8)) { + val[7] = '-'; + if ('C' == val[8]) + val[8] = 'c'; + } +#endif + set_query_attr(&req->q.manpath, &val); + } + + else if ( ! (strcmp(key, "sec") +#ifdef COMPAT_OLDURI + && strcmp(key, "sektion") +#endif + )) { + if ( ! strcmp(val, "0")) + *val = '\0'; + set_query_attr(&req->q.sec, &val); + } + + else if ( ! strcmp(key, "arch")) { + if ( ! strcmp(val, "default")) + *val = '\0'; + set_query_attr(&req->q.arch, &val); + } + + /* + * The key must be freed in any case. + * The val may have been handed over to the query + * structure, in which case it is now NULL. + */ +next: + free(key); + key = NULL; + free(val); + val = NULL; + + if (*qs != '\0') + qs++; + } +} + +/* + * HTTP-decode a string. The standard explanation is that this turns + * "%4e+foo" into "n foo" in the regular way. This is done in-place + * over the allocated string. + */ +static int +http_decode(char *p) +{ + char hex[3]; + char *q; + int c; + + hex[2] = '\0'; + + q = p; + for ( ; '\0' != *p; p++, q++) { + if ('%' == *p) { + if ('\0' == (hex[0] = *(p + 1))) + return 0; + if ('\0' == (hex[1] = *(p + 2))) + return 0; + if (1 != sscanf(hex, "%x", &c)) + return 0; + if ('\0' == c) + return 0; + + *q = (char)c; + p += 2; + } else + *q = '+' == *p ? ' ' : *p; + } + + *q = '\0'; + return 1; +} + +static void +resp_begin_http(int code, const char *msg) +{ + + if (200 != code) + printf("Status: %d %s\r\n", code, msg); + + printf("Content-Type: text/html; charset=utf-8\r\n" + "Cache-Control: no-cache\r\n" + "Pragma: no-cache\r\n" + "\r\n"); + + fflush(stdout); +} + +static void +resp_copy(const char *filename) +{ + char buf[4096]; + ssize_t sz; + int fd; + + if ((fd = open(filename, O_RDONLY)) != -1) { + fflush(stdout); + while ((sz = read(fd, buf, sizeof(buf))) > 0) + write(STDOUT_FILENO, buf, sz); + } +} + +static void +resp_begin_html(int code, const char *msg) +{ + + resp_begin_http(code, msg); + + printf("<!DOCTYPE html>\n" + "<HTML>\n" + "<HEAD>\n" + "<META CHARSET=\"UTF-8\" />\n" + "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\"" + " TYPE=\"text/css\" media=\"all\">\n" + "<TITLE>%s</TITLE>\n" + "</HEAD>\n" + "<BODY>\n" + "<!-- Begin page content. //-->\n", + CSS_DIR, CUSTOMIZE_TITLE); + + resp_copy(MAN_DIR "/header.html"); +} + +static void +resp_end_html(void) +{ + + resp_copy(MAN_DIR "/footer.html"); + + puts("</BODY>\n" + "</HTML>"); +} + +static void +resp_searchform(const struct req *req) +{ + int i; + + puts("<!-- Begin search form. //-->"); + printf("<DIV ID=\"mancgi\">\n" + "<FORM ACTION=\"%s\" METHOD=\"get\">\n" + "<FIELDSET>\n" + "<LEGEND>Manual Page Search Parameters</LEGEND>\n", + scriptname); + + /* Write query input box. */ + + printf( "<TABLE><TR><TD>\n" + "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\""); + if (NULL != req->q.query) + html_print(req->q.query); + puts("\" SIZE=\"40\">"); + + /* Write submission and reset buttons. */ + + printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n" + "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n"); + + /* Write show radio button */ + + printf( "</TD><TD>\n" + "<INPUT TYPE=\"radio\" "); + if (req->q.equal) + printf("CHECKED=\"checked\" "); + printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n" + "<LABEL FOR=\"show\">Show named manual page</LABEL>\n"); + + /* Write section selector. */ + + puts( "</TD></TR><TR><TD>\n" + "<SELECT NAME=\"sec\">"); + for (i = 0; i < sec_MAX; i++) { + printf("<OPTION VALUE=\"%s\"", sec_numbers[i]); + if (NULL != req->q.sec && + 0 == strcmp(sec_numbers[i], req->q.sec)) + printf(" SELECTED=\"selected\""); + printf(">%s</OPTION>\n", sec_names[i]); + } + puts("</SELECT>"); + + /* Write architecture selector. */ + + printf( "<SELECT NAME=\"arch\">\n" + "<OPTION VALUE=\"default\""); + if (NULL == req->q.arch) + printf(" SELECTED=\"selected\""); + puts(">All Architectures</OPTION>"); + for (i = 0; i < arch_MAX; i++) { + printf("<OPTION VALUE=\"%s\"", arch_names[i]); + if (NULL != req->q.arch && + 0 == strcmp(arch_names[i], req->q.arch)) + printf(" SELECTED=\"selected\""); + printf(">%s</OPTION>\n", arch_names[i]); + } + puts("</SELECT>"); + + /* Write manpath selector. */ + + if (req->psz > 1) { + puts("<SELECT NAME=\"manpath\">"); + for (i = 0; i < (int)req->psz; i++) { + printf("<OPTION "); + if (strcmp(req->q.manpath, req->p[i]) == 0) + printf("SELECTED=\"selected\" "); + printf("VALUE=\""); + html_print(req->p[i]); + printf("\">"); + html_print(req->p[i]); + puts("</OPTION>"); + } + puts("</SELECT>"); + } + + /* Write search radio button */ + + printf( "</TD><TD>\n" + "<INPUT TYPE=\"radio\" "); + if (0 == req->q.equal) + printf("CHECKED=\"checked\" "); + printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n" + "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n"); + + puts("</TD></TR></TABLE>\n" + "</FIELDSET>\n" + "</FORM>\n" + "</DIV>"); + puts("<!-- End search form. //-->"); +} + +static int +validate_urifrag(const char *frag) +{ + + while ('\0' != *frag) { + if ( ! (isalnum((unsigned char)*frag) || + '-' == *frag || '.' == *frag || + '/' == *frag || '_' == *frag)) + return 0; + frag++; + } + return 1; +} + +static int +validate_manpath(const struct req *req, const char* manpath) +{ + size_t i; + + if ( ! strcmp(manpath, "mandoc")) + return 1; + + for (i = 0; i < req->psz; i++) + if ( ! strcmp(manpath, req->p[i])) + return 1; + + return 0; +} + +static int +validate_filename(const char *file) +{ + + if ('.' == file[0] && '/' == file[1]) + file += 2; + + return ! (strstr(file, "../") || strstr(file, "/..") || + (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); +} + +static void +pg_index(const struct req *req) +{ + + resp_begin_html(200, NULL); + resp_searchform(req); + printf("<P>\n" + "This web interface is documented in the\n" + "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n" + "manual, and the\n" + "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n" + "manual explains the query syntax.\n" + "</P>\n", + scriptname, scriptname); + resp_end_html(); +} + +static void +pg_noresult(const struct req *req, const char *msg) +{ + resp_begin_html(200, NULL); + resp_searchform(req); + puts("<P>"); + puts(msg); + puts("</P>"); + resp_end_html(); +} + +static void +pg_error_badrequest(const char *msg) +{ + + resp_begin_html(400, "Bad Request"); + puts("<H1>Bad Request</H1>\n" + "<P>\n"); + puts(msg); + printf("Try again from the\n" + "<A HREF=\"%s\">main page</A>.\n" + "</P>", scriptname); + resp_end_html(); +} + +static void +pg_error_internal(void) +{ + resp_begin_html(500, "Internal Server Error"); + puts("<P>Internal Server Error</P>"); + resp_end_html(); +} + +static void +pg_searchres(const struct req *req, struct manpage *r, size_t sz) +{ + char *arch, *archend; + size_t i, iuse, isec; + int archprio, archpriouse; + int prio, priouse; + char sec; + + for (i = 0; i < sz; i++) { + if (validate_filename(r[i].file)) + continue; + fprintf(stderr, "invalid filename %s in %s database\n", + r[i].file, req->q.manpath); + pg_error_internal(); + return; + } + + if (1 == sz) { + /* + * If we have just one result, then jump there now + * without any delay. + */ + printf("Status: 303 See Other\r\n"); + printf("Location: http://%s%s/%s/%s", + HTTP_HOST, scriptname, req->q.manpath, r[0].file); + printf("\r\n" + "Content-Type: text/html; charset=utf-8\r\n" + "\r\n"); + return; + } + + resp_begin_html(200, NULL); + resp_searchform(req); + puts("<DIV CLASS=\"results\">"); + puts("<TABLE>"); + + for (i = 0; i < sz; i++) { + printf("<TR>\n" + "<TD CLASS=\"title\">\n" + "<A HREF=\"%s/%s/%s", + scriptname, req->q.manpath, r[i].file); + printf("\">"); + html_print(r[i].names); + printf("</A>\n" + "</TD>\n" + "<TD CLASS=\"desc\">"); + html_print(r[i].output); + puts("</TD>\n" + "</TR>"); + } + + puts("</TABLE>\n" + "</DIV>"); + + /* + * In man(1) mode, show one of the pages + * even if more than one is found. + */ + + if (req->q.equal) { + puts("<HR>"); + iuse = 0; + priouse = 10; + archpriouse = 3; + for (i = 0; i < sz; i++) { + isec = strcspn(r[i].file, "123456789"); + sec = r[i].file[isec]; + if ('\0' == sec) + continue; + prio = sec_prios[sec - '1']; + if (NULL == req->q.arch) { + archprio = + (NULL == (arch = strchr( + r[i].file + isec, '/'))) ? 3 : + (NULL == (archend = strchr( + arch + 1, '/'))) ? 0 : + strncmp(arch, "amd64/", + archend - arch) ? 2 : 1; + if (archprio < archpriouse) { + archpriouse = archprio; + priouse = prio; + iuse = i; + continue; + } + if (archprio > archpriouse) + continue; + } + if (prio >= priouse) + continue; + priouse = prio; + iuse = i; + } + resp_show(req, r[iuse].file); + } + + resp_end_html(); +} + +static void +catman(const struct req *req, const char *file) +{ + FILE *f; + char *p; + size_t sz; + ssize_t len; + int i; + int italic, bold; + + if ((f = fopen(file, "r")) == NULL) { + puts("<P>You specified an invalid manual file.</P>"); + return; + } + + puts("<DIV CLASS=\"catman\">\n" + "<PRE>"); + + p = NULL; + sz = 0; + + while ((len = getline(&p, &sz, f)) != -1) { + bold = italic = 0; + for (i = 0; i < len - 1; i++) { + /* + * This means that the catpage is out of state. + * Ignore it and keep going (although the + * catpage is bogus). + */ + + if ('\b' == p[i] || '\n' == p[i]) + continue; + + /* + * Print a regular character. + * Close out any bold/italic scopes. + * If we're in back-space mode, make sure we'll + * have something to enter when we backspace. + */ + + if ('\b' != p[i + 1]) { + if (italic) + printf("</I>"); + if (bold) + printf("</B>"); + italic = bold = 0; + html_putchar(p[i]); + continue; + } else if (i + 2 >= len) + continue; + + /* Italic mode. */ + + if ('_' == p[i]) { + if (bold) + printf("</B>"); + if ( ! italic) + printf("<I>"); + bold = 0; + italic = 1; + i += 2; + html_putchar(p[i]); + continue; + } + + /* + * Handle funny behaviour troff-isms. + * These grok'd from the original man2html.c. + */ + + if (('+' == p[i] && 'o' == p[i + 2]) || + ('o' == p[i] && '+' == p[i + 2]) || + ('|' == p[i] && '=' == p[i + 2]) || + ('=' == p[i] && '|' == p[i + 2]) || + ('*' == p[i] && '=' == p[i + 2]) || + ('=' == p[i] && '*' == p[i + 2]) || + ('*' == p[i] && '|' == p[i + 2]) || + ('|' == p[i] && '*' == p[i + 2])) { + if (italic) + printf("</I>"); + if (bold) + printf("</B>"); + italic = bold = 0; + putchar('*'); + i += 2; + continue; + } else if (('|' == p[i] && '-' == p[i + 2]) || + ('-' == p[i] && '|' == p[i + 1]) || + ('+' == p[i] && '-' == p[i + 1]) || + ('-' == p[i] && '+' == p[i + 1]) || + ('+' == p[i] && '|' == p[i + 1]) || + ('|' == p[i] && '+' == p[i + 1])) { + if (italic) + printf("</I>"); + if (bold) + printf("</B>"); + italic = bold = 0; + putchar('+'); + i += 2; + continue; + } + + /* Bold mode. */ + + if (italic) + printf("</I>"); + if ( ! bold) + printf("<B>"); + bold = 1; + italic = 0; + i += 2; + html_putchar(p[i]); + } + + /* + * Clean up the last character. + * We can get to a newline; don't print that. + */ + + if (italic) + printf("</I>"); + if (bold) + printf("</B>"); + + if (i == len - 1 && p[i] != '\n') + html_putchar(p[i]); + + putchar('\n'); + } + free(p); + + puts("</PRE>\n" + "</DIV>"); + + fclose(f); +} + +static void +format(const struct req *req, const char *file) +{ + struct manoutput conf; + struct mparse *mp; + struct roff_man *man; + void *vp; + int fd; + int usepath; + + if (-1 == (fd = open(file, O_RDONLY, 0))) { + puts("<P>You specified an invalid manual file.</P>"); + return; + } + + mchars_alloc(); + mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath); + mparse_readfd(mp, fd, file); + close(fd); + + memset(&conf, 0, sizeof(conf)); + conf.fragment = 1; + usepath = strcmp(req->q.manpath, req->p[0]); + mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s", + scriptname, + req->q.arch ? "&arch=" : "", + req->q.arch ? req->q.arch : "", + usepath ? "&manpath=" : "", + usepath ? req->q.manpath : ""); + + mparse_result(mp, &man, NULL); + if (man == NULL) { + fprintf(stderr, "fatal mandoc error: %s/%s\n", + req->q.manpath, file); + pg_error_internal(); + mparse_free(mp); + mchars_free(); + return; + } + + vp = html_alloc(&conf); + + if (man->macroset == MACROSET_MDOC) { + mdoc_validate(man); + html_mdoc(vp, man); + } else { + man_validate(man); + html_man(vp, man); + } + + html_free(vp); + mparse_free(mp); + mchars_free(); + free(conf.man); +} + +static void +resp_show(const struct req *req, const char *file) +{ + + if ('.' == file[0] && '/' == file[1]) + file += 2; + + if ('c' == *file) + catman(req, file); + else + format(req, file); +} + +static void +pg_show(struct req *req, const char *fullpath) +{ + char *manpath; + const char *file; + + if ((file = strchr(fullpath, '/')) == NULL) { + pg_error_badrequest( + "You did not specify a page to show."); + return; + } + manpath = mandoc_strndup(fullpath, file - fullpath); + file++; + + if ( ! validate_manpath(req, manpath)) { + pg_error_badrequest( + "You specified an invalid manpath."); + free(manpath); + return; + } + + /* + * Begin by chdir()ing into the manpath. + * This way we can pick up the database files, which are + * relative to the manpath root. + */ + + if (chdir(manpath) == -1) { + fprintf(stderr, "chdir %s: %s\n", + manpath, strerror(errno)); + pg_error_internal(); + free(manpath); + return; + } + + if (strcmp(manpath, "mandoc")) { + free(req->q.manpath); + req->q.manpath = manpath; + } else + free(manpath); + + if ( ! validate_filename(file)) { + pg_error_badrequest( + "You specified an invalid manual file."); + return; + } + + resp_begin_html(200, NULL); + resp_searchform(req); + resp_show(req, file); + resp_end_html(); +} + +static void +pg_search(const struct req *req) +{ + struct mansearch search; + struct manpaths paths; + struct manpage *res; + char **argv; + char *query, *rp, *wp; + size_t ressz; + int argc; + + /* + * Begin by chdir()ing into the root of the manpath. + * This way we can pick up the database files, which are + * relative to the manpath root. + */ + + if (-1 == (chdir(req->q.manpath))) { + fprintf(stderr, "chdir %s: %s\n", + req->q.manpath, strerror(errno)); + pg_error_internal(); + return; + } + + search.arch = req->q.arch; + search.sec = req->q.sec; + search.outkey = "Nd"; + search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; + search.firstmatch = 1; + + paths.sz = 1; + paths.paths = mandoc_malloc(sizeof(char *)); + paths.paths[0] = mandoc_strdup("."); + + /* + * Break apart at spaces with backslash-escaping. + */ + + argc = 0; + argv = NULL; + rp = query = mandoc_strdup(req->q.query); + for (;;) { + while (isspace((unsigned char)*rp)) + rp++; + if (*rp == '\0') + break; + argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); + argv[argc++] = wp = rp; + for (;;) { + if (isspace((unsigned char)*rp)) { + *wp = '\0'; + rp++; + break; + } + if (rp[0] == '\\' && rp[1] != '\0') + rp++; + if (wp != rp) + *wp = *rp; + if (*rp == '\0') + break; + wp++; + rp++; + } + } + + if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) + pg_noresult(req, "You entered an invalid query."); + else if (0 == ressz) + pg_noresult(req, "No results found."); + else + pg_searchres(req, res, ressz); + + free(query); + mansearch_free(res, ressz); + free(paths.paths[0]); + free(paths.paths); +} + +int +main(void) +{ + struct req req; + struct itimerval itimer; + const char *path; + const char *querystring; + int i; + + /* Poor man's ReDoS mitigation. */ + + itimer.it_value.tv_sec = 2; + itimer.it_value.tv_usec = 0; + itimer.it_interval.tv_sec = 2; + itimer.it_interval.tv_usec = 0; + if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { + fprintf(stderr, "setitimer: %s\n", strerror(errno)); + pg_error_internal(); + return EXIT_FAILURE; + } + + /* Scan our run-time environment. */ + + if (NULL == (scriptname = getenv("SCRIPT_NAME"))) + scriptname = ""; + + if ( ! validate_urifrag(scriptname)) { + fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n", + scriptname); + pg_error_internal(); + return EXIT_FAILURE; + } + + /* + * First we change directory into the MAN_DIR so that + * subsequent scanning for manpath directories is rooted + * relative to the same position. + */ + + if (-1 == chdir(MAN_DIR)) { + fprintf(stderr, "MAN_DIR: %s: %s\n", + MAN_DIR, strerror(errno)); + pg_error_internal(); + return EXIT_FAILURE; + } + + memset(&req, 0, sizeof(struct req)); + pathgen(&req); + + /* Next parse out the query string. */ + + if (NULL != (querystring = getenv("QUERY_STRING"))) + http_parse(&req, querystring); + + if (req.q.manpath == NULL) + req.q.manpath = mandoc_strdup(req.p[0]); + else if ( ! validate_manpath(&req, req.q.manpath)) { + pg_error_badrequest( + "You specified an invalid manpath."); + return EXIT_FAILURE; + } + + if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { + pg_error_badrequest( + "You specified an invalid architecture."); + return EXIT_FAILURE; + } + + /* Dispatch to the three different pages. */ + + path = getenv("PATH_INFO"); + if (NULL == path) + path = ""; + else if ('/' == *path) + path++; + + if ('\0' != *path) + pg_show(&req, path); + else if (NULL != req.q.query) + pg_search(&req); + else + pg_index(&req); + + free(req.q.manpath); + free(req.q.arch); + free(req.q.sec); + free(req.q.query); + for (i = 0; i < (int)req.psz; i++) + free(req.p[i]); + free(req.p); + return EXIT_SUCCESS; +} + +/* + * Scan for indexable paths. + */ +static void +pathgen(struct req *req) +{ + FILE *fp; + char *dp; + size_t dpsz; + ssize_t len; + + if (NULL == (fp = fopen("manpath.conf", "r"))) { + fprintf(stderr, "%s/manpath.conf: %s\n", + MAN_DIR, strerror(errno)); + pg_error_internal(); + exit(EXIT_FAILURE); + } + + dp = NULL; + dpsz = 0; + + while ((len = getline(&dp, &dpsz, fp)) != -1) { + if (dp[len - 1] == '\n') + dp[--len] = '\0'; + req->p = mandoc_realloc(req->p, + (req->psz + 1) * sizeof(char *)); + if ( ! validate_urifrag(dp)) { + fprintf(stderr, "%s/manpath.conf contains " + "unsafe path \"%s\"\n", MAN_DIR, dp); + pg_error_internal(); + exit(EXIT_FAILURE); + } + if (NULL != strchr(dp, '/')) { + fprintf(stderr, "%s/manpath.conf contains " + "path with slash \"%s\"\n", MAN_DIR, dp); + pg_error_internal(); + exit(EXIT_FAILURE); + } + req->p[req->psz++] = dp; + dp = NULL; + dpsz = 0; + } + free(dp); + + if ( req->p == NULL ) { + fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR); + pg_error_internal(); + exit(EXIT_FAILURE); + } +} diff --git a/contrib/mdocml/cgi.h.example b/contrib/mdocml/cgi.h.example new file mode 100644 index 0000000..c4878d3 --- /dev/null +++ b/contrib/mdocml/cgi.h.example @@ -0,0 +1,7 @@ +/* Example compile-time configuration file for man.cgi(8). */ + +#define HTTP_HOST "mdocml.bsd.lv" +#define MAN_DIR "/var/www/man" +#define CSS_DIR "" +#define CUSTOMIZE_TITLE "Manual pages with mandoc" +#define COMPAT_OLDURI Yes diff --git a/contrib/mdocml/chars.c b/contrib/mdocml/chars.c new file mode 100644 index 0000000..c2cfaf8 --- /dev/null +++ b/contrib/mdocml/chars.c @@ -0,0 +1,494 @@ +/* $Id: chars.c,v 1.68 2015/10/13 22:59:54 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "libmandoc.h" + +struct ln { + const char roffcode[16]; + const char *ascii; + int unicode; +}; + +/* Special break control characters. */ +static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; +static const char ascii_break[2] = { ASCII_BREAK, '\0' }; + +static struct ln lines[] = { + + /* Spacing. */ + { " ", ascii_nbrsp, 0x00a0 }, + { "~", ascii_nbrsp, 0x00a0 }, + { "0", " ", 0x2002 }, + { "|", "", 0 }, + { "^", "", 0 }, + { "&", "", 0 }, + { "%", "", 0 }, + { ":", ascii_break, 0 }, + /* XXX The following three do not really belong here. */ + { "t", "", 0 }, + { "c", "", 0 }, + { "}", "", 0 }, + + /* Lines. */ + { "ba", "|", 0x007c }, + { "br", "|", 0x2502 }, + { "ul", "_", 0x005f }, + { "rn", "-", 0x203e }, + { "bb", "|", 0x00a6 }, + { "sl", "/", 0x002f }, + { "rs", "\\", 0x005c }, + + /* Text markers. */ + { "ci", "O", 0x25cb }, + { "bu", "+\bo", 0x2022 }, + { "dd", "|\b=", 0x2021 }, + { "dg", "|\b-", 0x2020 }, + { "lz", "<>", 0x25ca }, + { "sq", "[]", 0x25a1 }, + { "ps", "<par>", 0x00b6 }, + { "sc", "<sec>", 0x00a7 }, + { "lh", "<=", 0x261c }, + { "rh", "=>", 0x261e }, + { "at", "@", 0x0040 }, + { "sh", "#", 0x0023 }, + { "CR", "_|", 0x21b5 }, + { "OK", "\\/", 0x2713 }, + + /* Legal symbols. */ + { "co", "(C)", 0x00a9 }, + { "rg", "(R)", 0x00ae }, + { "tm", "tm", 0x2122 }, + + /* Punctuation. */ + { "em", "--", 0x2014 }, + { "en", "-", 0x2013 }, + { "hy", "-", 0x2010 }, + { "e", "\\", 0x005c }, + { ".", ".", 0x002e }, + { "r!", "!", 0x00a1 }, + { "r?", "?", 0x00bf }, + + /* Quotes. */ + { "Bq", ",,", 0x201e }, + { "bq", ",", 0x201a }, + { "lq", "\"", 0x201c }, + { "rq", "\"", 0x201d }, + { "Lq", "``", 0x201c }, + { "Rq", "''", 0x201d }, + { "oq", "`", 0x2018 }, + { "cq", "\'", 0x2019 }, + { "aq", "\'", 0x0027 }, + { "dq", "\"", 0x0022 }, + { "Fo", "<<", 0x00ab }, + { "Fc", ">>", 0x00bb }, + { "fo", "<", 0x2039 }, + { "fc", ">", 0x203a }, + + /* Brackets. */ + { "lB", "[", 0x005b }, + { "rB", "]", 0x005d }, + { "lC", "{", 0x007b }, + { "rC", "}", 0x007d }, + { "la", "<", 0x27e8 }, + { "ra", ">", 0x27e9 }, + { "bv", "|", 0x23aa }, + { "braceex", "|", 0x23aa }, + { "bracketlefttp", "|", 0x23a1 }, + { "bracketleftbt", "|", 0x23a3 }, + { "bracketleftex", "|", 0x23a2 }, + { "bracketrighttp", "|", 0x23a4 }, + { "bracketrightbt", "|", 0x23a6 }, + { "bracketrightex", "|", 0x23a5 }, + { "lt", ",-", 0x23a7 }, + { "bracelefttp", ",-", 0x23a7 }, + { "lk", "{", 0x23a8 }, + { "braceleftmid", "{", 0x23a8 }, + { "lb", "`-", 0x23a9 }, + { "braceleftbt", "`-", 0x23a9 }, + { "braceleftex", "|", 0x23aa }, + { "rt", "-.", 0x23ab }, + { "bracerighttp", "-.", 0x23ab }, + { "rk", "}", 0x23ac }, + { "bracerightmid", "}", 0x23ac }, + { "rb", "-\'", 0x23ad }, + { "bracerightbt", "-\'", 0x23ad }, + { "bracerightex", "|", 0x23aa }, + { "parenlefttp", "/", 0x239b }, + { "parenleftbt", "\\", 0x239d }, + { "parenleftex", "|", 0x239c }, + { "parenrighttp", "\\", 0x239e }, + { "parenrightbt", "/", 0x23a0 }, + { "parenrightex", "|", 0x239f }, + + /* Arrows and lines. */ + { "<-", "<-", 0x2190 }, + { "->", "->", 0x2192 }, + { "<>", "<->", 0x2194 }, + { "da", "|\bv", 0x2193 }, + { "ua", "|\b^", 0x2191 }, + { "va", "^v", 0x2195 }, + { "lA", "<=", 0x21d0 }, + { "rA", "=>", 0x21d2 }, + { "hA", "<=>", 0x21d4 }, + { "uA", "=\b^", 0x21d1 }, + { "dA", "=\bv", 0x21d3 }, + { "vA", "^=v", 0x21d5 }, + + /* Logic. */ + { "AN", "^", 0x2227 }, + { "OR", "v", 0x2228 }, + { "no", "~", 0x00ac }, + { "tno", "~", 0x00ac }, + { "te", "3", 0x2203 }, + { "fa", "-\bV", 0x2200 }, + { "st", "-)", 0x220b }, + { "tf", ".:.", 0x2234 }, + { "3d", ".:.", 0x2234 }, + { "or", "|", 0x007c }, + + /* Mathematicals. */ + { "pl", "+", 0x002b }, + { "mi", "-", 0x2212 }, + { "-", "-", 0x002d }, + { "-+", "-+", 0x2213 }, + { "+-", "+-", 0x00b1 }, + { "t+-", "+-", 0x00b1 }, + { "pc", ".", 0x00b7 }, + { "md", ".", 0x22c5 }, + { "mu", "x", 0x00d7 }, + { "tmu", "x", 0x00d7 }, + { "c*", "O\bx", 0x2297 }, + { "c+", "O\b+", 0x2295 }, + { "di", "-:-", 0x00f7 }, + { "tdi", "-:-", 0x00f7 }, + { "f/", "/", 0x2044 }, + { "**", "*", 0x2217 }, + { "<=", "<=", 0x2264 }, + { ">=", ">=", 0x2265 }, + { "<<", "<<", 0x226a }, + { ">>", ">>", 0x226b }, + { "eq", "=", 0x003d }, + { "!=", "!=", 0x2260 }, + { "==", "==", 0x2261 }, + { "ne", "!==", 0x2262 }, + { "ap", "~", 0x223c }, + { "|=", "-~", 0x2243 }, + { "=~", "=~", 0x2245 }, + { "~~", "~~", 0x2248 }, + { "~=", "~=", 0x2248 }, + { "pt", "oc", 0x221d }, + { "es", "{}", 0x2205 }, + { "mo", "E", 0x2208 }, + { "nm", "!E", 0x2209 }, + { "sb", "(=", 0x2282 }, + { "nb", "(!=", 0x2284 }, + { "sp", "=)", 0x2283 }, + { "nc", "!=)", 0x2285 }, + { "ib", "(=\b_", 0x2286 }, + { "ip", "=\b_)", 0x2287 }, + { "ca", "(^)", 0x2229 }, + { "cu", "U", 0x222a }, + { "/_", "_\b/", 0x2220 }, + { "pp", "_\b|", 0x22a5 }, + { "is", "'\b,\bI", 0x222b }, + { "integral", "'\b,\bI", 0x222b }, + { "sum", "E", 0x2211 }, + { "product", "TT", 0x220f }, + { "coproduct", "U", 0x2210 }, + { "gr", "V", 0x2207 }, + { "sr", "\\/", 0x221a }, + { "sqrt", "\\/", 0x221a }, + { "lc", "|~", 0x2308 }, + { "rc", "~|", 0x2309 }, + { "lf", "|_", 0x230a }, + { "rf", "_|", 0x230b }, + { "if", "oo", 0x221e }, + { "Ah", "N", 0x2135 }, + { "Im", "I", 0x2111 }, + { "Re", "R", 0x211c }, + { "pd", "a", 0x2202 }, + { "-h", "/h", 0x210f }, + { "12", "1/2", 0x00bd }, + { "14", "1/4", 0x00bc }, + { "34", "3/4", 0x00be }, + + /* Ligatures. */ + { "ff", "ff", 0xfb00 }, + { "fi", "fi", 0xfb01 }, + { "fl", "fl", 0xfb02 }, + { "Fi", "ffi", 0xfb03 }, + { "Fl", "ffl", 0xfb04 }, + { "AE", "AE", 0x00c6 }, + { "ae", "ae", 0x00e6 }, + { "OE", "OE", 0x0152 }, + { "oe", "oe", 0x0153 }, + { "ss", "ss", 0x00df }, + { "IJ", "IJ", 0x0132 }, + { "ij", "ij", 0x0133 }, + + /* Accents. */ + { "a\"", "\"", 0x02dd }, + { "a-", "-", 0x00af }, + { "a.", ".", 0x02d9 }, + { "a^", "^", 0x005e }, + { "aa", "\'", 0x00b4 }, + { "\'", "\'", 0x00b4 }, + { "ga", "`", 0x0060 }, + { "`", "`", 0x0060 }, + { "ab", "'\b`", 0x02d8 }, + { "ac", ",", 0x00b8 }, + { "ad", "\"", 0x00a8 }, + { "ah", "v", 0x02c7 }, + { "ao", "o", 0x02da }, + { "a~", "~", 0x007e }, + { "ho", ",", 0x02db }, + { "ha", "^", 0x005e }, + { "ti", "~", 0x007e }, + + /* Accented letters. */ + { "'A", "'\bA", 0x00c1 }, + { "'E", "'\bE", 0x00c9 }, + { "'I", "'\bI", 0x00cd }, + { "'O", "'\bO", 0x00d3 }, + { "'U", "'\bU", 0x00da }, + { "'a", "'\ba", 0x00e1 }, + { "'e", "'\be", 0x00e9 }, + { "'i", "'\bi", 0x00ed }, + { "'o", "'\bo", 0x00f3 }, + { "'u", "'\bu", 0x00fa }, + { "`A", "`\bA", 0x00c0 }, + { "`E", "`\bE", 0x00c8 }, + { "`I", "`\bI", 0x00cc }, + { "`O", "`\bO", 0x00d2 }, + { "`U", "`\bU", 0x00d9 }, + { "`a", "`\ba", 0x00e0 }, + { "`e", "`\be", 0x00e8 }, + { "`i", "`\bi", 0x00ec }, + { "`o", "`\bo", 0x00f2 }, + { "`u", "`\bu", 0x00f9 }, + { "~A", "~\bA", 0x00c3 }, + { "~N", "~\bN", 0x00d1 }, + { "~O", "~\bO", 0x00d5 }, + { "~a", "~\ba", 0x00e3 }, + { "~n", "~\bn", 0x00f1 }, + { "~o", "~\bo", 0x00f5 }, + { ":A", "\"\bA", 0x00c4 }, + { ":E", "\"\bE", 0x00cb }, + { ":I", "\"\bI", 0x00cf }, + { ":O", "\"\bO", 0x00d6 }, + { ":U", "\"\bU", 0x00dc }, + { ":a", "\"\ba", 0x00e4 }, + { ":e", "\"\be", 0x00eb }, + { ":i", "\"\bi", 0x00ef }, + { ":o", "\"\bo", 0x00f6 }, + { ":u", "\"\bu", 0x00fc }, + { ":y", "\"\by", 0x00ff }, + { "^A", "^\bA", 0x00c2 }, + { "^E", "^\bE", 0x00ca }, + { "^I", "^\bI", 0x00ce }, + { "^O", "^\bO", 0x00d4 }, + { "^U", "^\bU", 0x00db }, + { "^a", "^\ba", 0x00e2 }, + { "^e", "^\be", 0x00ea }, + { "^i", "^\bi", 0x00ee }, + { "^o", "^\bo", 0x00f4 }, + { "^u", "^\bu", 0x00fb }, + { ",C", ",\bC", 0x00c7 }, + { ",c", ",\bc", 0x00e7 }, + { "/L", "/\bL", 0x0141 }, + { "/l", "/\bl", 0x0142 }, + { "/O", "/\bO", 0x00d8 }, + { "/o", "/\bo", 0x00f8 }, + { "oA", "o\bA", 0x00c5 }, + { "oa", "o\ba", 0x00e5 }, + + /* Special letters. */ + { "-D", "-\bD", 0x00d0 }, + { "Sd", "d", 0x00f0 }, + { "TP", "Th", 0x00de }, + { "Tp", "th", 0x00fe }, + { ".i", "i", 0x0131 }, + { ".j", "j", 0x0237 }, + + /* Currency. */ + { "Do", "$", 0x0024 }, + { "ct", "/\bc", 0x00a2 }, + { "Eu", "EUR", 0x20ac }, + { "eu", "EUR", 0x20ac }, + { "Ye", "=\bY", 0x00a5 }, + { "Po", "GBP", 0x00a3 }, + { "Cs", "o\bx", 0x00a4 }, + { "Fn", ",\bf", 0x0192 }, + + /* Units. */ + { "de", "<deg>", 0x00b0 }, + { "%0", "%o", 0x2030 }, + { "fm", "\'", 0x2032 }, + { "sd", "''", 0x2033 }, + { "mc", ",\bu", 0x00b5 }, + + /* Greek characters. */ + { "*A", "A", 0x0391 }, + { "*B", "B", 0x0392 }, + { "*G", "G", 0x0393 }, + { "*D", "_\b/_\b\\", 0x0394 }, + { "*E", "E", 0x0395 }, + { "*Z", "Z", 0x0396 }, + { "*Y", "H", 0x0397 }, + { "*H", "-\bO", 0x0398 }, + { "*I", "I", 0x0399 }, + { "*K", "K", 0x039a }, + { "*L", "/\\", 0x039b }, + { "*M", "M", 0x039c }, + { "*N", "N", 0x039d }, + { "*C", "_\bH", 0x039e }, + { "*O", "O", 0x039f }, + { "*P", "TT", 0x03a0 }, + { "*R", "P", 0x03a1 }, + { "*S", "S", 0x03a3 }, + { "*T", "T", 0x03a4 }, + { "*U", "Y", 0x03a5 }, + { "*F", "I\bO", 0x03a6 }, + { "*X", "X", 0x03a7 }, + { "*Q", "I\bY", 0x03a8 }, + { "*W", "_\bO", 0x03a9 }, + { "*a", "a", 0x03b1 }, + { "*b", "B", 0x03b2 }, + { "*g", "y", 0x03b3 }, + { "*d", "d", 0x03b4 }, + { "*e", "e", 0x03b5 }, + { "*z", ",\bC", 0x03b6 }, + { "*y", "n", 0x03b7 }, + { "*h", "-\b0", 0x03b8 }, + { "*i", "i", 0x03b9 }, + { "*k", "k", 0x03ba }, + { "*l", ">\b\\", 0x03bb }, + { "*m", ",\bu", 0x03bc }, + { "*n", "v", 0x03bd }, + { "*c", ",\bE", 0x03be }, + { "*o", "o", 0x03bf }, + { "*p", "-\bn", 0x03c0 }, + { "*r", "p", 0x03c1 }, + { "*s", "-\bo", 0x03c3 }, + { "*t", "~\bt", 0x03c4 }, + { "*u", "u", 0x03c5 }, + { "*f", "|\bo", 0x03d5 }, + { "*x", "x", 0x03c7 }, + { "*q", "|\bu", 0x03c8 }, + { "*w", "w", 0x03c9 }, + { "+h", "-\b0", 0x03d1 }, + { "+f", "|\bo", 0x03c6 }, + { "+p", "-\bw", 0x03d6 }, + { "+e", "e", 0x03f5 }, + { "ts", "s", 0x03c2 }, +}; + +static struct ohash mchars; + + +void +mchars_free(void) +{ + + ohash_delete(&mchars); +} + +void +mchars_alloc(void) +{ + size_t i; + unsigned int slot; + + mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode)); + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) { + slot = ohash_qlookup(&mchars, lines[i].roffcode); + assert(ohash_find(&mchars, slot) == NULL); + ohash_insert(&mchars, slot, lines + i); + } +} + +int +mchars_spec2cp(const char *p, size_t sz) +{ + const struct ln *ln; + const char *end; + + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); + return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1; +} + +int +mchars_num2char(const char *p, size_t sz) +{ + int i; + + i = mandoc_strntoi(p, sz, 10); + return i >= 0 && i < 256 ? i : -1; +} + +int +mchars_num2uc(const char *p, size_t sz) +{ + int i; + + i = mandoc_strntoi(p, sz, 16); + assert(i >= 0 && i <= 0x10FFFF); + return i; +} + +const char * +mchars_spec2str(const char *p, size_t sz, size_t *rsz) +{ + const struct ln *ln; + const char *end; + + end = p + sz; + ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end)); + if (ln == NULL) { + *rsz = 1; + return sz == 1 ? p : NULL; + } + + *rsz = strlen(ln->ascii); + return ln->ascii; +} + +const char * +mchars_uc2str(int uc) +{ + size_t i; + + for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) + if (uc == lines[i].unicode) + return lines[i].ascii; + return "<?>"; +} diff --git a/contrib/mdocml/compat_err.c b/contrib/mdocml/compat_err.c new file mode 100644 index 0000000..d8b09cb --- /dev/null +++ b/contrib/mdocml/compat_err.c @@ -0,0 +1,112 @@ +#include "config.h" + +#if HAVE_ERR + +int dummy; + +#else + +/* $Id: compat_err.c,v 1.4 2015/11/26 07:42:11 schwarze Exp $ */ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static void vwarni(const char *, va_list); +static void vwarnxi(const char *, va_list); + +static void +vwarnxi(const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", getprogname()); + if (fmt != NULL) + vfprintf(stderr, fmt, ap); +} + +static void +vwarni(const char *fmt, va_list ap) +{ + int sverrno; + + sverrno = errno; + vwarnxi(fmt, ap); + if (fmt != NULL) + fputs(": ", stderr); + fprintf(stderr, "%s\n", strerror(sverrno)); +} + +void +err(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarni(fmt, ap); + va_end(ap); + exit(eval); +} + +void +errx(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnxi(fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(eval); +} + +void +warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarni(fmt, ap); + va_end(ap); +} + +void +warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vwarnxi(fmt, ap); + va_end(ap); + fputc('\n', stderr); +} + +#endif diff --git a/contrib/mdocml/compat_fts.c b/contrib/mdocml/compat_fts.c new file mode 100644 index 0000000..ed95854 --- /dev/null +++ b/contrib/mdocml/compat_fts.c @@ -0,0 +1,657 @@ +#include "config.h" + +#if HAVE_FTS + +int dummy; + +#else + +/* $Id: compat_fts.c,v 1.9 2015/03/18 19:29:48 schwarze Exp $ */ +/* $OpenBSD: fts.c,v 1.50 2015/01/16 16:48:51 deraadt Exp $ */ + +/*- + * Copyright (c) 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> +#include <sys/types.h> + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "compat_fts.h" + +#define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) + +static FTSENT *fts_alloc(FTS *, const char *, size_t); +static FTSENT *fts_build(FTS *); +static void fts_lfree(FTSENT *); +static void fts_load(FTS *, FTSENT *); +static size_t fts_maxarglen(char * const *); +static void fts_padjust(FTS *, FTSENT *); +static int fts_palloc(FTS *, size_t); +static unsigned short fts_stat(FTS *, FTSENT *); + +#define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) +#ifndef O_DIRECTORY +#define O_DIRECTORY 0 +#endif +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +#define CLR(opt) (sp->fts_options &= ~(opt)) +#define ISSET(opt) (sp->fts_options & (opt)) +#define SET(opt) (sp->fts_options |= (opt)) + +FTS * +fts_open(char * const *argv, int options, void *dummy) +{ + FTS *sp; + FTSENT *p, *root; + int nitems; + FTSENT *parent, *tmp; + size_t len; + + /* Options check. */ + if (options & ~FTS_OPTIONMASK) { + errno = EINVAL; + return (NULL); + } + + /* Allocate/initialize the stream */ + if ((sp = calloc(1, sizeof(FTS))) == NULL) + return (NULL); + sp->fts_options = options; + + /* + * Start out with 1K of path space, and enough, in any case, + * to hold the user's paths. + */ + if (fts_palloc(sp, MAXIMUM(fts_maxarglen(argv), PATH_MAX))) + goto mem1; + + /* Allocate/initialize root's parent. */ + if ((parent = fts_alloc(sp, "", 0)) == NULL) + goto mem2; + parent->fts_level = FTS_ROOTPARENTLEVEL; + + /* Allocate/initialize root(s). */ + for (root = NULL, nitems = 0; *argv; ++argv, ++nitems) { + /* Don't allow zero-length paths. */ + if ((len = strlen(*argv)) == 0) { + errno = ENOENT; + goto mem3; + } + + if ((p = fts_alloc(sp, *argv, len)) == NULL) + goto mem3; + p->fts_level = FTS_ROOTLEVEL; + p->fts_parent = parent; + p->fts_accpath = p->fts_name; + p->fts_info = fts_stat(sp, p); + + /* Command-line "." and ".." are real directories. */ + if (p->fts_info == FTS_DOT) + p->fts_info = FTS_D; + + p->fts_link = NULL; + if (root == NULL) + tmp = root = p; + else { + tmp->fts_link = p; + tmp = p; + } + } + + /* + * Allocate a dummy pointer and make fts_read think that we've just + * finished the node before the root(s); set p->fts_info to FTS_INIT + * so that everything about the "current" node is ignored. + */ + if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL) + goto mem3; + sp->fts_cur->fts_link = root; + sp->fts_cur->fts_info = FTS_INIT; + + if (nitems == 0) + free(parent); + + return (sp); + +mem3: fts_lfree(root); + free(parent); +mem2: free(sp->fts_path); +mem1: free(sp); + return (NULL); +} + +static void +fts_load(FTS *sp, FTSENT *p) +{ + size_t len; + char *cp; + + /* + * Load the stream structure for the next traversal. Since we don't + * actually enter the directory until after the preorder visit, set + * the fts_accpath field specially so the chdir gets done to the right + * place and the user can access the first node. From fts_open it's + * known that the path will fit. + */ + len = p->fts_pathlen = p->fts_namelen; + memmove(sp->fts_path, p->fts_name, len + 1); + if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) { + len = strlen(++cp); + memmove(p->fts_name, cp, len + 1); + p->fts_namelen = len; + } + p->fts_accpath = p->fts_path = sp->fts_path; + sp->fts_dev = p->fts_dev; +} + +int +fts_close(FTS *sp) +{ + FTSENT *freep, *p; + + /* + * This still works if we haven't read anything -- the dummy structure + * points to the root list, so we step through to the end of the root + * list which has a valid parent pointer. + */ + if (sp->fts_cur) { + for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) { + freep = p; + p = p->fts_link ? p->fts_link : p->fts_parent; + free(freep); + } + free(p); + } + + /* Free up child linked list, sort array, path buffer, stream ptr.*/ + if (sp->fts_child) + fts_lfree(sp->fts_child); + free(sp->fts_path); + free(sp); + + return (0); +} + +/* + * Special case of "/" at the end of the path so that slashes aren't + * appended which would cause paths to be written as "....//foo". + */ +#define NAPPEND(p) \ + (p->fts_path[p->fts_pathlen - 1] == '/' \ + ? p->fts_pathlen - 1 : p->fts_pathlen) + +FTSENT * +fts_read(FTS *sp) +{ + FTSENT *p, *tmp; + int instr; + char *t; + + /* If finished or unrecoverable error, return NULL. */ + if (sp->fts_cur == NULL || ISSET(FTS_STOP)) + return (NULL); + + /* Set current node pointer. */ + p = sp->fts_cur; + + /* Save and zero out user instructions. */ + instr = p->fts_instr; + p->fts_instr = FTS_NOINSTR; + + /* Directory in pre-order. */ + if (p->fts_info == FTS_D) { + /* If skipped or crossed mount point, do post-order visit. */ + if (instr == FTS_SKIP || + (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) { + if (sp->fts_child) { + fts_lfree(sp->fts_child); + sp->fts_child = NULL; + } + p->fts_info = FTS_DP; + return (p); + } + + /* + * If haven't read do so. If the read fails, fts_build sets + * FTS_STOP or the fts_info field of the node. + */ + if (sp->fts_child) { + /* nothing */ + } else if ((sp->fts_child = fts_build(sp)) == NULL) { + if (ISSET(FTS_STOP)) + return (NULL); + return (p); + } + p = sp->fts_child; + sp->fts_child = NULL; + goto name; + } + + /* Move to the next node on this level. */ +next: tmp = p; + if ((p = p->fts_link)) { + free(tmp); + + /* + * If reached the top, return to the original directory (or + * the root of the tree), and load the paths for the next root. + */ + if (p->fts_level == FTS_ROOTLEVEL) { + fts_load(sp, p); + return (sp->fts_cur = p); + } + + /* + * User may have called fts_set on the node. If skipped, + * ignore. If followed, get a file descriptor so we can + * get back if necessary. + */ + if (p->fts_instr == FTS_SKIP) + goto next; + +name: t = sp->fts_path + NAPPEND(p->fts_parent); + *t++ = '/'; + memmove(t, p->fts_name, p->fts_namelen + 1); + return (sp->fts_cur = p); + } + + /* Move up to the parent node. */ + p = tmp->fts_parent; + free(tmp); + + if (p->fts_level == FTS_ROOTPARENTLEVEL) { + /* + * Done; free everything up and set errno to 0 so the user + * can distinguish between error and EOF. + */ + free(p); + errno = 0; + return (sp->fts_cur = NULL); + } + + /* NUL terminate the pathname. */ + sp->fts_path[p->fts_pathlen] = '\0'; + + p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; + return (sp->fts_cur = p); +} + +/* + * Fts_set takes the stream as an argument although it's not used in this + * implementation; it would be necessary if anyone wanted to add global + * semantics to fts using fts_set. An error return is allowed for similar + * reasons. + */ +/* ARGSUSED */ +int +fts_set(FTS *sp, FTSENT *p, int instr) +{ + if (instr && instr != FTS_NOINSTR && instr != FTS_SKIP) { + errno = EINVAL; + return (1); + } + p->fts_instr = instr; + return (0); +} + +/* + * This is the tricky part -- do not casually change *anything* in here. The + * idea is to build the linked list of entries that are used by fts_children + * and fts_read. There are lots of special cases. + * + * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is + * set and it's a physical walk (so that symbolic links can't be directories), + * we can do things quickly. First, if it's a 4.4BSD file system, the type + * of the file is in the directory entry. Otherwise, we assume that the number + * of subdirectories in a node is equal to the number of links to the parent. + * The former skips all stat calls. The latter skips stat calls in any leaf + * directories and for any files after the subdirectories in the directory have + * been found, cutting the stat calls by about 2/3. + */ +static FTSENT * +fts_build(FTS *sp) +{ + struct dirent *dp; + FTSENT *p, *head; + FTSENT *cur, *tail; + DIR *dirp; + void *oldaddr; + size_t dlen, len, maxlen; + int nitems, level, doadjust; + int saved_errno; + char *cp; + + /* Set current node pointer. */ + cur = sp->fts_cur; + + /* + * Open the directory for reading. If this fails, we're done. + * If being called from fts_read, set the fts_info field. + */ + if ((dirp = opendir(cur->fts_accpath)) == NULL) { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + return (NULL); + } + + /* + * Figure out the max file name length that can be stored in the + * current path -- the inner loop allocates more path as necessary. + * We really wouldn't have to do the maxlen calculations here, we + * could do them in fts_read before returning the path, but it's a + * lot easier here since the length is part of the dirent structure. + * + * If not changing directories set a pointer so that can just append + * each new name into the path. + */ + len = NAPPEND(cur); + cp = sp->fts_path + len; + *cp++ = '/'; + len++; + maxlen = sp->fts_pathlen - len; + + /* + * fts_level is signed so we must prevent it from wrapping + * around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL. + */ + level = cur->fts_level; + if (level < FTS_MAXLEVEL) + level++; + + /* Read the directory, attaching each entry to the `link' pointer. */ + doadjust = 0; + for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { + if (ISDOT(dp->d_name)) + continue; + +#if HAVE_DIRENT_NAMLEN + dlen = dp->d_namlen; +#else + dlen = strlen(dp->d_name); +#endif + + if (!(p = fts_alloc(sp, dp->d_name, dlen))) + goto mem1; + if (dlen >= maxlen) { /* include space for NUL */ + oldaddr = sp->fts_path; + if (fts_palloc(sp, dlen + len + 1)) { + /* + * No more memory for path or structures. Save + * errno, free up the current structure and the + * structures already allocated. + */ +mem1: saved_errno = errno; + if (p) + free(p); + fts_lfree(head); + (void)closedir(dirp); + cur->fts_info = FTS_ERR; + SET(FTS_STOP); + errno = saved_errno; + return (NULL); + } + /* Did realloc() change the pointer? */ + if (oldaddr != sp->fts_path) { + doadjust = 1; + cp = sp->fts_path + len; + } + maxlen = sp->fts_pathlen - len; + } + + p->fts_level = level; + p->fts_parent = sp->fts_cur; + p->fts_pathlen = len + dlen; + if (p->fts_pathlen < len) { + /* + * If we wrap, free up the current structure and + * the structures already allocated, then error + * out with ENAMETOOLONG. + */ + free(p); + fts_lfree(head); + (void)closedir(dirp); + cur->fts_info = FTS_ERR; + SET(FTS_STOP); + errno = ENAMETOOLONG; + return (NULL); + } + + /* Build a file name for fts_stat to stat. */ + p->fts_accpath = p->fts_path; + memmove(cp, p->fts_name, p->fts_namelen + 1); + /* Stat it. */ + p->fts_info = fts_stat(sp, p); + + /* We walk in directory order so "ls -f" doesn't get upset. */ + p->fts_link = NULL; + if (head == NULL) + head = tail = p; + else { + tail->fts_link = p; + tail = p; + } + ++nitems; + } + if (dirp) + (void)closedir(dirp); + + /* + * If realloc() changed the address of the path, adjust the + * addresses for the rest of the tree and the dir list. + */ + if (doadjust) + fts_padjust(sp, head); + + /* + * If not changing directories, reset the path back to original + * state. + */ + if (len == sp->fts_pathlen || nitems == 0) + --cp; + *cp = '\0'; + + /* If didn't find anything, return NULL. */ + if (!nitems) { + cur->fts_info = FTS_DP; + return (NULL); + } + return (head); +} + +static unsigned short +fts_stat(FTS *sp, FTSENT *p) +{ + FTSENT *t; + dev_t dev; + ino_t ino; + struct stat *sbp; + + /* If user needs stat info, stat buffer already allocated. */ + sbp = p->fts_statp; + + if (lstat(p->fts_accpath, sbp)) { + p->fts_errno = errno; + memset(sbp, 0, sizeof(struct stat)); + return (FTS_NS); + } + + if (S_ISDIR(sbp->st_mode)) { + /* + * Set the device/inode. Used to find cycles and check for + * crossing mount points. Also remember the link count, used + * in fts_build to limit the number of stat calls. It is + * understood that these fields are only referenced if fts_info + * is set to FTS_D. + */ + dev = p->fts_dev = sbp->st_dev; + ino = p->fts_ino = sbp->st_ino; + p->fts_nlink = sbp->st_nlink; + + if (ISDOT(p->fts_name)) + return (FTS_DOT); + + /* + * Cycle detection is done by brute force when the directory + * is first encountered. If the tree gets deep enough or the + * number of symbolic links to directories is high enough, + * something faster might be worthwhile. + */ + for (t = p->fts_parent; + t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) + if (ino == t->fts_ino && dev == t->fts_dev) { + p->fts_cycle = t; + return (FTS_DC); + } + return (FTS_D); + } + if (S_ISLNK(sbp->st_mode)) + return (FTS_SL); + if (S_ISREG(sbp->st_mode)) + return (FTS_F); + return (FTS_DEFAULT); +} + +static FTSENT * +fts_alloc(FTS *sp, const char *name, size_t namelen) +{ + FTSENT *p; + size_t len; + + len = sizeof(FTSENT) + namelen; + if ((p = calloc(1, len)) == NULL) + return (NULL); + + p->fts_path = sp->fts_path; + p->fts_namelen = namelen; + p->fts_instr = FTS_NOINSTR; + p->fts_statp = malloc(sizeof(struct stat)); + if (p->fts_statp == NULL) { + free(p); + return (NULL); + } + memcpy(p->fts_name, name, namelen); + + return (p); +} + +static void +fts_lfree(FTSENT *head) +{ + FTSENT *p; + + /* Free a linked list of structures. */ + while ((p = head)) { + head = head->fts_link; + free(p); + } +} + +/* + * Allow essentially unlimited paths; find, rm, ls should all work on any tree. + * Most systems will allow creation of paths much longer than PATH_MAX, even + * though the kernel won't resolve them. Add the size (not just what's needed) + * plus 256 bytes so don't realloc the path 2 bytes at a time. + */ +static int +fts_palloc(FTS *sp, size_t more) +{ + char *p; + + /* + * Check for possible wraparound. + */ + more += 256; + if (sp->fts_pathlen + more < sp->fts_pathlen) { + if (sp->fts_path) + free(sp->fts_path); + sp->fts_path = NULL; + errno = ENAMETOOLONG; + return (1); + } + sp->fts_pathlen += more; + p = realloc(sp->fts_path, sp->fts_pathlen); + if (p == NULL) { + if (sp->fts_path) + free(sp->fts_path); + sp->fts_path = NULL; + return (1); + } + sp->fts_path = p; + return (0); +} + +/* + * When the path is realloc'd, have to fix all of the pointers in structures + * already returned. + */ +static void +fts_padjust(FTS *sp, FTSENT *head) +{ + FTSENT *p; + char *addr = sp->fts_path; + +#define ADJUST(p) { \ + if ((p)->fts_accpath != (p)->fts_name) { \ + (p)->fts_accpath = \ + (char *)addr + ((p)->fts_accpath - (p)->fts_path); \ + } \ + (p)->fts_path = addr; \ +} + /* Adjust the current set of children. */ + for (p = sp->fts_child; p; p = p->fts_link) + ADJUST(p); + + /* Adjust the rest of the tree, including the current level. */ + for (p = head; p->fts_level >= FTS_ROOTLEVEL;) { + ADJUST(p); + p = p->fts_link ? p->fts_link : p->fts_parent; + } +} + +static size_t +fts_maxarglen(char * const *argv) +{ + size_t len, max; + + for (max = 0; *argv; ++argv) + if ((len = strlen(*argv)) > max) + max = len; + return (max + 1); +} + +#endif diff --git a/contrib/mdocml/compat_fts.h b/contrib/mdocml/compat_fts.h new file mode 100644 index 0000000..1eed2ae --- /dev/null +++ b/contrib/mdocml/compat_fts.h @@ -0,0 +1,101 @@ +/* $OpenBSD: fts.h,v 1.14 2012/12/05 23:19:57 deraadt Exp $ */ +/* $NetBSD: fts.h,v 1.7 2012/03/01 16:18:51 hans Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fts.h 8.3 (Berkeley) 8/14/94 + */ + +#ifndef _FTS_H_ +#define _FTS_H_ + +typedef struct { + struct _ftsent *fts_cur; /* current node */ + struct _ftsent *fts_child; /* linked list of children */ + dev_t fts_dev; /* starting device # */ + char *fts_path; /* path for this descent */ + size_t fts_pathlen; /* sizeof(path) */ + +#define FTS_NOCHDIR 0x0004 /* don't change directories */ +#define FTS_PHYSICAL 0x0010 /* physical walk */ +#define FTS_XDEV 0x0040 /* don't cross devices */ +#define FTS_OPTIONMASK 0x0054 /* valid user option mask */ + +#define FTS_STOP 0x2000 /* (private) unrecoverable error */ + int fts_options; /* fts_open options, global flags */ +} FTS; + +typedef struct _ftsent { + struct _ftsent *fts_cycle; /* cycle node */ + struct _ftsent *fts_parent; /* parent directory */ + struct _ftsent *fts_link; /* next file in directory */ + char *fts_accpath; /* access path */ + char *fts_path; /* root path */ + int fts_errno; /* errno for this node */ + size_t fts_pathlen; /* strlen(fts_path) */ + size_t fts_namelen; /* strlen(fts_name) */ + + ino_t fts_ino; /* inode */ + dev_t fts_dev; /* device */ + nlink_t fts_nlink; /* link count */ + +#define FTS_ROOTPARENTLEVEL -1 +#define FTS_ROOTLEVEL 0 +#define FTS_MAXLEVEL 0x7fffffff + int fts_level; /* depth (-1 to N) */ + +#define FTS_D 1 /* preorder directory */ +#define FTS_DC 2 /* directory that causes cycles */ +#define FTS_DEFAULT 3 /* none of the above */ +#define FTS_DNR 4 /* unreadable directory */ +#define FTS_DOT 5 /* dot or dot-dot */ +#define FTS_DP 6 /* postorder directory */ +#define FTS_ERR 7 /* error; errno is set */ +#define FTS_F 8 /* regular file */ +#define FTS_INIT 9 /* initialized only */ +#define FTS_NS 10 /* stat(2) failed */ +#define FTS_NSOK 11 /* no stat(2) requested */ +#define FTS_SL 12 /* symbolic link */ + unsigned short fts_info; /* user flags for FTSENT structure */ + +#define FTS_NOINSTR 3 /* no instructions */ +#define FTS_SKIP 4 /* discard node */ + unsigned short fts_instr; /* fts_set() instructions */ + + struct stat *fts_statp; /* stat(2) information */ + char fts_name[1]; /* file name */ +} FTSENT; + + +int fts_close(FTS *); +FTS *fts_open(char * const *, int, void *); +FTSENT *fts_read(FTS *); +int fts_set(FTS *, FTSENT *, int); + +#endif /* !_FTS_H_ */ diff --git a/contrib/mdocml/compat_getline.c b/contrib/mdocml/compat_getline.c new file mode 100644 index 0000000..aed4754 --- /dev/null +++ b/contrib/mdocml/compat_getline.c @@ -0,0 +1,68 @@ +#include "config.h" + +#if HAVE_GETLINE + +int dummy; + +#else + +/* $Id: compat_getline.c,v 1.1 2015/11/07 20:52:52 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +ssize_t +getline(char **buf, size_t *bufsz, FILE *fp) +{ + char *nbuf; + size_t nbufsz, pos; + int c; + + if (buf == NULL || bufsz == NULL) { + errno = EINVAL; + return -1; + } + + if (*buf == NULL) + *bufsz = 0; + else + **buf = '\0'; + + pos = 0; + for (;;) { + if (pos + 1 >= *bufsz) { + nbufsz = *bufsz ? *bufsz * 2 : BUFSIZ; + if ((nbuf = realloc(*buf, nbufsz)) == NULL) + return -1; + *buf = nbuf; + *bufsz = nbufsz; + } + if ((c = fgetc(fp)) == EOF) { + (*buf)[pos] = '\0'; + return pos > 0 && feof(fp) ? (ssize_t)pos : -1; + } + (*buf)[pos++] = c; + (*buf)[pos] = '\0'; + if (c == '\n') + return pos; + } +} + +#endif diff --git a/contrib/mdocml/compat_getsubopt.c b/contrib/mdocml/compat_getsubopt.c new file mode 100644 index 0000000..880f2f7 --- /dev/null +++ b/contrib/mdocml/compat_getsubopt.c @@ -0,0 +1,96 @@ +#include "config.h" + +#if HAVE_GETSUBOPT + +int dummy; + +#else + +/* $Id: compat_getsubopt.c,v 1.5 2014/08/17 20:53:50 schwarze Exp $ */ +/* $OpenBSD: getsubopt.c,v 1.4 2005/08/08 08:05:36 espie Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <string.h> + +int +getsubopt(char **optionp, char * const *tokens, char **valuep) +{ + int cnt; + char *suboptarg; + char *p; + + suboptarg = *valuep = NULL; + + if (!optionp || !*optionp) + return(-1); + + /* skip leading white-space, commas */ + for (p = *optionp; *p && (*p == ',' || *p == ' ' || *p == '\t'); ++p); + + if (!*p) { + *optionp = p; + return(-1); + } + + /* save the start of the token, and skip the rest of the token. */ + for (suboptarg = p; + *++p && *p != ',' && *p != '=' && *p != ' ' && *p != '\t';); + + if (*p) { + /* + * If there's an equals sign, set the value pointer, and + * skip over the value part of the token. Terminate the + * token. + */ + if (*p == '=') { + *p = '\0'; + for (*valuep = ++p; + *p && *p != ',' && *p != ' ' && *p != '\t'; ++p); + if (*p) + *p++ = '\0'; + } else + *p++ = '\0'; + /* Skip any whitespace or commas after this token. */ + for (; *p && (*p == ',' || *p == ' ' || *p == '\t'); ++p); + } + + /* set optionp for next round. */ + *optionp = p; + + for (cnt = 0; *tokens; ++tokens, ++cnt) + if (!strcmp(suboptarg, *tokens)) + return(cnt); + return(-1); +} + +#endif diff --git a/contrib/mdocml/compat_isblank.c b/contrib/mdocml/compat_isblank.c new file mode 100644 index 0000000..9e3c747 --- /dev/null +++ b/contrib/mdocml/compat_isblank.c @@ -0,0 +1,33 @@ +#include "config.h" + +#if HAVE_ISBLANK + +int dummy; + +#else + +/* $Id: compat_isblank.c,v 1.2 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +int +isblank(int c) +{ + + return c == ' ' || c == '\t'; +} + +#endif diff --git a/contrib/mdocml/compat_mkdtemp.c b/contrib/mdocml/compat_mkdtemp.c new file mode 100644 index 0000000..1fcb325 --- /dev/null +++ b/contrib/mdocml/compat_mkdtemp.c @@ -0,0 +1,61 @@ +#include "config.h" + +#if HAVE_MKDTEMP + +int dummy; + +#else + +/* $Id: compat_mkdtemp.c,v 1.2 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * The algorithm of this function is inspired by OpenBSD mkdtemp(3) + * by Theo de Raadt and Todd Miller, but the code differs. + */ + +#include <sys/stat.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +char * +mkdtemp(char *path) +{ + char *start, *cp; + unsigned int tries; + + start = strchr(path, '\0'); + while (start > path && start[-1] == 'X') + start--; + + for (tries = INT_MAX; tries; tries--) { + if (mktemp(path) == NULL) { + errno = EEXIST; + return NULL; + } + if (mkdir(path, S_IRUSR | S_IWUSR | S_IXUSR) == 0) + return path; + if (errno != EEXIST) + return NULL; + for (cp = start; *cp != '\0'; cp++) + *cp = 'X'; + } + errno = EEXIST; + return NULL; +} + +#endif diff --git a/contrib/mdocml/compat_ohash.c b/contrib/mdocml/compat_ohash.c new file mode 100644 index 0000000..cbd6052 --- /dev/null +++ b/contrib/mdocml/compat_ohash.c @@ -0,0 +1,339 @@ +#include "config.h" + +#if HAVE_OHASH + +int dummy; + +#else + +/* $OpenBSD: ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */ + +/* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include "compat_ohash.h" + +struct _ohash_record { + uint32_t hv; + const char *p; +}; + +#define DELETED ((const char *)h) +#define NONE (h->size) + +/* Don't bother changing the hash table if the change is small enough. */ +#define MINSIZE (1UL << 4) +#define MINDELETED 4 + +static void ohash_resize(struct ohash *); + + +/* This handles the common case of variable length keys, where the + * key is stored at the end of the record. + */ +void * +ohash_create_entry(struct ohash_info *i, const char *start, const char **end) +{ + char *p; + + if (!*end) + *end = start + strlen(start); + p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data); + if (p) { + memcpy(p+i->key_offset, start, *end-start); + p[i->key_offset + (*end - start)] = '\0'; + } + return (void *)p; +} + +/* hash_delete only frees the hash structure. Use hash_first/hash_next + * to free entries as well. */ +void +ohash_delete(struct ohash *h) +{ + (h->info.free)(h->t, h->info.data); +#ifndef NDEBUG + h->t = NULL; +#endif +} + +static void +ohash_resize(struct ohash *h) +{ + struct _ohash_record *n; + size_t ns; + unsigned int j; + unsigned int i, incr; + + if (4 * h->deleted < h->total) { + if (h->size >= (UINT_MAX >> 1U)) + ns = UINT_MAX; + else + ns = h->size << 1U; + } else if (3 * h->deleted > 2 * h->total) + ns = h->size >> 1U; + else + ns = h->size; + if (ns < MINSIZE) + ns = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_EXPAND++; + STAT_HASH_SIZE += ns - h->size; +#endif + + n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data); + if (!n) + return; + + for (j = 0; j < h->size; j++) { + if (h->t[j].p != NULL && h->t[j].p != DELETED) { + i = h->t[j].hv % ns; + incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1; + while (n[i].p != NULL) { + i += incr; + if (i >= ns) + i -= ns; + } + n[i].hv = h->t[j].hv; + n[i].p = h->t[j].p; + } + } + (h->info.free)(h->t, h->info.data); + h->t = n; + h->size = ns; + h->total -= h->deleted; + h->deleted = 0; +} + +void * +ohash_remove(struct ohash *h, unsigned int i) +{ + void *result = (void *)h->t[i].p; + + if (result == NULL || result == DELETED) + return NULL; + +#ifdef STATS_HASH + STAT_HASH_ENTRIES--; +#endif + h->t[i].p = DELETED; + h->deleted++; + if (h->deleted >= MINDELETED && 4 * h->deleted > h->total) + ohash_resize(h); + return result; +} + +void * +ohash_find(struct ohash *h, unsigned int i) +{ + if (h->t[i].p == DELETED) + return NULL; + else + return (void *)h->t[i].p; +} + +void * +ohash_insert(struct ohash *h, unsigned int i, void *p) +{ +#ifdef STATS_HASH + STAT_HASH_ENTRIES++; +#endif + if (h->t[i].p == DELETED) { + h->deleted--; + h->t[i].p = p; + } else { + h->t[i].p = p; + /* Arbitrary resize boundary. Tweak if not efficient enough. */ + if (++h->total * 4 > h->size * 3) + ohash_resize(h); + } + return p; +} + +unsigned int +ohash_entries(struct ohash *h) +{ + return h->total - h->deleted; +} + +void * +ohash_first(struct ohash *h, unsigned int *pos) +{ + *pos = 0; + return ohash_next(h, pos); +} + +void * +ohash_next(struct ohash *h, unsigned int *pos) +{ + for (; *pos < h->size; (*pos)++) + if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL) + return (void *)h->t[(*pos)++].p; + return NULL; +} + +void +ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info) +{ + h->size = 1UL << size; + if (h->size < MINSIZE) + h->size = MINSIZE; +#ifdef STATS_HASH + STAT_HASH_CREATION++; + STAT_HASH_SIZE += h->size; +#endif + /* Copy info so that caller may free it. */ + h->info.key_offset = info->key_offset; + h->info.calloc = info->calloc; + h->info.free = info->free; + h->info.alloc = info->alloc; + h->info.data = info->data; + h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record), + h->info.data); + h->total = h->deleted = 0; +} + +uint32_t +ohash_interval(const char *s, const char **e) +{ + uint32_t k; + + if (!*e) + *e = s + strlen(s); + if (s == *e) + k = 0; + else + k = *s++; + while (s != *e) + k = ((k << 2) | (k >> 30)) ^ *s++; + return k; +} + +unsigned int +ohash_lookup_interval(struct ohash *h, const char *start, const char *end, + uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + strncmp(h->t[i].p+h->info.key_offset, start, + end - start) == 0 && + (h->t[i].p+h->info.key_offset)[end-start] == '\0') { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + return i; + } + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} + +unsigned int +ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv) +{ + unsigned int i, incr; + unsigned int empty; + +#ifdef STATS_HASH + STAT_HASH_LOOKUP++; +#endif + empty = NONE; + i = hv % h->size; + incr = ((hv % (h->size-2)) & ~1) + 1; + while (h->t[i].p != NULL) { +#ifdef STATS_HASH + STAT_HASH_LENGTH++; +#endif + if (h->t[i].p == DELETED) { + if (empty == NONE) + empty = i; + } else if (h->t[i].hv == hv && + memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) { + if (empty != NONE) { + h->t[empty].hv = hv; + h->t[empty].p = h->t[i].p; + h->t[i].p = DELETED; + return empty; + } else { +#ifdef STATS_HASH + STAT_HASH_POSITIVE++; +#endif + } return i; + } + i += incr; + if (i >= h->size) + i -= h->size; + } + + /* Found an empty position. */ + if (empty != NONE) + i = empty; + h->t[i].hv = hv; + return i; +} + +unsigned int +ohash_qlookup(struct ohash *h, const char *s) +{ + const char *e = NULL; + return ohash_qlookupi(h, s, &e); +} + +unsigned int +ohash_qlookupi(struct ohash *h, const char *s, const char **e) +{ + uint32_t hv; + + hv = ohash_interval(s, e); + return ohash_lookup_interval(h, s, *e, hv); +} + +#endif /*!HAVE_OHASH*/ diff --git a/contrib/mdocml/compat_ohash.h b/contrib/mdocml/compat_ohash.h new file mode 100644 index 0000000..58fb220 --- /dev/null +++ b/contrib/mdocml/compat_ohash.h @@ -0,0 +1,72 @@ +/* $OpenBSD: ohash.h,v 1.2 2014/06/02 18:52:03 deraadt Exp $ */ + +/* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef OHASH_H +#define OHASH_H + +/* Open hashing support. + * Open hashing was chosen because it is much lighter than other hash + * techniques, and more efficient in most cases. + */ + +/* user-visible data structure */ +struct ohash_info { + ptrdiff_t key_offset; + void *data; /* user data */ + void *(*calloc)(size_t, size_t, void *); + void (*free)(void *, void *); + void *(*alloc)(size_t, void *); +}; + +struct _ohash_record; + +/* private structure. It's there just so you can do a sizeof */ +struct ohash { + struct _ohash_record *t; + struct ohash_info info; + unsigned int size; + unsigned int total; + unsigned int deleted; +}; + +/* For this to be tweakable, we use small primitives, and leave part of the + * logic to the client application. e.g., hashing is left to the client + * application. We also provide a simple table entry lookup that yields + * a hashing table index (opaque) to be used in find/insert/remove. + * The keys are stored at a known position in the client data. + */ +void ohash_init(struct ohash *, unsigned, struct ohash_info *); +void ohash_delete(struct ohash *); + +unsigned int ohash_lookup_interval(struct ohash *, const char *, + const char *, uint32_t); +unsigned int ohash_lookup_memory(struct ohash *, const char *, + size_t, uint32_t); +void *ohash_find(struct ohash *, unsigned int); +void *ohash_remove(struct ohash *, unsigned int); +void *ohash_insert(struct ohash *, unsigned int, void *); +void *ohash_first(struct ohash *, unsigned int *); +void *ohash_next(struct ohash *, unsigned int *); +unsigned int ohash_entries(struct ohash *); + +void *ohash_create_entry(struct ohash_info *, const char *, const char **); +uint32_t ohash_interval(const char *, const char **); + +unsigned int ohash_qlookupi(struct ohash *, const char *, const char **); +unsigned int ohash_qlookup(struct ohash *, const char *); + +#endif diff --git a/contrib/mdocml/compat_progname.c b/contrib/mdocml/compat_progname.c new file mode 100644 index 0000000..9840cc7 --- /dev/null +++ b/contrib/mdocml/compat_progname.c @@ -0,0 +1,42 @@ +#include "config.h" + +#if HAVE_PROGNAME + +int dummy; + +#else + +/* $Id: compat_progname.c,v 1.1 2015/11/06 16:30:33 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +static const char *progname; + +void +setprogname(const char *name) +{ + + progname = name; +} + +const char * +getprogname(void) +{ + + return progname; +} + +#endif diff --git a/contrib/mdocml/compat_reallocarray.c b/contrib/mdocml/compat_reallocarray.c new file mode 100644 index 0000000..6615190 --- /dev/null +++ b/contrib/mdocml/compat_reallocarray.c @@ -0,0 +1,49 @@ +#include "config.h" + +#if HAVE_REALLOCARRAY + +int dummy; + +#else + +/* $Id: compat_reallocarray.c,v 1.4 2014/12/11 09:05:01 schwarze Exp $ */ +/* $OpenBSD: reallocarray.c,v 1.2 2014/12/08 03:45:00 bcook Exp $ */ +/* + * Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void * +reallocarray(void *optr, size_t nmemb, size_t size) +{ + if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + nmemb > 0 && SIZE_MAX / nmemb < size) { + errno = ENOMEM; + return NULL; + } + return realloc(optr, size * nmemb); +} + +#endif /*!HAVE_REALLOCARRAY*/ diff --git a/contrib/mdocml/compat_sqlite3_errstr.c b/contrib/mdocml/compat_sqlite3_errstr.c new file mode 100644 index 0000000..8a6ace2 --- /dev/null +++ b/contrib/mdocml/compat_sqlite3_errstr.c @@ -0,0 +1,16 @@ +#include "config.h" + +#if HAVE_SQLITE3_ERRSTR + +int dummy; + +#else + +const char * +sqlite3_errstr(int rc) +{ + + return rc ? "unknown error" : "not an error"; +} + +#endif diff --git a/contrib/mdocml/compat_strcasestr.c b/contrib/mdocml/compat_strcasestr.c new file mode 100644 index 0000000..62c0ff7 --- /dev/null +++ b/contrib/mdocml/compat_strcasestr.c @@ -0,0 +1,73 @@ +#include "config.h" + +#if HAVE_STRCASESTR + +int dummy; + +#else + +/* $Id: compat_strcasestr.c,v 1.4 2014/12/11 09:19:32 schwarze Exp $ */ +/* $NetBSD: strcasestr.c,v 1.3 2005/11/29 03:12:00 christos Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <ctype.h> +#include <string.h> + +#define __UNCONST(a) ((void *)(unsigned long)(const void *)(a)) + +/* + * Find the first occurrence of find in s, ignore case. + */ +char * +strcasestr(const char *s, const char *find) +{ + char c, sc; + size_t len; + + if ((c = *find++) != 0) { + c = tolower((unsigned char)c); + len = strlen(find); + do { + do { + if ((sc = *s++) == 0) + return (NULL); + } while ((char)tolower((unsigned char)sc) != c); + } while (strncasecmp(s, find, len) != 0); + s--; + } + return __UNCONST(s); +} + +#endif diff --git a/contrib/mdocml/compat_stringlist.c b/contrib/mdocml/compat_stringlist.c new file mode 100644 index 0000000..17eba77 --- /dev/null +++ b/contrib/mdocml/compat_stringlist.c @@ -0,0 +1,119 @@ +#include "config.h" + +#if HAVE_STRINGLIST + +int dummy; + +#else + +/* $Id: compat_stringlist.c,v 1.6 2015/11/07 14:22:29 schwarze Exp $ */ +/* + * Copyright (c) 1994 Christos Zoulas <christos@netbsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_ERR +#include <err.h> +#endif +#include <stdlib.h> +#include <string.h> +#include "compat_stringlist.h" + +#define _SL_CHUNKSIZE 20 + +/* + * sl_init(): Initialize a string list + */ +StringList * +sl_init(void) +{ + StringList *sl; + + sl = malloc(sizeof(StringList)); + if (sl == NULL) + err(1, "stringlist"); + + sl->sl_cur = 0; + sl->sl_max = _SL_CHUNKSIZE; + sl->sl_str = reallocarray(NULL, sl->sl_max, sizeof(char *)); + if (sl->sl_str == NULL) + err(1, "stringlist"); + return sl; +} + + +/* + * sl_add(): Add an item to the string list + */ +int +sl_add(StringList *sl, char *name) +{ + if (sl->sl_cur == sl->sl_max - 1) { + sl->sl_max += _SL_CHUNKSIZE; + sl->sl_str = reallocarray(sl->sl_str, + sl->sl_max, sizeof(char *)); + if (sl->sl_str == NULL) + return (-1); + } + sl->sl_str[sl->sl_cur++] = name; + return (0); +} + + +/* + * sl_free(): Free a stringlist + */ +void +sl_free(StringList *sl, int all) +{ + size_t i; + + if (sl == NULL) + return; + if (sl->sl_str) { + if (all) + for (i = 0; i < sl->sl_cur; i++) + free(sl->sl_str[i]); + free(sl->sl_str); + } + free(sl); +} + + +/* + * sl_find(): Find a name in the string list + */ +char * +sl_find(StringList *sl, const char *name) +{ + size_t i; + + for (i = 0; i < sl->sl_cur; i++) + if (strcmp(sl->sl_str[i], name) == 0) + return sl->sl_str[i]; + + return NULL; +} + +#endif diff --git a/contrib/mdocml/compat_stringlist.h b/contrib/mdocml/compat_stringlist.h new file mode 100644 index 0000000..f04e843 --- /dev/null +++ b/contrib/mdocml/compat_stringlist.h @@ -0,0 +1,45 @@ +/* $Id: compat_stringlist.h,v 1.4 2015/11/07 14:01:16 schwarze Exp $ */ +/* $NetBSD: stringlist.h,v 1.2 1997/01/17 06:11:36 lukem Exp $ */ + +/* + * Copyright (c) 1994 Christos Zoulas <christos@netbsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +/* + * Simple string list + */ +typedef struct _stringlist { + char **sl_str; + size_t sl_max; + size_t sl_cur; +} StringList; + + +StringList *sl_init(void); +int sl_add(StringList *, char *); +void sl_free(StringList *, int); +char *sl_find(StringList *, const char *); diff --git a/contrib/mdocml/compat_strlcat.c b/contrib/mdocml/compat_strlcat.c new file mode 100644 index 0000000..acaae4f --- /dev/null +++ b/contrib/mdocml/compat_strlcat.c @@ -0,0 +1,65 @@ +#include "config.h" + +#if HAVE_STRLCAT + +int dummy; + +#else + +/* $OpenBSD: strlcat.c,v 1.13 2005/08/08 08:05:37 espie Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Appends src to string dst of size siz (unlike strncat, siz is the + * full size of dst, not space left). At most siz-1 characters + * will be copied. Always NUL terminates (unless siz <= strlen(dst)). + * Returns strlen(src) + MIN(siz, strlen(initial dst)). + * If retval >= siz, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end */ + while (n-- != 0 && *d != '\0') + d++; + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return(dlen + (s - src)); /* count does not include NUL */ +} + +#endif diff --git a/contrib/mdocml/compat_strlcpy.c b/contrib/mdocml/compat_strlcpy.c new file mode 100644 index 0000000..a00d511 --- /dev/null +++ b/contrib/mdocml/compat_strlcpy.c @@ -0,0 +1,61 @@ +#include "config.h" + +#if HAVE_STRLCPY + +int dummy; + +#else + +/* $OpenBSD: strlcpy.c,v 1.11 2006/05/05 15:27:38 millert Exp $ */ + +/* + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <string.h> + +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} + +#endif diff --git a/contrib/mdocml/compat_strsep.c b/contrib/mdocml/compat_strsep.c new file mode 100644 index 0000000..1df5758 --- /dev/null +++ b/contrib/mdocml/compat_strsep.c @@ -0,0 +1,79 @@ +#include "config.h" + +#if HAVE_STRSEP + +int dummy; + +#else + +/* $Id: compat_strsep.c,v 1.4 2014/12/11 09:05:01 schwarze Exp $ */ +/* $OpenBSD: strsep.c,v 1.7 2014/02/05 20:42:32 stsp Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, strsep returns NULL. + */ +char * +strsep(char **stringp, const char *delim) +{ + char *s; + const char *spanp; + int c, sc; + char *tok; + + if ((s = *stringp) == NULL) + return (NULL); + for (tok = s;;) { + c = *s++; + spanp = delim; + do { + if ((sc = *spanp++) == c) { + if (c == 0) + s = NULL; + else + s[-1] = 0; + *stringp = s; + return (tok); + } + } while (sc != 0); + } + /* NOTREACHED */ +} + +#endif diff --git a/contrib/mdocml/compat_strtonum.c b/contrib/mdocml/compat_strtonum.c new file mode 100644 index 0000000..628e5d5 --- /dev/null +++ b/contrib/mdocml/compat_strtonum.c @@ -0,0 +1,76 @@ +#include "config.h" + +#if HAVE_STRTONUM + +int dummy; + +#else + +/* $Id: compat_strtonum.c,v 1.1 2015/02/16 14:56:22 schwarze Exp $ */ +/* $OpenBSD: strtonum.c,v 1.7 2013/04/17 18:40:58 tedu Exp $ */ + +/* + * Copyright (c) 2004 Ted Unangst and Todd Miller + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + +#define INVALID 1 +#define TOOSMALL 2 +#define TOOLARGE 3 + +long long +strtonum(const char *numstr, long long minval, long long maxval, + const char **errstrp) +{ + long long ll = 0; + int error = 0; + char *ep; + struct errval { + const char *errstr; + int err; + } ev[4] = { + { NULL, 0 }, + { "invalid", EINVAL }, + { "too small", ERANGE }, + { "too large", ERANGE }, + }; + + ev[0].err = errno; + errno = 0; + if (minval > maxval) { + error = INVALID; + } else { + ll = strtoll(numstr, &ep, 10); + if (numstr == ep || *ep != '\0') + error = INVALID; + else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) + error = TOOSMALL; + else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) + error = TOOLARGE; + } + if (errstrp != NULL) + *errstrp = ev[error].errstr; + errno = ev[error].err; + if (error) + ll = 0; + + return (ll); +} + +#endif /* !HAVE_STRTONUM */ diff --git a/contrib/mdocml/compat_vasprintf.c b/contrib/mdocml/compat_vasprintf.c new file mode 100644 index 0000000..9040822 --- /dev/null +++ b/contrib/mdocml/compat_vasprintf.c @@ -0,0 +1,56 @@ +#include "config.h" + +#if HAVE_VASPRINTF + +int dummy; + +#else + +/* $Id: compat_vasprintf.c,v 1.3 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * This fallback implementation is not efficient: + * It does the formatting twice. + * Short of fiddling with the unknown internals of the system's + * printf(3) or completely reimplementing printf(3), i can't think + * of another portable solution. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +int +vasprintf(char **ret, const char *format, va_list ap) +{ + char buf[2]; + va_list ap2; + int sz; + + va_copy(ap2, ap); + sz = vsnprintf(buf, sizeof(buf), format, ap2); + va_end(ap2); + + if (sz != -1 && (*ret = malloc(sz + 1)) != NULL) { + if (vsnprintf(*ret, sz + 1, format, ap) == sz) + return sz; + free(*ret); + } + *ret = NULL; + return -1; +} + +#endif diff --git a/contrib/mdocml/config.h b/contrib/mdocml/config.h new file mode 100644 index 0000000..6252b8a --- /dev/null +++ b/contrib/mdocml/config.h @@ -0,0 +1,52 @@ +#ifdef __cplusplus +#error "Do not use C++. See the INSTALL file." +#endif + +#ifndef MANDOC_CONFIG_H +#define MANDOC_CONFIG_H + +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* See test-*.c what needs this. */ +#endif + +#include <sys/types.h> +#include <stdio.h> + +#define MAN_CONF_FILE "/etc/man.conf" +#define HAVE_DIRENT_NAMLEN 1 +#define HAVE_ERR 1 +#define HAVE_FTS 1 +#define HAVE_GETLINE 1 +#define HAVE_GETSUBOPT 1 +#define HAVE_ISBLANK 1 +#define HAVE_MKDTEMP 1 +#define HAVE_MMAP 1 +#define HAVE_PLEDGE 0 +#define HAVE_PROGNAME 1 +#define HAVE_REALLOCARRAY 1 +#define HAVE_REWB_BSD 0 +#define HAVE_REWB_SYSV 0 +#define HAVE_STRCASESTR 1 +#define HAVE_STRINGLIST 1 +#define HAVE_STRLCAT 1 +#define HAVE_STRLCPY 1 +#define HAVE_STRPTIME 1 +#define HAVE_STRSEP 1 +#define HAVE_STRTONUM 1 +#define HAVE_VASPRINTF 1 +#define HAVE_WCHAR 1 +#define HAVE_SQLITE3 1 +#define HAVE_SQLITE3_ERRSTR 0 +#define HAVE_OHASH 1 +#define HAVE_MANPATH 1 + +#define BINM_APROPOS "apropos" +#define BINM_MAKEWHATIS "makewhatis" +#define BINM_MAN "man" +#define BINM_SOELIM "soelim" +#define BINM_WHATIS "whatis" + +extern ssize_t getline(char **, size_t *, FILE *); +extern const char *sqlite3_errstr(int); + +#endif /* MANDOC_CONFIG_H */ diff --git a/contrib/mdocml/config.log b/contrib/mdocml/config.log new file mode 100644 index 0000000..1f7e116 --- /dev/null +++ b/contrib/mdocml/config.log @@ -0,0 +1,210 @@ +configure.local: no (fully automatic configuration) + +dirent-namlen: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-dirent-namlen test-dirent-namlen.c +dirent-namlen: cc succeeded +dirent-namlen: yes + +err: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-err test-err.c +err: cc succeeded +test-err: 1. warnx +test-err: 2. warn: No error: 0 +test-err: 3. err: No error: 0 +err: yes + +fts: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-fts test-fts.c +fts: cc succeeded +fts: yes + +getline: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-getline test-getline.c +test-getline.c:12:9: error: implicit declaration of function 'getline' is invalid in C99 [-Werror,-Wimplicit-function-declaration] + return getline(&line, &linesz, stdin) != -1; + ^ +1 error generated. +getline: cc failed with 1 + +getsubopt: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-getsubopt test-getsubopt.c +getsubopt: cc succeeded +getsubopt: yes + +isblank: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-isblank test-isblank.c +isblank: cc succeeded +isblank: yes + +mkdtemp: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-mkdtemp test-mkdtemp.c +mkdtemp: cc succeeded +mkdtemp: yes + +mmap: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-mmap test-mmap.c +mmap: cc succeeded +mmap: yes + +pledge: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-pledge test-pledge.c +test-pledge.c:6:11: error: implicit declaration of function 'pledge' is invalid in C99 [-Werror,-Wimplicit-function-declaration] + return !!pledge("stdio", NULL); + ^ +1 error generated. +pledge: cc failed with 1 + +progname: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-progname test-progname.c +progname: cc succeeded +progname: yes + +reallocarray: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-reallocarray test-reallocarray.c +reallocarray: cc succeeded +reallocarray: yes + +rewb-bsd: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-rewb-bsd test-rewb-bsd.c +test-rewb-bsd.c:11:42: error: use of undeclared identifier 'NULL' + if (regexec(&re, "the word is here", 0, NULL, 0)) + ^ +test-rewb-bsd.c:13:35: error: use of undeclared identifier 'NULL' + if (regexec(&re, "same word", 0, NULL, 0)) + ^ +test-rewb-bsd.c:15:36: error: use of undeclared identifier 'NULL' + if (regexec(&re, "word again", 0, NULL, 0)) + ^ +test-rewb-bsd.c:17:30: error: use of undeclared identifier 'NULL' + if (regexec(&re, "word", 0, NULL, 0)) + ^ +test-rewb-bsd.c:19:31: error: use of undeclared identifier 'NULL' + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + ^ +test-rewb-bsd.c:21:31: error: use of undeclared identifier 'NULL' + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + ^ +test-rewb-bsd.c:23:34: error: use of undeclared identifier 'NULL' + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + ^ +7 errors generated. +rewb-bsd: cc failed with 1 + +rewb-sysv: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-rewb-sysv test-rewb-sysv.c +test-rewb-sysv.c:11:42: error: use of undeclared identifier 'NULL' + if (regexec(&re, "the word is here", 0, NULL, 0)) + ^ +test-rewb-sysv.c:13:35: error: use of undeclared identifier 'NULL' + if (regexec(&re, "same word", 0, NULL, 0)) + ^ +test-rewb-sysv.c:15:36: error: use of undeclared identifier 'NULL' + if (regexec(&re, "word again", 0, NULL, 0)) + ^ +test-rewb-sysv.c:17:30: error: use of undeclared identifier 'NULL' + if (regexec(&re, "word", 0, NULL, 0)) + ^ +test-rewb-sysv.c:19:31: error: use of undeclared identifier 'NULL' + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + ^ +test-rewb-sysv.c:21:31: error: use of undeclared identifier 'NULL' + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + ^ +test-rewb-sysv.c:23:34: error: use of undeclared identifier 'NULL' + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + ^ +7 errors generated. +rewb-sysv: cc failed with 1 + +strcasestr: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strcasestr test-strcasestr.c +strcasestr: cc succeeded +strcasestr: yes + +stringlist: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-stringlist test-stringlist.c +test-stringlist.c:26:26: error: use of undeclared identifier 'NULL' + if ((sl = sl_init()) == NULL) + ^ +1 error generated. +stringlist: cc failed with 1 + +strlcat: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strlcat test-strlcat.c +strlcat: cc succeeded +strlcat: yes + +strlcpy: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strlcpy test-strlcpy.c +strlcpy: cc succeeded +strlcpy: yes + +strptime: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strptime test-strptime.c +strptime: cc succeeded +strptime: yes + +strsep: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strsep test-strsep.c +strsep: cc succeeded +strsep: yes + +strtonum: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-strtonum test-strtonum.c +strtonum: cc succeeded +strtonum: yes + +vasprintf: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-vasprintf test-vasprintf.c +vasprintf: cc succeeded +vasprintf: yes + +wchar: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-wchar test-wchar.c +wchar: cc succeeded +*wchar: yes + +sqlite3: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -lsqlite3 -o test-sqlite3 test-sqlite3.c +test-sqlite3.c:20:10: fatal error: 'sqlite3.h' file not found +#include <sqlite3.h> + ^ +1 error generated. +sqlite3: cc failed with 1 + +sqlite3: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -I/usr/local/include -L/usr/local/lib -lsqlite3 -o test-sqlite3 test-sqlite3.c +sqlite3: cc succeeded +sqlite3: yes + +sqlite3_errstr: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -L/usr/local/lib -lsqlite3 -o test-sqlite3_errstr test-sqlite3_errstr.c +test-sqlite3_errstr.c:2:10: fatal error: 'sqlite3.h' file not found +#include <sqlite3.h> + ^ +1 error generated. +sqlite3_errstr: cc failed with 1 + +ohash: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -o test-ohash test-ohash.c +test-ohash.c:4:10: fatal error: 'ohash.h' file not found +#include <ohash.h> + ^ +1 error generated. +ohash: cc failed with 1 + +ohash: testing... +cc -g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings -Wno-unused -Werror -lutil -o test-ohash test-ohash.c +test-ohash.c:4:10: fatal error: 'ohash.h' file not found +#include <ohash.h> + ^ +1 error generated. +ohash: cc failed with 1 + +DBLIB="-L/usr/local/lib -lsqlite3 -lz" + +/usr/share/man:/usr/local/man:/usr/share/openssl/man:/usr/local/lib/perl5/site_perl/man:/usr/local/lib/perl5/5.20/perl/man:/usr/local/share/xpdf/man +manpath: yes + +config.h: written +Makefile.local: written diff --git a/contrib/mdocml/configure b/contrib/mdocml/configure new file mode 100755 index 0000000..21997fc --- /dev/null +++ b/contrib/mdocml/configure @@ -0,0 +1,464 @@ +#!/bin/sh +# +# Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +set -e + +[ -w config.log ] && mv config.log config.log.old +[ -w config.h ] && mv config.h config.h.old + +# Output file descriptor usage: +# 1 (stdout): config.h, Makefile.local +# 2 (stderr): original stderr, usually to the console +# 3: config.log + +exec 3> config.log +echo "config.log: writing..." + +# --- default settings ------------------------------------------------- +# Initialize all variables here, +# such that nothing can leak in from the environment. + +MANPATH_DEFAULT="/usr/share/man:/usr/X11R6/man:/usr/local/man" +OSNAME= + +CC=`printf "all:\\n\\t@echo \\\$(CC)\\n" | make -f -` +CFLAGS="-g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings" +DBLIB= +STATIC="-static" + +BUILD_DB=1 +BUILD_CGI=0 + +HAVE_DIRENT_NAMLEN= +HAVE_ERR= +HAVE_FTS= +HAVE_GETLINE= +HAVE_GETSUBOPT= +HAVE_ISBLANK= +HAVE_MKDTEMP= +HAVE_MMAP= +HAVE_PLEDGE= +HAVE_PROGNAME= +HAVE_REALLOCARRAY= +HAVE_REWB_BSD= +HAVE_REWB_SYSV= +HAVE_STRCASESTR= +HAVE_STRINGLIST= +HAVE_STRLCAT= +HAVE_STRLCPY= +HAVE_STRPTIME= +HAVE_STRSEP= +HAVE_STRTONUM= +HAVE_VASPRINTF= +HAVE_WCHAR= + +HAVE_SQLITE3= +HAVE_SQLITE3_ERRSTR= +HAVE_OHASH= +HAVE_MANPATH= + +PREFIX="/usr/local" +BINDIR= +SBINDIR= +INCLUDEDIR= +LIBDIR= +MANDIR= +HOMEBREWDIR= + +WWWPREFIX="/var/www" +HTDOCDIR= +CGIBINDIR= + +BINM_APROPOS="apropos" +BINM_MAKEWHATIS="makewhatis" +BINM_MAN="man" +BINM_SOELIM="soelim" +BINM_WHATIS="whatis" +MANM_MAN="man" +MANM_MANCONF="man.conf" +MANM_MDOC="mdoc" +MANM_ROFF="roff" +MANM_EQN="eqn" +MANM_TBL="tbl" + +INSTALL="install" +INSTALL_PROGRAM= +INSTALL_LIB= +INSTALL_MAN= +INSTALL_DATA= + +# --- manual settings from configure.local ----------------------------- + +if [ -r ./configure.local ]; then + echo "configure.local: reading..." 1>&2 + echo "configure.local: reading..." 1>&3 + cat ./configure.local 1>&3 + . ./configure.local +else + echo "configure.local: no (fully automatic configuration)" 1>&2 + echo "configure.local: no (fully automatic configuration)" 1>&3 +fi +echo 1>&3 + +# --- tests for config.h ---------------------------------------------- + +COMP="${CC} ${CFLAGS} -Wno-unused -Werror" + +# Check whether this HAVE_ setting is manually overridden. +# If yes, use the override, if no, do not decide anything yet. +# Arguments: lower-case test name, manual value +ismanual() { + [ -z "${2}" ] && return 1 + echo "${1}: manual (${2})" 1>&2 + echo "${1}: manual (${2})" 1>&3 + echo 1>&3 + return 0 +} + +# Run a single autoconfiguration test. +# In case of success, enable the feature. +# In case of failure, do not decide anything yet. +# Arguments: lower-case test name, upper-case test name, additional CFLAGS +singletest() { + cat 1>&3 << __HEREDOC__ +${1}: testing... +${COMP} ${3} -o test-${1} test-${1}.c +__HEREDOC__ + + if ${COMP} ${3} -o "test-${1}" "test-${1}.c" 1>&3 2>&3; then + echo "${1}: ${CC} succeeded" 1>&3 + else + echo "${1}: ${CC} failed with $?" 1>&3 + echo 1>&3 + return 1 + fi + + if ./test-${1} 1>&3 2>&3; then + echo "${1}: yes" 1>&2 + echo "${1}: yes" 1>&3 + echo 1>&3 + eval HAVE_${2}=1 + rm "test-${1}" + return 0 + else + echo "${1}: execution failed with $?" 1>&3 + echo 1>&3 + rm "test-${1}" + return 1 + fi +} + +# Run a complete autoconfiguration test, including the check for +# a manual override and disabling the feature on failure. +# Arguments: lower case name, upper case name, additional CFLAGS +runtest() { + eval _manual=\${HAVE_${2}} + ismanual "${1}" "${_manual}" && return 0 + singletest "${1}" "${2}" "${3}" && return 0 + echo "${1}: no" 1>&2 + eval HAVE_${2}=0 + return 1 +} + +# --- library functions --- +runtest dirent-namlen DIRENT_NAMLEN || true +runtest err ERR || true +runtest fts FTS || true +runtest getline GETLINE || true +runtest getsubopt GETSUBOPT || true +runtest isblank ISBLANK || true +runtest mkdtemp MKDTEMP || true +runtest mmap MMAP || true +runtest pledge PLEDGE || true +runtest progname PROGNAME || true +runtest reallocarray REALLOCARRAY || true +runtest rewb-bsd REWB_BSD || true +runtest rewb-sysv REWB_SYSV || true +runtest strcasestr STRCASESTR || true +runtest stringlist STRINGLIST || true +runtest strlcat STRLCAT || true +runtest strlcpy STRLCPY || true +runtest strptime STRPTIME || true +runtest strsep STRSEP || true +runtest strtonum STRTONUM || true +runtest vasprintf VASPRINTF || true +runtest wchar WCHAR || true + +# --- sqlite3 --- +DETECTLIB= +if [ ${BUILD_DB} -eq 0 ]; then + echo "BUILD_DB=0 (manual)" 1>&2 + echo "BUILD_DB=0 (manual)" 1>&3 + echo 1>&3 + HAVE_SQLITE3=0 +elif ismanual sqlite3 "${HAVE_SQLITE3}"; then + DETECTLIB="-lsqlite3" +elif [ -n "${DBLIB}" ]; then + runtest sqlite3 SQLITE3 "${DBLIB}" || true +elif singletest sqlite3 SQLITE3 "-lsqlite3"; then + DETECTLIB="-lsqlite3" +elif runtest sqlite3 SQLITE3 \ + "-I/usr/local/include -L/usr/local/lib -lsqlite3"; then + DETECTLIB="-L/usr/local/lib -lsqlite3" + CFLAGS="${CFLAGS} -I/usr/local/include" +fi +if [ ${BUILD_DB} -gt 0 -a ${HAVE_SQLITE3} -eq 0 ]; then + echo "BUILD_DB=0 (no sqlite3)" 1>&2 + echo "BUILD_DB=0 (no sqlite3)" 1>&3 + echo 1>&3 + BUILD_DB=0 +fi + +# --- sqlite3_errstr --- +if [ ${BUILD_DB} -eq 0 ]; then + HAVE_SQLITE3_ERRSTR=1 +elif ismanual sqlite3_errstr "${HAVE_SQLITE3_ERRSTR}"; then + : +elif [ -n "${DBLIB}" ]; then + runtest sqlite3_errstr SQLITE3_ERRSTR "${DBLIB}" || true +else + runtest sqlite3_errstr SQLITE3_ERRSTR "${DETECTLIB}" || true +fi + +# --- ohash --- +if [ ${BUILD_DB} -eq 0 ]; then + HAVE_OHASH=1 +elif ismanual ohash "${HAVE_OHASH}"; then + : +elif [ -n "${DBLIB}" ]; then + runtest ohash OHASH "${DBLIB}" || true +elif singletest ohash OHASH; then + : +elif runtest ohash OHASH "-lutil"; then + DETECTLIB="${DETECTLIB} -lutil" +fi + +# --- DBLIB --- +if [ ${BUILD_DB} -eq 0 ]; then + DBLIB="-lz" +elif [ -z "${DBLIB}" ]; then + DBLIB="${DETECTLIB} -lz" + echo "DBLIB=\"${DBLIB}\"" 1>&2 + echo "DBLIB=\"${DBLIB}\"" 1>&3 + echo 1>&3 +fi + +# --- manpath --- +if ismanual manpath "${HAVE_MANPATH}"; then + : +elif manpath 1>&3 2>&3; then + echo "manpath: yes" 1>&2 + echo "manpath: yes" 1>&3 + echo 1>&3 + HAVE_MANPATH=1 +else + echo "manpath: no" 1>&2 + echo "manpath: no" 1>&3 + echo 1>&3 + HAVE_MANPATH=0 +fi + +# --- write config.h --- + +exec > config.h + +cat << __HEREDOC__ +#ifdef __cplusplus +#error "Do not use C++. See the INSTALL file." +#endif + +#ifndef MANDOC_CONFIG_H +#define MANDOC_CONFIG_H + +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* See test-*.c what needs this. */ +#endif + +__HEREDOC__ + +[ ${HAVE_GETLINE} -eq 0 -o ${HAVE_REALLOCARRAY} -eq 0 -o \ + ${HAVE_STRLCAT} -eq 0 -o ${HAVE_STRLCPY} -eq 0 ] \ + && echo "#include <sys/types.h>" +[ ${HAVE_VASPRINTF} -eq 0 ] && echo "#include <stdarg.h>" +[ ${HAVE_GETLINE} -eq 0 ] && echo "#include <stdio.h>" + +echo +echo "#define MAN_CONF_FILE \"/etc/${MANM_MANCONF}\"" +echo "#define MANPATH_DEFAULT \"${MANPATH_DEFAULT}\"" +[ -n "${OSNAME}" ] && echo "#define OSNAME \"${OSNAME}\"" +[ -n "${HOMEBREWDIR}" ] && echo "#define HOMEBREWDIR \"${HOMEBREWDIR}\"" + +cat << __HEREDOC__ +#define HAVE_DIRENT_NAMLEN ${HAVE_DIRENT_NAMLEN} +#define HAVE_ERR ${HAVE_ERR} +#define HAVE_FTS ${HAVE_FTS} +#define HAVE_GETLINE ${HAVE_GETLINE} +#define HAVE_GETSUBOPT ${HAVE_GETSUBOPT} +#define HAVE_ISBLANK ${HAVE_ISBLANK} +#define HAVE_MKDTEMP ${HAVE_MKDTEMP} +#define HAVE_MMAP ${HAVE_MMAP} +#define HAVE_PLEDGE ${HAVE_PLEDGE} +#define HAVE_PROGNAME ${HAVE_PROGNAME} +#define HAVE_REALLOCARRAY ${HAVE_REALLOCARRAY} +#define HAVE_REWB_BSD ${HAVE_REWB_BSD} +#define HAVE_REWB_SYSV ${HAVE_REWB_SYSV} +#define HAVE_STRCASESTR ${HAVE_STRCASESTR} +#define HAVE_STRINGLIST ${HAVE_STRINGLIST} +#define HAVE_STRLCAT ${HAVE_STRLCAT} +#define HAVE_STRLCPY ${HAVE_STRLCPY} +#define HAVE_STRPTIME ${HAVE_STRPTIME} +#define HAVE_STRSEP ${HAVE_STRSEP} +#define HAVE_STRTONUM ${HAVE_STRTONUM} +#define HAVE_VASPRINTF ${HAVE_VASPRINTF} +#define HAVE_WCHAR ${HAVE_WCHAR} +#define HAVE_SQLITE3 ${HAVE_SQLITE3} +#define HAVE_SQLITE3_ERRSTR ${HAVE_SQLITE3_ERRSTR} +#define HAVE_OHASH ${HAVE_OHASH} +#define HAVE_MANPATH ${HAVE_MANPATH} + +#define BINM_APROPOS "${BINM_APROPOS}" +#define BINM_MAKEWHATIS "${BINM_MAKEWHATIS}" +#define BINM_MAN "${BINM_MAN}" +#define BINM_SOELIM "${BINM_SOELIM}" +#define BINM_WHATIS "${BINM_WHATIS}" + +__HEREDOC__ + +if [ ${HAVE_ERR} -eq 0 ]; then + echo "extern void err(int, const char *, ...);" + echo "extern void errx(int, const char *, ...);" + echo "extern void warn(const char *, ...);" + echo "extern void warnx(const char *, ...);" +fi + +[ ${HAVE_GETLINE} -eq 0 ] && \ + echo "extern ssize_t getline(char **, size_t *, FILE *);" + +[ ${HAVE_GETSUBOPT} -eq 0 ] && \ + echo "extern int getsubopt(char **, char * const *, char **);" + +[ ${HAVE_ISBLANK} -eq 0 ] && \ + echo "extern int isblank(int);" + +[ ${HAVE_MKDTEMP} -eq 0 ] && \ + echo "extern char *mkdtemp(char *);" + +if [ ${HAVE_PROGNAME} -eq 0 ]; then + echo "extern const char *getprogname(void);" + echo "extern void setprogname(const char *);" +fi + +[ ${HAVE_REALLOCARRAY} -eq 0 ] && \ + echo "extern void *reallocarray(void *, size_t, size_t);" + +[ ${BUILD_DB} -gt 0 -a ${HAVE_SQLITE3_ERRSTR} -eq 0 ] && + echo "extern const char *sqlite3_errstr(int);" + +[ ${HAVE_STRCASESTR} -eq 0 ] && \ + echo "extern char *strcasestr(const char *, const char *);" + +[ ${HAVE_STRLCAT} -eq 0 ] && \ + echo "extern size_t strlcat(char *, const char *, size_t);" + +[ ${HAVE_STRLCPY} -eq 0 ] && \ + echo "extern size_t strlcpy(char *, const char *, size_t);" + +[ ${HAVE_STRSEP} -eq 0 ] && \ + echo "extern char *strsep(char **, const char *);" + +[ ${HAVE_STRTONUM} -eq 0 ] && \ + echo "extern long long strtonum(const char *, long long, long long, const char **);" + +[ ${HAVE_VASPRINTF} -eq 0 ] && \ + echo "extern int vasprintf(char **, const char *, va_list);" + +echo +echo "#endif /* MANDOC_CONFIG_H */" + +echo "config.h: written" 1>&2 +echo "config.h: written" 1>&3 + +# --- tests for Makefile.local ----------------------------------------- + +exec > Makefile.local + +[ -z "${BINDIR}" ] && BINDIR="${PREFIX}/bin" +[ -z "${SBINDIR}" ] && SBINDIR="${PREFIX}/sbin" +[ -z "${INCLUDEDIR}" ] && INCLUDEDIR="${PREFIX}/include/mandoc" +[ -z "${LIBDIR}" ] && LIBDIR="${PREFIX}/lib/mandoc" +[ -z "${MANDIR}" ] && MANDIR="${PREFIX}/man" + +[ -z "${HTDOCDIR}" ] && HTDOCDIR="${WWWPREFIX}/htdocs" +[ -z "${CGIBINDIR}" ] && CGIBINDIR="${WWWPREFIX}/cgi-bin" + +[ -z "${INSTALL_PROGRAM}" ] && INSTALL_PROGRAM="${INSTALL} -m 0555" +[ -z "${INSTALL_LIB}" ] && INSTALL_LIB="${INSTALL} -m 0444" +[ -z "${INSTALL_MAN}" ] && INSTALL_MAN="${INSTALL} -m 0444" +[ -z "${INSTALL_DATA}" ] && INSTALL_DATA="${INSTALL} -m 0444" + +if [ ${BUILD_DB} -eq 0 -a ${BUILD_CGI} -gt 0 ]; then + echo "BUILD_CGI=0 (no BUILD_DB)" 1>&2 + echo "BUILD_CGI=0 (no BUILD_DB)" 1>&3 + BUILD_CGI=0 +fi + +BUILD_TARGETS="base-build" +[ ${BUILD_CGI} -gt 0 ] && BUILD_TARGETS="${BUILD_TARGETS} cgi-build" +INSTALL_TARGETS="base-install" +[ ${BUILD_DB} -gt 0 ] && INSTALL_TARGETS="${INSTALL_TARGETS} db-install" +[ ${BUILD_CGI} -gt 0 ] && INSTALL_TARGETS="${INSTALL_TARGETS} cgi-install" + +cat << __HEREDOC__ +BUILD_TARGETS = ${BUILD_TARGETS} +INSTALL_TARGETS = ${INSTALL_TARGETS} +CC = ${CC} +CFLAGS = ${CFLAGS} +DBLIB = ${DBLIB} +STATIC = ${STATIC} +PREFIX = ${PREFIX} +BINDIR = ${BINDIR} +SBINDIR = ${SBINDIR} +INCLUDEDIR = ${INCLUDEDIR} +LIBDIR = ${LIBDIR} +MANDIR = ${MANDIR} +WWWPREFIX = ${WWWPREFIX} +HTDOCDIR = ${HTDOCDIR} +CGIBINDIR = ${CGIBINDIR} +BINM_APROPOS = ${BINM_APROPOS} +BINM_MAKEWHATIS = ${BINM_MAKEWHATIS} +BINM_MAN = ${BINM_MAN} +BINM_SOELIM = ${BINM_SOELIM} +BINM_WHATIS = ${BINM_WHATIS} +MANM_MAN = ${MANM_MAN} +MANM_MANCONF = ${MANM_MANCONF} +MANM_MDOC = ${MANM_MDOC} +MANM_ROFF = ${MANM_ROFF} +MANM_EQN = ${MANM_EQN} +MANM_TBL = ${MANM_TBL} +INSTALL = ${INSTALL} +INSTALL_PROGRAM = ${INSTALL_PROGRAM} +INSTALL_LIB = ${INSTALL_LIB} +INSTALL_MAN = ${INSTALL_MAN} +INSTALL_DATA = ${INSTALL_DATA} +__HEREDOC__ + +[ ${BUILD_DB} -gt 0 ] && \ + echo "MAIN_OBJS = \$(BASE_OBJS) \$(DB_OBJS)" + +echo "Makefile.local: written" 1>&2 +echo "Makefile.local: written" 1>&3 + +exit 0 diff --git a/contrib/mdocml/configure.local.example b/contrib/mdocml/configure.local.example new file mode 100644 index 0000000..de9f08f --- /dev/null +++ b/contrib/mdocml/configure.local.example @@ -0,0 +1,245 @@ +# $Id: configure.local.example,v 1.10 2015/11/07 13:14:21 schwarze Exp $ +# +# Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# For all settings documented in this file, there are reasonable +# defaults and/or the ./configure script attempts autodetection. +# Consequently, you only need to create a file ./configure.local +# and put any of these settings into it if ./configure autodetection +# fails or if you want to make different choices for other reasons. + +# If autodetection fails, please tell <tech@mdocml.bsd.lv>. + +# We recommend that you write ./configure.local from scratch and +# only put the lines there you need. This file contains examples. +# It is not intended as a template to be copied as a whole. + +# --- user settings relevant for all builds ---------------------------- + +# For -Tutf8 and -Tlocale operation, mandoc(1) requires <locale.h> +# providing setlocale(3) and <wchar.h> providing wcwidth(3) and +# putwchar(3) with a wchar_t storing UCS-4 values. Theoretically, +# the latter should be tested with the __STDC_ISO_10646__ feature +# macro. In practice, many <wchar.h> headers do not provide that +# macro even though they treat wchar_t as UCS-4. So the automatic +# test only checks that wchar_t is wide enough, that is, at least +# four bytes. + +# The following line forces multi-byte support. +# If your C library does not treat wchar_t as UCS-4, the UTF-8 output +# mode will print garbage. + +HAVE_WCHAR=1 + +# The following line disables multi-byte support. +# The output modes -Tutf8 and -Tlocale will be the same as -Tascii. + +HAVE_WCHAR=0 + +# When man(1) or apropos(1) is called without -m and -M options, +# MANPATH is not set in the environment, man.conf(5) is not available +# and manpath(1) not used, manuals are searched for in the following +# directory trees by default. + +MANPATH_DEFAULT="/usr/share/man:/usr/X11R6/man:/usr/local/man" + +# In manual pages written in the mdoc(7) language, the operating system +# version is displayed in the page footer line. If an operating system +# is specified as an argument to the .Os macro, that is always used. +# If the .Os macro has no argument and an operation system is specified +# with the mandoc(1) -Ios= command line option, that is used. +# Otherwise, the uname(3) library function is called at runtime to find +# the name of the operating system. +# If you do not want uname(3) to be called but instead want a fixed +# string to be used, use the following line: + +OSNAME="OpenBSD 5.6" + +# The following installation directories are used. +# It is possible to set only one or a few of these variables, +# there is no need to copy the whole block. +# Even if you set PREFIX to something else, the other variables +# pick it up without copying them all over. + +PREFIX="/usr/local" +BINDIR="${PREFIX}/bin" +SBINDIR="${PREFIX}/sbin" +INCLUDEDIR="${PREFIX}/include/mandoc" +LIBDIR="${PREFIX}/lib/mandoc" +MANDIR="${PREFIX}/man" + +# The man(1) utility needs to know where the manuals reside. +# We know of two ways to tell it: via manpath(1) or man.conf(5). +# The latter is used by OpenBSD and NetBSD, the former by most +# other systems. + +# Force usage of manpath(1). +# If it is not installed or not operational, +# man(1), makewhatis(8), and apropos(1) will not work properly. +HAVE_MANPATH=1 + +# Force usage of man.conf(5). +# If it does not exist or contains no valid configuration, +# man(1), makewhatis(8), and apropos(1) will not work properly. +HAVE_MANPATH=0 + +# Some distributions may want to avoid naming conflicts +# with the configuration files of other man(1) implementations. +# This changes the name of the installed section 5 manual page as well. +MANM_MANCONF="mandoc.conf" # default is "man.conf" + +# Some distributions may want to avoid naming conflicts among manuals. +# If you want to change the names of installed section 7 manual pages, +# the following alternative names are suggested. +# The suffix ".7" will automatically be appended. +# It is possible to set only one or a few of these variables, +# there is no need to copy the whole block. + +MANM_MAN="mandoc_man" # default is "man" +MANM_MDOC="mandoc_mdoc" # default is "mdoc" +MANM_ROFF="mandoc_roff" # default is "roff" +MANM_EQN="mandoc_eqn" # default is "eqn" +MANM_TBL="mandoc_tbl" # default is "tbl" + +# Some distributions may want to avoid naming conflicts +# with other man(1) and soelim(1) utilities. +# If you want to change the names of binary programs, +# the following alternative names are suggested. +# Using different names is possible as well. +# This changes the names of the installed section 1 manual pages as well. + +BINM_MAN=mman # default is "man" +BINM_SOELIM=msoelim # default is "soelim" + +# It is possible to change the utility program used for installation +# and the modes files are installed with. The defaults are: + +INSTALL="install" +INSTALL_PROGRAM="${INSTALL} -m 0555" +INSTALL_LIB="${INSTALL} -m 0444" +INSTALL_MAN="${INSTALL} -m 0444" +INSTALL_DATA="${INSTALL} -m 0444" + +# --- user settings related to database support ------------------------ + +# By default, building makewhatis(8) and apropos(1) is enabled. +# To disable it, for example to avoid the dependency on SQLite3, +# use the following line. It that case, the remaining settings +# in this section are irrelevant. + +BUILD_DB=0 + +# Two libraries are needed: SQLite3 and ohash(3). +# Autoconfiguration tries the following linker flags to find them. +# If none of these work, add a working DBLIB line to configure.local, +# disabling autodetection for library directories. + +DBLIB="-lsqlite3" +DBLIB="-lsqlite3 -lutil" +DBLIB="-L/usr/local/lib -lsqlite3" + +# When library autodetection decides to use -L/usr/local/lib, +# -I/usr/local/include is automatically added to CFLAGS. +# If you manually set DBLIB to something including -L/usr/local/lib, +# chances are you will also need the following line: + +CFLAGS="${CFLAGS} -I/usr/local/include" + +# Some distributions may want to avoid naming conflicts +# with another implementation of apropos(1) and makewhatis(8). +# If you want to change the names of the binary programs, +# the following alternative names are suggested. +# Using other names is possible as well. +# This changes the names of the installed section 1 and section 8 +# manual pages as well. +# It is possible to set only one or two of these variables, +# there is no need to copy the whole block. + +BINM_APROPOS=mapropos # default is "apropos" +BINM_WHATIS=mwhatis # default is "whatis" +BINM_MAKEWHATIS=mandocdb # default is "makewhatis" + +# When using the "homebrew" package manager on Mac OS X, the actual +# manuals are located in a so-called "cellar" and only symlinked +# into the manual trees. To allow mandoc to follow such symlinks, +# you have to specify the physical location of the cellar as returned +# by realpath(3), for example: + +PREFIX="/usr/local" +HOMEBREWDIR="${PREFIX}/Cellar" + +# --- user settings related man.cgi ------------------------------------ + +# By default, building man.cgi(8) is disabled. To enable it, copy +# cgi.h.example to cgi.h, edit it, and use the following line. +# Obviously, this requires that BUILD_DB is enabled, too. + +BUILD_CGI=1 + +# The remaining settings in this section are only relevant if BUILD_CGI +# is enabled. Otherwise, they have no effect either way. + +# By default, man.cgi(8) is linked statically. +# Some systems do not support static linking, for example Mac OS X. +# In that case, use the following line: + +STATIC= + +# Some systems, for example Linux, require -pthread for static linking: + +STATIC="-static -pthread" + +# Some directories. +# This works just like PREFIX, see above. + +WWWPREFIX="/var/www" +HTDOCDIR="${WWWPREFIX}/htdocs" +CGIBINDIR="${WWWPREFIX}/cgi-bin" + +# --- settings that rarely need to be touched -------------------------- + +# Do not set these variables unless you really need to. + +# You can manually override the compiler to be used. +# But that's rarely useful because ./configure asks your make(1) +# which compiler to use, and that answer will hardly be wrong. + +CC=cc + +# The default compiler flags are: + +CFLAGS="-g -W -Wall -Wstrict-prototypes -Wno-unused-parameter -Wwrite-strings" + +# In rare cases, it may be required to skip individual automatic tests. +# Each of the following variables can be set to 0 (test will not be run +# and will be regarded as failed) or 1 (test will not be run and will +# be regarded as successful). + +HAVE_DIRENT_NAMLEN=0 +HAVE_FGETLN=0 +HAVE_FTS=0 +HAVE_GETSUBOPT=0 +HAVE_MMAP=0 +HAVE_REALLOCARRAY=0 +HAVE_STRCASESTR=0 +HAVE_STRLCAT=0 +HAVE_STRLCPY=0 +HAVE_STRPTIME=0 +HAVE_STRSEP=0 +HAVE_STRTONUM=0 + +HAVE_SQLITE3=0 +HAVE_SQLITE3_ERRSTR=0 +HAVE_OHASH=0 diff --git a/contrib/mdocml/demandoc.1 b/contrib/mdocml/demandoc.1 new file mode 100644 index 0000000..bca69ef --- /dev/null +++ b/contrib/mdocml/demandoc.1 @@ -0,0 +1,108 @@ +.\" $Id: demandoc.1,v 1.8 2014/09/12 00:10:26 schwarze Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 12 2014 $ +.Dt DEMANDOC 1 +.Os +.Sh NAME +.Nm demandoc +.Nd emit only text of UNIX manuals +.Sh SYNOPSIS +.Nm demandoc +.Op Fl w +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility emits only the text portions of well-formed +.Xr mdoc 7 +and +.Xr man 7 +.Ux +manual files. +.Pp +By default, +.Nm +parses standard input and outputs only text nodes, preserving line +and column position. +Escape sequences are omitted from the output. +.Pp +Its arguments are as follows: +.Bl -tag -width Ds +.It Fl w +Output a word list. +This outputs each word of text on its own line. +A +.Qq word , +in this case, refers to whitespace-delimited terms beginning with at +least two letters and not consisting of any escape sequences. +Words have their leading and trailing punctuation +.Pq double-quotes, sentence punctuation, etc. +stripped. +.It Ar +The input files. +.El +.Pp +If a document is not well-formed, it is skipped. +.Pp +The +.Fl i , +.Fl k , +.Fl m , +and +.Fl p +flags are silently discarded for calling compatibility with the +historical deroff. +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed . +.El +.Sh EXAMPLES +The traditional usage of +.Nm +is for spell-checking manuals on +.Bx . +This is accomplished as follows (assuming British spelling): +.Pp +.Dl $ demandoc -w file.1 | spell -b +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 , +.Xr mdoc 7 +.Sh HISTORY +.Nm +replaces the historical deroff utility for handling modern +.Xr man 7 +and +.Xr mdoc 7 +documents. +.Sh AUTHORS +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/demandoc.c b/contrib/mdocml/demandoc.c new file mode 100644 index 0000000..13b7890 --- /dev/null +++ b/contrib/mdocml/demandoc.c @@ -0,0 +1,264 @@ +/* $Id: demandoc.c,v 1.26 2016/01/08 02:53:13 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "roff.h" +#include "man.h" +#include "mdoc.h" +#include "mandoc.h" + +static void pline(int, int *, int *, int); +static void pman(const struct roff_node *, int *, int *, int); +static void pmandoc(struct mparse *, int, const char *, int); +static void pmdoc(const struct roff_node *, int *, int *, int); +static void pstring(const char *, int, int *, int); +static void usage(void); + +static const char *progname; + +int +main(int argc, char *argv[]) +{ + struct mparse *mp; + int ch, fd, i, list; + extern int optind; + + if (argc < 1) + progname = "demandoc"; + else if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + mp = NULL; + list = 0; + + while (-1 != (ch = getopt(argc, argv, "ikm:pw"))) + switch (ch) { + case ('i'): + /* FALLTHROUGH */ + case ('k'): + /* FALLTHROUGH */ + case ('m'): + /* FALLTHROUGH */ + case ('p'): + break; + case ('w'): + list = 1; + break; + default: + usage(); + return (int)MANDOCLEVEL_BADARG; + } + + argc -= optind; + argv += optind; + + mchars_alloc(); + mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, NULL); + assert(mp); + + if (argc < 1) + pmandoc(mp, STDIN_FILENO, "<stdin>", list); + + for (i = 0; i < argc; i++) { + mparse_reset(mp); + if ((fd = mparse_open(mp, argv[i])) == -1) { + perror(argv[i]); + continue; + } + pmandoc(mp, fd, argv[i], list); + } + + mparse_free(mp); + mchars_free(); + return (int)MANDOCLEVEL_OK; +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s [-w] [files...]\n", progname); +} + +static void +pmandoc(struct mparse *mp, int fd, const char *fn, int list) +{ + struct roff_man *man; + int line, col; + + mparse_readfd(mp, fd, fn); + close(fd); + mparse_result(mp, &man, NULL); + line = 1; + col = 0; + + if (man == NULL) + return; + if (man->macroset == MACROSET_MDOC) { + mdoc_validate(man); + pmdoc(man->first->child, &line, &col, list); + } else { + man_validate(man); + pman(man->first->child, &line, &col, list); + } + + if ( ! list) + putchar('\n'); +} + +/* + * Strip the escapes out of a string, emitting the results. + */ +static void +pstring(const char *p, int col, int *colp, int list) +{ + enum mandoc_esc esc; + const char *start, *end; + int emit; + + /* + * Print as many column spaces til we achieve parity with the + * input document. + */ + +again: + if (list && '\0' != *p) { + while (isspace((unsigned char)*p)) + p++; + + while ('\'' == *p || '(' == *p || '"' == *p) + p++; + + emit = isalpha((unsigned char)p[0]) && + isalpha((unsigned char)p[1]); + + for (start = p; '\0' != *p; p++) + if ('\\' == *p) { + p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) + return; + emit = 0; + } else if (isspace((unsigned char)*p)) + break; + + end = p - 1; + + while (end > start) + if ('.' == *end || ',' == *end || + '\'' == *end || '"' == *end || + ')' == *end || '!' == *end || + '?' == *end || ':' == *end || + ';' == *end) + end--; + else + break; + + if (emit && end - start >= 1) { + for ( ; start <= end; start++) + if (ASCII_HYPH == *start) + putchar('-'); + else + putchar((unsigned char)*start); + putchar('\n'); + } + + if (isspace((unsigned char)*p)) + goto again; + + return; + } + + while (*colp < col) { + putchar(' '); + (*colp)++; + } + + /* + * Print the input word, skipping any special characters. + */ + while ('\0' != *p) + if ('\\' == *p) { + p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) + break; + } else { + putchar((unsigned char )*p++); + (*colp)++; + } +} + +static void +pline(int line, int *linep, int *col, int list) +{ + + if (list) + return; + + /* + * Print out as many lines as needed to reach parity with the + * original input. + */ + + while (*linep < line) { + putchar('\n'); + (*linep)++; + } + + *col = 0; +} + +static void +pmdoc(const struct roff_node *p, int *line, int *col, int list) +{ + + for ( ; p; p = p->next) { + if (MDOC_LINE & p->flags) + pline(p->line, line, col, list); + if (ROFFT_TEXT == p->type) + pstring(p->string, p->pos, col, list); + if (p->child) + pmdoc(p->child, line, col, list); + } +} + +static void +pman(const struct roff_node *p, int *line, int *col, int list) +{ + + for ( ; p; p = p->next) { + if (MAN_LINE & p->flags) + pline(p->line, line, col, list); + if (ROFFT_TEXT == p->type) + pstring(p->string, p->pos, col, list); + if (p->child) + pman(p->child, line, col, list); + } +} diff --git a/contrib/mdocml/eqn.7 b/contrib/mdocml/eqn.7 new file mode 100644 index 0000000..e3aad72 --- /dev/null +++ b/contrib/mdocml/eqn.7 @@ -0,0 +1,500 @@ +.\" $Id: eqn.7,v 1.35 2015/03/30 16:06:14 schwarze Exp $ +.\" +.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: March 30 2015 $ +.Dt EQN 7 +.Os +.Sh NAME +.Nm eqn +.Nd eqn language reference for mandoc +.Sh DESCRIPTION +The +.Nm eqn +language is an equation-formatting language. +It is used within +.Xr mdoc 7 +and +.Xr man 7 +.Ux +manual pages. +It describes the +.Em structure +of an equation, not its mathematical meaning. +This manual describes the +.Nm +language accepted by the +.Xr mandoc 1 +utility, which corresponds to the Second Edition +.Nm +specification (see +.Sx SEE ALSO +for references). +.Pp +Equations within +.Xr mdoc 7 +or +.Xr man 7 +documents are enclosed by the standalone +.Sq \&.EQ +and +.Sq \&.EN +tags. +Equations are multi-line blocks consisting of formulas and control +statements. +.Sh EQUATION STRUCTURE +Each equation is bracketed by +.Sq \&.EQ +and +.Sq \&.EN +strings. +.Em Note : +these are not the same as +.Xr roff 7 +macros, and may only be invoked as +.Sq \&.EQ . +.Pp +The equation grammar is as follows, where quoted strings are +case-sensitive literals in the input: +.Bd -literal -offset indent +eqn : box | eqn box +box : text + | \(dq{\(dq eqn \(dq}\(dq + | \(dqdefine\(dq text text + | \(dqndefine\(dq text text + | \(dqtdefine\(dq text text + | \(dqgfont\(dq text + | \(dqgsize\(dq text + | \(dqset\(dq text text + | \(dqundef\(dq text + | \(dqsqrt\(dq box + | box pos box + | box mark + | \(dqmatrix\(dq \(dq{\(dq [col \(dq{\(dq list \(dq}\(dq ]* + | pile \(dq{\(dq list \(dq}\(dq + | font box + | \(dqsize\(dq text box + | \(dqleft\(dq text eqn [\(dqright\(dq text] +col : \(dqlcol\(dq | \(dqrcol\(dq | \(dqccol\(dq | \(dqcol\(dq +text : [^space\e\(dq]+ | \e\(dq.*\e\(dq +pile : \(dqlpile\(dq | \(dqcpile\(dq | \(dqrpile\(dq | \(dqpile\(dq +pos : \(dqover\(dq | \(dqsup\(dq | \(dqsub\(dq | \(dqto\(dq | \(dqfrom\(dq +mark : \(dqdot\(dq | \(dqdotdot\(dq | \(dqhat\(dq | \(dqtilde\(dq | \(dqvec\(dq + | \(dqdyad\(dq | \(dqbar\(dq | \(dqunder\(dq +font : \(dqroman\(dq | \(dqitalic\(dq | \(dqbold\(dq | \(dqfat\(dq +list : eqn + | list \(dqabove\(dq eqn +space : [\e^~ \et] +.Ed +.Pp +White-space consists of the space, tab, circumflex, and tilde +characters. +It is required to delimit tokens consisting of alphabetic characters +and it is ignored at other places. +Braces and quotes also delimit tokens. +If within a quoted string, these space characters are retained. +Quoted strings are also not scanned for keywords, glyph names, +and expansion of definitions. +To print a literal quote character, it can be prepended with a +backslash or expressed with the \e(dq escape sequence. +.Pp +Subequations can be enclosed in braces to pass them as arguments +to operation keywords, overriding standard operation precedence. +Braces can be nested. +To set a brace verbatim, it needs to be enclosed in quotes. +.Pp +The following text terms are translated into a rendered glyph, if +available: alpha, beta, chi, delta, epsilon, eta, gamma, iota, kappa, +lambda, mu, nu, omega, omicron, phi, pi, psi, rho, sigma, tau, theta, +upsilon, xi, zeta, DELTA, GAMMA, LAMBDA, OMEGA, PHI, PI, PSI, SIGMA, +THETA, UPSILON, XI, inter (intersection), union (union), prod (product), +int (integral), sum (summation), grad (gradient), del (vector +differential), times (multiply), cdot (center-dot), nothing (zero-width +space), approx (approximately equals), prime (prime), half (one-half), +partial (partial differential), inf (infinity), >> (much greater), << +(much less), \-> (left arrow), <\- (right arrow), +\- (plus-minus), != +(not equal), == (equivalence), <= (less-than-equal), and >= +(more-than-equal). +The character escape sequences documented in +.Xr mandoc_char 7 +can be used, too. +.Pp +The following control statements are available: +.Bl -tag -width Ds +.It Cm define +Replace all occurrences of a key with a value. +Its syntax is as follows: +.Pp +.D1 Cm define Ar key cvalc +.Pp +The first character of the value string, +.Ar c , +is used as the delimiter for the value +.Ar val . +This allows for arbitrary enclosure of terms (not just quotes), such as +.Pp +.D1 Cm define Ar foo \(aqbar baz\(aq +.D1 Cm define Ar foo cbar bazc +.Pp +It is an error to have an empty +.Ar key +or +.Ar val . +Note that a quoted +.Ar key +causes errors in some +.Nm +implementations and should not be considered portable. +It is not expanded for replacements. +Definitions may refer to other definitions; these are evaluated +recursively when text replacement occurs and not when the definition is +created. +.Pp +Definitions can create arbitrary strings, for example, the following is +a legal construction. +.Bd -literal -offset indent +define foo \(aqdefine\(aq +foo bar \(aqbaz\(aq +.Ed +.Pp +Self-referencing definitions will raise an error. +The +.Cm ndefine +statement is a synonym for +.Cm define , +while +.Cm tdefine +is discarded. +.It Cm gfont +Set the default font of subsequent output. +Its syntax is as follows: +.Pp +.D1 Cm gfont Ar font +.Pp +In mandoc, this value is discarded. +.It Cm gsize +Set the default size of subsequent output. +Its syntax is as follows: +.Pp +.D1 Cm gsize Oo +|\- Oc Ns Ar size +.Pp +The +.Ar size +value should be an integer. +If prepended by a sign, +the font size is changed relative to the current size. +.It Cm set +Set an equation mode. +In mandoc, both arguments are thrown away. +Its syntax is as follows: +.Pp +.D1 Cm set Ar key val +.Pp +The +.Ar key +and +.Ar val +are not expanded for replacements. +This statement is a GNU extension. +.It Cm undef +Unset a previously-defined key. +Its syntax is as follows: +.Pp +.D1 Cm define Ar key +.Pp +Once invoked, the definition for +.Ar key +is discarded. +The +.Ar key +is not expanded for replacements. +This statement is a GNU extension. +.El +.Pp +Operation keywords have the following semantics: +.Bl -tag -width Ds +.It Cm above +See +.Cm pile . +.It Cm bar +Draw a line over the preceding box. +.It Cm bold +Set the following box using bold font. +.It Cm ccol +Like +.Cm cpile , +but for use in +.Cm matrix . +.It Cm cpile +Like +.Cm pile , +but with slightly increased vertical spacing. +.It Cm dot +Set a single dot over the preceding box. +.It Cm dotdot +Set two dots (dieresis) over the preceding box. +.It Cm dyad +Set a dyad symbol (left-right arrow) over the preceding box. +.It Cm fat +A synonym for +.Cm bold . +.It Cm font +Set the second argument using the font specified by the first argument; +currently not recognized by the +.Xr mandoc 1 +.Nm +parser. +.It Cm from +Set the following box below the preceding box, +using a slightly smaller font. +Used for sums, integrals, limits, and the like. +.It Cm hat +Set a hat (circumflex) over the preceding box. +.It Cm italic +Set the following box using italic font. +.It Cm lcol +Like +.Cm lpile , +but for use in +.Cm matrix . +.It Cm left +Set the first argument as a big left delimiter before the second argument. +As an optional third argument, +.Cm right +can follow. +In that case, the fourth argument is set as a big right delimiter after +the second argument. +.It Cm lpile +Like +.Cm cpile , +but subequations are left-justified. +.It Cm matrix +Followed by a list of columns enclosed in braces. +All columns need to have the same number of subequations. +The columns are set as a matrix. +The difference compared to multiple subsequent +.Cm pile +operators is that in a +.Cm matrix , +corresponding subequations in all columns line up horizontally, +while each +.Cm pile +does vertical spacing independently. +.It Cm over +Set a fraction. +The preceding box is the numerator, the following box is the denominator. +.It Cm pile +Followed by a list of subequations enclosed in braces, +the subequations being separated by +.Cm above +keywords. +Sets the subequations one above the other, each of them centered. +Typically used to represent vectors in coordinate representation. +.It Cm rcol +Like +.Cm rpile , +but for use in +.Cm matrix . +.It Cm right +See +.Cm left ; +.Cm right +cannot be used without +.Cm left . +To set a big right delimiter without a big left delimiter, the following +construction can be used: +.Pp +.D1 Cm left No \(dq\(dq Ar box Cm right Ar delimiter +.It Cm roman +Set the following box using the default font. +.It Cm rpile +Like +.Cm cpile , +but subequations are right-justified. +.It Cm size +Set the second argument with the font size specified by the first +argument; currently ignored by +.Xr mandoc 1 . +By prepending a plus or minus sign to the first argument, +the font size can be selected relative to the current size. +.It Cm sqrt +Set the square root of the following box. +.It Cm sub +Set the following box as a subscript to the preceding box. +.It Cm sup +Set the following box as a superscript to the preceding box. +As a special case, if a +.Cm sup +clause immediately follows a +.Cm sub +clause as in +.Pp +.D1 Ar mainbox Cm sub Ar subbox Cm sup Ar supbox +.Pp +both are set with respect to the same +.Ar mainbox , +that is, +.Ar supbox +is set above +.Ar subbox . +.It Cm tilde +Set a tilde over the preceding box. +.It Cm to +Set the following box above the preceding box, +using a slightly smaller font. +Used for sums and integrals and the like. +As a special case, if a +.Cm to +clause immediately follows a +.Cm from +clause as in +.Pp +.D1 Ar mainbox Cm from Ar frombox Cm to Ar tobox +.Pp +both are set below and above the same +.Ar mainbox . +.It Cm under +Underline the preceding box. +.It Cm vec +Set a vector symbol (right arrow) over the preceding box. +.El +.Pp +The binary operations +.Cm from , +.Cm to , +.Cm sub , +and +.Cm sup +group to the right, that is, +.Pp +.D1 Ar mainbox Cm sup Ar supbox Cm sub Ar subbox +.Pp +is the same as +.Pp +.D1 Ar mainbox Cm sup Brq Ar supbox Cm sub Ar subbox +.Pp +and different from +.Pp +.D1 Bro Ar mainbox Cm sup Ar supbox Brc Cm sub Ar subbox . +.Pp +By contrast, +.Cm over +groups to the left. +.Pp +In the following list, earlier operations bind more tightly than +later operations: +.Pp +.Bl -enum -compact +.It +.Cm dyad , +.Cm vec , +.Cm under , +.Cm bar , +.Cm tilde , +.Cm hat , +.Cm dot , +.Cm dotdot +.It +.Cm fat , +.Cm roman , +.Cm italic , +.Cm bold , +.Cm size +.It +.Cm sub , +.Cm sup +.It +.Cm sqrt +.It +.Cm over +.It +.Cm from , +.Cm to +.El +.Sh COMPATIBILITY +This section documents the compatibility of mandoc +.Nm +and the troff +.Nm +implementation (including GNU troff). +.Pp +.Bl -dash -compact +.It +The text string +.Sq \e\(dq +is interpreted as a literal quote in troff. +In mandoc, this is interpreted as a comment. +.It +In troff, The circumflex and tilde white-space symbols map to +fixed-width spaces. +In mandoc, these characters are synonyms for the space character. +.It +The troff implementation of +.Nm +allows for equation alignment with the +.Cm mark +and +.Cm lineup +tokens. +mandoc discards these tokens. +The +.Cm back Ar n , +.Cm fwd Ar n , +.Cm up Ar n , +and +.Cm down Ar n +commands are also ignored. +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 +.Rs +.%A Brian W. Kernighan +.%A Lorinda L. Cherry +.%T System for Typesetting Mathematics +.%J Communications of the ACM +.%V 18 +.%P 151\(en157 +.%D March, 1975 +.Re +.Rs +.%A Brian W. Kernighan +.%A Lorinda L. Cherry +.%T Typesetting Mathematics, User's Guide +.%D 1976 +.Re +.Rs +.%A Brian W. Kernighan +.%A Lorinda L. Cherry +.%T Typesetting Mathematics, User's Guide (Second Edition) +.%D 1978 +.Re +.Sh HISTORY +The eqn utility, a preprocessor for troff, was originally written by +Brian W. Kernighan and Lorinda L. Cherry in 1975. +The GNU reimplementation of eqn, part of the GNU troff package, was +released in 1989 by James Clark. +The eqn component of +.Xr mandoc 1 +was added in 2011. +.Sh AUTHORS +This +.Nm +reference was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/eqn.c b/contrib/mdocml/eqn.c new file mode 100644 index 0000000..e9fbdec --- /dev/null +++ b/contrib/mdocml/eqn.c @@ -0,0 +1,1126 @@ +/* $Id: eqn.c,v 1.61 2016/01/08 00:50:45 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "libmandoc.h" +#include "libroff.h" + +#define EQN_NEST_MAX 128 /* maximum nesting of defines */ +#define STRNEQ(p1, sz1, p2, sz2) \ + ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) + +enum eqn_tok { + EQN_TOK_DYAD = 0, + EQN_TOK_VEC, + EQN_TOK_UNDER, + EQN_TOK_BAR, + EQN_TOK_TILDE, + EQN_TOK_HAT, + EQN_TOK_DOT, + EQN_TOK_DOTDOT, + EQN_TOK_FWD, + EQN_TOK_BACK, + EQN_TOK_DOWN, + EQN_TOK_UP, + EQN_TOK_FAT, + EQN_TOK_ROMAN, + EQN_TOK_ITALIC, + EQN_TOK_BOLD, + EQN_TOK_SIZE, + EQN_TOK_SUB, + EQN_TOK_SUP, + EQN_TOK_SQRT, + EQN_TOK_OVER, + EQN_TOK_FROM, + EQN_TOK_TO, + EQN_TOK_BRACE_OPEN, + EQN_TOK_BRACE_CLOSE, + EQN_TOK_GSIZE, + EQN_TOK_GFONT, + EQN_TOK_MARK, + EQN_TOK_LINEUP, + EQN_TOK_LEFT, + EQN_TOK_RIGHT, + EQN_TOK_PILE, + EQN_TOK_LPILE, + EQN_TOK_RPILE, + EQN_TOK_CPILE, + EQN_TOK_MATRIX, + EQN_TOK_CCOL, + EQN_TOK_LCOL, + EQN_TOK_RCOL, + EQN_TOK_DELIM, + EQN_TOK_DEFINE, + EQN_TOK_TDEFINE, + EQN_TOK_NDEFINE, + EQN_TOK_UNDEF, + EQN_TOK_EOF, + EQN_TOK_ABOVE, + EQN_TOK__MAX +}; + +static const char *eqn_toks[EQN_TOK__MAX] = { + "dyad", /* EQN_TOK_DYAD */ + "vec", /* EQN_TOK_VEC */ + "under", /* EQN_TOK_UNDER */ + "bar", /* EQN_TOK_BAR */ + "tilde", /* EQN_TOK_TILDE */ + "hat", /* EQN_TOK_HAT */ + "dot", /* EQN_TOK_DOT */ + "dotdot", /* EQN_TOK_DOTDOT */ + "fwd", /* EQN_TOK_FWD * */ + "back", /* EQN_TOK_BACK */ + "down", /* EQN_TOK_DOWN */ + "up", /* EQN_TOK_UP */ + "fat", /* EQN_TOK_FAT */ + "roman", /* EQN_TOK_ROMAN */ + "italic", /* EQN_TOK_ITALIC */ + "bold", /* EQN_TOK_BOLD */ + "size", /* EQN_TOK_SIZE */ + "sub", /* EQN_TOK_SUB */ + "sup", /* EQN_TOK_SUP */ + "sqrt", /* EQN_TOK_SQRT */ + "over", /* EQN_TOK_OVER */ + "from", /* EQN_TOK_FROM */ + "to", /* EQN_TOK_TO */ + "{", /* EQN_TOK_BRACE_OPEN */ + "}", /* EQN_TOK_BRACE_CLOSE */ + "gsize", /* EQN_TOK_GSIZE */ + "gfont", /* EQN_TOK_GFONT */ + "mark", /* EQN_TOK_MARK */ + "lineup", /* EQN_TOK_LINEUP */ + "left", /* EQN_TOK_LEFT */ + "right", /* EQN_TOK_RIGHT */ + "pile", /* EQN_TOK_PILE */ + "lpile", /* EQN_TOK_LPILE */ + "rpile", /* EQN_TOK_RPILE */ + "cpile", /* EQN_TOK_CPILE */ + "matrix", /* EQN_TOK_MATRIX */ + "ccol", /* EQN_TOK_CCOL */ + "lcol", /* EQN_TOK_LCOL */ + "rcol", /* EQN_TOK_RCOL */ + "delim", /* EQN_TOK_DELIM */ + "define", /* EQN_TOK_DEFINE */ + "tdefine", /* EQN_TOK_TDEFINE */ + "ndefine", /* EQN_TOK_NDEFINE */ + "undef", /* EQN_TOK_UNDEF */ + NULL, /* EQN_TOK_EOF */ + "above", /* EQN_TOK_ABOVE */ +}; + +enum eqn_symt { + EQNSYM_alpha, + EQNSYM_beta, + EQNSYM_chi, + EQNSYM_delta, + EQNSYM_epsilon, + EQNSYM_eta, + EQNSYM_gamma, + EQNSYM_iota, + EQNSYM_kappa, + EQNSYM_lambda, + EQNSYM_mu, + EQNSYM_nu, + EQNSYM_omega, + EQNSYM_omicron, + EQNSYM_phi, + EQNSYM_pi, + EQNSYM_ps, + EQNSYM_rho, + EQNSYM_sigma, + EQNSYM_tau, + EQNSYM_theta, + EQNSYM_upsilon, + EQNSYM_xi, + EQNSYM_zeta, + EQNSYM_DELTA, + EQNSYM_GAMMA, + EQNSYM_LAMBDA, + EQNSYM_OMEGA, + EQNSYM_PHI, + EQNSYM_PI, + EQNSYM_PSI, + EQNSYM_SIGMA, + EQNSYM_THETA, + EQNSYM_UPSILON, + EQNSYM_XI, + EQNSYM_inter, + EQNSYM_union, + EQNSYM_prod, + EQNSYM_int, + EQNSYM_sum, + EQNSYM_grad, + EQNSYM_del, + EQNSYM_times, + EQNSYM_cdot, + EQNSYM_nothing, + EQNSYM_approx, + EQNSYM_prime, + EQNSYM_half, + EQNSYM_partial, + EQNSYM_inf, + EQNSYM_muchgreat, + EQNSYM_muchless, + EQNSYM_larrow, + EQNSYM_rarrow, + EQNSYM_pm, + EQNSYM_nequal, + EQNSYM_equiv, + EQNSYM_lessequal, + EQNSYM_moreequal, + EQNSYM_minus, + EQNSYM__MAX +}; + +struct eqnsym { + const char *str; + const char *sym; +}; + +static const struct eqnsym eqnsyms[EQNSYM__MAX] = { + { "alpha", "*a" }, /* EQNSYM_alpha */ + { "beta", "*b" }, /* EQNSYM_beta */ + { "chi", "*x" }, /* EQNSYM_chi */ + { "delta", "*d" }, /* EQNSYM_delta */ + { "epsilon", "*e" }, /* EQNSYM_epsilon */ + { "eta", "*y" }, /* EQNSYM_eta */ + { "gamma", "*g" }, /* EQNSYM_gamma */ + { "iota", "*i" }, /* EQNSYM_iota */ + { "kappa", "*k" }, /* EQNSYM_kappa */ + { "lambda", "*l" }, /* EQNSYM_lambda */ + { "mu", "*m" }, /* EQNSYM_mu */ + { "nu", "*n" }, /* EQNSYM_nu */ + { "omega", "*w" }, /* EQNSYM_omega */ + { "omicron", "*o" }, /* EQNSYM_omicron */ + { "phi", "*f" }, /* EQNSYM_phi */ + { "pi", "*p" }, /* EQNSYM_pi */ + { "psi", "*q" }, /* EQNSYM_psi */ + { "rho", "*r" }, /* EQNSYM_rho */ + { "sigma", "*s" }, /* EQNSYM_sigma */ + { "tau", "*t" }, /* EQNSYM_tau */ + { "theta", "*h" }, /* EQNSYM_theta */ + { "upsilon", "*u" }, /* EQNSYM_upsilon */ + { "xi", "*c" }, /* EQNSYM_xi */ + { "zeta", "*z" }, /* EQNSYM_zeta */ + { "DELTA", "*D" }, /* EQNSYM_DELTA */ + { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ + { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ + { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ + { "PHI", "*F" }, /* EQNSYM_PHI */ + { "PI", "*P" }, /* EQNSYM_PI */ + { "PSI", "*Q" }, /* EQNSYM_PSI */ + { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ + { "THETA", "*H" }, /* EQNSYM_THETA */ + { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ + { "XI", "*C" }, /* EQNSYM_XI */ + { "inter", "ca" }, /* EQNSYM_inter */ + { "union", "cu" }, /* EQNSYM_union */ + { "prod", "product" }, /* EQNSYM_prod */ + { "int", "integral" }, /* EQNSYM_int */ + { "sum", "sum" }, /* EQNSYM_sum */ + { "grad", "gr" }, /* EQNSYM_grad */ + { "del", "gr" }, /* EQNSYM_del */ + { "times", "mu" }, /* EQNSYM_times */ + { "cdot", "pc" }, /* EQNSYM_cdot */ + { "nothing", "&" }, /* EQNSYM_nothing */ + { "approx", "~~" }, /* EQNSYM_approx */ + { "prime", "fm" }, /* EQNSYM_prime */ + { "half", "12" }, /* EQNSYM_half */ + { "partial", "pd" }, /* EQNSYM_partial */ + { "inf", "if" }, /* EQNSYM_inf */ + { ">>", ">>" }, /* EQNSYM_muchgreat */ + { "<<", "<<" }, /* EQNSYM_muchless */ + { "<-", "<-" }, /* EQNSYM_larrow */ + { "->", "->" }, /* EQNSYM_rarrow */ + { "+-", "+-" }, /* EQNSYM_pm */ + { "!=", "!=" }, /* EQNSYM_nequal */ + { "==", "==" }, /* EQNSYM_equiv */ + { "<=", "<=" }, /* EQNSYM_lessequal */ + { ">=", ">=" }, /* EQNSYM_moreequal */ + { "-", "mi" }, /* EQNSYM_minus */ +}; + +static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); +static void eqn_box_free(struct eqn_box *); +static struct eqn_box *eqn_box_makebinary(struct eqn_node *, + enum eqn_post, struct eqn_box *); +static void eqn_def(struct eqn_node *); +static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); +static void eqn_delim(struct eqn_node *); +static const char *eqn_next(struct eqn_node *, char, size_t *, int); +static const char *eqn_nextrawtok(struct eqn_node *, size_t *); +static const char *eqn_nexttok(struct eqn_node *, size_t *); +static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *); +static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **); +static void eqn_undef(struct eqn_node *); + + +enum rofferr +eqn_read(struct eqn_node **epp, int ln, + const char *p, int pos, int *offs) +{ + size_t sz; + struct eqn_node *ep; + enum rofferr er; + + ep = *epp; + + /* + * If we're the terminating mark, unset our equation status and + * validate the full equation. + */ + + if (0 == strncmp(p, ".EN", 3)) { + er = eqn_end(epp); + p += 3; + while (' ' == *p || '\t' == *p) + p++; + if ('\0' == *p) + return er; + mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, + ln, pos, "EN %s", p); + return er; + } + + /* + * Build up the full string, replacing all newlines with regular + * whitespace. + */ + + sz = strlen(p + pos) + 1; + ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); + + /* First invocation: nil terminate the string. */ + + if (0 == ep->sz) + *ep->data = '\0'; + + ep->sz += sz; + strlcat(ep->data, p + pos, ep->sz + 1); + strlcat(ep->data, " ", ep->sz + 1); + return ROFF_IGN; +} + +struct eqn_node * +eqn_alloc(int pos, int line, struct mparse *parse) +{ + struct eqn_node *p; + + p = mandoc_calloc(1, sizeof(struct eqn_node)); + + p->parse = parse; + p->eqn.ln = line; + p->eqn.pos = pos; + p->gsize = EQN_DEFSIZE; + + return p; +} + +/* + * Find the key "key" of the give size within our eqn-defined values. + */ +static struct eqn_def * +eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) +{ + int i; + + for (i = 0; i < (int)ep->defsz; i++) + if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, + ep->defs[i].keysz, key, sz)) + return &ep->defs[i]; + + return NULL; +} + +/* + * Get the next token from the input stream using the given quote + * character. + * Optionally make any replacements. + */ +static const char * +eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) +{ + char *start, *next; + int q, diff, lim; + size_t ssz, dummy; + struct eqn_def *def; + + if (NULL == sz) + sz = &dummy; + + lim = 0; + ep->rew = ep->cur; +again: + /* Prevent self-definitions. */ + + if (lim >= EQN_NEST_MAX) { + mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, + ep->eqn.ln, ep->eqn.pos, NULL); + return NULL; + } + + ep->cur = ep->rew; + start = &ep->data[(int)ep->cur]; + q = 0; + + if ('\0' == *start) + return NULL; + + if (quote == *start) { + ep->cur++; + q = 1; + } + + start = &ep->data[(int)ep->cur]; + + if ( ! q) { + if ('{' == *start || '}' == *start) + ssz = 1; + else + ssz = strcspn(start + 1, " ^~\"{}\t") + 1; + next = start + (int)ssz; + if ('\0' == *next) + next = NULL; + } else + next = strchr(start, quote); + + if (NULL != next) { + *sz = (size_t)(next - start); + ep->cur += *sz; + if (q) + ep->cur++; + while (' ' == ep->data[(int)ep->cur] || + '\t' == ep->data[(int)ep->cur] || + '^' == ep->data[(int)ep->cur] || + '~' == ep->data[(int)ep->cur]) + ep->cur++; + } else { + if (q) + mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, + ep->eqn.ln, ep->eqn.pos, NULL); + next = strchr(start, '\0'); + *sz = (size_t)(next - start); + ep->cur += *sz; + } + + /* Quotes aren't expanded for values. */ + + if (q || ! repl) + return start; + + if (NULL != (def = eqn_def_find(ep, start, *sz))) { + diff = def->valsz - *sz; + + if (def->valsz > *sz) { + ep->sz += diff; + ep->data = mandoc_realloc(ep->data, ep->sz + 1); + ep->data[ep->sz] = '\0'; + start = &ep->data[(int)ep->rew]; + } + + diff = def->valsz - *sz; + memmove(start + *sz + diff, start + *sz, + (strlen(start) - *sz) + 1); + memcpy(start, def->val, def->valsz); + lim++; + goto again; + } + + return start; +} + +/* + * Get the next delimited token using the default current quote + * character. + */ +static const char * +eqn_nexttok(struct eqn_node *ep, size_t *sz) +{ + + return eqn_next(ep, '"', sz, 1); +} + +/* + * Get next token without replacement. + */ +static const char * +eqn_nextrawtok(struct eqn_node *ep, size_t *sz) +{ + + return eqn_next(ep, '"', sz, 0); +} + +/* + * Parse a token from the stream of text. + * A token consists of one of the recognised eqn(7) strings. + * Strings are separated by delimiting marks. + * This returns EQN_TOK_EOF when there are no more tokens. + * If the token is an unrecognised string literal, then it returns + * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated + * string. + * This must be later freed with free(3). + */ +static enum eqn_tok +eqn_tok_parse(struct eqn_node *ep, char **p) +{ + const char *start; + size_t i, sz; + int quoted; + + if (NULL != p) + *p = NULL; + + quoted = ep->data[ep->cur] == '"'; + + if (NULL == (start = eqn_nexttok(ep, &sz))) + return EQN_TOK_EOF; + + if (quoted) { + if (p != NULL) + *p = mandoc_strndup(start, sz); + return EQN_TOK__MAX; + } + + for (i = 0; i < EQN_TOK__MAX; i++) { + if (NULL == eqn_toks[i]) + continue; + if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) + break; + } + + if (i == EQN_TOK__MAX && NULL != p) + *p = mandoc_strndup(start, sz); + + return i; +} + +static void +eqn_box_free(struct eqn_box *bp) +{ + + if (bp->first) + eqn_box_free(bp->first); + if (bp->next) + eqn_box_free(bp->next); + + free(bp->text); + free(bp->left); + free(bp->right); + free(bp->top); + free(bp->bottom); + free(bp); +} + +/* + * Allocate a box as the last child of the parent node. + */ +static struct eqn_box * +eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) +{ + struct eqn_box *bp; + + bp = mandoc_calloc(1, sizeof(struct eqn_box)); + bp->parent = parent; + bp->parent->args++; + bp->expectargs = UINT_MAX; + bp->size = ep->gsize; + + if (NULL != parent->first) { + parent->last->next = bp; + bp->prev = parent->last; + } else + parent->first = bp; + + parent->last = bp; + return bp; +} + +/* + * Reparent the current last node (of the current parent) under a new + * EQN_SUBEXPR as the first element. + * Then return the new parent. + * The new EQN_SUBEXPR will have a two-child limit. + */ +static struct eqn_box * +eqn_box_makebinary(struct eqn_node *ep, + enum eqn_post pos, struct eqn_box *parent) +{ + struct eqn_box *b, *newb; + + assert(NULL != parent->last); + b = parent->last; + if (parent->last == parent->first) + parent->first = NULL; + parent->args--; + parent->last = b->prev; + b->prev = NULL; + newb = eqn_box_alloc(ep, parent); + newb->pos = pos; + newb->type = EQN_SUBEXPR; + newb->expectargs = 2; + newb->args = 1; + newb->first = newb->last = b; + newb->first->next = NULL; + b->parent = newb; + return newb; +} + +/* + * Parse the "delim" control statement. + */ +static void +eqn_delim(struct eqn_node *ep) +{ + const char *start; + size_t sz; + + if ((start = eqn_nextrawtok(ep, &sz)) == NULL) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, "delim"); + else if (strncmp(start, "off", 3) == 0) + ep->delim = 0; + else if (strncmp(start, "on", 2) == 0) { + if (ep->odelim && ep->cdelim) + ep->delim = 1; + } else if (start[1] != '\0') { + ep->odelim = start[0]; + ep->cdelim = start[1]; + ep->delim = 1; + } +} + +/* + * Undefine a previously-defined string. + */ +static void +eqn_undef(struct eqn_node *ep) +{ + const char *start; + struct eqn_def *def; + size_t sz; + + if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, "undef"); + return; + } + if ((def = eqn_def_find(ep, start, sz)) == NULL) + return; + free(def->key); + free(def->val); + def->key = def->val = NULL; + def->keysz = def->valsz = 0; +} + +static void +eqn_def(struct eqn_node *ep) +{ + const char *start; + size_t sz; + struct eqn_def *def; + int i; + + if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, "define"); + return; + } + + /* + * Search for a key that already exists. + * Create a new key if none is found. + */ + if (NULL == (def = eqn_def_find(ep, start, sz))) { + /* Find holes in string array. */ + for (i = 0; i < (int)ep->defsz; i++) + if (0 == ep->defs[i].keysz) + break; + + if (i == (int)ep->defsz) { + ep->defsz++; + ep->defs = mandoc_reallocarray(ep->defs, + ep->defsz, sizeof(struct eqn_def)); + ep->defs[i].key = ep->defs[i].val = NULL; + } + + def = ep->defs + i; + free(def->key); + def->key = mandoc_strndup(start, sz); + def->keysz = sz; + } + + start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); + if (start == NULL) { + mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, "define %s", def->key); + free(def->key); + free(def->val); + def->key = def->val = NULL; + def->keysz = def->valsz = 0; + return; + } + free(def->val); + def->val = mandoc_strndup(start, sz); + def->valsz = sz; +} + +/* + * Recursively parse an eqn(7) expression. + */ +static enum rofferr +eqn_parse(struct eqn_node *ep, struct eqn_box *parent) +{ + char sym[64]; + struct eqn_box *cur; + const char *start; + char *p; + size_t i, sz; + enum eqn_tok tok, subtok; + enum eqn_post pos; + int size; + + assert(parent != NULL); + + /* + * Empty equation. + * Do not add it to the high-level syntax tree. + */ + + if (ep->data == NULL) + return ROFF_IGN; + +next_tok: + tok = eqn_tok_parse(ep, &p); + +this_tok: + switch (tok) { + case (EQN_TOK_UNDEF): + eqn_undef(ep); + break; + case (EQN_TOK_NDEFINE): + case (EQN_TOK_DEFINE): + eqn_def(ep); + break; + case (EQN_TOK_TDEFINE): + if (eqn_nextrawtok(ep, NULL) == NULL || + eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, "tdefine"); + break; + case (EQN_TOK_DELIM): + eqn_delim(ep); + break; + case (EQN_TOK_GFONT): + if (eqn_nextrawtok(ep, NULL) == NULL) + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + case (EQN_TOK_MARK): + case (EQN_TOK_LINEUP): + /* Ignore these. */ + break; + case (EQN_TOK_DYAD): + case (EQN_TOK_VEC): + case (EQN_TOK_UNDER): + case (EQN_TOK_BAR): + case (EQN_TOK_TILDE): + case (EQN_TOK_HAT): + case (EQN_TOK_DOT): + case (EQN_TOK_DOTDOT): + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + switch (tok) { + case (EQN_TOK_DOTDOT): + strlcpy(sym, "\\[ad]", sizeof(sym)); + break; + case (EQN_TOK_VEC): + strlcpy(sym, "\\[->]", sizeof(sym)); + break; + case (EQN_TOK_DYAD): + strlcpy(sym, "\\[<>]", sizeof(sym)); + break; + case (EQN_TOK_TILDE): + strlcpy(sym, "\\[a~]", sizeof(sym)); + break; + case (EQN_TOK_UNDER): + strlcpy(sym, "\\[ul]", sizeof(sym)); + break; + case (EQN_TOK_BAR): + strlcpy(sym, "\\[rl]", sizeof(sym)); + break; + case (EQN_TOK_DOT): + strlcpy(sym, "\\[a.]", sizeof(sym)); + break; + case (EQN_TOK_HAT): + strlcpy(sym, "\\[ha]", sizeof(sym)); + break; + default: + abort(); + } + + switch (tok) { + case (EQN_TOK_DOTDOT): + case (EQN_TOK_VEC): + case (EQN_TOK_DYAD): + case (EQN_TOK_TILDE): + case (EQN_TOK_BAR): + case (EQN_TOK_DOT): + case (EQN_TOK_HAT): + parent->top = mandoc_strdup(sym); + break; + case (EQN_TOK_UNDER): + parent->bottom = mandoc_strdup(sym); + break; + default: + abort(); + } + parent = parent->parent; + break; + case (EQN_TOK_FWD): + case (EQN_TOK_BACK): + case (EQN_TOK_DOWN): + case (EQN_TOK_UP): + subtok = eqn_tok_parse(ep, NULL); + if (subtok != EQN_TOK__MAX) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + tok = subtok; + goto this_tok; + } + break; + case (EQN_TOK_FAT): + case (EQN_TOK_ROMAN): + case (EQN_TOK_ITALIC): + case (EQN_TOK_BOLD): + while (parent->args == parent->expectargs) + parent = parent->parent; + /* + * These values apply to the next word or sequence of + * words; thus, we mark that we'll have a child with + * exactly one of those. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + switch (tok) { + case (EQN_TOK_FAT): + parent->font = EQNFONT_FAT; + break; + case (EQN_TOK_ROMAN): + parent->font = EQNFONT_ROMAN; + break; + case (EQN_TOK_ITALIC): + parent->font = EQNFONT_ITALIC; + break; + case (EQN_TOK_BOLD): + parent->font = EQNFONT_BOLD; + break; + default: + abort(); + } + break; + case (EQN_TOK_SIZE): + case (EQN_TOK_GSIZE): + /* Accept two values: integral size and a single. */ + if (NULL == (start = eqn_nexttok(ep, &sz))) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + size = mandoc_strntoi(start, sz, 10); + if (-1 == size) { + mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + if (EQN_TOK_GSIZE == tok) { + ep->gsize = size; + break; + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LISTONE; + parent->expectargs = 1; + parent->size = size; + break; + case (EQN_TOK_FROM): + case (EQN_TOK_TO): + case (EQN_TOK_SUB): + case (EQN_TOK_SUP): + /* + * We have a left-right-associative expression. + * Repivot under a positional node, open a child scope + * and keep on reading. + */ + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + /* Handle the "subsup" and "fromto" positions. */ + if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { + parent->expectargs = 3; + parent->pos = EQNPOS_SUBSUP; + break; + } + if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { + parent->expectargs = 3; + parent->pos = EQNPOS_FROMTO; + break; + } + switch (tok) { + case (EQN_TOK_FROM): + pos = EQNPOS_FROM; + break; + case (EQN_TOK_TO): + pos = EQNPOS_TO; + break; + case (EQN_TOK_SUP): + pos = EQNPOS_SUP; + break; + case (EQN_TOK_SUB): + pos = EQNPOS_SUB; + break; + default: + abort(); + } + parent = eqn_box_makebinary(ep, pos, parent); + break; + case (EQN_TOK_SQRT): + while (parent->args == parent->expectargs) + parent = parent->parent; + /* + * Accept a left-right-associative set of arguments just + * like sub and sup and friends but without rebalancing + * under a pivot. + */ + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_SUBEXPR; + parent->pos = EQNPOS_SQRT; + parent->expectargs = 1; + break; + case (EQN_TOK_OVER): + /* + * We have a right-left-associative fraction. + * Close out anything that's currently open, then + * rebalance and continue reading. + */ + if (parent->last == NULL) { + mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + cur->text = mandoc_strdup(""); + } + while (EQN_SUBEXPR == parent->type) + parent = parent->parent; + parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); + break; + case (EQN_TOK_RIGHT): + case (EQN_TOK_BRACE_CLOSE): + /* + * Close out the existing brace. + * FIXME: this is a shitty sentinel: we should really + * have a native EQN_BRACE type or whatnot. + */ + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_LIST && + (tok == EQN_TOK_BRACE_CLOSE || + cur->left != NULL)) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = cur; + if (EQN_TOK_RIGHT == tok) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + mandoc_msg(MANDOCERR_REQ_EMPTY, + ep->parse, ep->eqn.ln, + ep->eqn.pos, eqn_toks[tok]); + break; + } + /* Handling depends on right/left. */ + if (STRNEQ(start, sz, "ceiling", 7)) { + strlcpy(sym, "\\[rc]", sizeof(sym)); + parent->right = mandoc_strdup(sym); + } else if (STRNEQ(start, sz, "floor", 5)) { + strlcpy(sym, "\\[rf]", sizeof(sym)); + parent->right = mandoc_strdup(sym); + } else + parent->right = mandoc_strndup(start, sz); + } + parent = parent->parent; + if (tok == EQN_TOK_BRACE_CLOSE && + (parent->type == EQN_PILE || + parent->type == EQN_MATRIX)) + parent = parent->parent; + /* Close out any "singleton" lists. */ + while (parent->type == EQN_LISTONE && + parent->args == parent->expectargs) + parent = parent->parent; + break; + case (EQN_TOK_BRACE_OPEN): + case (EQN_TOK_LEFT): + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more + * (just like with the text node). + */ + while (parent->args == parent->expectargs) + parent = parent->parent; + if (EQN_TOK_LEFT == tok && + (start = eqn_nexttok(ep, &sz)) == NULL) { + mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_LIST; + if (EQN_TOK_LEFT == tok) { + if (STRNEQ(start, sz, "ceiling", 7)) { + strlcpy(sym, "\\[lc]", sizeof(sym)); + parent->left = mandoc_strdup(sym); + } else if (STRNEQ(start, sz, "floor", 5)) { + strlcpy(sym, "\\[lf]", sizeof(sym)); + parent->left = mandoc_strdup(sym); + } else + parent->left = mandoc_strndup(start, sz); + } + break; + case (EQN_TOK_PILE): + case (EQN_TOK_LPILE): + case (EQN_TOK_RPILE): + case (EQN_TOK_CPILE): + case (EQN_TOK_CCOL): + case (EQN_TOK_LCOL): + case (EQN_TOK_RCOL): + while (parent->args == parent->expectargs) + parent = parent->parent; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_PILE; + parent->expectargs = 1; + break; + case (EQN_TOK_ABOVE): + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->type == EQN_PILE) + break; + if (cur == NULL) { + mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, + ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + break; + } + parent = eqn_box_alloc(ep, cur); + parent->type = EQN_LIST; + break; + case (EQN_TOK_MATRIX): + while (parent->args == parent->expectargs) + parent = parent->parent; + parent = eqn_box_alloc(ep, parent); + parent->type = EQN_MATRIX; + parent->expectargs = 1; + break; + case (EQN_TOK_EOF): + /* + * End of file! + * TODO: make sure we're not in an open subexpression. + */ + return ROFF_EQN; + default: + assert(tok == EQN_TOK__MAX); + assert(NULL != p); + /* + * If we already have something in the stack and we're + * in an expression, then rewind til we're not any more. + */ + while (parent->args == parent->expectargs) + parent = parent->parent; + cur = eqn_box_alloc(ep, parent); + cur->type = EQN_TEXT; + for (i = 0; i < EQNSYM__MAX; i++) + if (0 == strcmp(eqnsyms[i].str, p)) { + (void)snprintf(sym, sizeof(sym), + "\\[%s]", eqnsyms[i].sym); + cur->text = mandoc_strdup(sym); + free(p); + break; + } + + if (i == EQNSYM__MAX) + cur->text = p; + /* + * Post-process list status. + */ + while (parent->type == EQN_LISTONE && + parent->args == parent->expectargs) + parent = parent->parent; + break; + } + goto next_tok; +} + +enum rofferr +eqn_end(struct eqn_node **epp) +{ + struct eqn_node *ep; + + ep = *epp; + *epp = NULL; + + ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); + ep->eqn.root->expectargs = UINT_MAX; + return eqn_parse(ep, ep->eqn.root); +} + +void +eqn_free(struct eqn_node *p) +{ + int i; + + eqn_box_free(p->eqn.root); + + for (i = 0; i < (int)p->defsz; i++) { + free(p->defs[i].key); + free(p->defs[i].val); + } + + free(p->data); + free(p->defs); + free(p); +} diff --git a/contrib/mdocml/eqn_html.c b/contrib/mdocml/eqn_html.c new file mode 100644 index 0000000..f297336 --- /dev/null +++ b/contrib/mdocml/eqn_html.c @@ -0,0 +1,192 @@ +/* $Id: eqn_html.c,v 1.10 2014/10/12 19:31:41 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" + +static void +eqn_box(struct html *p, const struct eqn_box *bp) +{ + struct tag *post, *row, *cell, *t; + struct htmlpair tag[2]; + const struct eqn_box *child, *parent; + size_t i, j, rows; + + if (NULL == bp) + return; + + post = NULL; + + /* + * Special handling for a matrix, which is presented to us in + * column order, but must be printed in row-order. + */ + if (EQN_MATRIX == bp->type) { + if (NULL == bp->first) + goto out; + if (EQN_LIST != bp->first->type) { + eqn_box(p, bp->first); + goto out; + } + if (NULL == (parent = bp->first->first)) + goto out; + /* Estimate the number of rows, first. */ + if (NULL == (child = parent->first)) + goto out; + for (rows = 0; NULL != child; rows++) + child = child->next; + /* Print row-by-row. */ + post = print_otag(p, TAG_MTABLE, 0, NULL); + for (i = 0; i < rows; i++) { + parent = bp->first->first; + row = print_otag(p, TAG_MTR, 0, NULL); + while (NULL != parent) { + child = parent->first; + for (j = 0; j < i; j++) { + if (NULL == child) + break; + child = child->next; + } + cell = print_otag + (p, TAG_MTD, 0, NULL); + /* + * If we have no data for this + * particular cell, then print a + * placeholder and continue--don't puke. + */ + if (NULL != child) + eqn_box(p, child->first); + print_tagq(p, cell); + parent = parent->next; + } + print_tagq(p, row); + } + goto out; + } + + switch (bp->pos) { + case (EQNPOS_TO): + post = print_otag(p, TAG_MOVER, 0, NULL); + break; + case (EQNPOS_SUP): + post = print_otag(p, TAG_MSUP, 0, NULL); + break; + case (EQNPOS_FROM): + post = print_otag(p, TAG_MUNDER, 0, NULL); + break; + case (EQNPOS_SUB): + post = print_otag(p, TAG_MSUB, 0, NULL); + break; + case (EQNPOS_OVER): + post = print_otag(p, TAG_MFRAC, 0, NULL); + break; + case (EQNPOS_FROMTO): + post = print_otag(p, TAG_MUNDEROVER, 0, NULL); + break; + case (EQNPOS_SUBSUP): + post = print_otag(p, TAG_MSUBSUP, 0, NULL); + break; + case (EQNPOS_SQRT): + post = print_otag(p, TAG_MSQRT, 0, NULL); + break; + default: + break; + } + + if (bp->top || bp->bottom) { + assert(NULL == post); + if (bp->top && NULL == bp->bottom) + post = print_otag(p, TAG_MOVER, 0, NULL); + else if (bp->top && bp->bottom) + post = print_otag(p, TAG_MUNDEROVER, 0, NULL); + else if (bp->bottom) + post = print_otag(p, TAG_MUNDER, 0, NULL); + } + + if (EQN_PILE == bp->type) { + assert(NULL == post); + if (bp->first != NULL && bp->first->type == EQN_LIST) + post = print_otag(p, TAG_MTABLE, 0, NULL); + } else if (bp->type == EQN_LIST && + bp->parent && bp->parent->type == EQN_PILE) { + assert(NULL == post); + post = print_otag(p, TAG_MTR, 0, NULL); + print_otag(p, TAG_MTD, 0, NULL); + } + + if (NULL != bp->text) { + assert(NULL == post); + post = print_otag(p, TAG_MI, 0, NULL); + print_text(p, bp->text); + } else if (NULL == post) { + if (NULL != bp->left || NULL != bp->right) { + PAIR_INIT(&tag[0], ATTR_OPEN, + NULL == bp->left ? "" : bp->left); + PAIR_INIT(&tag[1], ATTR_CLOSE, + NULL == bp->right ? "" : bp->right); + post = print_otag(p, TAG_MFENCED, 2, tag); + } + if (NULL == post) + post = print_otag(p, TAG_MROW, 0, NULL); + else + print_otag(p, TAG_MROW, 0, NULL); + } + + eqn_box(p, bp->first); + +out: + if (NULL != bp->bottom) { + t = print_otag(p, TAG_MO, 0, NULL); + print_text(p, bp->bottom); + print_tagq(p, t); + } + if (NULL != bp->top) { + t = print_otag(p, TAG_MO, 0, NULL); + print_text(p, bp->top); + print_tagq(p, t); + } + + if (NULL != post) + print_tagq(p, post); + + eqn_box(p, bp->next); +} + +void +print_eqn(struct html *p, const struct eqn *ep) +{ + struct htmlpair tag; + struct tag *t; + + PAIR_CLASS_INIT(&tag, "eqn"); + t = print_otag(p, TAG_MATH, 1, &tag); + + p->flags |= HTML_NONOSPACE; + eqn_box(p, ep->root); + p->flags &= ~HTML_NONOSPACE; + + print_tagq(p, t); +} diff --git a/contrib/mdocml/eqn_term.c b/contrib/mdocml/eqn_term.c new file mode 100644 index 0000000..5f2818b --- /dev/null +++ b/contrib/mdocml/eqn_term.c @@ -0,0 +1,127 @@ +/* $Id: eqn_term.c,v 1.8 2015/01/01 15:36:08 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" + +static const enum termfont fontmap[EQNFONT__MAX] = { + TERMFONT_NONE, /* EQNFONT_NONE */ + TERMFONT_NONE, /* EQNFONT_ROMAN */ + TERMFONT_BOLD, /* EQNFONT_BOLD */ + TERMFONT_BOLD, /* EQNFONT_FAT */ + TERMFONT_UNDER /* EQNFONT_ITALIC */ +}; + +static void eqn_box(struct termp *, const struct eqn_box *); + + +void +term_eqn(struct termp *p, const struct eqn *ep) +{ + + eqn_box(p, ep->root); + p->flags &= ~TERMP_NOSPACE; +} + +static void +eqn_box(struct termp *p, const struct eqn_box *bp) +{ + const struct eqn_box *child; + + if (bp->type == EQN_LIST || + (bp->type == EQN_PILE && (bp->prev || bp->next)) || + (bp->parent != NULL && bp->parent->pos == EQNPOS_SQRT)) { + if (bp->parent->type == EQN_SUBEXPR && bp->prev != NULL) + p->flags |= TERMP_NOSPACE; + term_word(p, bp->left != NULL ? bp->left : "("); + p->flags |= TERMP_NOSPACE; + } + if (bp->font != EQNFONT_NONE) + term_fontpush(p, fontmap[(int)bp->font]); + + if (bp->text != NULL) + term_word(p, bp->text); + + if (bp->pos == EQNPOS_SQRT) { + term_word(p, "sqrt"); + p->flags |= TERMP_NOSPACE; + eqn_box(p, bp->first); + } else if (bp->type == EQN_SUBEXPR) { + child = bp->first; + eqn_box(p, child); + p->flags |= TERMP_NOSPACE; + term_word(p, bp->pos == EQNPOS_OVER ? "/" : + (bp->pos == EQNPOS_SUP || + bp->pos == EQNPOS_TO) ? "^" : "_"); + p->flags |= TERMP_NOSPACE; + child = child->next; + if (child != NULL) { + eqn_box(p, child); + if (bp->pos == EQNPOS_FROMTO || + bp->pos == EQNPOS_SUBSUP) { + p->flags |= TERMP_NOSPACE; + term_word(p, "^"); + p->flags |= TERMP_NOSPACE; + child = child->next; + if (child != NULL) + eqn_box(p, child); + } + } + } else { + child = bp->first; + if (bp->type == EQN_MATRIX && child->type == EQN_LIST) + child = child->first; + while (child != NULL) { + eqn_box(p, + bp->type == EQN_PILE && + child->type == EQN_LIST && + child->args == 1 ? + child->first : child); + child = child->next; + } + } + + if (bp->font != EQNFONT_NONE) + term_fontpop(p); + if (bp->type == EQN_LIST || + (bp->type == EQN_PILE && (bp->prev || bp->next)) || + (bp->parent != NULL && bp->parent->pos == EQNPOS_SQRT)) { + p->flags |= TERMP_NOSPACE; + term_word(p, bp->right != NULL ? bp->right : ")"); + if (bp->parent->type == EQN_SUBEXPR && bp->next != NULL) + p->flags |= TERMP_NOSPACE; + } + + if (bp->top != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, bp->top); + } + if (bp->bottom != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, "_"); + } +} diff --git a/contrib/mdocml/gmdiff b/contrib/mdocml/gmdiff new file mode 100644 index 0000000..8d24fa7 --- /dev/null +++ b/contrib/mdocml/gmdiff @@ -0,0 +1,51 @@ +#!/bin/sh +# Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +if [ `id -u` -eq 0 ]; then + echo "$0: do not run me as root" + exit 1 +fi + +if [ $# -eq 0 ]; then + echo "usage: $0 -h manual_source_file ..." + exit 1 +fi + +if [ "X$1" = "X-h" ]; then + shift + export PATH="/usr/local/heirloom-doctools/bin:$PATH" + EQN="neqn" + ROFF="nroff" + MOPT="-Omdoc $MOPT" +else + EQN="eqn -Tascii" + ROFF="groff -ww -Tascii -P -c" +fi +MOPT="-Werror $MOPT" + +while [ -n "$1" ]; do + file=$1 + shift + echo " ========== $file ========== " + tbl $file | $EQN | $ROFF -mandoc 2> /tmp/roff.err > /tmp/roff.out + ${MANDOC:=mandoc} -Ios='OpenBSD ports' $MOPT $file \ + 2> /tmp/mandoc.err > /tmp/mandoc.out + for i in roff mandoc; do + [[ -s /tmp/$i.err ]] && echo "$i errors:" && cat /tmp/$i.err + done + diff -au /tmp/roff.out /tmp/mandoc.out 2>&1 +done + +exit 0 diff --git a/contrib/mdocml/html.c b/contrib/mdocml/html.c new file mode 100644 index 0000000..adff053 --- /dev/null +++ b/contrib/mdocml/html.c @@ -0,0 +1,727 @@ +/* $Id: html.c,v 1.192 2016/01/04 12:45:29 schwarze Exp $ */ +/* + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "out.h" +#include "html.h" +#include "manconf.h" +#include "main.h" + +struct htmldata { + const char *name; + int flags; +#define HTML_CLRLINE (1 << 0) +#define HTML_NOSTACK (1 << 1) +#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ +}; + +static const struct htmldata htmltags[TAG_MAX] = { + {"html", HTML_CLRLINE}, /* TAG_HTML */ + {"head", HTML_CLRLINE}, /* TAG_HEAD */ + {"body", HTML_CLRLINE}, /* TAG_BODY */ + {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ + {"title", HTML_CLRLINE}, /* TAG_TITLE */ + {"div", HTML_CLRLINE}, /* TAG_DIV */ + {"h1", 0}, /* TAG_H1 */ + {"h2", 0}, /* TAG_H2 */ + {"span", 0}, /* TAG_SPAN */ + {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ + {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ + {"a", 0}, /* TAG_A */ + {"table", HTML_CLRLINE}, /* TAG_TABLE */ + {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ + {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ + {"tr", HTML_CLRLINE}, /* TAG_TR */ + {"td", HTML_CLRLINE}, /* TAG_TD */ + {"li", HTML_CLRLINE}, /* TAG_LI */ + {"ul", HTML_CLRLINE}, /* TAG_UL */ + {"ol", HTML_CLRLINE}, /* TAG_OL */ + {"dl", HTML_CLRLINE}, /* TAG_DL */ + {"dt", HTML_CLRLINE}, /* TAG_DT */ + {"dd", HTML_CLRLINE}, /* TAG_DD */ + {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ + {"pre", HTML_CLRLINE }, /* TAG_PRE */ + {"b", 0 }, /* TAG_B */ + {"i", 0 }, /* TAG_I */ + {"code", 0 }, /* TAG_CODE */ + {"small", 0 }, /* TAG_SMALL */ + {"style", HTML_CLRLINE}, /* TAG_STYLE */ + {"math", HTML_CLRLINE}, /* TAG_MATH */ + {"mrow", 0}, /* TAG_MROW */ + {"mi", 0}, /* TAG_MI */ + {"mo", 0}, /* TAG_MO */ + {"msup", 0}, /* TAG_MSUP */ + {"msub", 0}, /* TAG_MSUB */ + {"msubsup", 0}, /* TAG_MSUBSUP */ + {"mfrac", 0}, /* TAG_MFRAC */ + {"msqrt", 0}, /* TAG_MSQRT */ + {"mfenced", 0}, /* TAG_MFENCED */ + {"mtable", 0}, /* TAG_MTABLE */ + {"mtr", 0}, /* TAG_MTR */ + {"mtd", 0}, /* TAG_MTD */ + {"munderover", 0}, /* TAG_MUNDEROVER */ + {"munder", 0}, /* TAG_MUNDER*/ + {"mover", 0}, /* TAG_MOVER*/ +}; + +static const char *const htmlattrs[ATTR_MAX] = { + "name", /* ATTR_NAME */ + "rel", /* ATTR_REL */ + "href", /* ATTR_HREF */ + "type", /* ATTR_TYPE */ + "media", /* ATTR_MEDIA */ + "class", /* ATTR_CLASS */ + "style", /* ATTR_STYLE */ + "id", /* ATTR_ID */ + "colspan", /* ATTR_COLSPAN */ + "charset", /* ATTR_CHARSET */ + "open", /* ATTR_OPEN */ + "close", /* ATTR_CLOSE */ + "mathvariant", /* ATTR_MATHVARIANT */ +}; + +static const char *const roffscales[SCALE_MAX] = { + "cm", /* SCALE_CM */ + "in", /* SCALE_IN */ + "pc", /* SCALE_PC */ + "pt", /* SCALE_PT */ + "em", /* SCALE_EM */ + "em", /* SCALE_MM */ + "ex", /* SCALE_EN */ + "ex", /* SCALE_BU */ + "em", /* SCALE_VS */ + "ex", /* SCALE_FS */ +}; + +static void bufncat(struct html *, const char *, size_t); +static void print_ctag(struct html *, struct tag *); +static int print_escape(char); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum mandoc_esc); +static void print_attr(struct html *, const char *, const char *); + + +void * +html_alloc(const struct manoutput *outopts) +{ + struct html *h; + + h = mandoc_calloc(1, sizeof(struct html)); + + h->tags.head = NULL; + h->style = outopts->style; + h->base_man = outopts->man; + h->base_includes = outopts->includes; + if (outopts->fragment) + h->oflags |= HTML_FRAGMENT; + + return h; +} + +void +html_free(void *p) +{ + struct tag *tag; + struct html *h; + + h = (struct html *)p; + + while ((tag = h->tags.head) != NULL) { + h->tags.head = tag->next; + free(tag); + } + + free(h); +} + +void +print_gen_head(struct html *h) +{ + struct htmlpair tag[4]; + struct tag *t; + + tag[0].key = ATTR_CHARSET; + tag[0].val = "utf-8"; + print_otag(h, TAG_META, 1, tag); + + /* + * Print a default style-sheet. + */ + t = print_otag(h, TAG_STYLE, 0, NULL); + print_text(h, "table.head, table.foot { width: 100%; }\n" + "td.head-rtitle, td.foot-os { text-align: right; }\n" + "td.head-vol { text-align: center; }\n" + "table.foot td { width: 50%; }\n" + "table.head td { width: 33%; }\n" + "div.spacer { margin: 1em 0; }\n"); + print_tagq(h, t); + + if (h->style) { + tag[0].key = ATTR_REL; + tag[0].val = "stylesheet"; + tag[1].key = ATTR_HREF; + tag[1].val = h->style; + tag[2].key = ATTR_TYPE; + tag[2].val = "text/css"; + tag[3].key = ATTR_MEDIA; + tag[3].val = "all"; + print_otag(h, TAG_LINK, 4, tag); + } +} + +static void +print_metaf(struct html *h, enum mandoc_esc deco) +{ + enum htmlfont font; + + switch (deco) { + case ESCAPE_FONTPREV: + font = h->metal; + break; + case ESCAPE_FONTITALIC: + font = HTMLFONT_ITALIC; + break; + case ESCAPE_FONTBOLD: + font = HTMLFONT_BOLD; + break; + case ESCAPE_FONTBI: + font = HTMLFONT_BI; + break; + case ESCAPE_FONT: + case ESCAPE_FONTROMAN: + font = HTMLFONT_NONE; + break; + default: + abort(); + } + + if (h->metaf) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } + + h->metal = h->metac; + h->metac = font; + + switch (font) { + case HTMLFONT_ITALIC: + h->metaf = print_otag(h, TAG_I, 0, NULL); + break; + case HTMLFONT_BOLD: + h->metaf = print_otag(h, TAG_B, 0, NULL); + break; + case HTMLFONT_BI: + h->metaf = print_otag(h, TAG_B, 0, NULL); + print_otag(h, TAG_I, 0, NULL); + break; + default: + break; + } +} + +int +html_strlen(const char *cp) +{ + size_t rsz; + int skip, sz; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_strlen() as we + * must calculate the width of produced strings. + * Assume that characters are always width of "1". This is + * hacky, but it gets the job done for approximation of widths. + */ + + sz = 0; + skip = 0; + while (1) { + rsz = strcspn(cp, "\\"); + if (rsz) { + cp += rsz; + if (skip) { + skip = 0; + rsz--; + } + sz += rsz; + } + if ('\0' == *cp) + break; + cp++; + switch (mandoc_escape(&cp, NULL, NULL)) { + case ESCAPE_ERROR: + return sz; + case ESCAPE_UNICODE: + case ESCAPE_NUMBERED: + case ESCAPE_SPECIAL: + case ESCAPE_OVERSTRIKE: + if (skip) + skip = 0; + else + sz++; + break; + case ESCAPE_SKIPCHAR: + skip = 1; + break; + default: + break; + } + } + return sz; +} + +static int +print_escape(char c) +{ + + switch (c) { + case '<': + printf("<"); + break; + case '>': + printf(">"); + break; + case '&': + printf("&"); + break; + case '"': + printf("""); + break; + case ASCII_NBRSP: + printf(" "); + break; + case ASCII_HYPH: + putchar('-'); + break; + case ASCII_BREAK: + break; + default: + return 0; + } + return 1; +} + +static int +print_encode(struct html *h, const char *p, int norecurse) +{ + size_t sz; + int c, len, nospace; + const char *seq; + enum mandoc_esc esc; + static const char rejs[9] = { '\\', '<', '>', '&', '"', + ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; + + nospace = 0; + + while ('\0' != *p) { + if (HTML_SKIPCHAR & h->flags && '\\' != *p) { + h->flags &= ~HTML_SKIPCHAR; + p++; + continue; + } + + sz = strcspn(p, rejs); + + fwrite(p, 1, sz, stdout); + p += (int)sz; + + if ('\0' == *p) + break; + + if (print_escape(*p++)) + continue; + + esc = mandoc_escape(&p, &seq, &len); + if (ESCAPE_ERROR == esc) + break; + + switch (esc) { + case ESCAPE_FONT: + case ESCAPE_FONTPREV: + case ESCAPE_FONTBOLD: + case ESCAPE_FONTITALIC: + case ESCAPE_FONTBI: + case ESCAPE_FONTROMAN: + if (0 == norecurse) + print_metaf(h, esc); + continue; + case ESCAPE_SKIPCHAR: + h->flags |= HTML_SKIPCHAR; + continue; + default: + break; + } + + if (h->flags & HTML_SKIPCHAR) { + h->flags &= ~HTML_SKIPCHAR; + continue; + } + + switch (esc) { + case ESCAPE_UNICODE: + /* Skip past "u" header. */ + c = mchars_num2uc(seq + 1, len - 1); + break; + case ESCAPE_NUMBERED: + c = mchars_num2char(seq, len); + if (c < 0) + continue; + break; + case ESCAPE_SPECIAL: + c = mchars_spec2cp(seq, len); + if (c <= 0) + continue; + break; + case ESCAPE_NOSPACE: + if ('\0' == *p) + nospace = 1; + continue; + case ESCAPE_OVERSTRIKE: + if (len == 0) + continue; + c = seq[len - 1]; + break; + default: + continue; + } + if ((c < 0x20 && c != 0x09) || + (c > 0x7E && c < 0xA0)) + c = 0xFFFD; + if (c > 0x7E) + printf("&#%d;", c); + else if ( ! print_escape(c)) + putchar(c); + } + + return nospace; +} + +static void +print_attr(struct html *h, const char *key, const char *val) +{ + printf(" %s=\"", key); + (void)print_encode(h, val, 1); + putchar('\"'); +} + +struct tag * +print_otag(struct html *h, enum htmltag tag, + int sz, const struct htmlpair *p) +{ + int i; + struct tag *t; + + /* Push this tags onto the stack of open scopes. */ + + if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { + t = mandoc_malloc(sizeof(struct tag)); + t->tag = tag; + t->next = h->tags.head; + h->tags.head = t; + } else + t = NULL; + + if ( ! (HTML_NOSPACE & h->flags)) + if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { + /* Manage keeps! */ + if ( ! (HTML_KEEP & h->flags)) { + if (HTML_PREKEEP & h->flags) + h->flags |= HTML_KEEP; + putchar(' '); + } else + printf(" "); + } + + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + else + h->flags |= HTML_NOSPACE; + + /* Print out the tag name and attributes. */ + + printf("<%s", htmltags[tag].name); + for (i = 0; i < sz; i++) + print_attr(h, htmlattrs[p[i].key], p[i].val); + + /* Accommodate for "well-formed" singleton escaping. */ + + if (HTML_AUTOCLOSE & htmltags[tag].flags) + putchar('/'); + + putchar('>'); + + h->flags |= HTML_NOSPACE; + + if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) + putchar('\n'); + + return t; +} + +static void +print_ctag(struct html *h, struct tag *tag) +{ + + /* + * Remember to close out and nullify the current + * meta-font and table, if applicable. + */ + if (tag == h->metaf) + h->metaf = NULL; + if (tag == h->tblt) + h->tblt = NULL; + + printf("</%s>", htmltags[tag->tag].name); + if (HTML_CLRLINE & htmltags[tag->tag].flags) { + h->flags |= HTML_NOSPACE; + putchar('\n'); + } + + h->tags.head = tag->next; + free(tag); +} + +void +print_gen_decls(struct html *h) +{ + + puts("<!DOCTYPE html>"); +} + +void +print_text(struct html *h, const char *word) +{ + + if ( ! (HTML_NOSPACE & h->flags)) { + /* Manage keeps! */ + if ( ! (HTML_KEEP & h->flags)) { + if (HTML_PREKEEP & h->flags) + h->flags |= HTML_KEEP; + putchar(' '); + } else + printf(" "); + } + + assert(NULL == h->metaf); + switch (h->metac) { + case HTMLFONT_ITALIC: + h->metaf = print_otag(h, TAG_I, 0, NULL); + break; + case HTMLFONT_BOLD: + h->metaf = print_otag(h, TAG_B, 0, NULL); + break; + case HTMLFONT_BI: + h->metaf = print_otag(h, TAG_B, 0, NULL); + print_otag(h, TAG_I, 0, NULL); + break; + default: + break; + } + + assert(word); + if ( ! print_encode(h, word, 0)) { + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + h->flags &= ~HTML_NONEWLINE; + } else + h->flags |= HTML_NOSPACE | HTML_NONEWLINE; + + if (h->metaf) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } + + h->flags &= ~HTML_IGNDELIM; +} + +void +print_tagq(struct html *h, const struct tag *until) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + print_ctag(h, tag); + if (until && tag == until) + return; + } +} + +void +print_stagq(struct html *h, const struct tag *suntil) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + if (suntil && tag == suntil) + return; + print_ctag(h, tag); + } +} + +void +print_paragraph(struct html *h) +{ + struct tag *t; + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "spacer"); + t = print_otag(h, TAG_DIV, 1, &tag); + print_tagq(h, t); +} + + +void +bufinit(struct html *h) +{ + + h->buf[0] = '\0'; + h->buflen = 0; +} + +void +bufcat_style(struct html *h, const char *key, const char *val) +{ + + bufcat(h, key); + bufcat(h, ":"); + bufcat(h, val); + bufcat(h, ";"); +} + +void +bufcat(struct html *h, const char *p) +{ + + /* + * XXX This is broken and not easy to fix. + * When using the -Oincludes option, buffmt_includes() + * may pass in strings overrunning BUFSIZ, causing a crash. + */ + + h->buflen = strlcat(h->buf, p, BUFSIZ); + assert(h->buflen < BUFSIZ); +} + +void +bufcat_fmt(struct html *h, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(h->buf + (int)h->buflen, + BUFSIZ - h->buflen - 1, fmt, ap); + va_end(ap); + h->buflen = strlen(h->buf); +} + +static void +bufncat(struct html *h, const char *p, size_t sz) +{ + + assert(h->buflen + sz + 1 < BUFSIZ); + strncat(h->buf, p, sz); + h->buflen += sz; +} + +void +buffmt_includes(struct html *h, const char *name) +{ + const char *p, *pp; + + pp = h->base_includes; + + bufinit(h); + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case'I': + bufcat(h, name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + +void +buffmt_man(struct html *h, const char *name, const char *sec) +{ + const char *p, *pp; + + pp = h->base_man; + + bufinit(h); + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case 'S': + bufcat(h, sec ? sec : "1"); + break; + case 'N': + bufcat_fmt(h, "%s", name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + +void +bufcat_su(struct html *h, const char *p, const struct roffsu *su) +{ + double v; + + v = su->scale; + if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) + v = 1.0; + else if (SCALE_BU == su->unit) + v /= 24.0; + + bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); +} + +void +bufcat_id(struct html *h, const char *src) +{ + + /* Cf. <http://www.w3.org/TR/html5/dom.html#the-id-attribute>. */ + + for (; '\0' != *src; src++) + bufncat(h, *src == ' ' ? "_" : src, 1); +} diff --git a/contrib/mdocml/html.h b/contrib/mdocml/html.h new file mode 100644 index 0000000..27dc140 --- /dev/null +++ b/contrib/mdocml/html.h @@ -0,0 +1,176 @@ +/* $Id: html.h,v 1.72 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum htmltag { + TAG_HTML, + TAG_HEAD, + TAG_BODY, + TAG_META, + TAG_TITLE, + TAG_DIV, + TAG_H1, + TAG_H2, + TAG_SPAN, + TAG_LINK, + TAG_BR, + TAG_A, + TAG_TABLE, + TAG_TBODY, + TAG_COL, + TAG_TR, + TAG_TD, + TAG_LI, + TAG_UL, + TAG_OL, + TAG_DL, + TAG_DT, + TAG_DD, + TAG_BLOCKQUOTE, + TAG_PRE, + TAG_B, + TAG_I, + TAG_CODE, + TAG_SMALL, + TAG_STYLE, + TAG_MATH, + TAG_MROW, + TAG_MI, + TAG_MO, + TAG_MSUP, + TAG_MSUB, + TAG_MSUBSUP, + TAG_MFRAC, + TAG_MSQRT, + TAG_MFENCED, + TAG_MTABLE, + TAG_MTR, + TAG_MTD, + TAG_MUNDEROVER, + TAG_MUNDER, + TAG_MOVER, + TAG_MAX +}; + +enum htmlattr { + ATTR_NAME, + ATTR_REL, + ATTR_HREF, + ATTR_TYPE, + ATTR_MEDIA, + ATTR_CLASS, + ATTR_STYLE, + ATTR_ID, + ATTR_COLSPAN, + ATTR_CHARSET, + ATTR_OPEN, + ATTR_CLOSE, + ATTR_MATHVARIANT, + ATTR_MAX +}; + +enum htmlfont { + HTMLFONT_NONE = 0, + HTMLFONT_BOLD, + HTMLFONT_ITALIC, + HTMLFONT_BI, + HTMLFONT_MAX +}; + +struct tag { + struct tag *next; + enum htmltag tag; +}; + +struct tagq { + struct tag *head; +}; + +struct htmlpair { + enum htmlattr key; + const char *val; +}; + +#define PAIR_INIT(p, t, v) \ + do { \ + (p)->key = (t); \ + (p)->val = (v); \ + } while (/* CONSTCOND */ 0) + +#define PAIR_ID_INIT(p, v) PAIR_INIT(p, ATTR_ID, v) +#define PAIR_CLASS_INIT(p, v) PAIR_INIT(p, ATTR_CLASS, v) +#define PAIR_HREF_INIT(p, v) PAIR_INIT(p, ATTR_HREF, v) +#define PAIR_STYLE_INIT(p, h) PAIR_INIT(p, ATTR_STYLE, (h)->buf) + +struct html { + int flags; +#define HTML_NOSPACE (1 << 0) /* suppress next space */ +#define HTML_IGNDELIM (1 << 1) +#define HTML_KEEP (1 << 2) +#define HTML_PREKEEP (1 << 3) +#define HTML_NONOSPACE (1 << 4) /* never add spaces */ +#define HTML_LITERAL (1 << 5) /* literal (e.g., <PRE>) context */ +#define HTML_SKIPCHAR (1 << 6) /* skip the next character */ +#define HTML_NOSPLIT (1 << 7) /* do not break line before .An */ +#define HTML_SPLIT (1 << 8) /* break line before .An */ +#define HTML_NONEWLINE (1 << 9) /* No line break in nofill mode. */ + struct tagq tags; /* stack of open tags */ + struct rofftbl tbl; /* current table */ + struct tag *tblt; /* current open table scope */ + char *base_man; /* base for manpage href */ + char *base_includes; /* base for include href */ + char *style; /* style-sheet URI */ + char buf[BUFSIZ]; /* see bufcat and friends */ + size_t buflen; + struct tag *metaf; /* current open font scope */ + enum htmlfont metal; /* last used font */ + enum htmlfont metac; /* current font mode */ + int oflags; /* output options */ +#define HTML_FRAGMENT (1 << 0) /* don't emit HTML/HEAD/BODY */ +}; + + +struct tbl_span; +struct eqn; + +void print_gen_decls(struct html *); +void print_gen_head(struct html *); +struct tag *print_otag(struct html *, enum htmltag, + int, const struct htmlpair *); +void print_tagq(struct html *, const struct tag *); +void print_stagq(struct html *, const struct tag *); +void print_text(struct html *, const char *); +void print_tblclose(struct html *); +void print_tbl(struct html *, const struct tbl_span *); +void print_eqn(struct html *, const struct eqn *); +void print_paragraph(struct html *); + +#if __GNUC__ - 0 >= 4 +__attribute__((__format__ (__printf__, 2, 3))) +#endif +void bufcat_fmt(struct html *, const char *, ...); +void bufcat(struct html *, const char *); +void bufcat_id(struct html *, const char *); +void bufcat_style(struct html *, + const char *, const char *); +void bufcat_su(struct html *, const char *, + const struct roffsu *); +void bufinit(struct html *); +void buffmt_man(struct html *, + const char *, const char *); +void buffmt_includes(struct html *, const char *); + +int html_strlen(const char *); diff --git a/contrib/mdocml/lib.c b/contrib/mdocml/lib.c new file mode 100644 index 0000000..5295950 --- /dev/null +++ b/contrib/mdocml/lib.c @@ -0,0 +1,37 @@ +/* $Id: lib.c,v 1.13 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <string.h> + +#include "roff.h" +#include "mdoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2lib(const char *p) +{ + +#include "lib.in" + + return NULL; +} diff --git a/contrib/mdocml/lib.in b/contrib/mdocml/lib.in new file mode 100644 index 0000000..ca04e94 --- /dev/null +++ b/contrib/mdocml/lib.in @@ -0,0 +1,128 @@ +/* $Id: lib.in,v 1.18 2014/01/06 00:53:33 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2009, 2012 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * These are all possible .Lb strings. When a new library is added, add + * its short-string to the left-hand side and formatted string to the + * right-hand side. + * + * Be sure to escape strings. + */ + +LINE("lib80211", "802.11 Wireless Network Management Library (lib80211, \\-l80211)") +LINE("libarchive", "Streaming Archive Library (libarchive, \\-larchive)") +LINE("libarm", "ARM Architecture Library (libarm, \\-larm)") +LINE("libarm32", "ARM32 Architecture Library (libarm32, \\-larm32)") +LINE("libbluetooth", "Bluetooth Library (libbluetooth, \\-lbluetooth)") +LINE("libbsm", "Basic Security Module Library (libbsm, \\-lbsm)") +LINE("libc", "Standard C\\~Library (libc, \\-lc)") +LINE("libc_r", "Reentrant C\\~Library (libc_r, \\-lc_r)") +LINE("libcalendar", "Calendar Arithmetic Library (libcalendar, \\-lcalendar)") +LINE("libcam", "Common Access Method User Library (libcam, \\-lcam)") +LINE("libcapsicum", "Capsicum Library (libcapsicum, \\-lcapsicum)") +LINE("libcdk", "Curses Development Kit Library (libcdk, \\-lcdk)") +LINE("libcipher", "FreeSec Crypt Library (libcipher, \\-lcipher)") +LINE("libcompat", "Compatibility Library (libcompat, \\-lcompat)") +LINE("libcrypt", "Crypt Library (libcrypt, \\-lcrypt)") +LINE("libcurses", "Curses Library (libcurses, \\-lcurses)") +LINE("libcuse", "Userland Character Device Library (libcuse, \\-lcuse)") +LINE("libdevattr", "Device attribute and event library (libdevattr, \\-ldevattr)") +LINE("libdevctl", "Device Control Library (libdevctl, \\-ldevctl)") +LINE("libdevinfo", "Device and Resource Information Utility Library (libdevinfo, \\-ldevinfo)") +LINE("libdevstat", "Device Statistics Library (libdevstat, \\-ldevstat)") +LINE("libdisk", "Interface to Slice and Partition Labels Library (libdisk, \\-ldisk)") +LINE("libdm", "Device Mapper Library (libdm, \\-ldm)") +LINE("libdwarf", "DWARF Access Library (libdwarf, \\-ldwarf)") +LINE("libedit", "Command Line Editor Library (libedit, \\-ledit)") +LINE("libefi", "EFI Runtime Services Library (libefi, \\-lefi)") +LINE("libelf", "ELF Access Library (libelf, \\-lelf)") +LINE("libevent", "Event Notification Library (libevent, \\-levent)") +LINE("libexecinfo", "Backtrace Information Library (libexecinfo, \\-lexecinfo)") +LINE("libfetch", "File Transfer Library (libfetch, \\-lfetch)") +LINE("libfsid", "Filesystem Identification Library (libfsid, \\-lfsid)") +LINE("libftpio", "FTP Connection Management Library (libftpio, \\-lftpio)") +LINE("libform", "Curses Form Library (libform, \\-lform)") +LINE("libgeom", "Userland API Library for Kernel GEOM subsystem (libgeom, \\-lgeom)") +LINE("libgpio", "General-Purpose Input Output (GPIO) library (libgpio, \\-lgpio)") +LINE("libhammer", "HAMMER Filesystem Userland Library (libhammer, \\-lhammer)") +LINE("libi386", "i386 Architecture Library (libi386, \\-li386)") +LINE("libintl", "Internationalized Message Handling Library (libintl, \\-lintl)") +LINE("libipsec", "IPsec Policy Control Library (libipsec, \\-lipsec)") +LINE("libiscsi", "iSCSI protocol library (libiscsi, \\-liscsi)") +LINE("libisns", "Internet Storage Name Service Library (libisns, \\-lisns)") +LINE("libjail", "Jail Library (libjail, \\-ljail)") +LINE("libkcore", "Kernel Memory Core Access Library (libkcore, \\-lkcore)") +LINE("libkiconv", "Kernel-side iconv Library (libkiconv, \\-lkiconv)") +LINE("libkse", "N:M Threading Library (libkse, \\-lkse)") +LINE("libkvm", "Kernel Data Access Library (libkvm, \\-lkvm)") +LINE("libm", "Math Library (libm, \\-lm)") +LINE("libm68k", "m68k Architecture Library (libm68k, \\-lm68k)") +LINE("libmagic", "Magic Number Recognition Library (libmagic, \\-lmagic)") +LINE("libmandoc", "Mandoc Macro Compiler Library (libmandoc, \\-lmandoc)") +LINE("libmd", "Message Digest (MD4, MD5, etc.) Support Library (libmd, \\-lmd)") +LINE("libmemstat", "Kernel Memory Allocator Statistics Library (libmemstat, \\-lmemstat)") +LINE("libmenu", "Curses Menu Library (libmenu, \\-lmenu)") +LINE("libmj", "Minimalist JSON library (libmj, \\-lmj)") +LINE("libnetgraph", "Netgraph User Library (libnetgraph, \\-lnetgraph)") +LINE("libnetpgp", "Netpgp Signing, Verification, Encryption and Decryption (libnetpgp, \\-lnetpgp)") +LINE("libnetpgpverify", "Netpgp Verification (libnetpgpverify, \\-lnetpgpverify)") +LINE("libnpf", "NPF Packet Filter Library (libnpf, \\-lnpf)") +LINE("libnv", "Name/value pairs library (libnv, \\-lnv)") +LINE("libossaudio", "OSS Audio Emulation Library (libossaudio, \\-lossaudio)") +LINE("libpam", "Pluggable Authentication Module Library (libpam, \\-lpam)") +LINE("libpcap", "Packet Capture Library (libpcap, \\-lpcap)") +LINE("libpci", "PCI Bus Access Library (libpci, \\-lpci)") +LINE("libpmc", "Performance Counters Library (libpmc, \\-lpmc)") +LINE("libppath", "Property-List Paths Library (libppath, \\-lppath)") +LINE("libposix", "POSIX Compatibility Library (libposix, \\-lposix)") +LINE("libposix1e", "POSIX.1e Security API Library (libposix1e, \\-lposix1e)") +LINE("libppath", "Property-List Paths Library (libppath, \\-lppath)") +LINE("libproc", "Processor Monitoring and Analysis Library (libproc, \\-lproc)") +LINE("libprocstat", "Process and Files Information Retrieval (libprocstat, \\-lprocstat)") +LINE("libprop", "Property Container Object Library (libprop, \\-lprop)") +LINE("libpthread", "POSIX Threads Library (libpthread, \\-lpthread)") +LINE("libpuffs", "puffs Convenience Library (libpuffs, \\-lpuffs)") +LINE("libquota", "Disk Quota Access and Control Library (libquota, \\-lquota)") +LINE("libradius", "RADIUS Client Library (libradius, \\-lradius)") +LINE("librefuse", "File System in Userspace Convenience Library (librefuse, \\-lrefuse)") +LINE("libresolv", "DNS Resolver Library (libresolv, \\-lresolv)") +LINE("librpcsec_gss", "RPC GSS-API Authentication Library (librpcsec_gss, \\-lrpcsec_gss)") +LINE("librpcsvc", "RPC Service Library (librpcsvc, \\-lrpcsvc)") +LINE("librt", "POSIX Real\\-time Library (librt, \\-lrt)") +LINE("librtld_db", "Run-time Linker Debugging Library (librtld_db, \\-lrtld_db)") +LINE("libsaslc", "Simple Authentication and Security Layer client library (libsaslc, \\-lsaslc)") +LINE("libsbuf", "Safe String Composition Library (libsbuf, \\-lsbuf)") +LINE("libsdp", "Bluetooth Service Discovery Protocol User Library (libsdp, \\-lsdp)") +LINE("libssp", "Buffer Overflow Protection Library (libssp, \\-lssp)") +LINE("libstdthreads", "C11 Threads Library (libstdthreads, \\-lstdthreads)") +LINE("libSystem", "System Library (libSystem, \\-lSystem)") +LINE("libsysdecode", "System Argument Decoding Library (libsysdecode, \\-lsysdecode)") +LINE("libtacplus", "TACACS+ Client Library (libtacplus, \\-ltacplus)") +LINE("libtcplay", "TrueCrypt-compatible API library (libtcplay, \\-ltcplay)") +LINE("libtermcap", "Termcap Access Library (libtermcap, \\-ltermcap)") +LINE("libterminfo", "Terminal Information Library (libterminfo, \\-lterminfo)") +LINE("libthr", "1:1 Threading Library (libthr, \\-lthr)") +LINE("libufs", "UFS File System Access Library (libufs, \\-lufs)") +LINE("libugidfw", "File System Firewall Interface Library (libugidfw, \\-lugidfw)") +LINE("libulog", "User Login Record Library (libulog, \\-lulog)") +LINE("libusbhid", "USB Human Interface Devices Library (libusbhid, \\-lusbhid)") +LINE("libutil", "System Utilities Library (libutil, \\-lutil)") +LINE("libvgl", "Video Graphics Library (libvgl, \\-lvgl)") +LINE("libx86_64", "x86_64 Architecture Library (libx86_64, \\-lx86_64)") +LINE("libxo", "Text, XML, JSON, and HTML Output Emission Library (libxo, \\-lxo)") +LINE("libz", "Compression Library (libz, \\-lz)") diff --git a/contrib/mdocml/libman.h b/contrib/mdocml/libman.h new file mode 100644 index 0000000..6584960 --- /dev/null +++ b/contrib/mdocml/libman.h @@ -0,0 +1,41 @@ +/* $Id: libman.h,v 1.79 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MACRO_PROT_ARGS struct roff_man *man, \ + int tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct man_macro { + void (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_SCOPED (1 << 0) /* Optional next-line scope. */ +#define MAN_NSCOPED (1 << 1) /* Allowed in next-line element scope. */ +#define MAN_BSCOPE (1 << 2) /* Break next-line block scope. */ +#define MAN_JOIN (1 << 3) /* Join arguments together. */ +}; + +extern const struct man_macro *const man_macros; + + +int man_hash_find(const char *); +void man_node_validate(struct roff_man *); +void man_state(struct roff_man *, struct roff_node *); +void man_unscope(struct roff_man *, const struct roff_node *); diff --git a/contrib/mdocml/libmandoc.h b/contrib/mdocml/libmandoc.h new file mode 100644 index 0000000..939ec83 --- /dev/null +++ b/contrib/mdocml/libmandoc.h @@ -0,0 +1,85 @@ +/* $Id: libmandoc.h,v 1.62 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum rofferr { + ROFF_CONT, /* continue processing line */ + ROFF_RERUN, /* re-run roff interpreter with offset */ + ROFF_APPEND, /* re-run main parser, appending next line */ + ROFF_REPARSE, /* re-run main parser on the result */ + ROFF_SO, /* include another file */ + ROFF_IGN, /* ignore current line */ + ROFF_TBL, /* a table row was successfully parsed */ + ROFF_EQN /* an equation was successfully parsed */ +}; + +struct buf { + char *buf; + size_t sz; +}; + + +struct mparse; +struct tbl_span; +struct eqn; +struct roff; +struct roff_man; +struct roff_node; + +void mandoc_msg(enum mandocerr, struct mparse *, + int, int, const char *); +#if __GNUC__ - 0 >= 4 +__attribute__((__format__ (__printf__, 5, 6))) +#endif +void mandoc_vmsg(enum mandocerr, struct mparse *, + int, int, const char *, ...); +char *mandoc_getarg(struct mparse *, char **, int, int *); +char *mandoc_normdate(struct mparse *, char *, int, int); +int mandoc_eos(const char *, size_t); +int mandoc_strntoi(const char *, size_t, int); +const char *mandoc_a2msec(const char*); + +void mdoc_hash_init(void); +int mdoc_parseln(struct roff_man *, int, char *, int); +void mdoc_endparse(struct roff_man *); + +void man_hash_init(void); +int man_parseln(struct roff_man *, int, char *, int); +void man_endparse(struct roff_man *); + +int preconv_cue(const struct buf *, size_t); +int preconv_encode(struct buf *, size_t *, + struct buf *, size_t *, int *); + +void roff_free(struct roff *); +struct roff *roff_alloc(struct mparse *, int); +void roff_reset(struct roff *); +void roff_man_free(struct roff_man *); +struct roff_man *roff_man_alloc(struct roff *, struct mparse *, + const char *, int); +void roff_man_reset(struct roff_man *); +enum rofferr roff_parseln(struct roff *, int, struct buf *, int *); +void roff_endparse(struct roff *); +void roff_setreg(struct roff *, const char *, int, char sign); +int roff_getreg(const struct roff *, const char *); +char *roff_strdup(const struct roff *, const char *); +int roff_getcontrol(const struct roff *, + const char *, int *); +int roff_getformat(const struct roff *); + +const struct tbl_span *roff_span(const struct roff *); +const struct eqn *roff_eqn(const struct roff *); diff --git a/contrib/mdocml/libmdoc.h b/contrib/mdocml/libmdoc.h new file mode 100644 index 0000000..5a6cc3e --- /dev/null +++ b/contrib/mdocml/libmdoc.h @@ -0,0 +1,88 @@ +/* $Id: libmdoc.h,v 1.108 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MACRO_PROT_ARGS struct roff_man *mdoc, \ + int tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct mdoc_macro { + void (*fp)(MACRO_PROT_ARGS); + int flags; +#define MDOC_CALLABLE (1 << 0) +#define MDOC_PARSED (1 << 1) +#define MDOC_EXPLICIT (1 << 2) +#define MDOC_PROLOGUE (1 << 3) +#define MDOC_IGNDELIM (1 << 4) +#define MDOC_JOIN (1 << 5) +}; + +enum margserr { + ARGS_ERROR, + ARGS_EOLN, /* end-of-line */ + ARGS_WORD, /* normal word */ + ARGS_PUNCT, /* series of punctuation */ + ARGS_QWORD, /* quoted word */ + ARGS_PHRASE /* Bl -column phrase */ +}; + +/* + * A punctuation delimiter is opening, closing, or "middle mark" + * punctuation. These govern spacing. + * Opening punctuation (e.g., the opening parenthesis) suppresses the + * following space; closing punctuation (e.g., the closing parenthesis) + * suppresses the leading space; middle punctuation (e.g., the vertical + * bar) can do either. The middle punctuation delimiter bends the rules + * depending on usage. + */ +enum mdelim { + DELIM_NONE = 0, + DELIM_OPEN, + DELIM_MIDDLE, + DELIM_CLOSE, + DELIM_MAX +}; + +extern const struct mdoc_macro *const mdoc_macros; + + +void mdoc_macro(MACRO_PROT_ARGS); +void mdoc_elem_alloc(struct roff_man *, int, int, + int, struct mdoc_arg *); +struct roff_node *mdoc_block_alloc(struct roff_man *, int, int, + int, struct mdoc_arg *); +void mdoc_tail_alloc(struct roff_man *, int, int, int); +struct roff_node *mdoc_endbody_alloc(struct roff_man *, int, int, int, + struct roff_node *, enum mdoc_endbody); +void mdoc_node_relink(struct roff_man *, struct roff_node *); +void mdoc_node_validate(struct roff_man *); +void mdoc_state(struct roff_man *, struct roff_node *); +void mdoc_state_reset(struct roff_man *); +int mdoc_hash_find(const char *); +const char *mdoc_a2arch(const char *); +const char *mdoc_a2att(const char *); +const char *mdoc_a2lib(const char *); +enum roff_sec mdoc_a2sec(const char *); +const char *mdoc_a2st(const char *); +void mdoc_argv(struct roff_man *, int, int, + struct mdoc_arg **, int *, char *); +enum margserr mdoc_args(struct roff_man *, int, + int *, char *, int, char **); +enum mdelim mdoc_isdelim(const char *); diff --git a/contrib/mdocml/libroff.h b/contrib/mdocml/libroff.h new file mode 100644 index 0000000..897a55a --- /dev/null +++ b/contrib/mdocml/libroff.h @@ -0,0 +1,79 @@ +/* $Id: libroff.h,v 1.39 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum tbl_part { + TBL_PART_OPTS, /* in options (first line) */ + TBL_PART_LAYOUT, /* describing layout */ + TBL_PART_DATA, /* creating data rows */ + TBL_PART_CDATA /* continue previous row */ +}; + +struct tbl_node { + struct mparse *parse; /* parse point */ + int pos; /* invocation column */ + int line; /* invocation line */ + enum tbl_part part; + struct tbl_opts opts; + struct tbl_row *first_row; + struct tbl_row *last_row; + struct tbl_span *first_span; + struct tbl_span *current_span; + struct tbl_span *last_span; + struct tbl_node *next; +}; + +struct eqn_node { + struct eqn eqn; /* syntax tree of this equation */ + struct mparse *parse; /* main parser, for error reporting */ + struct eqn_node *next; /* singly linked list of equations */ + struct eqn_def *defs; /* array of definitions */ + char *data; /* source code of this equation */ + size_t defsz; /* number of definitions */ + size_t sz; /* length of the source code */ + size_t cur; /* parse point in the source code */ + size_t rew; /* beginning of the current token */ + int gsize; /* default point size */ + int delim; /* in-line delimiters enabled */ + char odelim; /* in-line opening delimiter */ + char cdelim; /* in-line closing delimiter */ +}; + +struct eqn_def { + char *key; + size_t keysz; + char *val; + size_t valsz; +}; + + +struct tbl_node *tbl_alloc(int, int, struct mparse *); +void tbl_restart(int, int, struct tbl_node *); +void tbl_free(struct tbl_node *); +void tbl_reset(struct tbl_node *); +enum rofferr tbl_read(struct tbl_node *, int, const char *, int); +void tbl_option(struct tbl_node *, int, const char *, int *); +void tbl_layout(struct tbl_node *, int, const char *, int); +void tbl_data(struct tbl_node *, int, const char *, int); +int tbl_cdata(struct tbl_node *, int, const char *, int); +const struct tbl_span *tbl_span(struct tbl_node *); +int tbl_end(struct tbl_node **); +struct eqn_node *eqn_alloc(int, int, struct mparse *); +enum rofferr eqn_end(struct eqn_node **); +void eqn_free(struct eqn_node *); +enum rofferr eqn_read(struct eqn_node **, int, + const char *, int, int *); diff --git a/contrib/mdocml/main.c b/contrib/mdocml/main.c new file mode 100644 index 0000000..3c4ff2a --- /dev/null +++ b/contrib/mdocml/main.c @@ -0,0 +1,1092 @@ +/* $Id: main.c,v 1.262 2016/01/08 02:53:13 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2012, 2014-2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#include <sys/param.h> /* MACHINE */ +#include <sys/wait.h> + +#include <assert.h> +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <glob.h> +#include <signal.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "tag.h" +#include "main.h" +#include "manconf.h" +#include "mansearch.h" + +#if !defined(__GNUC__) || (__GNUC__ < 2) +# if !defined(lint) +# define __attribute__(x) +# endif +#endif /* !defined(__GNUC__) || (__GNUC__ < 2) */ + +enum outmode { + OUTMODE_DEF = 0, + OUTMODE_FLN, + OUTMODE_LST, + OUTMODE_ALL, + OUTMODE_INT, + OUTMODE_ONE +}; + +enum outt { + OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ + OUTT_TREE, /* -Ttree */ + OUTT_MAN, /* -Tman */ + OUTT_HTML, /* -Thtml */ + OUTT_LINT, /* -Tlint */ + OUTT_PS, /* -Tps */ + OUTT_PDF /* -Tpdf */ +}; + +struct curparse { + struct mparse *mp; + enum mandoclevel wlevel; /* ignore messages below this */ + int wstop; /* stop after a file with a warning */ + enum outt outtype; /* which output to use */ + void *outdata; /* data for output */ + struct manoutput *outopts; /* output options */ +}; + +static int fs_lookup(const struct manpaths *, + size_t ipath, const char *, + const char *, const char *, + struct manpage **, size_t *); +static void fs_search(const struct mansearch *, + const struct manpaths *, int, char**, + struct manpage **, size_t *); +static int koptions(int *, char *); +#if HAVE_SQLITE3 +int mandocdb(int, char**); +#endif +static int moptions(int *, char *); +static void mmsg(enum mandocerr, enum mandoclevel, + const char *, int, int, const char *); +static void parse(struct curparse *, int, const char *); +static void passthrough(const char *, int, int); +static pid_t spawn_pager(struct tag_files *); +static int toptions(struct curparse *, char *); +static void usage(enum argmode) __attribute__((noreturn)); +static int woptions(struct curparse *, char *); + +static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; +static char help_arg[] = "help"; +static char *help_argv[] = {help_arg, NULL}; +static enum mandoclevel rc; + + +int +main(int argc, char *argv[]) +{ + struct manconf conf; + struct curparse curp; + struct mansearch search; + struct tag_files *tag_files; + const char *progname; + char *auxpaths; + char *defos; + unsigned char *uc; + struct manpage *res, *resp; + char *conf_file, *defpaths; + size_t isec, i, sz; + int prio, best_prio; + char sec; + enum outmode outmode; + int fd; + int show_usage; + int options; + int use_pager; + int status, signum; + int c; + pid_t pager_pid, tc_pgid, man_pgid, pid; + +#if HAVE_PROGNAME + progname = getprogname(); +#else + if (argc < 1) + progname = mandoc_strdup("mandoc"); + else if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + setprogname(progname); +#endif + +#if HAVE_SQLITE3 + if (strncmp(progname, "mandocdb", 8) == 0 || + strcmp(progname, BINM_MAKEWHATIS) == 0) + return mandocdb(argc, argv); +#endif + +#if HAVE_PLEDGE + if (pledge("stdio rpath tmppath tty proc exec flock", NULL) == -1) + err((int)MANDOCLEVEL_SYSERR, "pledge"); +#endif + + /* Search options. */ + + memset(&conf, 0, sizeof(conf)); + conf_file = defpaths = NULL; + auxpaths = NULL; + + memset(&search, 0, sizeof(struct mansearch)); + search.outkey = "Nd"; + + if (strcmp(progname, BINM_MAN) == 0) + search.argmode = ARG_NAME; + else if (strcmp(progname, BINM_APROPOS) == 0) + search.argmode = ARG_EXPR; + else if (strcmp(progname, BINM_WHATIS) == 0) + search.argmode = ARG_WORD; + else if (strncmp(progname, "help", 4) == 0) + search.argmode = ARG_NAME; + else + search.argmode = ARG_FILE; + + /* Parser and formatter options. */ + + memset(&curp, 0, sizeof(struct curparse)); + curp.outtype = OUTT_LOCALE; + curp.wlevel = MANDOCLEVEL_BADARG; + curp.outopts = &conf.output; + options = MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1; + defos = NULL; + + use_pager = 1; + tag_files = NULL; + show_usage = 0; + outmode = OUTMODE_DEF; + + while (-1 != (c = getopt(argc, argv, + "aC:cfhI:iK:klM:m:O:S:s:T:VW:w"))) { + switch (c) { + case 'a': + outmode = OUTMODE_ALL; + break; + case 'C': + conf_file = optarg; + break; + case 'c': + use_pager = 0; + break; + case 'f': + search.argmode = ARG_WORD; + break; + case 'h': + conf.output.synopsisonly = 1; + use_pager = 0; + outmode = OUTMODE_ALL; + break; + case 'I': + if (strncmp(optarg, "os=", 3)) { + warnx("-I %s: Bad argument", optarg); + return (int)MANDOCLEVEL_BADARG; + } + if (defos) { + warnx("-I %s: Duplicate argument", optarg); + return (int)MANDOCLEVEL_BADARG; + } + defos = mandoc_strdup(optarg + 3); + break; + case 'i': + outmode = OUTMODE_INT; + break; + case 'K': + if ( ! koptions(&options, optarg)) + return (int)MANDOCLEVEL_BADARG; + break; + case 'k': + search.argmode = ARG_EXPR; + break; + case 'l': + search.argmode = ARG_FILE; + outmode = OUTMODE_ALL; + break; + case 'M': + defpaths = optarg; + break; + case 'm': + auxpaths = optarg; + break; + case 'O': + search.outkey = optarg; + while (optarg != NULL) + manconf_output(&conf.output, + strsep(&optarg, ",")); + break; + case 'S': + search.arch = optarg; + break; + case 's': + search.sec = optarg; + break; + case 'T': + if ( ! toptions(&curp, optarg)) + return (int)MANDOCLEVEL_BADARG; + break; + case 'W': + if ( ! woptions(&curp, optarg)) + return (int)MANDOCLEVEL_BADARG; + break; + case 'w': + outmode = OUTMODE_FLN; + break; + default: + show_usage = 1; + break; + } + } + + if (show_usage) + usage(search.argmode); + + /* Postprocess options. */ + + if (outmode == OUTMODE_DEF) { + switch (search.argmode) { + case ARG_FILE: + outmode = OUTMODE_ALL; + use_pager = 0; + break; + case ARG_NAME: + outmode = OUTMODE_ONE; + break; + default: + outmode = OUTMODE_LST; + break; + } + } + + if (outmode == OUTMODE_FLN || + outmode == OUTMODE_LST || + !isatty(STDOUT_FILENO)) + use_pager = 0; + +#if HAVE_PLEDGE + if (!use_pager) + if (pledge("stdio rpath flock", NULL) == -1) + err((int)MANDOCLEVEL_SYSERR, "pledge"); +#endif + + /* Parse arguments. */ + + if (argc > 0) { + argc -= optind; + argv += optind; + } + resp = NULL; + + /* + * Quirks for help(1) + * and for a man(1) section argument without -s. + */ + + if (search.argmode == ARG_NAME) { + if (*progname == 'h') { + if (argc == 0) { + argv = help_argv; + argc = 1; + } + } else if (argc > 1 && + ((uc = (unsigned char *)argv[0]) != NULL) && + ((isdigit(uc[0]) && (uc[1] == '\0' || + (isalpha(uc[1]) && uc[2] == '\0'))) || + (uc[0] == 'n' && uc[1] == '\0'))) { + search.sec = (char *)uc; + argv++; + argc--; + } + if (search.arch == NULL) + search.arch = getenv("MACHINE"); +#ifdef MACHINE + if (search.arch == NULL) + search.arch = MACHINE; +#endif + } + + rc = MANDOCLEVEL_OK; + + /* man(1), whatis(1), apropos(1) */ + + if (search.argmode != ARG_FILE) { + if (argc == 0) + usage(search.argmode); + + if (search.argmode == ARG_NAME && + outmode == OUTMODE_ONE) + search.firstmatch = 1; + + /* Access the mandoc database. */ + + manconf_parse(&conf, conf_file, defpaths, auxpaths); +#if HAVE_SQLITE3 + mansearch_setup(1); + if ( ! mansearch(&search, &conf.manpath, + argc, argv, &res, &sz)) + usage(search.argmode); +#else + if (search.argmode != ARG_NAME) { + fputs("mandoc: database support not compiled in\n", + stderr); + return (int)MANDOCLEVEL_BADARG; + } + sz = 0; +#endif + + if (sz == 0) { + if (search.argmode == ARG_NAME) + fs_search(&search, &conf.manpath, + argc, argv, &res, &sz); + else + warnx("nothing appropriate"); + } + + if (sz == 0) { + rc = MANDOCLEVEL_BADARG; + goto out; + } + + /* + * For standard man(1) and -a output mode, + * prepare for copying filename pointers + * into the program parameter array. + */ + + if (outmode == OUTMODE_ONE) { + argc = 1; + best_prio = 10; + } else if (outmode == OUTMODE_ALL) + argc = (int)sz; + + /* Iterate all matching manuals. */ + + resp = res; + for (i = 0; i < sz; i++) { + if (outmode == OUTMODE_FLN) + puts(res[i].file); + else if (outmode == OUTMODE_LST) + printf("%s - %s\n", res[i].names, + res[i].output == NULL ? "" : + res[i].output); + else if (outmode == OUTMODE_ONE) { + /* Search for the best section. */ + isec = strcspn(res[i].file, "123456789"); + sec = res[i].file[isec]; + if ('\0' == sec) + continue; + prio = sec_prios[sec - '1']; + if (prio >= best_prio) + continue; + best_prio = prio; + resp = res + i; + } + } + + /* + * For man(1), -a and -i output mode, fall through + * to the main mandoc(1) code iterating files + * and running the parsers on each of them. + */ + + if (outmode == OUTMODE_FLN || outmode == OUTMODE_LST) + goto out; + } + + /* mandoc(1) */ + +#if HAVE_PLEDGE + if (use_pager) { + if (pledge("stdio rpath tmppath tty proc exec", NULL) == -1) + err((int)MANDOCLEVEL_SYSERR, "pledge"); + } else { + if (pledge("stdio rpath", NULL) == -1) + err((int)MANDOCLEVEL_SYSERR, "pledge"); + } +#endif + + if (search.argmode == ARG_FILE && ! moptions(&options, auxpaths)) + return (int)MANDOCLEVEL_BADARG; + + mchars_alloc(); + curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos); + + /* + * Conditionally start up the lookaside buffer before parsing. + */ + if (OUTT_MAN == curp.outtype) + mparse_keep(curp.mp); + + if (argc < 1) { + if (use_pager) + tag_files = tag_init(); + parse(&curp, STDIN_FILENO, "<stdin>"); + } + + while (argc > 0) { + fd = mparse_open(curp.mp, resp != NULL ? resp->file : *argv); + if (fd != -1) { + if (use_pager) { + tag_files = tag_init(); + use_pager = 0; + } + + if (resp == NULL) + parse(&curp, fd, *argv); + else if (resp->form & FORM_SRC) { + /* For .so only; ignore failure. */ + chdir(conf.manpath.paths[resp->ipath]); + parse(&curp, fd, resp->file); + } else + passthrough(resp->file, fd, + conf.output.synopsisonly); + + if (argc > 1 && curp.outtype <= OUTT_UTF8) + ascii_sepline(curp.outdata); + } else if (rc < MANDOCLEVEL_ERROR) + rc = MANDOCLEVEL_ERROR; + + if (MANDOCLEVEL_OK != rc && curp.wstop) + break; + + if (resp != NULL) + resp++; + else + argv++; + if (--argc) + mparse_reset(curp.mp); + } + + if (curp.outdata != NULL) { + switch (curp.outtype) { + case OUTT_HTML: + html_free(curp.outdata); + break; + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_ASCII: + ascii_free(curp.outdata); + break; + case OUTT_PDF: + case OUTT_PS: + pspdf_free(curp.outdata); + break; + default: + break; + } + } + mparse_free(curp.mp); + mchars_free(); + +out: + if (search.argmode != ARG_FILE) { + manconf_free(&conf); +#if HAVE_SQLITE3 + mansearch_free(res, sz); + mansearch_setup(0); +#endif + } + + free(defos); + + /* + * When using a pager, finish writing both temporary files, + * fork it, wait for the user to close it, and clean up. + */ + + if (tag_files != NULL) { + fclose(stdout); + tag_write(); + man_pgid = getpgid(0); + tag_files->tcpgid = man_pgid == getpid() ? + getpgid(getppid()) : man_pgid; + pager_pid = 0; + signum = SIGSTOP; + for (;;) { + + /* Stop here until moved to the foreground. */ + + tc_pgid = tcgetpgrp(STDIN_FILENO); + if (tc_pgid != man_pgid) { + if (tc_pgid == pager_pid) { + (void)tcsetpgrp(STDIN_FILENO, + man_pgid); + if (signum == SIGTTIN) + continue; + } else + tag_files->tcpgid = tc_pgid; + kill(0, signum); + continue; + } + + /* Once in the foreground, activate the pager. */ + + if (pager_pid) { + (void)tcsetpgrp(STDIN_FILENO, pager_pid); + kill(pager_pid, SIGCONT); + } else + pager_pid = spawn_pager(tag_files); + + /* Wait for the pager to stop or exit. */ + + while ((pid = waitpid(pager_pid, &status, + WUNTRACED)) == -1 && errno == EINTR) + continue; + + if (pid == -1) { + warn("wait"); + rc = MANDOCLEVEL_SYSERR; + break; + } + if (!WIFSTOPPED(status)) + break; + + signum = WSTOPSIG(status); + } + tag_unlink(); + } + + return (int)rc; +} + +static void +usage(enum argmode argmode) +{ + + switch (argmode) { + case ARG_FILE: + fputs("usage: mandoc [-acfhkl] [-I os=name] " + "[-K encoding] [-mformat] [-O option]\n" + "\t [-T output] [-W level] [file ...]\n", stderr); + break; + case ARG_NAME: + fputs("usage: man [-acfhklw] [-C file] [-I os=name] " + "[-K encoding] [-M path] [-m path]\n" + "\t [-O option=value] [-S subsection] [-s section] " + "[-T output] [-W level]\n" + "\t [section] name ...\n", stderr); + break; + case ARG_WORD: + fputs("usage: whatis [-acfhklw] [-C file] " + "[-M path] [-m path] [-O outkey] [-S arch]\n" + "\t [-s section] name ...\n", stderr); + break; + case ARG_EXPR: + fputs("usage: apropos [-acfhklw] [-C file] " + "[-M path] [-m path] [-O outkey] [-S arch]\n" + "\t [-s section] expression ...\n", stderr); + break; + } + exit((int)MANDOCLEVEL_BADARG); +} + +static int +fs_lookup(const struct manpaths *paths, size_t ipath, + const char *sec, const char *arch, const char *name, + struct manpage **res, size_t *ressz) +{ + glob_t globinfo; + struct manpage *page; + char *file; + int form, globres; + + form = FORM_SRC; + mandoc_asprintf(&file, "%s/man%s/%s.%s", + paths->paths[ipath], sec, name, sec); + if (access(file, R_OK) != -1) + goto found; + free(file); + + mandoc_asprintf(&file, "%s/cat%s/%s.0", + paths->paths[ipath], sec, name); + if (access(file, R_OK) != -1) { + form = FORM_CAT; + goto found; + } + free(file); + + if (arch != NULL) { + mandoc_asprintf(&file, "%s/man%s/%s/%s.%s", + paths->paths[ipath], sec, arch, name, sec); + if (access(file, R_OK) != -1) + goto found; + free(file); + } + + mandoc_asprintf(&file, "%s/man%s/%s.[01-9]*", + paths->paths[ipath], sec, name); + globres = glob(file, 0, NULL, &globinfo); + if (globres != 0 && globres != GLOB_NOMATCH) + warn("%s: glob", file); + free(file); + if (globres == 0) + file = mandoc_strdup(*globinfo.gl_pathv); + globfree(&globinfo); + if (globres != 0) + return 0; + +found: +#if HAVE_SQLITE3 + warnx("outdated mandoc.db lacks %s(%s) entry, run makewhatis %s", + name, sec, paths->paths[ipath]); +#endif + *res = mandoc_reallocarray(*res, ++*ressz, sizeof(struct manpage)); + page = *res + (*ressz - 1); + page->file = file; + page->names = NULL; + page->output = NULL; + page->ipath = ipath; + page->bits = NAME_FILE & NAME_MASK; + page->sec = (*sec >= '1' && *sec <= '9') ? *sec - '1' + 1 : 10; + page->form = form; + return 1; +} + +static void +fs_search(const struct mansearch *cfg, const struct manpaths *paths, + int argc, char **argv, struct manpage **res, size_t *ressz) +{ + const char *const sections[] = + {"1", "8", "6", "2", "3", "3p", "5", "7", "4", "9"}; + const size_t nsec = sizeof(sections)/sizeof(sections[0]); + + size_t ipath, isec, lastsz; + + assert(cfg->argmode == ARG_NAME); + + *res = NULL; + *ressz = lastsz = 0; + while (argc) { + for (ipath = 0; ipath < paths->sz; ipath++) { + if (cfg->sec != NULL) { + if (fs_lookup(paths, ipath, cfg->sec, + cfg->arch, *argv, res, ressz) && + cfg->firstmatch) + return; + } else for (isec = 0; isec < nsec; isec++) + if (fs_lookup(paths, ipath, sections[isec], + cfg->arch, *argv, res, ressz) && + cfg->firstmatch) + return; + } + if (*ressz == lastsz) + warnx("No entry for %s in the manual.", *argv); + lastsz = *ressz; + argv++; + argc--; + } +} + +static void +parse(struct curparse *curp, int fd, const char *file) +{ + enum mandoclevel rctmp; + struct roff_man *man; + + /* Begin by parsing the file itself. */ + + assert(file); + assert(fd > 0); + + rctmp = mparse_readfd(curp->mp, fd, file); + if (fd != STDIN_FILENO) + close(fd); + if (rc < rctmp) + rc = rctmp; + + /* + * With -Wstop and warnings or errors of at least the requested + * level, do not produce output. + */ + + if (rctmp != MANDOCLEVEL_OK && curp->wstop) + return; + + /* If unset, allocate output dev now (if applicable). */ + + if (curp->outdata == NULL) { + switch (curp->outtype) { + case OUTT_HTML: + curp->outdata = html_alloc(curp->outopts); + break; + case OUTT_UTF8: + curp->outdata = utf8_alloc(curp->outopts); + break; + case OUTT_LOCALE: + curp->outdata = locale_alloc(curp->outopts); + break; + case OUTT_ASCII: + curp->outdata = ascii_alloc(curp->outopts); + break; + case OUTT_PDF: + curp->outdata = pdf_alloc(curp->outopts); + break; + case OUTT_PS: + curp->outdata = ps_alloc(curp->outopts); + break; + default: + break; + } + } + + mparse_result(curp->mp, &man, NULL); + + /* Execute the out device, if it exists. */ + + if (man == NULL) + return; + if (man->macroset == MACROSET_MDOC) { + mdoc_validate(man); + switch (curp->outtype) { + case OUTT_HTML: + html_mdoc(curp->outdata, man); + break; + case OUTT_TREE: + tree_mdoc(curp->outdata, man); + break; + case OUTT_MAN: + man_mdoc(curp->outdata, man); + break; + case OUTT_PDF: + case OUTT_ASCII: + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_PS: + terminal_mdoc(curp->outdata, man); + break; + default: + break; + } + } + if (man->macroset == MACROSET_MAN) { + man_validate(man); + switch (curp->outtype) { + case OUTT_HTML: + html_man(curp->outdata, man); + break; + case OUTT_TREE: + tree_man(curp->outdata, man); + break; + case OUTT_MAN: + man_man(curp->outdata, man); + break; + case OUTT_PDF: + case OUTT_ASCII: + case OUTT_UTF8: + case OUTT_LOCALE: + case OUTT_PS: + terminal_man(curp->outdata, man); + break; + default: + break; + } + } +} + +static void +passthrough(const char *file, int fd, int synopsis_only) +{ + const char synb[] = "S\bSY\bYN\bNO\bOP\bPS\bSI\bIS\bS"; + const char synr[] = "SYNOPSIS"; + + FILE *stream; + const char *syscall; + char *line, *cp; + size_t linesz; + int print; + + line = NULL; + linesz = 0; + + if ((stream = fdopen(fd, "r")) == NULL) { + close(fd); + syscall = "fdopen"; + goto fail; + } + + print = 0; + while (getline(&line, &linesz, stream) != -1) { + cp = line; + if (synopsis_only) { + if (print) { + if ( ! isspace((unsigned char)*cp)) + goto done; + while (isspace((unsigned char)*cp)) + cp++; + } else { + if (strcmp(cp, synb) == 0 || + strcmp(cp, synr) == 0) + print = 1; + continue; + } + } + if (fputs(cp, stdout)) { + fclose(stream); + syscall = "fputs"; + goto fail; + } + } + + if (ferror(stream)) { + fclose(stream); + syscall = "getline"; + goto fail; + } + +done: + free(line); + fclose(stream); + return; + +fail: + free(line); + warn("%s: SYSERR: %s", file, syscall); + if (rc < MANDOCLEVEL_SYSERR) + rc = MANDOCLEVEL_SYSERR; +} + +static int +koptions(int *options, char *arg) +{ + + if ( ! strcmp(arg, "utf-8")) { + *options |= MPARSE_UTF8; + *options &= ~MPARSE_LATIN1; + } else if ( ! strcmp(arg, "iso-8859-1")) { + *options |= MPARSE_LATIN1; + *options &= ~MPARSE_UTF8; + } else if ( ! strcmp(arg, "us-ascii")) { + *options &= ~(MPARSE_UTF8 | MPARSE_LATIN1); + } else { + warnx("-K %s: Bad argument", arg); + return 0; + } + return 1; +} + +static int +moptions(int *options, char *arg) +{ + + if (arg == NULL) + /* nothing to do */; + else if (0 == strcmp(arg, "doc")) + *options |= MPARSE_MDOC; + else if (0 == strcmp(arg, "andoc")) + /* nothing to do */; + else if (0 == strcmp(arg, "an")) + *options |= MPARSE_MAN; + else { + warnx("-m %s: Bad argument", arg); + return 0; + } + + return 1; +} + +static int +toptions(struct curparse *curp, char *arg) +{ + + if (0 == strcmp(arg, "ascii")) + curp->outtype = OUTT_ASCII; + else if (0 == strcmp(arg, "lint")) { + curp->outtype = OUTT_LINT; + curp->wlevel = MANDOCLEVEL_WARNING; + } else if (0 == strcmp(arg, "tree")) + curp->outtype = OUTT_TREE; + else if (0 == strcmp(arg, "man")) + curp->outtype = OUTT_MAN; + else if (0 == strcmp(arg, "html")) + curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "utf8")) + curp->outtype = OUTT_UTF8; + else if (0 == strcmp(arg, "locale")) + curp->outtype = OUTT_LOCALE; + else if (0 == strcmp(arg, "xhtml")) + curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "ps")) + curp->outtype = OUTT_PS; + else if (0 == strcmp(arg, "pdf")) + curp->outtype = OUTT_PDF; + else { + warnx("-T %s: Bad argument", arg); + return 0; + } + + return 1; +} + +static int +woptions(struct curparse *curp, char *arg) +{ + char *v, *o; + const char *toks[7]; + + toks[0] = "stop"; + toks[1] = "all"; + toks[2] = "warning"; + toks[3] = "error"; + toks[4] = "unsupp"; + toks[5] = "fatal"; + toks[6] = NULL; + + while (*arg) { + o = arg; + switch (getsubopt(&arg, UNCONST(toks), &v)) { + case 0: + curp->wstop = 1; + break; + case 1: + case 2: + curp->wlevel = MANDOCLEVEL_WARNING; + break; + case 3: + curp->wlevel = MANDOCLEVEL_ERROR; + break; + case 4: + curp->wlevel = MANDOCLEVEL_UNSUPP; + break; + case 5: + curp->wlevel = MANDOCLEVEL_BADARG; + break; + default: + warnx("-W %s: Bad argument", o); + return 0; + } + } + + return 1; +} + +static void +mmsg(enum mandocerr t, enum mandoclevel lvl, + const char *file, int line, int col, const char *msg) +{ + const char *mparse_msg; + + fprintf(stderr, "%s: %s:", getprogname(), file); + + if (line) + fprintf(stderr, "%d:%d:", line, col + 1); + + fprintf(stderr, " %s", mparse_strlevel(lvl)); + + if (NULL != (mparse_msg = mparse_strerror(t))) + fprintf(stderr, ": %s", mparse_msg); + + if (msg) + fprintf(stderr, ": %s", msg); + + fputc('\n', stderr); +} + +static pid_t +spawn_pager(struct tag_files *tag_files) +{ +#define MAX_PAGER_ARGS 16 + char *argv[MAX_PAGER_ARGS]; + const char *pager; + char *cp; + size_t cmdlen; + int argc; + pid_t pager_pid; + + pager = getenv("MANPAGER"); + if (pager == NULL || *pager == '\0') + pager = getenv("PAGER"); + if (pager == NULL || *pager == '\0') + pager = "more -s"; + cp = mandoc_strdup(pager); + + /* + * Parse the pager command into words. + * Intentionally do not do anything fancy here. + */ + + argc = 0; + while (argc + 4 < MAX_PAGER_ARGS) { + argv[argc++] = cp; + cp = strchr(cp, ' '); + if (cp == NULL) + break; + *cp++ = '\0'; + while (*cp == ' ') + cp++; + if (*cp == '\0') + break; + } + + /* For more(1) and less(1), use the tag file. */ + + if ((cmdlen = strlen(argv[0])) >= 4) { + cp = argv[0] + cmdlen - 4; + if (strcmp(cp, "less") == 0 || strcmp(cp, "more") == 0) { + argv[argc++] = mandoc_strdup("-T"); + argv[argc++] = tag_files->tfn; + } + } + argv[argc++] = tag_files->ofn; + argv[argc] = NULL; + + switch (pager_pid = fork()) { + case -1: + err((int)MANDOCLEVEL_SYSERR, "fork"); + case 0: + /* Set pgrp in both parent and child to avoid racing exec. */ + (void)setpgid(0, 0); + break; + default: + (void)setpgid(pager_pid, 0); + (void)tcsetpgrp(STDIN_FILENO, pager_pid); +#if HAVE_PLEDGE + if (pledge("stdio rpath tmppath tty proc", NULL) == -1) + err((int)MANDOCLEVEL_SYSERR, "pledge"); +#endif + tag_files->pager_pid = pager_pid; + return pager_pid; + } + + /* The child process becomes the pager. */ + + if (dup2(tag_files->ofd, STDOUT_FILENO) == -1) + err((int)MANDOCLEVEL_SYSERR, "pager stdout"); + close(tag_files->ofd); + close(tag_files->tfd); + execvp(argv[0], argv); + err((int)MANDOCLEVEL_SYSERR, "exec %s", argv[0]); +} diff --git a/contrib/mdocml/main.h b/contrib/mdocml/main.h new file mode 100644 index 0000000..e9e7e86 --- /dev/null +++ b/contrib/mdocml/main.h @@ -0,0 +1,53 @@ +/* $Id: main.h,v 1.24 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) + +struct roff_man; +struct manoutput; + +/* + * Definitions for main.c-visible output device functions, e.g., -Thtml + * and -Tascii. Note that ascii_alloc() is named as such in + * anticipation of latin1_alloc() and so on, all of which map into the + * terminal output routines with different character settings. + */ + +void *html_alloc(const struct manoutput *); +void html_mdoc(void *, const struct roff_man *); +void html_man(void *, const struct roff_man *); +void html_free(void *); + +void tree_mdoc(void *, const struct roff_man *); +void tree_man(void *, const struct roff_man *); + +void man_mdoc(void *, const struct roff_man *); +void man_man(void *, const struct roff_man *); + +void *locale_alloc(const struct manoutput *); +void *utf8_alloc(const struct manoutput *); +void *ascii_alloc(const struct manoutput *); +void ascii_free(void *); +void ascii_sepline(void *); + +void *pdf_alloc(const struct manoutput *); +void *ps_alloc(const struct manoutput *); +void pspdf_free(void *); + +void terminal_mdoc(void *, const struct roff_man *); +void terminal_man(void *, const struct roff_man *); diff --git a/contrib/mdocml/makewhatis.8 b/contrib/mdocml/makewhatis.8 new file mode 100644 index 0000000..8a5de93 --- /dev/null +++ b/contrib/mdocml/makewhatis.8 @@ -0,0 +1,217 @@ +.\" $Id: makewhatis.8,v 1.3 2014/08/17 21:03:06 schwarze Exp $ +.\" +.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: August 17 2014 $ +.Dt MAKEWHATIS 8 +.Os +.Sh NAME +.Nm makewhatis +.Nd index UNIX manuals +.Sh SYNOPSIS +.Nm +.Op Fl aDnpQ +.Op Fl T Cm utf8 +.Op Fl C Ar file +.Nm +.Op Fl aDnpQ +.Op Fl T Cm utf8 +.Ar dir ... +.Nm +.Op Fl DnpQ +.Op Fl T Cm utf8 +.Fl d Ar dir +.Op Ar +.Nm +.Op Fl Dnp +.Op Fl T Cm utf8 +.Fl u Ar dir +.Op Ar +.Nm +.Op Fl DQ +.Fl t Ar +.Sh DESCRIPTION +The +.Nm +utility extracts keywords from +.Ux +manuals and indexes them in a database for fast retrieval by +.Xr apropos 1 , +.Xr whatis 1 , +and +.Xr man 1 Ns 's +.Fl k +option. +.Pp +By default, +.Nm +creates a database in each +.Ar dir +using the files +.Sm off +.Sy man Ar section Li / +.Op Ar arch Li / +.Ar title . section +.Sm on +and +.Sm off +.Sy cat Ar section Li / +.Op Ar arch Li / +.Ar title . Sy 0 +.Sm on +in that directory. +Existing databases are replaced. +If +.Ar dir +is not provided, +.Nm +uses the default paths stipulated by +.Xr manpath 1 , +or +.Xr man.conf 5 . +.Pp +The arguments are as follows: +.Bl -tag -width "-C file" +.It Fl a +Use all directories and files found below +.Ar dir ... . +.It Fl C Ar file +Specify an alternative configuration +.Ar file +in +.Xr man.conf 5 +format. +.It Fl D +Display all files added or removed to the index. +With a second +.Fl D , +also show all keywords added for each file. +.It Fl d Ar dir +Merge (remove and re-add) +.Ar +to the database in +.Ar dir . +.It Fl n +Do not create or modify any database; scan and parse only, +and print manual page names and descriptions to standard output. +.It Fl p +Print warnings about potential problems with manual pages +to the standard error output. +.It Fl Q +Quickly build reduced-size databases +by reading only the NAME sections of manuals. +The resulting databases will usually contain names and descriptions only. +.It Fl T Cm utf8 +Use UTF-8 encoding instead of ASCII for strings stored in the databases. +.It Fl t Ar +Check the given +.Ar files +for potential problems. +Implies +.Fl a , +.Fl n , +and +.Fl p . +All diagnostic messages are printed to the standard output; +the standard error output is not used. +.It Fl u Ar dir +Remove +.Ar +from the database in +.Ar dir . +.El +.Pp +If fatal parse errors are encountered while parsing, the offending file +is printed to stderr, omitted from the index, and the parse continues +with the next input file. +.Sh FILES +.Bl -tag -width Ds +.It Pa mandoc.db +A database of manpages relative to the directory of the file. +This file is portable across architectures and systems, so long as the +manpage hierarchy it indexes does not change. +.It Pa /etc/man.conf +The default +.Xr man 1 +configuration file. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No errors occurred. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example memory exhaustion or an +error accessing input files. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +The output databases are corrupt and should be removed. +.El +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr whatis 1 , +.Xr man.conf 5 +.Sh HISTORY +A +.Nm +utility first appeared in +.Bx 2 . +It was rewritten in +.Xr perl 1 +for +.Ox 2.7 +and in C for +.Ox 5.6 . +.Pp +The +.Ar dir +argument first appeared in +.Nx 1.0 ; +the options +.Fl dpt +in +.Ox 2.7 ; +the option +.Fl u +in +.Ox 3.4 ; +and the options +.Fl aCDnQT +in +.Ox 5.6 . +.Sh AUTHORS +.An -nosplit +.An Bill Joy +wrote the original +.Bx +.Nm +in February 1979, +.An Marc Espie +started the Perl version in 2000, +and the current version of +.Nm +was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/man-cgi.css b/contrib/mdocml/man-cgi.css new file mode 100644 index 0000000..256e8c6 --- /dev/null +++ b/contrib/mdocml/man-cgi.css @@ -0,0 +1,13 @@ +body { font-family: Helvetica, Arial, sans-serif; } +body > div { padding-left: 2em; + padding-top: 1em; } +body > div#mancgi { padding-left: 0em; + padding-top: 0em; } +body > div.results { font-size: smaller; } +#mancgi fieldset { text-align: center; + border: thin solid silver; + border-radius: 1em; + font-size: small; } +#mancgi input[name=expr] { width: 25%; } +.results td.title { vertical-align: top; + padding-right: 1em; } diff --git a/contrib/mdocml/man.1 b/contrib/mdocml/man.1 new file mode 100644 index 0000000..f29360b --- /dev/null +++ b/contrib/mdocml/man.1 @@ -0,0 +1,451 @@ +.\" $Id: man.1,v 1.16 2015/09/21 09:59:02 schwarze Exp $ +.\" +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" Copyright (c) 2003, 2007, 2008, 2014 Jason McIntyre <jmc@openbsd.org> +.\" Copyright (c) 2010, 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)man.1 8.2 (Berkeley) 1/2/94 +.\" +.Dd $Mdocdate: September 21 2015 $ +.Dt MAN 1 +.Os +.Sh NAME +.Nm man +.Nd display manual pages +.Sh SYNOPSIS +.Nm man +.Op Fl acfhklw +.Op Fl C Ar file +.Op Fl I Cm os Ns = Ns Ar name +.Op Fl K Ar encoding +.Op Fl M Ar path +.Op Fl m Ar path +.Op Fl O Ar option Ns = Ns Ar value +.Op Fl S Ar subsection +.Op Fl s Ar section +.Op Fl T Ar output +.Op Fl W Ar level +.Op Ar section +.Ar name ... +.Sh DESCRIPTION +The +.Nm +utility +displays the +manual pages entitled +.Ar name . +Pages may be selected according to +a specific category +.Pq Ar section +or +machine architecture +.Pq Ar subsection . +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +Display all of the manual pages for a specified +.Ar section +and +.Ar name +combination. +Normally, only the first manual page found is displayed. +.It Fl C Ar file +Use the specified +.Ar file +instead of the default configuration file. +This permits users to configure their own manual environment. +See +.Xr man.conf 5 +for a description of the contents of this file. +.It Fl c +Copy the manual page to the standard output instead of using +.Xr more 1 +to paginate it. +This is done by default if the standard output is not a terminal device. +.It Fl f +A synonym for +.Xr whatis 1 . +It searches for +.Ar name +in manual page names and displays the header lines from all matching pages. +The search is case insensitive and matches whole words only. +This overrides any earlier +.Fl k +and +.Fl l +options. +.It Fl I Cm os Ns = Ns Ar name +Override the default operating system +.Ar name +for the +.Xr mdoc 7 +.Ic \&Os +and for the +.Xr man 7 +.Ic \&TH +macro. +.It Fl h +Display only the SYNOPSIS lines of the requested manual pages. +Implies +.Fl a +and +.Fl c . +.It Fl K Ar encoding +Specify the input encoding. +The supported +.Ar encoding +arguments are +.Cm us-ascii , +.Cm iso-8859-1 , +and +.Cm utf-8 . +By default, the encoding is automatically detected as described in the +.Xr mandoc 1 +manual. +.It Fl k +A synonym for +.Xr apropos 1 . +Instead of +.Ar name , +an expression can be provided using the syntax described in the +.Xr apropos 1 +manual. +By default, it displays the header lines of all matching pages. +This overrides any earlier +.Fl f +and +.Fl l +options. +.It Fl l +A synonym for +.Xr mandoc 1 +.Fl a . +The +.Ar name +arguments are interpreted as filenames. +No search is done and +.Ar file , +.Ar path , +.Ar section , +and +.Ar subsection +are ignored. +This overrides any earlier +.Fl f , +.Fl k , +and +.Fl w +options. +.It Fl M Ar path +Override the list of standard directories which +.Nm +searches for manual pages. +The supplied +.Ar path +must be a colon +.Pq Ql \&: +separated list of directories. +This search path may also be set using the environment variable +.Ev MANPATH . +.It Fl m Ar path +Augment the list of standard directories which +.Nm +searches for manual pages. +The supplied +.Ar path +must be a colon +.Pq Ql \&: +separated list of directories. +These directories will be searched before the standard directories or +the directories specified using the +.Fl M +option or the +.Ev MANPATH +environment variable. +.It Fl O Ar option Ns = Ns Ar value +Comma-separated output options. +For each output format, the available options are described in the +.Xr mandoc 1 +manual. +.It Fl S Ar subsection +Restricts the directories that +.Nm +will search to those of a specific +.Xr machine 1 +architecture. +.Ar subsection +is case insensitive. +.Pp +By default manual pages for all architectures are installed. +Therefore this option can be used to view pages for one +architecture whilst using another. +.Pp +This option overrides the +.Ev MACHINE +environment variable. +.It Xo +.Op Fl s +.Ar section +.Xc +Restricts the directories that +.Nm +will search to a specific section. +The currently available sections are: +.Pp +.Bl -tag -width "localXXX" -offset indent -compact +.It 1 +General commands +.Pq tools and utilities . +.It 2 +System calls and error numbers. +.It 3 +Libraries. +.It 3f +Fortran programmer's reference guide. +.It 3p +.Xr perl 1 +programmer's reference guide. +.It 4 +Device drivers. +.It 5 +File formats. +.It 6 +Games. +.It 7 +Miscellaneous. +.It 8 +System maintenance and operation commands. +.It 9 +Kernel internals. +.It X11 +An alias for X11R6. +.It X11R6 +X Window System. +.It local +Pages located in +.Pa /usr/local . +.It n +Tcl/Tk commands. +.El +.Pp +The +.Nm +configuration file, +.Xr man.conf 5 , +specifies the possible +.Ar section +values, and their search order. +Additional sections may be specified. +.It Fl T Ar output +Select the output format. +The default is +.Cm locale . +The other output modes +.Cm ascii , +.Cm html , +.Cm lint , +.Cm man , +.Cm pdf , +.Cm ps , +.Cm tree , +and +.Cm utf8 +are described in the +.Xr mandoc 1 +manual. +.It Fl W Ar level +Specify the minimum message +.Ar level +to be reported on the standard error output and to affect the exit status. +The +.Ar level +can be +.Cm warning , +.Cm error , +or +.Cm unsupp ; +.Cm all +is an alias for +.Cm warning . +By default, +.Nm +is silent. +See the +.Xr mandoc 1 +manual for details. +.It Fl w +List the pathnames of the manual pages which +.Nm +would display for the specified +.Ar section +and +.Ar name +combination. +.El +.Pp +Guidelines for writing +man pages can be found in +.Xr mdoc 7 . +.Pp +If both a formatted and an unformatted version of the same manual page, +for example +.Pa cat1/foo.0 +and +.Pa man1/foo.1 , +exist in the same directory, and at least one of them is selected, +only the newer one is used. +However, if both the +.Fl a +and the +.Fl w +options are specified, both file names are printed. +.Sh ENVIRONMENT +.Bl -tag -width MANPATHX +.It Ev MACHINE +As some manual pages are intended only for specific architectures, +.Nm +searches any subdirectories, +with the same name as the current architecture, +in every directory which it searches. +Machine specific areas are checked before general areas. +The current machine type may be overridden by setting the environment +variable +.Ev MACHINE +to the name of a specific architecture, +or with the +.Fl S +option. +.Ev MACHINE +is case insensitive. +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +will be used instead of the standard pagination program, +.Xr more 1 . +If +.Xr less 1 +is used, the interactive +.Ic :t +command can be used to go to the definitions of various terms, for +example command line options, command modifiers, internal commands, +and environment variables. +.It Ev MANPATH +The standard search path used by +.Nm +may be overridden by specifying a path in the +.Ev MANPATH +environment +variable. +The format of the path is a colon +.Pq Ql \&: +separated list of directories. +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +will be used. +.El +.Sh FILES +.Bl -tag -width /etc/man.conf -compact +.It Pa /etc/man.conf +default man configuration file +.El +.Sh EXIT STATUS +.Ex -std man +.Sh SEE ALSO +.Xr apropos 1 , +.Xr intro 1 , +.Xr whatis 1 , +.Xr whereis 1 , +.Xr intro 2 , +.Xr intro 3 , +.Xr intro 4 , +.Xr intro 5 , +.Xr man.conf 5 , +.Xr intro 6 , +.Xr intro 7 , +.Xr mdoc 7 , +.Xr intro 8 , +.Xr intro 9 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aCcfhIKlMmOSsTWw , +as well as the environment variables +.Ev MACHINE , +.Ev MANPAGER , +and +.Ev MANPATH , +are extensions to that specification. +.Sh HISTORY +A +.Nm +command first appeared in +.At v3 . +.Pp +The +.Fl w +option first appeared in +.At v7 ; +.Fl f +and +.Fl k +in +.Bx 4 ; +.Fl M +in +.Bx 4.3 ; +.Fl a +in +.Bx 4.3 Tahoe ; +.Fl c +and +.Fl m +in +.Bx 4.3 Reno ; +.Fl h +in +.Bx 4.3 Net/2 ; +.Fl C +in +.Nx 1.0 ; +and +.Fl s +and +.Fl S +in +.Ox 2.3 . diff --git a/contrib/mdocml/man.7 b/contrib/mdocml/man.7 new file mode 100644 index 0000000..bfeec51 --- /dev/null +++ b/contrib/mdocml/man.7 @@ -0,0 +1,928 @@ +.\" $Id: man.7,v 1.132 2015/01/29 00:33:57 schwarze Exp $ +.\" +.\" Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> +.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: January 29 2015 $ +.Dt MAN 7 +.Os +.Sh NAME +.Nm man +.Nd legacy formatting language for manual pages +.Sh DESCRIPTION +Traditionally, the +.Nm man +language has been used to write +.Ux +manuals for the +.Xr man 1 +utility. +It supports limited control of presentational details like fonts, +indentation and spacing. +This reference document describes the structure of manual pages +and the syntax and usage of the man language. +.Pp +.Bf -emphasis +Do not use +.Nm +to write your manuals: +.Ef +It lacks support for semantic markup. +Use the +.Xr mdoc 7 +language, instead. +.Pp +In a +.Nm +document, lines beginning with the control character +.Sq \&. +are called +.Dq macro lines . +The first word is the macro name. +It usually consists of two capital letters. +For a list of available macros, see +.Sx MACRO OVERVIEW . +The words following the macro name are arguments to the macro. +.Pp +Lines not beginning with the control character are called +.Dq text lines . +They provide free-form text to be printed; the formatting of the text +depends on the respective processing context: +.Bd -literal -offset indent +\&.SH Macro lines change control state. +Text lines are interpreted within the current state. +.Ed +.Pp +Many aspects of the basic syntax of the +.Nm +language are based on the +.Xr roff 7 +language; see the +.Em LANGUAGE SYNTAX +and +.Em MACRO SYNTAX +sections in the +.Xr roff 7 +manual for details, in particular regarding +comments, escape sequences, whitespace, and quoting. +.Sh MANUAL STRUCTURE +Each +.Nm +document must contain the +.Sx \&TH +macro describing the document's section and title. +It may occur anywhere in the document, although conventionally it +appears as the first macro. +.Pp +Beyond +.Sx \&TH , +at least one macro or text line must appear in the document. +.Pp +The following is a well-formed skeleton +.Nm +file for a utility +.Qq progname : +.Bd -literal -offset indent +\&.TH PROGNAME 1 2009-10-10 +\&.SH NAME +\efBprogname\efR \e(en one line about what it does +\&.\e\(dq .SH LIBRARY +\&.\e\(dq For sections 2, 3, and 9 only. +\&.\e\(dq Not used in OpenBSD. +\&.SH SYNOPSIS +\efBprogname\efR [\efB\e-options\efR] \efIfile ...\efR +\&.SH DESCRIPTION +The \efBfoo\efR utility processes files ... +\&.\e\(dq .Sh CONTEXT +\&.\e\(dq For section 9 functions only. +\&.\e\(dq .SH IMPLEMENTATION NOTES +\&.\e\(dq Not used in OpenBSD. +\&.\e\(dq .SH RETURN VALUES +\&.\e\(dq For sections 2, 3, and 9 function return values only. +\&.\e\(dq .SH ENVIRONMENT +\&.\e\(dq For sections 1, 6, 7, and 8 only. +\&.\e\(dq .SH FILES +\&.\e\(dq .SH EXIT STATUS +\&.\e\(dq For sections 1, 6, and 8 only. +\&.\e\(dq .SH EXAMPLES +\&.\e\(dq .SH DIAGNOSTICS +\&.\e\(dq For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only. +\&.\e\(dq .SH ERRORS +\&.\e\(dq For sections 2, 3, 4, and 9 errno settings only. +\&.\e\(dq .SH SEE ALSO +\&.\e\(dq .BR foobar ( 1 ) +\&.\e\(dq .SH STANDARDS +\&.\e\(dq .SH HISTORY +\&.\e\(dq .SH AUTHORS +\&.\e\(dq .SH CAVEATS +\&.\e\(dq .SH BUGS +\&.\e\(dq .SH SECURITY CONSIDERATIONS +\&.\e\(dq Not used in OpenBSD. +.Ed +.Pp +The sections in a +.Nm +document are conventionally ordered as they appear above. +Sections should be composed as follows: +.Bl -ohang -offset indent +.It Em NAME +The name(s) and a short description of the documented material. +The syntax for this is generally as follows: +.Pp +.D1 \efBname\efR \e(en description +.It Em LIBRARY +The name of the library containing the documented material, which is +assumed to be a function in a section 2 or 3 manual. +For functions in the C library, this may be as follows: +.Pp +.D1 Standard C Library (libc, -lc) +.It Em SYNOPSIS +Documents the utility invocation syntax, function call syntax, or device +configuration. +.Pp +For the first, utilities (sections 1, 6, and 8), this is +generally structured as follows: +.Pp +.D1 \efBname\efR [-\efBab\efR] [-\efBc\efR\efIarg\efR] \efBpath\efR... +.Pp +For the second, function calls (sections 2, 3, 9): +.Pp +.D1 \&.B char *name(char *\efIarg\efR); +.Pp +And for the third, configurations (section 4): +.Pp +.D1 \&.B name* at cardbus ? function ? +.Pp +Manuals not in these sections generally don't need a +.Em SYNOPSIS . +.It Em DESCRIPTION +This expands upon the brief, one-line description in +.Em NAME . +It usually contains a break-down of the options (if documenting a +command). +.It Em CONTEXT +This section lists the contexts in which functions can be called in section 9. +The contexts are autoconf, process, or interrupt. +.It Em IMPLEMENTATION NOTES +Implementation-specific notes should be kept here. +This is useful when implementing standard functions that may have side +effects or notable algorithmic implications. +.It Em RETURN VALUES +This section documents the return values of functions in sections 2, 3, and 9. +.It Em ENVIRONMENT +Documents any usages of environment variables, e.g., +.Xr environ 7 . +.It Em FILES +Documents files used. +It's helpful to document both the file name and a short description of how +the file is used (created, modified, etc.). +.It Em EXIT STATUS +This section documents the command exit status for +section 1, 6, and 8 utilities. +Historically, this information was described in +.Em DIAGNOSTICS , +a practise that is now discouraged. +.It Em EXAMPLES +Example usages. +This often contains snippets of well-formed, +well-tested invocations. +Make sure that examples work properly! +.It Em DIAGNOSTICS +Documents error conditions. +In section 4 and 9 manuals, these are usually messages +printed by the kernel to the console and to the kernel log. +In section 1, 6, 7, and 8, these are usually messages +printed by userland programs to the standard error output. +.Pp +Historically, this section was used in place of +.Em EXIT STATUS +for manuals in sections 1, 6, and 8; however, this practise is +discouraged. +.It Em ERRORS +Documents +.Xr errno 2 +settings in sections 2, 3, 4, and 9. +.It Em SEE ALSO +References other manuals with related topics. +This section should exist for most manuals. +.Pp +.D1 \&.BR bar \&( 1 \&), +.Pp +Cross-references should conventionally be ordered +first by section, then alphabetically. +.It Em STANDARDS +References any standards implemented or used, such as +.Pp +.D1 IEEE Std 1003.2 (\e(lqPOSIX.2\e(rq) +.Pp +If not adhering to any standards, the +.Em HISTORY +section should be used. +.It Em HISTORY +A brief history of the subject, including where support first appeared. +.It Em AUTHORS +Credits to the person or persons who wrote the code and/or documentation. +Authors should generally be noted by both name and email address. +.It Em CAVEATS +Common misuses and misunderstandings should be explained +in this section. +.It Em BUGS +Known bugs, limitations, and work-arounds should be described +in this section. +.It Em SECURITY CONSIDERATIONS +Documents any security precautions that operators should consider. +.El +.Sh MACRO OVERVIEW +This overview is sorted such that macros of similar purpose are listed +together, to help find the best macro for any given purpose. +Deprecated macros are not included in the overview, but can be found +in the alphabetical reference below. +.Ss Page header and footer meta-data +.Bl -column "PP, LP, P" description +.It Sx TH Ta set the title: Ar title section date Op Ar source Op Ar volume +.It Sx AT Ta display AT&T UNIX version in the page footer (<= 1 argument) +.It Sx UC Ta display BSD version in the page footer (<= 1 argument) +.El +.Ss Sections and paragraphs +.Bl -column "PP, LP, P" description +.It Sx SH Ta section header (one line) +.It Sx SS Ta subsection header (one line) +.It Sx PP , LP , P Ta start an undecorated paragraph (no arguments) +.It Sx RS , RE Ta reset the left margin: Op Ar width +.It Sx IP Ta indented paragraph: Op Ar head Op Ar width +.It Sx TP Ta tagged paragraph: Op Ar width +.It Sx HP Ta hanged paragraph: Op Ar width +.It Sx PD Ta set vertical paragraph distance: Op Ar height +.It Sx \&br Ta force output line break in text mode (no arguments) +.It Sx \&sp Ta force vertical space: Op Ar height +.It Sx fi , nf Ta fill mode and no-fill mode (no arguments) +.It Sx in Ta additional indent: Op Ar width +.El +.Ss Physical markup +.Bl -column "PP, LP, P" description +.It Sx B Ta boldface font +.It Sx I Ta italic font +.It Sx R Ta roman (default) font +.It Sx SB Ta small boldface font +.It Sx SM Ta small roman font +.It Sx BI Ta alternate between boldface and italic fonts +.It Sx BR Ta alternate between boldface and roman fonts +.It Sx IB Ta alternate between italic and boldface fonts +.It Sx IR Ta alternate between italic and roman fonts +.It Sx RB Ta alternate between roman and boldface fonts +.It Sx RI Ta alternate between roman and italic fonts +.El +.Sh MACRO REFERENCE +This section is a canonical reference to all macros, arranged +alphabetically. +For the scoping of individual macros, see +.Sx MACRO SYNTAX . +.Ss \&AT +Sets the volume for the footer for compatibility with man pages from +.At +releases. +The optional arguments specify which release it is from. +.Ss \&B +Text is rendered in bold face. +.Pp +See also +.Sx \&I +and +.Sx \&R . +.Ss \&BI +Text is rendered alternately in bold face and italic. +Thus, +.Sq .BI this word and that +causes +.Sq this +and +.Sq and +to render in bold face, while +.Sq word +and +.Sq that +render in italics. +Whitespace between arguments is omitted in output. +.Pp +Examples: +.Pp +.Dl \&.BI bold italic bold italic +.Pp +The output of this example will be emboldened +.Dq bold +and italicised +.Dq italic , +with spaces stripped between arguments. +.Pp +See also +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&BR +Text is rendered alternately in bold face and roman (the default font). +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&DT +Has no effect. +Included for compatibility. +.Ss \&EE +This is a non-standard GNU extension, included only for compatibility. +In +.Xr mandoc 1 , +it does the same as +.Sx \&fi . +.Ss \&EX +This is a non-standard GNU extension, included only for compatibility. +In +.Xr mandoc 1 , +it does the same as +.Sx \&nf . +.Ss \&HP +Begin a paragraph whose initial output line is left-justified, but +subsequent output lines are indented, with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&HP +.Op Ar width +.Ed +.Pp +The +.Ar width +argument is a +.Xr roff 7 +scaling width. +If specified, it's saved for later paragraph left-margins; if unspecified, the +saved or default width is used. +.Pp +See also +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&I +Text is rendered in italics. +.Pp +See also +.Sx \&B +and +.Sx \&R . +.Ss \&IB +Text is rendered alternately in italics and bold face. +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&BR , +.Sx \&RB , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&IP +Begin an indented paragraph with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&IP +.Op Ar head Op Ar width +.Ed +.Pp +The +.Ar width +argument is a +.Xr roff 7 +scaling width defining the left margin. +It's saved for later paragraph left-margins; if unspecified, the saved or +default width is used. +.Pp +The +.Ar head +argument is used as a leading term, flushed to the left margin. +This is useful for bulleted paragraphs and so on. +.Pp +See also +.Sx \&HP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&IR +Text is rendered alternately in italics and roman (the default font). +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +and +.Sx \&RI . +.Ss \&LP +Begin an undecorated paragraph. +The scope of a paragraph is closed by a subsequent paragraph, +sub-section, section, or end of file. +The saved paragraph left-margin width is reset to the default. +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&P , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&OP +Optional command-line argument. +This is a non-standard GNU extension, included only for compatibility. +It has the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&OP +.Ar key Op Ar value +.Ed +.Pp +The +.Ar key +is usually a command-line flag and +.Ar value +its argument. +.Ss \&P +Synonym for +.Sx \&LP . +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&PP , +and +.Sx \&TP . +.Ss \&PD +Specify the vertical space to be inserted before each new paragraph. +.br +The syntax is as follows: +.Bd -filled -offset indent +.Pf \. Sx \&PD +.Op Ar height +.Ed +.Pp +The +.Ar height +argument is a +.Xr roff 7 +scaling width. +It defaults to +.Cm 1v . +If the unit is omitted, +.Cm v +is assumed. +.Pp +This macro affects the spacing before any subsequent instances of +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +.Sx \&SH , +.Sx \&SS , +and +.Sx \&TP . +.Ss \&PP +Synonym for +.Sx \&LP . +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +and +.Sx \&TP . +.Ss \&R +Text is rendered in roman (the default font). +.Pp +See also +.Sx \&I +and +.Sx \&B . +.Ss \&RB +Text is rendered alternately in roman (the default font) and bold face. +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RI , +and +.Sx \&IR . +.Ss \&RE +Explicitly close out the scope of a prior +.Sx \&RS . +The default left margin is restored to the state before that +.Sx \&RS +invocation. +.Pp +The syntax is as follows: +.Bd -filled -offset indent +.Pf \. Sx \&RE +.Op Ar level +.Ed +.Pp +Without an argument, the most recent +.Sx \&RS +block is closed out. +If +.Ar level +is 1, all open +.Sx \&RS +blocks are closed out. +Otherwise, +.Ar level No \(mi 1 +nested +.Sx \&RS +blocks remain open. +.Ss \&RI +Text is rendered alternately in roman (the default font) and italics. +Whitespace between arguments is omitted in output. +.Pp +See +.Sx \&BI +for an equivalent example. +.Pp +See also +.Sx \&BI , +.Sx \&IB , +.Sx \&BR , +.Sx \&RB , +and +.Sx \&IR . +.Ss \&RS +Temporarily reset the default left margin. +This has the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&RS +.Op Ar width +.Ed +.Pp +The +.Ar width +argument is a +.Xr roff 7 +scaling width. +If not specified, the saved or default width is used. +.Pp +See also +.Sx \&RE . +.Ss \&SB +Text is rendered in small size (one point smaller than the default font) +bold face. +.Ss \&SH +Begin a section. +The scope of a section is only closed by another section or the end of +file. +The paragraph left-margin width is reset to the default. +.Ss \&SM +Text is rendered in small size (one point smaller than the default +font). +.Ss \&SS +Begin a sub-section. +The scope of a sub-section is closed by a subsequent sub-section, +section, or end of file. +The paragraph left-margin width is reset to the default. +.Ss \&TH +Sets the title of the manual page for use in the page header +and footer with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&TH +.Ar title section date +.Op Ar source Op Ar volume +.Ed +.Pp +Conventionally, the document +.Ar title +is given in all caps. +The recommended +.Ar date +format is +.Sy YYYY-MM-DD +as specified in the ISO-8601 standard; +if the argument does not conform, it is printed verbatim. +If the +.Ar date +is empty or not specified, the current date is used. +The optional +.Ar source +string specifies the organisation providing the utility. +When unspecified, +.Xr mandoc 1 +uses its +.Fl Ios +argument. +The +.Ar volume +string replaces the default rendered volume, which is dictated by the +manual section. +.Pp +Examples: +.Pp +.Dl \&.TH CVS 5 "1992-02-12" GNU +.Ss \&TP +Begin a paragraph where the head, if exceeding the indentation width, is +followed by a newline; if not, the body follows on the same line after a +buffer to the indentation width. +Subsequent output lines are indented. +The syntax is as follows: +.Bd -filled -offset indent +.Pf \. Sx \&TP +.Op Ar width +.Ed +.Pp +The +.Ar width +argument is a +.Xr roff 7 +scaling width. +If specified, it's saved for later paragraph left-margins; if +unspecified, the saved or default width is used. +.Pp +See also +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +and +.Sx \&PP . +.Ss \&UC +Sets the volume for the footer for compatibility with man pages from +.Bx +releases. +The optional first argument specifies which release it is from. +.Ss \&UE +End a uniform resource identifier block. +This is a non-standard GNU extension, included only for compatibility. +See +.Sx \&UE . +.Ss \&UR +Begin a uniform resource identifier block. +This is a non-standard GNU extension, included only for compatibility. +It has the following syntax: +.Bd -literal -offset indent +.Pf \. Sx \&UR Ar uri +link description to be shown +.Pf \. Sx UE +.Ed +.Ss \&br +Breaks the current line. +Consecutive invocations have no further effect. +.Pp +See also +.Sx \&sp . +.Ss \&fi +End literal mode begun by +.Sx \&nf . +.Ss \&in +Indent relative to the current indentation: +.Pp +.D1 Pf \. Sx \&in Op Ar width +.Pp +If +.Ar width +is signed, the new offset is relative. +Otherwise, it is absolute. +This value is reset upon the next paragraph, section, or sub-section. +.Ss \&nf +Begin literal mode: all subsequent free-form lines have their end of +line boundaries preserved. +May be ended by +.Sx \&fi . +Literal mode is implicitly ended by +.Sx \&SH +or +.Sx \&SS . +.Ss \&sp +Insert vertical spaces into output with the following syntax: +.Bd -filled -offset indent +.Pf \. Sx \&sp +.Op Ar height +.Ed +.Pp +The +.Ar height +argument is a scaling width as described in +.Xr roff 7 . +If 0, this is equivalent to the +.Sx \&br +macro. +Defaults to 1, if unspecified. +.Pp +See also +.Sx \&br . +.Sh MACRO SYNTAX +The +.Nm +macros are classified by scope: line scope or block scope. +Line macros are only scoped to the current line (and, in some +situations, the subsequent line). +Block macros are scoped to the current line and subsequent lines until +closed by another block macro. +.Ss Line Macros +Line macros are generally scoped to the current line, with the body +consisting of zero or more arguments. +If a macro is scoped to the next line and the line arguments are empty, +the next line, which must be text, is used instead. +Thus: +.Bd -literal -offset indent +\&.I +foo +.Ed +.Pp +is equivalent to +.Sq \&.I foo . +If next-line macros are invoked consecutively, only the last is used. +If a next-line macro is followed by a non-next-line macro, an error is +raised, except for +.Sx \&br +and +.Sx \&sp . +.Pp +The syntax is as follows: +.Bd -literal -offset indent +\&.YO \(lBbody...\(rB +\(lBbody...\(rB +.Ed +.Bl -column "MacroX" "ArgumentsX" "ScopeXXXXX" "CompatX" -offset indent +.It Em Macro Ta Em Arguments Ta Em Scope Ta Em Notes +.It Sx \&AT Ta <=1 Ta current Ta \& +.It Sx \&B Ta n Ta next-line Ta \& +.It Sx \&BI Ta n Ta current Ta \& +.It Sx \&BR Ta n Ta current Ta \& +.It Sx \&DT Ta 0 Ta current Ta \& +.It Sx \&EE Ta 0 Ta current Ta compat +.It Sx \&EX Ta 0 Ta current Ta compat +.It Sx \&I Ta n Ta next-line Ta \& +.It Sx \&IB Ta n Ta current Ta \& +.It Sx \&IR Ta n Ta current Ta \& +.It Sx \&OP Ta 0, 1 Ta current Ta compat +.It Sx \&PD Ta 1 Ta current Ta \& +.It Sx \&R Ta n Ta next-line Ta \& +.It Sx \&RB Ta n Ta current Ta \& +.It Sx \&RI Ta n Ta current Ta \& +.It Sx \&SB Ta n Ta next-line Ta \& +.It Sx \&SM Ta n Ta next-line Ta \& +.It Sx \&TH Ta >1, <6 Ta current Ta \& +.It Sx \&UC Ta <=1 Ta current Ta \& +.It Sx \&br Ta 0 Ta current Ta compat +.It Sx \&fi Ta 0 Ta current Ta compat +.It Sx \&in Ta 1 Ta current Ta compat +.It Sx \&nf Ta 0 Ta current Ta compat +.It Sx \&sp Ta 1 Ta current Ta compat +.El +.Pp +Macros marked as +.Qq compat +are included for compatibility with the significant corpus of existing +manuals that mix dialects of roff. +These macros should not be used for portable +.Nm +manuals. +.Ss Block Macros +Block macros comprise a head and body. +As with in-line macros, the head is scoped to the current line and, in +one circumstance, the next line (the next-line stipulations as in +.Sx Line Macros +apply here as well). +.Pp +The syntax is as follows: +.Bd -literal -offset indent +\&.YO \(lBhead...\(rB +\(lBhead...\(rB +\(lBbody...\(rB +.Ed +.Pp +The closure of body scope may be to the section, where a macro is closed +by +.Sx \&SH ; +sub-section, closed by a section or +.Sx \&SS ; +part, closed by a section, sub-section, or +.Sx \&RE ; +or paragraph, closed by a section, sub-section, part, +.Sx \&HP , +.Sx \&IP , +.Sx \&LP , +.Sx \&P , +.Sx \&PP , +or +.Sx \&TP . +No closure refers to an explicit block closing macro. +.Pp +As a rule, block macros may not be nested; thus, calling a block macro +while another block macro scope is open, and the open scope is not +implicitly closed, is syntactically incorrect. +.Bl -column "MacroX" "ArgumentsX" "Head ScopeX" "sub-sectionX" "compatX" -offset indent +.It Em Macro Ta Em Arguments Ta Em Head Scope Ta Em Body Scope Ta Em Notes +.It Sx \&HP Ta <2 Ta current Ta paragraph Ta \& +.It Sx \&IP Ta <3 Ta current Ta paragraph Ta \& +.It Sx \&LP Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&P Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&PP Ta 0 Ta current Ta paragraph Ta \& +.It Sx \&RE Ta 0 Ta current Ta none Ta compat +.It Sx \&RS Ta 1 Ta current Ta part Ta compat +.It Sx \&SH Ta >0 Ta next-line Ta section Ta \& +.It Sx \&SS Ta >0 Ta next-line Ta sub-section Ta \& +.It Sx \&TP Ta n Ta next-line Ta paragraph Ta \& +.It Sx \&UE Ta 0 Ta current Ta none Ta compat +.It Sx \&UR Ta 1 Ta current Ta part Ta compat +.El +.Pp +Macros marked +.Qq compat +are as mentioned in +.Sx Line Macros . +.Pp +If a block macro is next-line scoped, it may only be followed by in-line +macros for decorating text. +.Ss Font handling +In +.Nm +documents, both +.Sx Physical markup +macros and +.Xr roff 7 +.Ql \ef +font escape sequences can be used to choose fonts. +In text lines, the effect of manual font selection by escape sequences +only lasts until the next macro invocation; in macro lines, it only lasts +until the end of the macro scope. +Note that macros like +.Sx \&BR +open and close a font scope for each argument. +.Sh SEE ALSO +.Xr man 1 , +.Xr mandoc 1 , +.Xr eqn 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 , +.Xr tbl 7 +.Sh HISTORY +The +.Nm +language first appeared as a macro package for the roff typesetting +system in +.At v7 . +It was later rewritten by James Clark as a macro package for groff. +Eric S. Raymond wrote the extended +.Nm +macros for groff in 2007. +The stand-alone implementation that is part of the +.Xr mandoc 1 +utility written by Kristaps Dzonsons appeared in +.Ox 4.6 . +.Sh AUTHORS +This +.Nm +reference was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . +.Sh CAVEATS +Do not use this language. +Use +.Xr mdoc 7 , +instead. diff --git a/contrib/mdocml/man.c b/contrib/mdocml/man.c new file mode 100644 index 0000000..31c094e --- /dev/null +++ b/contrib/mdocml/man.c @@ -0,0 +1,369 @@ +/* $Id: man.c,v 1.166 2015/10/22 21:54:23 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" + +const char *const __man_macronames[MAN_MAX] = { + "br", "TH", "SH", "SS", + "TP", "LP", "PP", "P", + "IP", "HP", "SM", "SB", + "BI", "IB", "BR", "RB", + "R", "B", "I", "IR", + "RI", "sp", "nf", + "fi", "RE", "RS", "DT", + "UC", "PD", "AT", "in", + "ft", "OP", "EX", "EE", + "UR", "UE", "ll" + }; + +const char * const *man_macronames = __man_macronames; + +static void man_descope(struct roff_man *, int, int); +static int man_ptext(struct roff_man *, int, char *, int); +static int man_pmacro(struct roff_man *, int, char *, int); + + +int +man_parseln(struct roff_man *man, int ln, char *buf, int offs) +{ + + if (man->last->type != ROFFT_EQN || ln > man->last->line) + man->flags |= MAN_NEWLINE; + + return roff_getcontrol(man->roff, buf, &offs) ? + man_pmacro(man, ln, buf, offs) : + man_ptext(man, ln, buf, offs); +} + +static void +man_descope(struct roff_man *man, int line, int offs) +{ + /* + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scope (if applicable), then close + * out the block scope (also if applicable). + */ + + if (man->flags & MAN_ELINE) { + man->flags &= ~MAN_ELINE; + man_unscope(man, man->last->parent); + } + if ( ! (man->flags & MAN_BLINE)) + return; + man->flags &= ~MAN_BLINE; + man_unscope(man, man->last->parent); + roff_body_alloc(man, line, offs, man->last->tok); +} + +static int +man_ptext(struct roff_man *man, int line, char *buf, int offs) +{ + int i; + + /* Literal free-form text whitespace is preserved. */ + + if (man->flags & MAN_LITERAL) { + roff_word_alloc(man, line, offs, buf + offs); + man_descope(man, line, offs); + return 1; + } + + for (i = offs; buf[i] == ' '; i++) + /* Skip leading whitespace. */ ; + + /* + * Blank lines are ignored right after headings + * but add a single vertical space elsewhere. + */ + + if (buf[i] == '\0') { + /* Allocate a blank entry. */ + if (man->last->tok != MAN_SH && + man->last->tok != MAN_SS) { + roff_elem_alloc(man, line, offs, MAN_sp); + man->next = ROFF_NEXT_SIBLING; + } + return 1; + } + + /* + * Warn if the last un-escaped character is whitespace. Then + * strip away the remaining spaces (tabs stay!). + */ + + i = (int)strlen(buf); + assert(i); + + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { + if (i > 1 && '\\' != buf[i - 2]) + mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, + line, i - 1, NULL); + + for (--i; i && ' ' == buf[i]; i--) + /* Spin back to non-space. */ ; + + /* Jump ahead of escaped whitespace. */ + i += '\\' == buf[i] ? 2 : 1; + + buf[i] = '\0'; + } + roff_word_alloc(man, line, offs, buf + offs); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(i); + if (mandoc_eos(buf, (size_t)i)) + man->last->flags |= MAN_EOS; + + man_descope(man, line, offs); + return 1; +} + +static int +man_pmacro(struct roff_man *man, int ln, char *buf, int offs) +{ + struct roff_node *n; + const char *cp; + int tok; + int i, ppos; + int bline; + char mac[5]; + + ppos = offs; + + /* + * Copy the first word into a nil-terminated buffer. + * Stop when a space, tab, escape, or eoln is encountered. + */ + + i = 0; + while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) + mac[i++] = buf[offs++]; + + mac[i] = '\0'; + + tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE; + + if (tok == TOKEN_NONE) { + mandoc_msg(MANDOCERR_MACRO, man->parse, + ln, ppos, buf + ppos - 1); + return 1; + } + + /* Skip a leading escape sequence or tab. */ + + switch (buf[offs]) { + case '\\': + cp = buf + offs + 1; + mandoc_escape(&cp, NULL, NULL); + offs = cp - buf; + break; + case '\t': + offs++; + break; + default: + break; + } + + /* Jump to the next non-whitespace word. */ + + while (buf[offs] && buf[offs] == ' ') + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if (buf[offs] == '\0' && buf[offs - 1] == ' ') + mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, + ln, offs - 1, NULL); + + /* + * Some macros break next-line scopes; otherwise, remember + * whether we are in next-line scope for a block head. + */ + + man_breakscope(man, tok); + bline = man->flags & MAN_BLINE; + + /* Call to handler... */ + + assert(man_macros[tok].fp); + (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf); + + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (man->quick && tok == MAN_SH) { + n = man->last; + if (n->type == ROFFT_BODY && + strcmp(n->prev->child->string, "NAME")) + return 2; + } + + /* + * If we are in a next-line scope for a block head, + * close it out now and switch to the body, + * unless the next-line scope is allowed to continue. + */ + + if ( ! bline || man->flags & MAN_ELINE || + man_macros[tok].flags & MAN_NSCOPED) + return 1; + + assert(man->flags & MAN_BLINE); + man->flags &= ~MAN_BLINE; + + man_unscope(man, man->last->parent); + roff_body_alloc(man, ln, ppos, man->last->tok); + return 1; +} + +void +man_breakscope(struct roff_man *man, int tok) +{ + struct roff_node *n; + + /* + * An element next line scope is open, + * and the new macro is not allowed inside elements. + * Delete the element that is being broken. + */ + + if (man->flags & MAN_ELINE && (tok == TOKEN_NONE || + ! (man_macros[tok].flags & MAN_NSCOPED))) { + n = man->last; + assert(n->type != ROFFT_TEXT); + if (man_macros[n->tok].flags & MAN_NSCOPED) + n = n->parent; + + mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, + n->line, n->pos, "%s breaks %s", + tok == TOKEN_NONE ? "TS" : man_macronames[tok], + man_macronames[n->tok]); + + roff_node_delete(man, n); + man->flags &= ~MAN_ELINE; + } + + /* + * Weird special case: + * Switching fill mode closes section headers. + */ + + if (man->flags & MAN_BLINE && + (tok == MAN_nf || tok == MAN_fi) && + (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) { + n = man->last; + man_unscope(man, n); + roff_body_alloc(man, n->line, n->pos, n->tok); + man->flags &= ~MAN_BLINE; + } + + /* + * A block header next line scope is open, + * and the new macro is not allowed inside block headers. + * Delete the block that is being broken. + */ + + if (man->flags & MAN_BLINE && (tok == TOKEN_NONE || + man_macros[tok].flags & MAN_BSCOPE)) { + n = man->last; + if (n->type == ROFFT_TEXT) + n = n->parent; + if ( ! (man_macros[n->tok].flags & MAN_BSCOPE)) + n = n->parent; + + assert(n->type == ROFFT_HEAD); + n = n->parent; + assert(n->type == ROFFT_BLOCK); + assert(man_macros[n->tok].flags & MAN_SCOPED); + + mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, + n->line, n->pos, "%s breaks %s", + tok == TOKEN_NONE ? "TS" : man_macronames[tok], + man_macronames[n->tok]); + + roff_node_delete(man, n); + man->flags &= ~MAN_BLINE; + } +} + +const struct mparse * +man_mparse(const struct roff_man *man) +{ + + assert(man && man->parse); + return man->parse; +} + +void +man_state(struct roff_man *man, struct roff_node *n) +{ + + switch(n->tok) { + case MAN_nf: + case MAN_EX: + if (man->flags & MAN_LITERAL && ! (n->flags & MAN_VALID)) + mandoc_msg(MANDOCERR_NF_SKIP, man->parse, + n->line, n->pos, "nf"); + man->flags |= MAN_LITERAL; + break; + case MAN_fi: + case MAN_EE: + if ( ! (man->flags & MAN_LITERAL) && + ! (n->flags & MAN_VALID)) + mandoc_msg(MANDOCERR_FI_SKIP, man->parse, + n->line, n->pos, "fi"); + man->flags &= ~MAN_LITERAL; + break; + default: + break; + } + man->last->flags |= MAN_VALID; +} + +void +man_validate(struct roff_man *man) +{ + + man->last = man->first; + man_node_validate(man); + man->flags &= ~MAN_LITERAL; +} diff --git a/contrib/mdocml/man.cgi.8 b/contrib/mdocml/man.cgi.8 new file mode 100644 index 0000000..2e54dbf --- /dev/null +++ b/contrib/mdocml/man.cgi.8 @@ -0,0 +1,413 @@ +.\" $Id: man.cgi.8,v 1.13 2015/11/05 20:55:41 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: November 5 2015 $ +.Dt MAN.CGI 8 +.Os +.Sh NAME +.Nm man.cgi +.Nd CGI program to search and display manual pages +.Sh DESCRIPTION +The +.Nm +CGI program searches for manual pages on a WWW server +and displays them to HTTP clients, +providing functionality equivalent to the +.Xr apropos 1 +and +.Xr man 1 +utilities. +It can use multiple manual trees in parallel. +.Ss HTML search interface +At the top of each generated HTML page, +.Nm +displays a search form containing these elements: +.Bl -enum +.It +An input box for search queries, expecting +either a name of a manual page or an +.Ar expression +using the syntax described in the +.Xr apropos 1 +manual; filling this in is required for each search. +.Pp +The expression is broken into words at whitespace. +Whitespace characters and backslashes can be escaped +by prepending a backslash. +The effect of prepending a backslash to another character is undefined; +in the current implementation, it has no effect. +.It +A +.Dq Submit +button to send a search request from the client to the server. +.It +A +.Dq Reset +button to undo any changes to the input boxes and the dropdown menus +and reset them to the values contained in the +.Ev QUERY_STRING . +.It +Radio buttons to select pages either by name like in +.Xr man 1 +or using +.Xr apropos 1 +queries. +.It +A dropdown menu to optionally select a manual section. +If one is provided, it has the same effect as the +.Xr man 1 +and +.Xr apropos 1 +.Fl s +option. +Otherwise, pages from all sections are shown. +.It +A dropdown menu to optionally select an architecture. +If one is provided, it has the same effect as the +.Xr man 1 +and +.Xr apropos 1 +.Fl S +option. +By default, pages for all architectures are shown. +.It +A dropdown menu to select a manual tree. +If the configuration file +.Pa /var/www/man/manpath.conf +contains only one manpath, the dropdown menu is not shown. +By default, the first manpath given in the file is used. +.El +.Ss Program output +The +.Nm +program generates five kinds of output pages: +.Bl -tag -width Ds +.It The index page. +This is returned when calling +.Nm +without +.Ev PATH_INFO +and without a +.Ev QUERY_STRING . +It serves as a starting point for using the program +and shows the search form only. +.It A list page. +Lists are returned when searches match more than one manual page. +The first column shows the names and section numbers of manuals +as clickable links. +The second column shows the one-line descriptions of the manuals. +.It A manual page. +This output format is used when a search matches exactly one +manual page, or when a link on a list page or an +.Ic \&Xr +link on another manual page is followed. +.It A no-result page. +This is shown when a search request returns no results - +eiher because it violates the query syntax, or because +the search does not match any manual pages. +.It \&An error page. +This cannot happen by merely clicking the +.Dq Search +button, but only by manually entering an invalid URI. +It does not show the search form, but only an error message +and a link back to the index page. +.El +.Ss Setup +For each manual tree, create one first-level subdirectory below +.Pa /var/www/man . +The name of one of these directories is called a +.Dq manpath +in the context of +.Nm . +Create a single ASCII text file +.Pa /var/www/man/manpath.conf +containing the names of these directories, one per line. +The directory given first is used as the default manpath. +.Pp +Inside each of these directories, use the same directory and file +structure as found below +.Pa /usr/share/man , +that is, second-level subdirectories +.Pa /var/www/man/*/man1 , /var/www/man/*/man2 +etc. containing source +.Xr mdoc 7 +and +.Xr man 7 +manuals with file name extensions matching the section numbers, +second-level subdirectories +.Pa /var/www/man/*/cat1 , /var/www/man/*/cat2 +etc. containing preformatted manuals with the file name extension +.Sq 0 , +and optional third-level subdirectories for architectures. +Use +.Xr makewhatis 8 +to create a +.Xr mandoc.db 5 +database inside each manpath. +.Pp +Configure your web server to execute CGI programs located in +.Pa /cgi-bin . +When using +.Ox +.Xr httpd 8 +or +.Xr nginx 8 , +the +.Xr slowcgi 8 +proxy daemon is needed to translate FastCGI requests to plain old CGI. +.Pp +To compile +.Nm , +first copy +.Pa cgi.h.example +to +.Pa cgi.h +and edit it according to your needs. +It contains the following compile-time definitions: +.Bl -tag -width Ds +.It Ev COMPAT_OLDURI +Only useful for running on www.openbsd.org to deal with old URIs containing +.Qq "manpath=OpenBSD " +where the blank character has to be translated to a hyphen. +When compiling for other sites, this definition can be deleted. +.It Ev CSS_DIR +An optional path to the directory containing the CSS files, +to be specified relative to the server's document root, +and to be specified without a trailing slash. +When not specified, the CSS files +are assumed to be in the document root. +This is used in generated HTML code. +.It Ev CUSTOMIZE_TITLE +An ASCII string to be used for the HTML <TITLE> element. +.It Ev HTTP_HOST +The FQDN of the (possibly virtual) host the HTTP server is running on. +This is used for +.Ic Location: +headers in HTTP 303 responses. +.It Ev MAN_DIR +A path to the +.Nm +data directory to be used instead of +.Pa /var/www/man , +relative to the web server +.Xr chroot 2 +directory, to be specified without a trailing slash. +This is prepended to the manpath when opening +.Xr mandoc.db 5 +and manual page files. +.El +.Pp +After editing +.Pa cgi.h , +run +.Pp +.Dl make man.cgi +.Pp +and copy the files to the proper locations. +Reading the +.Cm installcgi +target in the +.Pa Makefile +can help with that, but do not run it without carefully checking it +because the directory layouts of web servers vary greatly. +.Ss URI interface +.Nm +uniform resource identifiers are not needed for interactive use, +but can be useful for deep linking. +They consist of: +.Bl -enum +.It +The +.Cm http:// +protocol specifier. +.It +The host name and a following slash. +.It +The path to the program, normally +.Pa cgi-bin/man.cgi/ . +.It +To show a single page, a slash, the manpath, another slash, +and the name of the requested file, for example +.Pa /OpenBSD-current/man1/mandoc.1 . +.It +For searches, a query string starting with a question mark +and consisting of +.Ar key Ns = Ns Ar value +pairs, separated by ampersands, for example +.Pa ?manpath=OpenBSD-current&query=mandoc . +Supported keys are +.Cm manpath , +.Cm query , +.Cm sec , +.Cm arch , +corresponding to +.Xr apropos 1 +.Fl M , +.Ar expression , +.Fl s , +.Fl S , +respectively, and +.Cm apropos , +which is a boolean parameter to select or deselect the +.Xr apropos 1 +query mode. +For backward compatibility with the traditional +.Nm , +.Cm sektion +is supported as an alias for +.Cm sec . +.El +.Ss Restricted character set +For security reasons, in particular to prevent cross site scripting +attacks, some strings used by +.Nm +can only contain the following characters: +.Pp +.Bl -dash -compact -offset indent +.It +lower case and upper case ASCII letters +.It +the ten decimal digits +.It +the dash +.Pq Sq - +.It +the dot +.Pq Sq \&. +.It +the slash +.Pq Sq / +.It +the underscore +.Pq Sq _ +.El +.Pp +In particular, this applies to the +.Ev SCRIPT_NAME , +to all manpaths, and to all architecture names. +.Sh ENVIRONMENT +The web server may pass the following CGI variables to +.Nm : +.Bl -tag -width Ds +.It Ev PATH_INFO +The final part of the URI path passed from the client to the server, +starting after the +.Ev SCRIPT_NAME +and ending before the +.Ev QUERY_STRING . +It is used by the +.Cm show +page to acquire the manpath and filename it needs. +.It Ev QUERY_STRING +The HTTP query string passed from the client to the server. +It is the final part of the URI, after the question mark. +It is used by the +.Cm search +page to acquire the named parameters it needs. +.It Ev SCRIPT_NAME +The path to the +.Nm +binary relative to the server root, usually +.Pa /cgi-bin/man.cgi . +This is used for generating URIs to be embedded +in generated HTML code and HTTP headers. +If this contains any character not contained in the +.Sx Restricted character set , +.Nm +reports an internal server error and exits without doing anything. +.El +.Sh FILES +.Bl -tag -width Ds +.It Pa /var/www +Default web server +.Xr chroot 2 +directory. +All the following paths are specified relative to this directory. +.It Pa /cgi-bin/man.cgi +The path to the +.Nm +program relative to the server root. +Can be overridden by +.Ev SCRIPT_NAME . +.It Pa /htdocs +The path to the server document root relative to the server root. +This is part of the web server configuration and not specific to +.Nm . +.It Pa /htdocs/mandoc.css +A style sheet for +.Xr mandoc 1 +HTML styling, referenced from each generated HTML page. +.It Pa /man +Default +.Nm +data directory containing all the manual trees. +Can be overridden by +.Ev MAN_DIR . +.It Pa /man/mandoc/man1/apropos.1 , /man/mandoc/man8/man.cgi.8 +Manual pages documenting +.Nm +itself, linked from the index page. +.It Pa /man/manpath.conf +The list of available manpaths, one per line. +If any of the lines in this file contains a slash +.Pq Sq / +or any character not contained in the +.Sx Restricted character set , +.Nm +reports an internal server error and exits without doing anything. +.It Pa /man/header.html +An optional file containing static HTML code to be inserted right +after opening the <BODY> element. +.It Pa /man/footer.html +An optional file containing static HTML code to be inserted right +before closing the <BODY> element. +.It Pa /man/OpenBSD-current/man1/mandoc.1 +An example +.Xr mdoc 7 +source file located below the +.Dq OpenBSD-current +manpath. +.El +.Sh COMPATIBILITY +The +.Nm +CGI program is call-compatible with queries from the traditional +.Pa man.cgi +script by Wolfram Schneider. +However, the output may not be quite the same. +.Sh SEE ALSO +.Xr apropos 1 , +.Xr mandoc.db 5 , +.Xr makewhatis 8 , +.Xr slowcgi 8 +.Sh HISTORY +A version of +.Nm +based on +.Xr mandoc 1 +first appeared in mdocml-1.12.1 (March 2012). +The current SQLite3-based version first appeared in +.Ox 5.6 . +.Sh AUTHORS +.An -nosplit +The +.Nm +program was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and ported to the SQLite3-based +.Xr mandoc.db 5 +backend by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/man.conf.5 b/contrib/mdocml/man.conf.5 new file mode 100644 index 0000000..9cfeca7 --- /dev/null +++ b/contrib/mdocml/man.conf.5 @@ -0,0 +1,131 @@ +.\" $Id: man.conf.5,v 1.3 2015/03/27 21:33:20 schwarze Exp $ +.\" +.\" Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: March 27 2015 $ +.Dt MAN.CONF 5 +.Os +.Sh NAME +.Nm man.conf +.Nd configuration file for man +.Sh DESCRIPTION +This is the configuration file +for the +.Xr man 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 +utilities. +Its presence, and all directives, are optional. +.Pp +This file is an ASCII text file. +Leading whitespace on lines, lines starting with +.Sq # , +and blank lines are ignored. +Words are separated by whitespace. +The first word on each line is the name of a configuration directive. +.Pp +The following directives are supported: +.Bl -tag -width Ds +.It Ic manpath Ar path +Override the default search +.Ar path +for +.Xr man 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 . +It can be used multiple times to specify multiple paths, +with the order determining the manual page search order. +.Pp +Each path is a tree containing subdirectories +whose names consist of the strings +.Sq man +and/or +.Sq cat +followed by the names of sections, usually single digits. +The former are supposed to contain unformatted manual pages in +.Xr mdoc 7 +and/or +.Xr man 7 +format; file names should end with the name of the section +preceded by a dot. +The latter should contain preformatted manual pages; +file names should end with +.Ql .0 . +.Pp +Creating a +.Xr mandoc.db 5 +database with +.Xr makewhatis 8 +in each directory configured with +.Ic manpath +is recommended and necessary for +.Xr apropos 1 +to work, but not strictly required for +.Xr man 1 . +.It Ic output Ar option Op Ar value +Configure the default value of an output option. +These directives are overridden by the +.Fl O +command line options of the same names. +For details, see the +.Xr mandoc 1 +manual. +.Pp +.Bl -column fragment integer "ascii, utf8" -compact +.It Ar option Ta Ar value Ta used by Fl T Ta purpose +.It Ta Ta Ta +.It Ic fragment Ta none Ta Cm html Ta print only body +.It Ic includes Ta string Ta Cm html Ta path to header files +.It Ic indent Ta integer Ta Cm ascii , utf8 Ta left margin +.It Ic man Ta string Ta Cm html Ta path for Xr links +.It Ic paper Ta string Ta Cm ps , pdf Ta paper size +.It Ic style Ta string Ta Cm html Ta CSS file +.It Ic width Ta integer Ta Cm ascii , utf8 Ta right margin +.El +.It Ic _whatdb Ar path Ns Cm /whatis.db +This directive provides the same functionality as +.Ic manpath , +but using a historic and misleading syntax. +It is kept for backward compatibility for now, +but will eventually be removed. +.El +.Sh FILES +.Pa /etc/man.conf +.Sh EXAMPLES +The following configuration file reproduces the defaults: +installing it is equivalent to not having a +.Nm +file at all. +.Bd -literal -offset indent +manpath /usr/share/man +manpath /usr/X11R6/man +manpath /usr/local/man +.Ed +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr makewhatis 8 +.Sh HISTORY +A relatively complicated +.Nm +file format first appeared in +.Bx 4.3 Reno . +For +.Ox 5.8 , +it was redesigned from scratch, aiming for simplicity. +.Sh AUTHORS +.An Ingo Schwarze Aq Mt schwarze@openbsd.org diff --git a/contrib/mdocml/man.h b/contrib/mdocml/man.h new file mode 100644 index 0000000..8f63f3b --- /dev/null +++ b/contrib/mdocml/man.h @@ -0,0 +1,66 @@ +/* $Id: man.h,v 1.77 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MAN_br 0 +#define MAN_TH 1 +#define MAN_SH 2 +#define MAN_SS 3 +#define MAN_TP 4 +#define MAN_LP 5 +#define MAN_PP 6 +#define MAN_P 7 +#define MAN_IP 8 +#define MAN_HP 9 +#define MAN_SM 10 +#define MAN_SB 11 +#define MAN_BI 12 +#define MAN_IB 13 +#define MAN_BR 14 +#define MAN_RB 15 +#define MAN_R 16 +#define MAN_B 17 +#define MAN_I 18 +#define MAN_IR 19 +#define MAN_RI 20 +#define MAN_sp 21 +#define MAN_nf 22 +#define MAN_fi 23 +#define MAN_RE 24 +#define MAN_RS 25 +#define MAN_DT 26 +#define MAN_UC 27 +#define MAN_PD 28 +#define MAN_AT 29 +#define MAN_in 30 +#define MAN_ft 31 +#define MAN_OP 32 +#define MAN_EX 33 +#define MAN_EE 34 +#define MAN_UR 35 +#define MAN_UE 36 +#define MAN_ll 37 +#define MAN_MAX 38 + +/* Names of macros. */ +extern const char *const *man_macronames; + + +struct roff_man; + +const struct mparse *man_mparse(const struct roff_man *); +void man_validate(struct roff_man *); diff --git a/contrib/mdocml/man_hash.c b/contrib/mdocml/man_hash.c new file mode 100644 index 0000000..8573994 --- /dev/null +++ b/contrib/mdocml/man_hash.c @@ -0,0 +1,101 @@ +/* $Id: man_hash.c,v 1.34 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <string.h> + +#include "roff.h" +#include "man.h" +#include "libman.h" + +#define HASH_DEPTH 6 + +#define HASH_ROW(x) do { \ + if (isupper((unsigned char)(x))) \ + (x) -= 65; \ + else \ + (x) -= 97; \ + (x) *= HASH_DEPTH; \ + } while (/* CONSTCOND */ 0) + +/* + * Lookup table is indexed first by lower-case first letter (plus one + * for the period, which is stored in the last row), then by lower or + * uppercase second letter. Buckets correspond to the index of the + * macro (the integer value of the enum stored as a char to save a bit + * of space). + */ +static unsigned char table[26 * HASH_DEPTH]; + + +void +man_hash_init(void) +{ + int i, j, x; + + if (*table != '\0') + return; + + memset(table, UCHAR_MAX, sizeof(table)); + + for (i = 0; i < (int)MAN_MAX; i++) { + x = man_macronames[i][0]; + + assert(isalpha((unsigned char)x)); + + HASH_ROW(x); + + for (j = 0; j < HASH_DEPTH; j++) + if (UCHAR_MAX == table[x + j]) { + table[x + j] = (unsigned char)i; + break; + } + + assert(j < HASH_DEPTH); + } +} + +int +man_hash_find(const char *tmp) +{ + int x, y, i; + int tok; + + if ('\0' == (x = tmp[0])) + return TOKEN_NONE; + if ( ! (isalpha((unsigned char)x))) + return TOKEN_NONE; + + HASH_ROW(x); + + for (i = 0; i < HASH_DEPTH; i++) { + if (UCHAR_MAX == (y = table[x + i])) + return TOKEN_NONE; + + tok = y; + if (0 == strcmp(tmp, man_macronames[tok])) + return tok; + } + + return TOKEN_NONE; +} diff --git a/contrib/mdocml/man_html.c b/contrib/mdocml/man_html.c new file mode 100644 index 0000000..d71eb38 --- /dev/null +++ b/contrib/mdocml/man_html.c @@ -0,0 +1,671 @@ +/* $Id: man_html.c,v 1.120 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "roff.h" +#include "man.h" +#include "out.h" +#include "html.h" +#include "main.h" + +/* TODO: preserve ident widths. */ +/* FIXME: have PD set the default vspace width. */ + +#define INDENT 5 + +#define MAN_ARGS const struct roff_meta *man, \ + const struct roff_node *n, \ + struct mhtml *mh, \ + struct html *h + +struct mhtml { + int fl; +#define MANH_LITERAL (1 << 0) /* literal context */ +}; + +struct htmlman { + int (*pre)(MAN_ARGS); + int (*post)(MAN_ARGS); +}; + +static void print_bvspace(struct html *, + const struct roff_node *); +static void print_man_head(MAN_ARGS); +static void print_man_nodelist(MAN_ARGS); +static void print_man_node(MAN_ARGS); +static int a2width(const struct roff_node *, + struct roffsu *); +static int man_B_pre(MAN_ARGS); +static int man_HP_pre(MAN_ARGS); +static int man_IP_pre(MAN_ARGS); +static int man_I_pre(MAN_ARGS); +static int man_OP_pre(MAN_ARGS); +static int man_PP_pre(MAN_ARGS); +static int man_RS_pre(MAN_ARGS); +static int man_SH_pre(MAN_ARGS); +static int man_SM_pre(MAN_ARGS); +static int man_SS_pre(MAN_ARGS); +static int man_UR_pre(MAN_ARGS); +static int man_alt_pre(MAN_ARGS); +static int man_br_pre(MAN_ARGS); +static int man_ign_pre(MAN_ARGS); +static int man_in_pre(MAN_ARGS); +static int man_literal_pre(MAN_ARGS); +static void man_root_post(MAN_ARGS); +static void man_root_pre(MAN_ARGS); + +static const struct htmlman mans[MAN_MAX] = { + { man_br_pre, NULL }, /* br */ + { NULL, NULL }, /* TH */ + { man_SH_pre, NULL }, /* SH */ + { man_SS_pre, NULL }, /* SS */ + { man_IP_pre, NULL }, /* TP */ + { man_PP_pre, NULL }, /* LP */ + { man_PP_pre, NULL }, /* PP */ + { man_PP_pre, NULL }, /* P */ + { man_IP_pre, NULL }, /* IP */ + { man_HP_pre, NULL }, /* HP */ + { man_SM_pre, NULL }, /* SM */ + { man_SM_pre, NULL }, /* SB */ + { man_alt_pre, NULL }, /* BI */ + { man_alt_pre, NULL }, /* IB */ + { man_alt_pre, NULL }, /* BR */ + { man_alt_pre, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { man_B_pre, NULL }, /* B */ + { man_I_pre, NULL }, /* I */ + { man_alt_pre, NULL }, /* IR */ + { man_alt_pre, NULL }, /* RI */ + { man_br_pre, NULL }, /* sp */ + { man_literal_pre, NULL }, /* nf */ + { man_literal_pre, NULL }, /* fi */ + { NULL, NULL }, /* RE */ + { man_RS_pre, NULL }, /* RS */ + { man_ign_pre, NULL }, /* DT */ + { man_ign_pre, NULL }, /* UC */ + { man_ign_pre, NULL }, /* PD */ + { man_ign_pre, NULL }, /* AT */ + { man_in_pre, NULL }, /* in */ + { man_ign_pre, NULL }, /* ft */ + { man_OP_pre, NULL }, /* OP */ + { man_literal_pre, NULL }, /* EX */ + { man_literal_pre, NULL }, /* EE */ + { man_UR_pre, NULL }, /* UR */ + { NULL, NULL }, /* UE */ + { man_ign_pre, NULL }, /* ll */ +}; + + +/* + * Printing leading vertical space before a block. + * This is used for the paragraph macros. + * The rules are pretty simple, since there's very little nesting going + * on here. Basically, if we're the first within another block (SS/SH), + * then don't emit vertical space. If we are (RS), then do. If not the + * first, print it. + */ +static void +print_bvspace(struct html *h, const struct roff_node *n) +{ + + if (n->body && n->body->child) + if (n->body->child->type == ROFFT_TBL) + return; + + if (n->parent->type == ROFFT_ROOT || n->parent->tok != MAN_RS) + if (NULL == n->prev) + return; + + print_paragraph(h); +} + +void +html_man(void *arg, const struct roff_man *man) +{ + struct mhtml mh; + struct htmlpair tag; + struct html *h; + struct tag *t, *tt; + + memset(&mh, 0, sizeof(mh)); + PAIR_CLASS_INIT(&tag, "mandoc"); + h = (struct html *)arg; + + if ( ! (HTML_FRAGMENT & h->oflags)) { + print_gen_decls(h); + t = print_otag(h, TAG_HTML, 0, NULL); + tt = print_otag(h, TAG_HEAD, 0, NULL); + print_man_head(&man->meta, man->first, &mh, h); + print_tagq(h, tt); + print_otag(h, TAG_BODY, 0, NULL); + print_otag(h, TAG_DIV, 1, &tag); + } else + t = print_otag(h, TAG_DIV, 1, &tag); + + print_man_nodelist(&man->meta, man->first, &mh, h); + print_tagq(h, t); + putchar('\n'); +} + +static void +print_man_head(MAN_ARGS) +{ + + print_gen_head(h); + assert(man->title); + assert(man->msec); + bufcat_fmt(h, "%s(%s)", man->title, man->msec); + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + +static void +print_man_nodelist(MAN_ARGS) +{ + + while (n != NULL) { + print_man_node(man, n, mh, h); + n = n->next; + } +} + +static void +print_man_node(MAN_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + + switch (n->type) { + case ROFFT_ROOT: + man_root_pre(man, n, mh, h); + break; + case ROFFT_TEXT: + if ('\0' == *n->string) { + print_paragraph(h); + return; + } + if (n->flags & MAN_LINE && (*n->string == ' ' || + (n->prev != NULL && mh->fl & MANH_LITERAL && + ! (h->flags & HTML_NONEWLINE)))) + print_otag(h, TAG_BR, 0, NULL); + print_text(h, n->string); + return; + case ROFFT_EQN: + if (n->flags & MAN_LINE) + putchar('\n'); + print_eqn(h, n->eqn); + break; + case ROFFT_TBL: + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out scope of font prior to opening a macro + * scope. + */ + if (HTMLFONT_NONE != h->metac) { + h->metal = h->metac; + h->metac = HTMLFONT_NONE; + } + + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt) { + print_tblclose(h); + t = h->tags.head; + } + if (mans[n->tok].pre) + child = (*mans[n->tok].pre)(man, n, mh, h); + break; + } + + if (child && n->child) + print_man_nodelist(man, n->child, mh, h); + + /* This will automatically close out any font scope. */ + print_stagq(h, t); + + switch (n->type) { + case ROFFT_ROOT: + man_root_post(man, n, mh, h); + break; + case ROFFT_EQN: + break; + default: + if (mans[n->tok].post) + (*mans[n->tok].post)(man, n, mh, h); + break; + } +} + +static int +a2width(const struct roff_node *n, struct roffsu *su) +{ + + if (n->type != ROFFT_TEXT) + return 0; + if (a2roffsu(n->string, su, SCALE_EN)) + return 1; + + return 0; +} + +static void +man_root_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct tag *t, *tt; + char *title; + + assert(man->title); + assert(man->msec); + mandoc_asprintf(&title, "%s(%s)", man->title, man->msec); + + PAIR_CLASS_INIT(&tag, "head"); + t = print_otag(h, TAG_TABLE, 1, &tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "head-ltitle"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, title); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "head-vol"); + print_otag(h, TAG_TD, 1, &tag); + if (NULL != man->vol) + print_text(h, man->vol); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "head-rtitle"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, title); + print_tagq(h, t); + free(title); +} + +static void +man_root_post(MAN_ARGS) +{ + struct htmlpair tag; + struct tag *t, *tt; + + PAIR_CLASS_INIT(&tag, "foot"); + t = print_otag(h, TAG_TABLE, 1, &tag); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "foot-date"); + print_otag(h, TAG_TD, 1, &tag); + + assert(man->date); + print_text(h, man->date); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "foot-os"); + print_otag(h, TAG_TD, 1, &tag); + + if (man->os) + print_text(h, man->os); + print_tagq(h, t); +} + + +static int +man_br_pre(MAN_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + + SCALE_VS_INIT(&su, 1); + + if (MAN_sp == n->tok) { + if (NULL != (n = n->child)) + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + su.scale = 1.0; + } else + su.scale = 0.0; + + bufinit(h); + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return 0; +} + +static int +man_SH_pre(MAN_ARGS) +{ + struct htmlpair tag; + + if (n->type == ROFFT_BLOCK) { + mh->fl &= ~MANH_LITERAL; + PAIR_CLASS_INIT(&tag, "section"); + print_otag(h, TAG_DIV, 1, &tag); + return 1; + } else if (n->type == ROFFT_BODY) + return 1; + + print_otag(h, TAG_H1, 0, NULL); + return 1; +} + +static int +man_alt_pre(MAN_ARGS) +{ + const struct roff_node *nn; + int i, savelit; + enum htmltag fp; + struct tag *t; + + if ((savelit = mh->fl & MANH_LITERAL)) + print_otag(h, TAG_BR, 0, NULL); + + mh->fl &= ~MANH_LITERAL; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + t = NULL; + switch (n->tok) { + case MAN_BI: + fp = i % 2 ? TAG_I : TAG_B; + break; + case MAN_IB: + fp = i % 2 ? TAG_B : TAG_I; + break; + case MAN_RI: + fp = i % 2 ? TAG_I : TAG_MAX; + break; + case MAN_IR: + fp = i % 2 ? TAG_MAX : TAG_I; + break; + case MAN_BR: + fp = i % 2 ? TAG_MAX : TAG_B; + break; + case MAN_RB: + fp = i % 2 ? TAG_B : TAG_MAX; + break; + default: + abort(); + } + + if (i) + h->flags |= HTML_NOSPACE; + + if (TAG_MAX != fp) + t = print_otag(h, fp, 0, NULL); + + print_man_node(man, nn, mh, h); + + if (t) + print_tagq(h, t); + } + + if (savelit) + mh->fl |= MANH_LITERAL; + + return 0; +} + +static int +man_SM_pre(MAN_ARGS) +{ + + print_otag(h, TAG_SMALL, 0, NULL); + if (MAN_SB == n->tok) + print_otag(h, TAG_B, 0, NULL); + return 1; +} + +static int +man_SS_pre(MAN_ARGS) +{ + struct htmlpair tag; + + if (n->type == ROFFT_BLOCK) { + mh->fl &= ~MANH_LITERAL; + PAIR_CLASS_INIT(&tag, "subsection"); + print_otag(h, TAG_DIV, 1, &tag); + return 1; + } else if (n->type == ROFFT_BODY) + return 1; + + print_otag(h, TAG_H2, 0, NULL); + return 1; +} + +static int +man_PP_pre(MAN_ARGS) +{ + + if (n->type == ROFFT_HEAD) + return 0; + else if (n->type == ROFFT_BLOCK) + print_bvspace(h, n); + + return 1; +} + +static int +man_IP_pre(MAN_ARGS) +{ + const struct roff_node *nn; + + if (n->type == ROFFT_BODY) { + print_otag(h, TAG_DD, 0, NULL); + return 1; + } else if (n->type != ROFFT_HEAD) { + print_otag(h, TAG_DL, 0, NULL); + return 1; + } + + /* FIXME: width specification. */ + + print_otag(h, TAG_DT, 0, NULL); + + /* For IP, only print the first header element. */ + + if (MAN_IP == n->tok && n->child) + print_man_node(man, n->child, mh, h); + + /* For TP, only print next-line header elements. */ + + if (MAN_TP == n->tok) { + nn = n->child; + while (NULL != nn && 0 == (MAN_LINE & nn->flags)) + nn = nn->next; + while (NULL != nn) { + print_man_node(man, nn, mh, h); + nn = nn->next; + } + } + + return 0; +} + +static int +man_HP_pre(MAN_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + const struct roff_node *np; + + if (n->type == ROFFT_HEAD) + return 0; + else if (n->type != ROFFT_BLOCK) + return 1; + + np = n->head->child; + + if (NULL == np || ! a2width(np, &su)) + SCALE_HS_INIT(&su, INDENT); + + bufinit(h); + + print_bvspace(h, n); + bufcat_su(h, "margin-left", &su); + su.scale = -su.scale; + bufcat_su(h, "text-indent", &su); + PAIR_STYLE_INIT(&tag[0], h); + PAIR_CLASS_INIT(&tag[1], "spacer"); + print_otag(h, TAG_DIV, 2, tag); + return 1; +} + +static int +man_OP_pre(MAN_ARGS) +{ + struct tag *tt; + struct htmlpair tag; + + print_text(h, "["); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "opt"); + tt = print_otag(h, TAG_SPAN, 1, &tag); + + if (NULL != (n = n->child)) { + print_otag(h, TAG_B, 0, NULL); + print_text(h, n->string); + } + + print_stagq(h, tt); + + if (NULL != n && NULL != n->next) { + print_otag(h, TAG_I, 0, NULL); + print_text(h, n->next->string); + } + + print_stagq(h, tt); + h->flags |= HTML_NOSPACE; + print_text(h, "]"); + return 0; +} + +static int +man_B_pre(MAN_ARGS) +{ + + print_otag(h, TAG_B, 0, NULL); + return 1; +} + +static int +man_I_pre(MAN_ARGS) +{ + + print_otag(h, TAG_I, 0, NULL); + return 1; +} + +static int +man_literal_pre(MAN_ARGS) +{ + + if (MAN_fi == n->tok || MAN_EE == n->tok) { + print_otag(h, TAG_BR, 0, NULL); + mh->fl &= ~MANH_LITERAL; + } else + mh->fl |= MANH_LITERAL; + + return 0; +} + +static int +man_in_pre(MAN_ARGS) +{ + + print_otag(h, TAG_BR, 0, NULL); + return 0; +} + +static int +man_ign_pre(MAN_ARGS) +{ + + return 0; +} + +static int +man_RS_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + + if (n->type == ROFFT_HEAD) + return 0; + else if (n->type == ROFFT_BODY) + return 1; + + SCALE_HS_INIT(&su, INDENT); + if (n->head->child) + a2width(n->head->child, &su); + + bufinit(h); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return 1; +} + +static int +man_UR_pre(MAN_ARGS) +{ + struct htmlpair tag[2]; + + n = n->child; + assert(n->type == ROFFT_HEAD); + if (n->child != NULL) { + assert(n->child->type == ROFFT_TEXT); + PAIR_CLASS_INIT(&tag[0], "link-ext"); + PAIR_HREF_INIT(&tag[1], n->child->string); + print_otag(h, TAG_A, 2, tag); + } + + assert(n->next->type == ROFFT_BODY); + if (n->next->child != NULL) + n = n->next; + + print_man_nodelist(man, n->child, mh, h); + + return 0; +} diff --git a/contrib/mdocml/man_macro.c b/contrib/mdocml/man_macro.c new file mode 100644 index 0000000..d153357 --- /dev/null +++ b/contrib/mdocml/man_macro.c @@ -0,0 +1,413 @@ +/* $Id: man_macro.c,v 1.114 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" + +static void blk_close(MACRO_PROT_ARGS); +static void blk_exp(MACRO_PROT_ARGS); +static void blk_imp(MACRO_PROT_ARGS); +static void in_line_eoln(MACRO_PROT_ARGS); +static int man_args(struct roff_man *, int, + int *, char *, char **); +static void rew_scope(struct roff_man *, int); + +const struct man_macro __man_macros[MAN_MAX] = { + { in_line_eoln, MAN_NSCOPED }, /* br */ + { in_line_eoln, MAN_BSCOPE }, /* TH */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* TP */ + { blk_imp, MAN_BSCOPE }, /* LP */ + { blk_imp, MAN_BSCOPE }, /* PP */ + { blk_imp, MAN_BSCOPE }, /* P */ + { blk_imp, MAN_BSCOPE }, /* IP */ + { blk_imp, MAN_BSCOPE }, /* HP */ + { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SM */ + { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* R */ + { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* B */ + { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { in_line_eoln, MAN_NSCOPED }, /* sp */ + { in_line_eoln, MAN_NSCOPED }, /* nf */ + { in_line_eoln, MAN_NSCOPED }, /* fi */ + { blk_close, MAN_BSCOPE }, /* RE */ + { blk_exp, MAN_BSCOPE }, /* RS */ + { in_line_eoln, 0 }, /* DT */ + { in_line_eoln, 0 }, /* UC */ + { in_line_eoln, MAN_NSCOPED }, /* PD */ + { in_line_eoln, 0 }, /* AT */ + { in_line_eoln, 0 }, /* in */ + { in_line_eoln, 0 }, /* ft */ + { in_line_eoln, 0 }, /* OP */ + { in_line_eoln, MAN_BSCOPE }, /* EX */ + { in_line_eoln, MAN_BSCOPE }, /* EE */ + { blk_exp, MAN_BSCOPE }, /* UR */ + { blk_close, MAN_BSCOPE }, /* UE */ + { in_line_eoln, 0 }, /* ll */ +}; + +const struct man_macro * const man_macros = __man_macros; + + +void +man_unscope(struct roff_man *man, const struct roff_node *to) +{ + struct roff_node *n; + + to = to->parent; + n = man->last; + while (n != to) { + + /* Reached the end of the document? */ + + if (to == NULL && ! (n->flags & MAN_VALID)) { + if (man->flags & (MAN_BLINE | MAN_ELINE) && + man_macros[n->tok].flags & MAN_SCOPED) { + mandoc_vmsg(MANDOCERR_BLK_LINE, + man->parse, n->line, n->pos, + "EOF breaks %s", + man_macronames[n->tok]); + if (man->flags & MAN_ELINE) + man->flags &= ~MAN_ELINE; + else { + assert(n->type == ROFFT_HEAD); + n = n->parent; + man->flags &= ~MAN_BLINE; + } + man->last = n; + n = n->parent; + roff_node_delete(man, man->last); + continue; + } + if (n->type == ROFFT_BLOCK && + man_macros[n->tok].fp == blk_exp) + mandoc_msg(MANDOCERR_BLK_NOEND, + man->parse, n->line, n->pos, + man_macronames[n->tok]); + } + + /* + * We might delete the man->last node + * in the post-validation phase. + * Save a pointer to the parent such that + * we know where to continue the iteration. + */ + + man->last = n; + n = n->parent; + man->last->flags |= MAN_VALID; + } + + /* + * If we ended up at the parent of the node we were + * supposed to rewind to, that means the target node + * got deleted, so add the next node we parse as a child + * of the parent instead of as a sibling of the target. + */ + + man->next = (man->last == to) ? + ROFF_NEXT_CHILD : ROFF_NEXT_SIBLING; +} + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static void +rew_scope(struct roff_man *man, int tok) +{ + struct roff_node *n; + + /* Preserve empty paragraphs before RS. */ + + n = man->last; + if (tok == MAN_RS && n->child == NULL && + (n->tok == MAN_P || n->tok == MAN_PP || n->tok == MAN_LP)) + return; + + for (;;) { + if (n->type == ROFFT_ROOT) + return; + if (n->flags & MAN_VALID) { + n = n->parent; + continue; + } + if (n->type != ROFFT_BLOCK) { + if (n->parent->type == ROFFT_ROOT) { + man_unscope(man, n); + return; + } else { + n = n->parent; + continue; + } + } + if (tok != MAN_SH && (n->tok == MAN_SH || + (tok != MAN_SS && (n->tok == MAN_SS || + man_macros[n->tok].fp == blk_exp)))) + return; + man_unscope(man, n); + n = man->last; + } +} + + +/* + * Close out a generic explicit macro. + */ +void +blk_close(MACRO_PROT_ARGS) +{ + int ntok; + const struct roff_node *nn; + char *p; + int nrew, target; + + nrew = 1; + switch (tok) { + case MAN_RE: + ntok = MAN_RS; + if ( ! man_args(man, line, pos, buf, &p)) + break; + for (nn = man->last->parent; nn; nn = nn->parent) + if (nn->tok == ntok && nn->type == ROFFT_BLOCK) + nrew++; + target = strtol(p, &p, 10); + if (*p != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, + line, p - buf, "RE ... %s", p); + if (target == 0) + target = 1; + nrew -= target; + if (nrew < 1) { + mandoc_vmsg(MANDOCERR_RE_NOTOPEN, man->parse, + line, ppos, "RE %d", target); + return; + } + break; + case MAN_UE: + ntok = MAN_UR; + break; + default: + abort(); + } + + for (nn = man->last->parent; nn; nn = nn->parent) + if (nn->tok == ntok && nn->type == ROFFT_BLOCK && ! --nrew) + break; + + if (nn == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, man->parse, + line, ppos, man_macronames[tok]); + rew_scope(man, MAN_PP); + } else { + line = man->last->line; + ppos = man->last->pos; + ntok = man->last->tok; + man_unscope(man, nn); + + /* Move a trailing paragraph behind the block. */ + + if (ntok == MAN_LP || ntok == MAN_PP || ntok == MAN_P) { + *pos = strlen(buf); + blk_imp(man, ntok, line, ppos, pos, buf); + } + } +} + +void +blk_exp(MACRO_PROT_ARGS) +{ + struct roff_node *head; + char *p; + int la; + + rew_scope(man, tok); + roff_block_alloc(man, line, ppos, tok); + head = roff_head_alloc(man, line, ppos, tok); + + la = *pos; + if (man_args(man, line, pos, buf, &p)) + roff_word_alloc(man, line, la, p); + + if (buf[*pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, + man->parse, line, *pos, "%s ... %s", + man_macronames[tok], buf + *pos); + + man_unscope(man, head); + roff_body_alloc(man, line, ppos, tok); +} + +/* + * Parse an implicit-block macro. These contain a ROFFT_HEAD and a + * ROFFT_BODY contained within a ROFFT_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +void +blk_imp(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct roff_node *n; + + rew_scope(man, tok); + n = roff_block_alloc(man, line, ppos, tok); + if (n->tok == MAN_SH || n->tok == MAN_SS) + man->flags &= ~MAN_LITERAL; + n = roff_head_alloc(man, line, ppos, tok); + + /* Add line arguments. */ + + for (;;) { + la = *pos; + if ( ! man_args(man, line, pos, buf, &p)) + break; + roff_word_alloc(man, line, la, p); + } + + /* + * For macros having optional next-line scope, + * keep the head open if there were no arguments. + * For `TP', always keep the head open. + */ + + if (man_macros[tok].flags & MAN_SCOPED && + (tok == MAN_TP || n == man->last)) { + man->flags |= MAN_BLINE; + return; + } + + /* Close out the head and open the body. */ + + man_unscope(man, n); + roff_body_alloc(man, line, ppos, tok); +} + +void +in_line_eoln(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct roff_node *n; + + roff_elem_alloc(man, line, ppos, tok); + n = man->last; + + for (;;) { + if (buf[*pos] != '\0' && (tok == MAN_br || + tok == MAN_fi || tok == MAN_nf)) { + mandoc_vmsg(MANDOCERR_ARG_SKIP, + man->parse, line, *pos, "%s %s", + man_macronames[tok], buf + *pos); + break; + } + if (buf[*pos] != '\0' && man->last != n && + (tok == MAN_PD || tok == MAN_ft || tok == MAN_sp)) { + mandoc_vmsg(MANDOCERR_ARG_EXCESS, + man->parse, line, *pos, "%s ... %s", + man_macronames[tok], buf + *pos); + break; + } + la = *pos; + if ( ! man_args(man, line, pos, buf, &p)) + break; + if (man_macros[tok].flags & MAN_JOIN && + man->last->type == ROFFT_TEXT) + roff_word_append(man, p); + else + roff_word_alloc(man, line, la, p); + } + + /* + * Append MAN_EOS in case the last snipped argument + * ends with a dot, e.g. `.IR syslog (3).' + */ + + if (n != man->last && + mandoc_eos(man->last->string, strlen(man->last->string))) + man->last->flags |= MAN_EOS; + + /* + * If no arguments are specified and this is MAN_SCOPED (i.e., + * next-line scoped), then set our mode to indicate that we're + * waiting for terms to load into our context. + */ + + if (n == man->last && man_macros[tok].flags & MAN_SCOPED) { + assert( ! (man_macros[tok].flags & MAN_NSCOPED)); + man->flags |= MAN_ELINE; + return; + } + + assert(man->last->type != ROFFT_ROOT); + man->next = ROFF_NEXT_SIBLING; + + /* Rewind our element scope. */ + + for ( ; man->last; man->last = man->last->parent) { + man_state(man, man->last); + if (man->last == n) + break; + } +} + +void +man_endparse(struct roff_man *man) +{ + + man_unscope(man, man->first); + man->flags &= ~MAN_LITERAL; +} + +static int +man_args(struct roff_man *man, int line, int *pos, char *buf, char **v) +{ + char *start; + + assert(*pos); + *v = start = buf + *pos; + assert(' ' != *start); + + if ('\0' == *start) + return 0; + + *v = mandoc_getarg(man->parse, v, line, pos); + return 1; +} diff --git a/contrib/mdocml/man_term.c b/contrib/mdocml/man_term.c new file mode 100644 index 0000000..f45e24a --- /dev/null +++ b/contrib/mdocml/man_term.c @@ -0,0 +1,1177 @@ +/* $Id: man_term.c,v 1.187 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "out.h" +#include "term.h" +#include "main.h" + +#define MAXMARGINS 64 /* maximum number of indented scopes */ + +struct mtermp { + int fl; +#define MANT_LITERAL (1 << 0) + int lmargin[MAXMARGINS]; /* margins (incl. vis. page) */ + int lmargincur; /* index of current margin */ + int lmarginsz; /* actual number of nested margins */ + size_t offset; /* default offset to visible page */ + int pardist; /* vert. space before par., unit: [v] */ +}; + +#define DECL_ARGS struct termp *p, \ + struct mtermp *mt, \ + struct roff_node *n, \ + const struct roff_meta *meta + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); + int flags; +#define MAN_NOTEXT (1 << 0) /* Never has text children. */ +}; + +static void print_man_nodelist(DECL_ARGS); +static void print_man_node(DECL_ARGS); +static void print_man_head(struct termp *, + const struct roff_meta *); +static void print_man_foot(struct termp *, + const struct roff_meta *); +static void print_bvspace(struct termp *, + const struct roff_node *, int); + +static int pre_B(DECL_ARGS); +static int pre_HP(DECL_ARGS); +static int pre_I(DECL_ARGS); +static int pre_IP(DECL_ARGS); +static int pre_OP(DECL_ARGS); +static int pre_PD(DECL_ARGS); +static int pre_PP(DECL_ARGS); +static int pre_RS(DECL_ARGS); +static int pre_SH(DECL_ARGS); +static int pre_SS(DECL_ARGS); +static int pre_TP(DECL_ARGS); +static int pre_UR(DECL_ARGS); +static int pre_alternate(DECL_ARGS); +static int pre_ft(DECL_ARGS); +static int pre_ign(DECL_ARGS); +static int pre_in(DECL_ARGS); +static int pre_literal(DECL_ARGS); +static int pre_ll(DECL_ARGS); +static int pre_sp(DECL_ARGS); + +static void post_IP(DECL_ARGS); +static void post_HP(DECL_ARGS); +static void post_RS(DECL_ARGS); +static void post_SH(DECL_ARGS); +static void post_SS(DECL_ARGS); +static void post_TP(DECL_ARGS); +static void post_UR(DECL_ARGS); + +static const struct termact termacts[MAN_MAX] = { + { pre_sp, NULL, MAN_NOTEXT }, /* br */ + { NULL, NULL, 0 }, /* TH */ + { pre_SH, post_SH, 0 }, /* SH */ + { pre_SS, post_SS, 0 }, /* SS */ + { pre_TP, post_TP, 0 }, /* TP */ + { pre_PP, NULL, 0 }, /* LP */ + { pre_PP, NULL, 0 }, /* PP */ + { pre_PP, NULL, 0 }, /* P */ + { pre_IP, post_IP, 0 }, /* IP */ + { pre_HP, post_HP, 0 }, /* HP */ + { NULL, NULL, 0 }, /* SM */ + { pre_B, NULL, 0 }, /* SB */ + { pre_alternate, NULL, 0 }, /* BI */ + { pre_alternate, NULL, 0 }, /* IB */ + { pre_alternate, NULL, 0 }, /* BR */ + { pre_alternate, NULL, 0 }, /* RB */ + { NULL, NULL, 0 }, /* R */ + { pre_B, NULL, 0 }, /* B */ + { pre_I, NULL, 0 }, /* I */ + { pre_alternate, NULL, 0 }, /* IR */ + { pre_alternate, NULL, 0 }, /* RI */ + { pre_sp, NULL, MAN_NOTEXT }, /* sp */ + { pre_literal, NULL, 0 }, /* nf */ + { pre_literal, NULL, 0 }, /* fi */ + { NULL, NULL, 0 }, /* RE */ + { pre_RS, post_RS, 0 }, /* RS */ + { pre_ign, NULL, 0 }, /* DT */ + { pre_ign, NULL, MAN_NOTEXT }, /* UC */ + { pre_PD, NULL, MAN_NOTEXT }, /* PD */ + { pre_ign, NULL, 0 }, /* AT */ + { pre_in, NULL, MAN_NOTEXT }, /* in */ + { pre_ft, NULL, MAN_NOTEXT }, /* ft */ + { pre_OP, NULL, 0 }, /* OP */ + { pre_literal, NULL, 0 }, /* EX */ + { pre_literal, NULL, 0 }, /* EE */ + { pre_UR, post_UR, 0 }, /* UR */ + { NULL, NULL, 0 }, /* UE */ + { pre_ll, NULL, MAN_NOTEXT }, /* ll */ +}; + + +void +terminal_man(void *arg, const struct roff_man *man) +{ + struct termp *p; + struct roff_node *n; + struct mtermp mt; + + p = (struct termp *)arg; + p->overstep = 0; + p->rmargin = p->maxrmargin = p->defrmargin; + p->tabwidth = term_len(p, 5); + + memset(&mt, 0, sizeof(struct mtermp)); + mt.lmargin[mt.lmargincur] = term_len(p, p->defindent); + mt.offset = term_len(p, p->defindent); + mt.pardist = 1; + + n = man->first->child; + if (p->synopsisonly) { + while (n != NULL) { + if (n->tok == MAN_SH && + n->child->child->type == ROFFT_TEXT && + !strcmp(n->child->child->string, "SYNOPSIS")) { + if (n->child->next->child != NULL) + print_man_nodelist(p, &mt, + n->child->next->child, + &man->meta); + term_newln(p); + break; + } + n = n->next; + } + } else { + if (p->defindent == 0) + p->defindent = 7; + term_begin(p, print_man_head, print_man_foot, &man->meta); + p->flags |= TERMP_NOSPACE; + if (n != NULL) + print_man_nodelist(p, &mt, n, &man->meta); + term_end(p); + } +} + +/* + * Printing leading vertical space before a block. + * This is used for the paragraph macros. + * The rules are pretty simple, since there's very little nesting going + * on here. Basically, if we're the first within another block (SS/SH), + * then don't emit vertical space. If we are (RS), then do. If not the + * first, print it. + */ +static void +print_bvspace(struct termp *p, const struct roff_node *n, int pardist) +{ + int i; + + term_newln(p); + + if (n->body && n->body->child) + if (n->body->child->type == ROFFT_TBL) + return; + + if (n->parent->type == ROFFT_ROOT || n->parent->tok != MAN_RS) + if (NULL == n->prev) + return; + + for (i = 0; i < pardist; i++) + term_vspace(p); +} + + +static int +pre_ign(DECL_ARGS) +{ + + return 0; +} + +static int +pre_ll(DECL_ARGS) +{ + + term_setwidth(p, n->child != NULL ? n->child->string : NULL); + return 0; +} + +static int +pre_I(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_UNDER); + return 1; +} + +static int +pre_literal(DECL_ARGS) +{ + + term_newln(p); + + if (MAN_nf == n->tok || MAN_EX == n->tok) + mt->fl |= MANT_LITERAL; + else + mt->fl &= ~MANT_LITERAL; + + /* + * Unlike .IP and .TP, .HP does not have a HEAD. + * So in case a second call to term_flushln() is needed, + * indentation has to be set up explicitly. + */ + if (MAN_HP == n->parent->tok && p->rmargin < p->maxrmargin) { + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + p->flags |= TERMP_NOSPACE; + } + + return 0; +} + +static int +pre_PD(DECL_ARGS) +{ + struct roffsu su; + + n = n->child; + if (n == NULL) { + mt->pardist = 1; + return 0; + } + assert(n->type == ROFFT_TEXT); + if (a2roffsu(n->string, &su, SCALE_VS)) + mt->pardist = term_vspan(p, &su); + return 0; +} + +static int +pre_alternate(DECL_ARGS) +{ + enum termfont font[2]; + struct roff_node *nn; + int savelit, i; + + switch (n->tok) { + case MAN_RB: + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_BOLD; + break; + case MAN_RI: + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_UNDER; + break; + case MAN_BR: + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_NONE; + break; + case MAN_BI: + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_UNDER; + break; + case MAN_IR: + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_NONE; + break; + case MAN_IB: + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_BOLD; + break; + default: + abort(); + } + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + for (i = 0, nn = n->child; nn; nn = nn->next, i = 1 - i) { + term_fontrepl(p, font[i]); + if (savelit && NULL == nn->next) + mt->fl |= MANT_LITERAL; + assert(nn->type == ROFFT_TEXT); + term_word(p, nn->string); + if (nn->flags & MAN_EOS) + p->flags |= TERMP_SENTENCE; + if (nn->next) + p->flags |= TERMP_NOSPACE; + } + + return 0; +} + +static int +pre_B(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_BOLD); + return 1; +} + +static int +pre_OP(DECL_ARGS) +{ + + term_word(p, "["); + p->flags |= TERMP_NOSPACE; + + if (NULL != (n = n->child)) { + term_fontrepl(p, TERMFONT_BOLD); + term_word(p, n->string); + } + if (NULL != n && NULL != n->next) { + term_fontrepl(p, TERMFONT_UNDER); + term_word(p, n->next->string); + } + + term_fontrepl(p, TERMFONT_NONE); + p->flags |= TERMP_NOSPACE; + term_word(p, "]"); + return 0; +} + +static int +pre_ft(DECL_ARGS) +{ + const char *cp; + + if (NULL == n->child) { + term_fontlast(p); + return 0; + } + + cp = n->child->string; + switch (*cp) { + case '4': + case '3': + case 'B': + term_fontrepl(p, TERMFONT_BOLD); + break; + case '2': + case 'I': + term_fontrepl(p, TERMFONT_UNDER); + break; + case 'P': + term_fontlast(p); + break; + case '1': + case 'C': + case 'R': + term_fontrepl(p, TERMFONT_NONE); + break; + default: + break; + } + return 0; +} + +static int +pre_in(DECL_ARGS) +{ + struct roffsu su; + const char *cp; + size_t v; + int less; + + term_newln(p); + + if (NULL == n->child) { + p->offset = mt->offset; + return 0; + } + + cp = n->child->string; + less = 0; + + if ('-' == *cp) + less = -1; + else if ('+' == *cp) + less = 1; + else + cp--; + + if ( ! a2roffsu(++cp, &su, SCALE_EN)) + return 0; + + v = (term_hspan(p, &su) + 11) / 24; + + if (less < 0) + p->offset -= p->offset > v ? v : p->offset; + else if (less > 0) + p->offset += v; + else + p->offset = v; + if (p->offset > SHRT_MAX) + p->offset = term_len(p, p->defindent); + + return 0; +} + +static int +pre_sp(DECL_ARGS) +{ + struct roffsu su; + int i, len; + + if ((NULL == n->prev && n->parent)) { + switch (n->parent->tok) { + case MAN_SH: + case MAN_SS: + case MAN_PP: + case MAN_LP: + case MAN_P: + return 0; + default: + break; + } + } + + if (n->tok == MAN_br) + len = 0; + else if (n->child == NULL) + len = 1; + else { + if ( ! a2roffsu(n->child->string, &su, SCALE_VS)) + su.scale = 1.0; + len = term_vspan(p, &su); + } + + if (len == 0) + term_newln(p); + else if (len < 0) + p->skipvsp -= len; + else + for (i = 0; i < len; i++) + term_vspace(p); + + /* + * Handle an explicit break request in the same way + * as an overflowing line. + */ + + if (p->flags & TERMP_BRIND) { + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + } + + return 0; +} + +static int +pre_HP(DECL_ARGS) +{ + struct roffsu su; + const struct roff_node *nn; + int len; + + switch (n->type) { + case ROFFT_BLOCK: + print_bvspace(p, n, mt->pardist); + return 1; + case ROFFT_BODY: + break; + default: + return 0; + } + + if ( ! (MANT_LITERAL & mt->fl)) { + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 2; + } + + /* Calculate offset. */ + + if ((nn = n->parent->head->child) != NULL && + a2roffsu(nn->string, &su, SCALE_EN)) { + len = term_hspan(p, &su) / 24; + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + return 1; +} + +static void +post_HP(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BODY: + term_newln(p); + + /* + * Compatibility with a groff bug. + * The .HP macro uses the undocumented .tag request + * which causes a line break and cancels no-space + * mode even if there isn't any output. + */ + + if (n->child == NULL) + term_vspace(p); + + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + p->trailspace = 0; + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + break; + default: + break; + } +} + +static int +pre_PP(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + print_bvspace(p, n, mt->pardist); + break; + default: + p->offset = mt->offset; + break; + } + + return n->type != ROFFT_HEAD; +} + +static int +pre_IP(DECL_ARGS) +{ + struct roffsu su; + const struct roff_node *nn; + int len, savelit; + + switch (n->type) { + case ROFFT_BODY: + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_HEAD: + p->flags |= TERMP_NOBREAK; + p->trailspace = 1; + break; + case ROFFT_BLOCK: + print_bvspace(p, n, mt->pardist); + /* FALLTHROUGH */ + default: + return 1; + } + + /* Calculate the offset from the optional second argument. */ + if ((nn = n->parent->head->child) != NULL && + (nn = nn->next) != NULL && + a2roffsu(nn->string, &su, SCALE_EN)) { + len = term_hspan(p, &su) / 24; + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + switch (n->type) { + case ROFFT_HEAD: + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + if (n->child) + print_man_node(p, mt, n->child, meta); + + if (savelit) + mt->fl |= MANT_LITERAL; + + return 0; + case ROFFT_BODY: + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return 1; +} + +static void +post_IP(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + p->rmargin = p->maxrmargin; + break; + case ROFFT_BODY: + term_newln(p); + p->offset = mt->offset; + break; + default: + break; + } +} + +static int +pre_TP(DECL_ARGS) +{ + struct roffsu su; + struct roff_node *nn; + int len, savelit; + + switch (n->type) { + case ROFFT_HEAD: + p->flags |= TERMP_NOBREAK | TERMP_BRTRSP; + p->trailspace = 1; + break; + case ROFFT_BODY: + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_BLOCK: + print_bvspace(p, n, mt->pardist); + /* FALLTHROUGH */ + default: + return 1; + } + + /* Calculate offset. */ + + if ((nn = n->parent->head->child) != NULL && + nn->string != NULL && ! (MAN_LINE & nn->flags) && + a2roffsu(nn->string, &su, SCALE_EN)) { + len = term_hspan(p, &su) / 24; + if (len < 0 && (size_t)(-len) > mt->offset) + len = -mt->offset; + else if (len > SHRT_MAX) + len = term_len(p, p->defindent); + mt->lmargin[mt->lmargincur] = len; + } else + len = mt->lmargin[mt->lmargincur]; + + switch (n->type) { + case ROFFT_HEAD: + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + /* Don't print same-line elements. */ + nn = n->child; + while (NULL != nn && 0 == (MAN_LINE & nn->flags)) + nn = nn->next; + + while (NULL != nn) { + print_man_node(p, mt, nn, meta); + nn = nn->next; + } + + if (savelit) + mt->fl |= MANT_LITERAL; + return 0; + case ROFFT_BODY: + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP); + break; + default: + break; + } + + return 1; +} + +static void +post_TP(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + term_flushln(p); + break; + case ROFFT_BODY: + term_newln(p); + p->offset = mt->offset; + break; + default: + break; + } +} + +static int +pre_SS(DECL_ARGS) +{ + int i; + + switch (n->type) { + case ROFFT_BLOCK: + mt->fl &= ~MANT_LITERAL; + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + + /* + * No vertical space before the first subsection + * and after an empty subsection. + */ + + do { + n = n->prev; + } while (n != NULL && n->tok != TOKEN_NONE && + termacts[n->tok].flags & MAN_NOTEXT); + if (n == NULL || (n->tok == MAN_SS && n->body->child == NULL)) + break; + + for (i = 0; i < mt->pardist; i++) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontrepl(p, TERMFONT_BOLD); + p->offset = term_len(p, 3); + p->rmargin = mt->offset; + p->trailspace = mt->offset; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + break; + case ROFFT_BODY: + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + break; + default: + break; + } + + return 1; +} + +static void +post_SS(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + term_newln(p); + break; + case ROFFT_BODY: + term_newln(p); + break; + default: + break; + } +} + +static int +pre_SH(DECL_ARGS) +{ + int i; + + switch (n->type) { + case ROFFT_BLOCK: + mt->fl &= ~MANT_LITERAL; + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + + /* + * No vertical space before the first section + * and after an empty section. + */ + + do { + n = n->prev; + } while (n != NULL && termacts[n->tok].flags & MAN_NOTEXT); + if (n == NULL || (n->tok == MAN_SH && n->body->child == NULL)) + break; + + for (i = 0; i < mt->pardist; i++) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontrepl(p, TERMFONT_BOLD); + p->offset = 0; + p->rmargin = mt->offset; + p->trailspace = mt->offset; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + break; + case ROFFT_BODY: + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + break; + default: + break; + } + + return 1; +} + +static void +post_SH(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + term_newln(p); + break; + case ROFFT_BODY: + term_newln(p); + break; + default: + break; + } +} + +static int +pre_RS(DECL_ARGS) +{ + struct roffsu su; + + switch (n->type) { + case ROFFT_BLOCK: + term_newln(p); + return 1; + case ROFFT_HEAD: + return 0; + default: + break; + } + + n = n->parent->head; + n->aux = SHRT_MAX + 1; + if (n->child == NULL) + n->aux = mt->lmargin[mt->lmargincur]; + else if (a2roffsu(n->child->string, &su, SCALE_EN)) + n->aux = term_hspan(p, &su) / 24; + if (n->aux < 0 && (size_t)(-n->aux) > mt->offset) + n->aux = -mt->offset; + else if (n->aux > SHRT_MAX) + n->aux = term_len(p, p->defindent); + + mt->offset += n->aux; + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + + if (++mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; + + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + return 1; +} + +static void +post_RS(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + return; + case ROFFT_HEAD: + return; + default: + term_newln(p); + break; + } + + mt->offset -= n->parent->head->aux; + p->offset = mt->offset; + + if (--mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; +} + +static int +pre_UR(DECL_ARGS) +{ + + return n->type != ROFFT_HEAD; +} + +static void +post_UR(DECL_ARGS) +{ + + if (n->type != ROFFT_BLOCK) + return; + + term_word(p, "<"); + p->flags |= TERMP_NOSPACE; + + if (NULL != n->child->child) + print_man_node(p, mt, n->child->child, meta); + + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); +} + +static void +print_man_node(DECL_ARGS) +{ + size_t rm, rmax; + int c; + + switch (n->type) { + case ROFFT_TEXT: + /* + * If we have a blank line, output a vertical space. + * If we have a space as the first character, break + * before printing the line's data. + */ + if ('\0' == *n->string) { + term_vspace(p); + return; + } else if (' ' == *n->string && MAN_LINE & n->flags) + term_newln(p); + + term_word(p, n->string); + goto out; + + case ROFFT_EQN: + if ( ! (n->flags & MAN_LINE)) + p->flags |= TERMP_NOSPACE; + term_eqn(p, n->eqn); + if (n->next != NULL && ! (n->next->flags & MAN_LINE)) + p->flags |= TERMP_NOSPACE; + return; + case ROFFT_TBL: + if (p->tbl.cols == NULL) + term_vspace(p); + term_tbl(p, n->span); + return; + default: + break; + } + + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + + c = 1; + if (termacts[n->tok].pre) + c = (*termacts[n->tok].pre)(p, mt, n, meta); + + if (c && n->child) + print_man_nodelist(p, mt, n->child, meta); + + if (termacts[n->tok].post) + (*termacts[n->tok].post)(p, mt, n, meta); + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + +out: + /* + * If we're in a literal context, make sure that words + * together on the same line stay together. This is a + * POST-printing call, so we check the NEXT word. Since + * -man doesn't have nested macros, we don't need to be + * more specific than this. + */ + if (mt->fl & MANT_LITERAL && + ! (p->flags & (TERMP_NOBREAK | TERMP_NONEWLINE)) && + (n->next == NULL || n->next->flags & MAN_LINE)) { + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + p->flags |= TERMP_NOSPACE; + if (n->string != NULL && *n->string != '\0') + term_flushln(p); + else + term_newln(p); + if (rm < rmax && n->parent->tok == MAN_HP) { + p->offset = rm; + p->rmargin = rmax; + } else + p->rmargin = rm; + p->maxrmargin = rmax; + } + if (MAN_EOS & n->flags) + p->flags |= TERMP_SENTENCE; +} + + +static void +print_man_nodelist(DECL_ARGS) +{ + + while (n != NULL) { + print_man_node(p, mt, n, meta); + n = n->next; + } +} + +static void +print_man_foot(struct termp *p, const struct roff_meta *meta) +{ + char *title; + size_t datelen, titlen; + + assert(meta->title); + assert(meta->msec); + assert(meta->date); + + term_fontrepl(p, TERMFONT_NONE); + + if (meta->hasbody) + term_vspace(p); + + /* + * Temporary, undocumented option to imitate mdoc(7) output. + * In the bottom right corner, use the operating system + * instead of the title. + */ + + if ( ! p->mdocstyle) { + if (meta->hasbody) { + term_vspace(p); + term_vspace(p); + } + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + } else if (meta->os) { + title = mandoc_strdup(meta->os); + } else { + title = mandoc_strdup(""); + } + datelen = term_strlen(p, meta->date); + + /* Bottom left corner: operating system. */ + + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + p->trailspace = 1; + p->offset = 0; + p->rmargin = p->maxrmargin > datelen ? + (p->maxrmargin + term_len(p, 1) - datelen) / 2 : 0; + + if (meta->os) + term_word(p, meta->os); + term_flushln(p); + + /* At the bottom in the middle: manual date. */ + + p->offset = p->rmargin; + titlen = term_strlen(p, title); + p->rmargin = p->maxrmargin > titlen ? p->maxrmargin - titlen : 0; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->date); + term_flushln(p); + + /* Bottom right corner: manual title and section. */ + + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + p->trailspace = 0; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + + term_word(p, title); + term_flushln(p); + free(title); +} + +static void +print_man_head(struct termp *p, const struct roff_meta *meta) +{ + const char *volume; + char *title; + size_t vollen, titlen; + + assert(meta->title); + assert(meta->msec); + + volume = NULL == meta->vol ? "" : meta->vol; + vollen = term_strlen(p, volume); + + /* Top left corner: manual title and section. */ + + mandoc_asprintf(&title, "%s(%s)", meta->title, meta->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->trailspace = 1; + p->offset = 0; + p->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + (p->maxrmargin - vollen + term_len(p, 1)) / 2 : + vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; + + term_word(p, title); + term_flushln(p); + + /* At the top in the middle: manual volume. */ + + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->offset + vollen + titlen < p->maxrmargin ? + p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, volume); + term_flushln(p); + + /* Top right corner: title and section, again. */ + + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + if (p->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->offset = 0; + p->rmargin = p->maxrmargin; + + /* + * Groff prints three blank lines before the content. + * Do the same, except in the temporary, undocumented + * mode imitating mdoc(7) output. + */ + + term_vspace(p); + if ( ! p->mdocstyle) { + term_vspace(p); + term_vspace(p); + } + free(title); +} diff --git a/contrib/mdocml/man_validate.c b/contrib/mdocml/man_validate.c new file mode 100644 index 0000000..16d9963 --- /dev/null +++ b/contrib/mdocml/man_validate.c @@ -0,0 +1,495 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libman.h" + +#define CHKARGS struct roff_man *man, struct roff_node *n + +typedef void (*v_check)(CHKARGS); + +static void check_par(CHKARGS); +static void check_part(CHKARGS); +static void check_root(CHKARGS); +static void check_text(CHKARGS); + +static void post_AT(CHKARGS); +static void post_IP(CHKARGS); +static void post_vs(CHKARGS); +static void post_ft(CHKARGS); +static void post_OP(CHKARGS); +static void post_TH(CHKARGS); +static void post_UC(CHKARGS); +static void post_UR(CHKARGS); + +static v_check man_valids[MAN_MAX] = { + post_vs, /* br */ + post_TH, /* TH */ + NULL, /* SH */ + NULL, /* SS */ + NULL, /* TP */ + check_par, /* LP */ + check_par, /* PP */ + check_par, /* P */ + post_IP, /* IP */ + NULL, /* HP */ + NULL, /* SM */ + NULL, /* SB */ + NULL, /* BI */ + NULL, /* IB */ + NULL, /* BR */ + NULL, /* RB */ + NULL, /* R */ + NULL, /* B */ + NULL, /* I */ + NULL, /* IR */ + NULL, /* RI */ + post_vs, /* sp */ + NULL, /* nf */ + NULL, /* fi */ + NULL, /* RE */ + check_part, /* RS */ + NULL, /* DT */ + post_UC, /* UC */ + NULL, /* PD */ + post_AT, /* AT */ + NULL, /* in */ + post_ft, /* ft */ + post_OP, /* OP */ + NULL, /* EX */ + NULL, /* EE */ + post_UR, /* UR */ + NULL, /* UE */ + NULL, /* ll */ +}; + + +void +man_node_validate(struct roff_man *man) +{ + struct roff_node *n; + v_check *cp; + + n = man->last; + man->last = man->last->child; + while (man->last != NULL) { + man_node_validate(man); + if (man->last == n) + man->last = man->last->child; + else + man->last = man->last->next; + } + + man->last = n; + man->next = ROFF_NEXT_SIBLING; + switch (n->type) { + case ROFFT_TEXT: + check_text(man, n); + break; + case ROFFT_ROOT: + check_root(man, n); + break; + case ROFFT_EQN: + case ROFFT_TBL: + break; + default: + cp = man_valids + n->tok; + if (*cp) + (*cp)(man, n); + if (man->last == n) + man_state(man, n); + break; + } +} + +static void +check_root(CHKARGS) +{ + + assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); + + if (NULL == man->first->child) + mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, + n->line, n->pos, NULL); + else + man->meta.hasbody = 1; + + if (NULL == man->meta.title) { + mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, + n->line, n->pos, NULL); + + /* + * If a title hasn't been set, do so now (by + * implication, date and section also aren't set). + */ + + man->meta.title = mandoc_strdup(""); + man->meta.msec = mandoc_strdup(""); + man->meta.date = man->quick ? mandoc_strdup("") : + mandoc_normdate(man->parse, NULL, n->line, n->pos); + } +} + +static void +check_text(CHKARGS) +{ + char *cp, *p; + + if (MAN_LITERAL & man->flags) + return; + + cp = n->string; + for (p = cp; NULL != (p = strchr(p, '\t')); p++) + mandoc_msg(MANDOCERR_FI_TAB, man->parse, + n->line, n->pos + (p - cp), NULL); +} + +static void +post_OP(CHKARGS) +{ + + if (n->child == NULL) + mandoc_msg(MANDOCERR_OP_EMPTY, man->parse, + n->line, n->pos, "OP"); + else if (n->child->next != NULL && n->child->next->next != NULL) { + n = n->child->next->next; + mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, + n->line, n->pos, "OP ... %s", n->string); + } +} + +static void +post_UR(CHKARGS) +{ + + if (n->type == ROFFT_HEAD && n->child == NULL) + mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse, + n->line, n->pos, "UR"); + check_part(man, n); +} + +static void +post_ft(CHKARGS) +{ + char *cp; + int ok; + + if (n->child == NULL) + return; + + ok = 0; + cp = n->child->string; + switch (*cp) { + case '1': + case '2': + case '3': + case '4': + case 'I': + case 'P': + case 'R': + if ('\0' == cp[1]) + ok = 1; + break; + case 'B': + if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) + ok = 1; + break; + case 'C': + if ('W' == cp[1] && '\0' == cp[2]) + ok = 1; + break; + default: + break; + } + + if (0 == ok) { + mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, + n->line, n->pos, "ft %s", cp); + *cp = '\0'; + } +} + +static void +check_part(CHKARGS) +{ + + if (n->type == ROFFT_BODY && n->child == NULL) + mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, + n->line, n->pos, man_macronames[n->tok]); +} + +static void +check_par(CHKARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + if (n->body->child == NULL) + roff_node_delete(man, n); + break; + case ROFFT_BODY: + if (n->child == NULL) + mandoc_vmsg(MANDOCERR_PAR_SKIP, + man->parse, n->line, n->pos, + "%s empty", man_macronames[n->tok]); + break; + case ROFFT_HEAD: + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_ARG_SKIP, + man->parse, n->line, n->pos, + "%s %s%s", man_macronames[n->tok], + n->child->string, + n->child->next != NULL ? " ..." : ""); + break; + default: + break; + } +} + +static void +post_IP(CHKARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + if (n->head->child == NULL && n->body->child == NULL) + roff_node_delete(man, n); + break; + case ROFFT_BODY: + if (n->parent->head->child == NULL && n->child == NULL) + mandoc_vmsg(MANDOCERR_PAR_SKIP, + man->parse, n->line, n->pos, + "%s empty", man_macronames[n->tok]); + break; + default: + break; + } +} + +static void +post_TH(CHKARGS) +{ + struct roff_node *nb; + const char *p; + + free(man->meta.title); + free(man->meta.vol); + free(man->meta.os); + free(man->meta.msec); + free(man->meta.date); + + man->meta.title = man->meta.vol = man->meta.date = + man->meta.msec = man->meta.os = NULL; + + nb = n; + + /* ->TITLE<- MSEC DATE OS VOL */ + + n = n->child; + if (n && n->string) { + for (p = n->string; '\0' != *p; p++) { + /* Only warn about this once... */ + if (isalpha((unsigned char)*p) && + ! isupper((unsigned char)*p)) { + mandoc_vmsg(MANDOCERR_TITLE_CASE, + man->parse, n->line, + n->pos + (p - n->string), + "TH %s", n->string); + break; + } + } + man->meta.title = mandoc_strdup(n->string); + } else { + man->meta.title = mandoc_strdup(""); + mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, + nb->line, nb->pos, "TH"); + } + + /* TITLE ->MSEC<- DATE OS VOL */ + + if (n) + n = n->next; + if (n && n->string) + man->meta.msec = mandoc_strdup(n->string); + else { + man->meta.msec = mandoc_strdup(""); + mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, + nb->line, nb->pos, "TH %s", man->meta.title); + } + + /* TITLE MSEC ->DATE<- OS VOL */ + + if (n) + n = n->next; + if (n && n->string && '\0' != n->string[0]) { + man->meta.date = man->quick ? + mandoc_strdup(n->string) : + mandoc_normdate(man->parse, n->string, + n->line, n->pos); + } else { + man->meta.date = mandoc_strdup(""); + mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, + n ? n->line : nb->line, + n ? n->pos : nb->pos, "TH"); + } + + /* TITLE MSEC DATE ->OS<- VOL */ + + if (n && (n = n->next)) + man->meta.os = mandoc_strdup(n->string); + else if (man->defos != NULL) + man->meta.os = mandoc_strdup(man->defos); + + /* TITLE MSEC DATE OS ->VOL<- */ + /* If missing, use the default VOL name for MSEC. */ + + if (n && (n = n->next)) + man->meta.vol = mandoc_strdup(n->string); + else if ('\0' != man->meta.msec[0] && + (NULL != (p = mandoc_a2msec(man->meta.msec)))) + man->meta.vol = mandoc_strdup(p); + + if (n != NULL && (n = n->next) != NULL) + mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, + n->line, n->pos, "TH ... %s", n->string); + + /* + * Remove the `TH' node after we've processed it for our + * meta-data. + */ + roff_node_delete(man, man->last); +} + +static void +post_UC(CHKARGS) +{ + static const char * const bsd_versions[] = { + "3rd Berkeley Distribution", + "4th Berkeley Distribution", + "4.2 Berkeley Distribution", + "4.3 Berkeley Distribution", + "4.4 Berkeley Distribution", + }; + + const char *p, *s; + + n = n->child; + + if (n == NULL || n->type != ROFFT_TEXT) + p = bsd_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = bsd_versions[0]; + else if (0 == strcmp(s, "4")) + p = bsd_versions[1]; + else if (0 == strcmp(s, "5")) + p = bsd_versions[2]; + else if (0 == strcmp(s, "6")) + p = bsd_versions[3]; + else if (0 == strcmp(s, "7")) + p = bsd_versions[4]; + else + p = bsd_versions[0]; + } + + free(man->meta.os); + man->meta.os = mandoc_strdup(p); +} + +static void +post_AT(CHKARGS) +{ + static const char * const unix_versions[] = { + "7th Edition", + "System III", + "System V", + "System V Release 2", + }; + + struct roff_node *nn; + const char *p, *s; + + n = n->child; + + if (n == NULL || n->type != ROFFT_TEXT) + p = unix_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = unix_versions[0]; + else if (0 == strcmp(s, "4")) + p = unix_versions[1]; + else if (0 == strcmp(s, "5")) { + nn = n->next; + if (nn != NULL && + nn->type == ROFFT_TEXT && + nn->string[0] != '\0') + p = unix_versions[3]; + else + p = unix_versions[2]; + } else + p = unix_versions[0]; + } + + free(man->meta.os); + man->meta.os = mandoc_strdup(p); +} + +static void +post_vs(CHKARGS) +{ + + if (NULL != n->prev) + return; + + switch (n->parent->tok) { + case MAN_SH: + case MAN_SS: + mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, + "%s after %s", man_macronames[n->tok], + man_macronames[n->parent->tok]); + /* FALLTHROUGH */ + case TOKEN_NONE: + /* + * Don't warn about this because it occurs in pod2man + * and would cause considerable (unfixable) warnage. + */ + roff_node_delete(man, n); + break; + default: + break; + } +} diff --git a/contrib/mdocml/manconf.h b/contrib/mdocml/manconf.h new file mode 100644 index 0000000..782269e --- /dev/null +++ b/contrib/mdocml/manconf.h @@ -0,0 +1,48 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* List of unique, absolute paths to manual trees. */ + +struct manpaths { + char **paths; + size_t sz; +}; + +/* Data from -O options and man.conf(5) output directives. */ + +struct manoutput { + char *includes; + char *man; + char *paper; + char *style; + size_t indent; + size_t width; + int fragment; + int mdoc; + int synopsisonly; +}; + +struct manconf { + struct manoutput output; + struct manpaths manpath; +}; + + +void manconf_parse(struct manconf *, const char *, char *, char *); +void manconf_output(struct manoutput *, const char *); +void manconf_free(struct manconf *); diff --git a/contrib/mdocml/mandoc.1 b/contrib/mdocml/mandoc.1 new file mode 100644 index 0000000..f4707aa --- /dev/null +++ b/contrib/mdocml/mandoc.1 @@ -0,0 +1,1823 @@ +.\" $Id: mandoc.1,v 1.164 2015/11/05 17:47:51 schwarze Exp $ +.\" +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: November 5 2015 $ +.Dt MANDOC 1 +.Os +.Sh NAME +.Nm mandoc +.Nd format and display UNIX manuals +.Sh SYNOPSIS +.Nm mandoc +.Op Fl acfhkl +.Op Fl I Cm os Ns = Ns Ar name +.Op Fl K Ar encoding +.Op Fl m Ns Ar format +.Op Fl O Ar option +.Op Fl T Ar output +.Op Fl W Ar level +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility formats +.Ux +manual pages for display. +.Pp +By default, +.Nm +reads +.Xr mdoc 7 +or +.Xr man 7 +text from stdin, implying +.Fl m Ns Cm andoc , +and produces +.Fl T Cm locale +output. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl a +If the standard output is a terminal device and +.Fl c +is not specified, use +.Xr more 1 +to paginate the output, just like +.Xr man 1 +would. +.It Fl c +Copy the formatted manual pages to the standard output without using +.Xr more 1 +to paginate them. +This is the default. +It can be specified to override +.Fl a . +.It Fl f +A synonym for +.Xr whatis 1 . +This overrides any earlier +.Fl k +and +.Fl l +options. +.It Fl I Cm os Ns = Ns Ar name +Override the default operating system +.Ar name +for the +.Xr mdoc 7 +.Sq \&Os +and for the +.Xr man 7 +.Sq \&TH +macro. +.It Fl h +Display only the SYNOPSIS lines. +Implies +.Fl c . +.It Fl K Ar encoding +Specify the input encoding. +The supported +.Ar encoding +arguments are +.Cm us-ascii , +.Cm iso-8859-1 , +and +.Cm utf-8 . +If not specified, autodetection uses the first match: +.Bl -tag -width iso-8859-1 +.It Cm utf-8 +if the first three bytes of the input file +are the UTF-8 byte order mark (BOM, 0xefbbbf) +.It Ar encoding +if the first or second line of the input file matches the +.Sy emacs +mode line format +.Pp +.D1 .\e" -*- Oo ...; Oc coding: Ar encoding ; No -*- +.It Cm utf-8 +if the first non-ASCII byte in the file introduces a valid UTF-8 sequence +.It Cm iso-8859-1 +otherwise +.El +.It Fl k +A synonym for +.Xr apropos 1 . +This overrides any earlier +.Fl f +and +.Fl l +options. +.It Fl l +A synonym for +.Fl a . +Also reverts any earlier +.Fl f +and +.Fl k +options. +.It Fl m Ns Ar format +Input format. +See +.Sx Input Formats +for available formats. +Defaults to +.Fl m Ns Cm andoc . +.It Fl O Ar option +Comma-separated output options. +.It Fl T Ar output +Output format. +See +.Sx Output Formats +for available formats. +Defaults to +.Fl T Cm locale . +.It Fl W Ar level +Specify the minimum message +.Ar level +to be reported on the standard error output and to affect the exit status. +The +.Ar level +can be +.Cm warning , +.Cm error , +or +.Cm unsupp ; +.Cm all +is an alias for +.Cm warning . +By default, +.Nm +is silent. +See +.Sx EXIT STATUS +and +.Sx DIAGNOSTICS +for details. +.Pp +The special option +.Fl W Cm stop +tells +.Nm +to exit after parsing a file that causes warnings or errors of at least +the requested level. +No formatted output will be produced from that file. +If both a +.Ar level +and +.Cm stop +are requested, they can be joined with a comma, for example +.Fl W Cm error , Ns Cm stop . +.It Ar file +Read input from zero or more files. +If unspecified, reads from stdin. +If multiple files are specified, +.Nm +will halt with the first failed parse. +.El +.Pp +In +.Fl f +and +.Fl k +mode, +.Nm +also supports the options +.Fl CMmOSsw +described in the +.Xr apropos 1 +manual. +.Ss Input Formats +The +.Nm +utility accepts +.Xr mdoc 7 +and +.Xr man 7 +input with +.Fl m Ns Cm doc +and +.Fl m Ns Cm an , +respectively. +The +.Xr mdoc 7 +format is +.Em strongly +recommended; +.Xr man 7 +should only be used for legacy manuals. +.Pp +A third option, +.Fl m Ns Cm andoc , +which is also the default, determines encoding on-the-fly: if the first +non-comment macro is +.Sq \&Dd +or +.Sq \&Dt , +the +.Xr mdoc 7 +parser is used; otherwise, the +.Xr man 7 +parser is used. +.Pp +If multiple +files are specified with +.Fl m Ns Cm andoc , +each has its file-type determined this way. +If multiple files are +specified and +.Fl m Ns Cm doc +or +.Fl m Ns Cm an +is specified, then this format is used exclusively. +.Ss Output Formats +The +.Nm +utility accepts the following +.Fl T +arguments, which correspond to output modes: +.Bl -tag -width "-T locale" +.It Fl T Cm ascii +Produce 7-bit ASCII output. +See +.Sx ASCII Output . +.It Fl T Cm html +Produce HTML5, CSS1, and MathML output. +See +.Sx HTML Output . +.It Fl T Cm lint +Parse only: produce no output. +Implies +.Fl W Cm warning . +.It Fl T Cm locale +Encode output using the current locale. +This is the default. +See +.Sx Locale Output . +.It Fl T Cm man +Produce +.Xr man 7 +format output. +See +.Sx Man Output . +.It Fl T Cm pdf +Produce PDF output. +See +.Sx PDF Output . +.It Fl T Cm ps +Produce PostScript output. +See +.Sx PostScript Output . +.It Fl T Cm tree +Produce an indented parse tree. +See +.Sx Syntax tree output . +.It Fl T Cm utf8 +Encode output in the UTF\-8 multi-byte format. +See +.Sx UTF\-8 Output . +.It Fl T Cm xhtml +This is a synonym for +.Fl T Cm html . +.El +.Pp +If multiple input files are specified, these will be processed by the +corresponding filter in-order. +.Ss ASCII Output +Output produced by +.Fl T Cm ascii +is rendered in standard 7-bit ASCII documented in +.Xr ascii 7 . +.Pp +Font styles are applied by using back-spaced encoding such that an +underlined character +.Sq c +is rendered as +.Sq _ Ns \e[bs] Ns c , +where +.Sq \e[bs] +is the back-space character number 8. +Emboldened characters are rendered as +.Sq c Ns \e[bs] Ns c . +.Pp +The special characters documented in +.Xr mandoc_char 7 +are rendered best-effort in an ASCII equivalent. +.Pp +Output width is limited to 78 visible columns unless literal input lines +exceed this limit. +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm indent Ns = Ns Ar indent +The left margin for normal text is set to +.Ar indent +blank characters instead of the default of five for +.Xr mdoc 7 +and seven for +.Xr man 7 . +Increasing this is not recommended; it may result in degraded formatting, +for example overfull lines or ugly line breaks. +.It Cm width Ns = Ns Ar width +The output width is set to +.Ar width , +which will normalise to \(>=58. +.El +.Ss HTML Output +Output produced by +.Fl T Cm html +conforms to HTML5 using optional self-closing tags. +Default styles use only CSS1. +Equations rendered from +.Xr eqn 7 +blocks use MathML. +.Pp +The +.Pa mandoc.css +file documents style-sheet classes available for customising output. +If a style-sheet is not specified with +.Fl O Cm style , +.Fl T Cm html +defaults to simple output (via an embedded style-sheet) +readable in any graphical or text-based web +browser. +.Pp +Special characters are rendered in decimal-encoded UTF\-8. +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm fragment +Omit the <!DOCTYPE> declaration and the <html>, <head>, and <body> +elements and only emit the subtree below the <body> element. +The +.Cm style +argument will be ignored. +This is useful when embedding manual content within existing documents. +.It Cm includes Ns = Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../src/%I.html , +is used as a template for linked header files (usually via the +.Sq \&In +macro). +Instances of +.Sq \&%I +are replaced with the include filename. +The default is not to present a +hyperlink. +.It Cm man Ns = Ns Ar fmt +The string +.Ar fmt , +for example, +.Ar ../html%S/%N.%S.html , +is used as a template for linked manuals (usually via the +.Sq \&Xr +macro). +Instances of +.Sq \&%N +and +.Sq %S +are replaced with the linked manual's name and section, respectively. +If no section is included, section 1 is assumed. +The default is not to +present a hyperlink. +.It Cm style Ns = Ns Ar style.css +The file +.Ar style.css +is used for an external style-sheet. +This must be a valid absolute or +relative URI. +.El +.Ss Locale Output +Locale-depending output encoding is triggered with +.Fl T Cm locale . +This is the default. +.Pp +This option is not available on all systems: systems without locale +support, or those whose internal representation is not natively UCS-4, +will fall back to +.Fl T Cm ascii . +See +.Sx ASCII Output +for font style specification and available command-line arguments. +.Ss Man Output +Translate input format into +.Xr man 7 +output format. +This is useful for distributing manual sources to legacy systems +lacking +.Xr mdoc 7 +formatters. +.Pp +If +.Xr mdoc 7 +is passed as input, it is translated into +.Xr man 7 . +If the input format is +.Xr man 7 , +the input is copied to the output, expanding any +.Xr roff 7 +.Sq so +requests. +The parser is also run, and as usual, the +.Fl W +level controls which +.Sx DIAGNOSTICS +are displayed before copying the input to the output. +.Ss PDF Output +PDF-1.1 output may be generated by +.Fl T Cm pdf . +See +.Sx PostScript Output +for +.Fl O +arguments and defaults. +.Ss PostScript Output +PostScript +.Qq Adobe-3.0 +Level-2 pages may be generated by +.Fl T Cm ps . +Output pages default to letter sized and are rendered in the Times font +family, 11-point. +Margins are calculated as 1/9 the page length and width. +Line-height is 1.4m. +.Pp +Special characters are rendered as in +.Sx ASCII Output . +.Pp +The following +.Fl O +arguments are accepted: +.Bl -tag -width Ds +.It Cm paper Ns = Ns Ar name +The paper size +.Ar name +may be one of +.Ar a3 , +.Ar a4 , +.Ar a5 , +.Ar legal , +or +.Ar letter . +You may also manually specify dimensions as +.Ar NNxNN , +width by height in millimetres. +If an unknown value is encountered, +.Ar letter +is used. +.El +.Ss UTF\-8 Output +Use +.Fl T Cm utf8 +to force a UTF\-8 locale. +See +.Sx Locale Output +for details and options. +.Ss Syntax tree output +Use +.Fl T Cm tree +to show a human readable representation of the syntax tree. +It is useful for debugging the source code of manual pages. +The exact format is subject to change, so don't write parsers for it. +Each output line shows one syntax tree node. +Child nodes are indented with respect to their parent node. +The columns are: +.Pp +.Bl -enum -compact +.It +For macro nodes, the macro name; for text and +.Xr tbl 7 +nodes, the content. +There is a special format for +.Xr eqn 7 +nodes. +.It +Node type (text, elem, block, head, body, body-end, tail, tbl, eqn). +.It +Flags: +.Bl -dash -compact +.It +An opening parenthesis if the node is an opening delimiter. +.It +An asterisk if the node starts a new input line. +.It +The input line number (starting at one). +.It +A colon. +.It +The input column number (starting at one). +.It +A closing parenthesis if the node is a closing delimiter. +.It +A full stop if the node ends a sentence. +.El +.El +.Sh ENVIRONMENT +.Bl -tag -width MANPAGER +.It Ev MANPAGER +Any non-empty value of the environment variable +.Ev MANPAGER +will be used instead of the standard pagination program, +.Xr more 1 . +.It Ev PAGER +Specifies the pagination program to use when +.Ev MANPAGER +is not defined. +If neither PAGER nor MANPAGER is defined, +.Xr more 1 +.Fl s +will be used. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values, controlled by the message +.Ar level +associated with the +.Fl W +option: +.Pp +.Bl -tag -width Ds -compact +.It 0 +No warnings or errors occurred, or those that did were ignored because +they were lower than the requested +.Ar level . +.It 2 +At least one warning occurred, but no error, and +.Fl W Cm warning +was specified. +.It 3 +At least one parsing error occurred, +but no unsupported feature was encountered, and +.Fl W Cm error +or +.Fl W Cm warning +was specified. +.It 4 +At least one unsupported feature was encountered, and +.Fl W Cm unsupp , +.Fl W Cm error +or +.Fl W Cm warning +was specified. +.It 5 +Invalid command line arguments were specified. +No input files have been read. +.It 6 +An operating system error occurred, for example exhaustion +of memory, file descriptors, or process table entries. +Such errors cause +.Nm +to exit at once, possibly in the middle of parsing or formatting a file. +.El +.Pp +Note that selecting +.Fl T Cm lint +output mode implies +.Fl W Cm warning . +.Sh EXAMPLES +To page manuals to the terminal: +.Pp +.Dl $ mandoc \-W all,stop mandoc.1 2\*(Gt&1 | less +.Dl $ mandoc mandoc.1 mdoc.3 mdoc.7 | less +.Pp +To produce HTML manuals with +.Pa mandoc.css +as the style-sheet: +.Pp +.Dl $ mandoc \-T html -O style=mandoc.css mdoc.7 \*(Gt mdoc.7.html +.Pp +To check over a large set of manuals: +.Pp +.Dl $ mandoc \-T lint \(gafind /usr/src -name \e*\e.[1-9]\(ga +.Pp +To produce a series of PostScript manuals for A4 paper: +.Pp +.Dl $ mandoc \-T ps \-O paper=a4 mdoc.7 man.7 \*(Gt manuals.ps +.Pp +Convert a modern +.Xr mdoc 7 +manual to the older +.Xr man 7 +format, for use on systems lacking an +.Xr mdoc 7 +parser: +.Pp +.Dl $ mandoc \-T man foo.mdoc \*(Gt foo.man +.Sh DIAGNOSTICS +Messages displayed by +.Nm +follow this format: +.Pp +.D1 Nm Ns : Ar file : Ns Ar line : Ns Ar column : level : message : macro args +.Pp +Line and column numbers start at 1. +Both are omitted for messages referring to an input file as a whole. +Macro names and arguments are omitted where meaningless. +Fatal messages about invalid command line arguments +or operating system errors, for example when memory is exhausted, +may also omit the +.Ar file +and +.Ar level +fields. +.Pp +Message levels have the following meanings: +.Bl -tag -width "warning" +.It Cm unsupp +An input file uses unsupported low-level +.Xr roff 7 +features. +The output may be incomplete and/or misformatted, +so using GNU troff instead of +.Nm +to process the file may be preferable. +.It Cm error +An input file contains invalid syntax that cannot be safely interpreted. +By discarding part of the input or inserting missing tokens, +the parser is able to continue, and the error does not prevent +generation of formatted output, but typically, preparing that +output involves information loss, broken document structure +or unintended formatting, no matter whether +.Nm +or GNU troff is used. +In many cases, the output of +.Nm +and GNU troff is identical, but in some, +.Nm +is more resilient than GNU troff with respect to malformed input. +.Pp +Non-existent or unreadable input files are also reported on the +.Cm error +level. +In that case, the parser cannot even be started and no output +is produced from those input files. +.It Cm warning +An input file uses obsolete, discouraged or non-portable syntax. +All the same, the meaning of the input is unambiguous and a correct +rendering can be produced. +Documents causing warnings may render poorly when using other +formatting tools instead of +.Nm . +.El +.Pp +Messages of the +.Cm warning , +.Cm error , +and +.Cm unsupp +levels except those about non-existent or unreadable input files +are hidden unless their level, or a lower level, is requested using a +.Fl W +option or +.Fl T Cm lint +output mode. +.Ss Warnings related to the document prologue +.Bl -ohang +.It Sy "missing manual title, using UNTITLED" +.Pq mdoc +A +.Ic \&Dt +macro has no arguments, or there is no +.Ic \&Dt +macro before the first non-prologue macro. +.It Sy "missing manual title, using \(dq\(dq" +.Pq man +There is no +.Ic \&TH +macro, or it has no arguments. +.It Sy "lower case character in document title" +.Pq mdoc , man +The title is still used as given in the +.Ic \&Dt +or +.Ic \&TH +macro. +.It Sy "missing manual section, using \(dq\(dq" +.Pq mdoc , man +A +.Ic \&Dt +or +.Ic \&TH +macro lacks the mandatory section argument. +.It Sy "unknown manual section" +.Pq mdoc +The section number in a +.Ic \&Dt +line is invalid, but still used. +.It Sy "missing date, using today's date" +.Pq mdoc, man +The document was parsed as +.Xr mdoc 7 +and it has no +.Ic \&Dd +macro, or the +.Ic \&Dd +macro has no arguments or only empty arguments; +or the document was parsed as +.Xr man 7 +and it has no +.Ic \&TH +macro, or the +.Ic \&TH +macro has less than three arguments or its third argument is empty. +.It Sy "cannot parse date, using it verbatim" +.Pq mdoc , man +The date given in a +.Ic \&Dd +or +.Ic \&TH +macro does not follow the conventional format. +.It Sy "missing Os macro, using \(dq\(dq" +.Pq mdoc +The default or current system is not shown in this case. +.It Sy "duplicate prologue macro" +.Pq mdoc +One of the prologue macros occurs more than once. +The last instance overrides all previous ones. +.It Sy "late prologue macro" +.Pq mdoc +A +.Ic \&Dd +or +.Ic \&Os +macro occurs after some non-prologue macro, but still takes effect. +.It Sy "skipping late title macro" +.Pq mdoc +The +.Ic \&Dt +macro appears after the first non-prologue macro. +Traditional formatters cannot handle this because +they write the page header before parsing the document body. +Even though this technical restriction does not apply to +.Nm , +traditional semantics is preserved. +The late macro is discarded including its arguments. +.It Sy "prologue macros out of order" +.Pq mdoc +The prologue macros are not given in the conventional order +.Ic \&Dd , +.Ic \&Dt , +.Ic \&Os . +All three macros are used even when given in another order. +.El +.Ss Warnings regarding document structure +.Bl -ohang +.It Sy ".so is fragile, better use ln(1)" +.Pq roff +Including files only works when the parser program runs with the correct +current working directory. +.It Sy "no document body" +.Pq mdoc , man +The document body contains neither text nor macros. +An empty document is shown, consisting only of a header and a footer line. +.It Sy "content before first section header" +.Pq mdoc , man +Some macros or text precede the first +.Ic \&Sh +or +.Ic \&SH +section header. +The offending macros and text are parsed and added to the top level +of the syntax tree, outside any section block. +.It Sy "first section is not NAME" +.Pq mdoc +The argument of the first +.Ic \&Sh +macro is not +.Sq NAME . +This may confuse +.Xr makewhatis 8 +and +.Xr apropos 1 . +.It Sy "NAME section without name" +.Pq mdoc +The NAME section does not contain any +.Ic \&Nm +child macro. +.It Sy "NAME section without description" +.Pq mdoc +The NAME section lacks the mandatory +.Ic \&Nd +child macro. +.It Sy "description not at the end of NAME" +.Pq mdoc +The NAME section does contain an +.Ic \&Nd +child macro, but other content follows it. +.It Sy "bad NAME section content" +.Pq mdoc +The NAME section contains plain text or macros other than +.Ic \&Nm +and +.Ic \&Nd . +.It Sy "missing description line, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Nd +macro lacks the required argument. +The title line of the manual will end after the dash. +.It Sy "sections out of conventional order" +.Pq mdoc +A standard section occurs after another section it usually precedes. +All section titles are used as given, +and the order of sections is not changed. +.It Sy "duplicate section title" +.Pq mdoc +The same standard section title occurs more than once. +.It Sy "unexpected section" +.Pq mdoc +A standard section header occurs in a section of the manual +where it normally isn't useful. +.It Sy "unusual Xr order" +.Pq mdoc +In the SEE ALSO section, an +.Ic \&Xr +macro with a lower section number follows one with a higher number, +or two +.Ic \&Xr +macros referring to the same section are out of alphabetical order. +.It Sy "unusual Xr punctuation" +.Pq mdoc +In the SEE ALSO section, punctuation between two +.Ic \&Xr +macros differs from a single comma, or there is trailing punctuation +after the last +.Ic \&Xr +macro. +.It Sy "AUTHORS section without An macro" +.Pq mdoc +An AUTHORS sections contains no +.Ic \&An +macros, or only empty ones. +Probably, there are author names lacking markup. +.El +.Ss "Warnings related to macros and nesting" +.Bl -ohang +.It Sy "obsolete macro" +.Pq mdoc +See the +.Xr mdoc 7 +manual for replacements. +.It Sy "macro neither callable nor escaped" +.Pq mdoc +The name of a macro that is not callable appears on a macro line. +It is printed verbatim. +If the intention is to call it, move it to its own input line; +otherwise, escape it by prepending +.Sq \e& . +.It Sy "skipping paragraph macro" +In +.Xr mdoc 7 +documents, this happens +.Bl -dash -compact +.It +at the beginning and end of sections and subsections +.It +right before non-compact lists and displays +.It +at the end of items in non-column, non-compact lists +.It +and for multiple consecutive paragraph macros. +.El +In +.Xr man 7 +documents, it happens +.Bl -dash -compact +.It +for empty +.Ic \&P , +.Ic \&PP , +and +.Ic \&LP +macros +.It +for +.Ic \&IP +macros having neither head nor body arguments +.It +for +.Ic \&br +or +.Ic \&sp +right after +.Ic \&SH +or +.Ic \&SS +.El +.It Sy "moving paragraph macro out of list" +.Pq mdoc +A list item in a +.Ic \&Bl +list contains a trailing paragraph macro. +The paragraph macro is moved after the end of the list. +.It Sy "skipping no-space macro" +.Pq mdoc +An input line begins with an +.Ic \&Ns +macro. +The macro is ignored. +.It Sy "blocks badly nested" +.Pq mdoc +If two blocks intersect, one should completely contain the other. +Otherwise, rendered output is likely to look strange in any output +format, and rendering in SGML-based output formats is likely to be +outright wrong because such languages do not support badly nested +blocks at all. +Typical examples of badly nested blocks are +.Qq Ic \&Ao \&Bo \&Ac \&Bc +and +.Qq Ic \&Ao \&Bq \&Ac . +In these examples, +.Ic \&Ac +breaks +.Ic \&Bo +and +.Ic \&Bq , +respectively. +.It Sy "nested displays are not portable" +.Pq mdoc +A +.Ic \&Bd , +.Ic \&D1 , +or +.Ic \&Dl +display occurs nested inside another +.Ic \&Bd +display. +This works with +.Nm , +but fails with most other implementations. +.It Sy "moving content out of list" +.Pq mdoc +A +.Ic \&Bl +list block contains text or macros before the first +.Ic \&It +macro. +The offending children are moved before the beginning of the list. +.It Sy "fill mode already enabled, skipping" +.Pq man +A +.Ic \&fi +request occurs even though the document is still in fill mode, +or already switched back to fill mode. +It has no effect. +.It Sy "fill mode already disabled, skipping" +.Pq man +An +.Ic \&nf +request occurs even though the document already switched to no-fill mode +and did not switch back to fill mode yet. +It has no effect. +.It Sy "line scope broken" +.Pq man +While parsing the next-line scope of the previous macro, +another macro is found that prematurely terminates the previous one. +The previous, interrupted macro is deleted from the parse tree. +.El +.Ss "Warnings related to missing arguments" +.Bl -ohang +.It Sy "skipping empty request" +.Pq roff , eqn +The macro name is missing from a macro definition request, +or an +.Xr eqn 7 +control statement or operation keyword lacks its required argument. +.It Sy "conditional request controls empty scope" +.Pq roff +A conditional request is only useful if any of the following +follows it on the same logical input line: +.Bl -dash -compact +.It +The +.Sq \e{ +keyword to open a multi-line scope. +.It +A request or macro or some text, resulting in a single-line scope. +.It +The immediate end of the logical line without any intervening whitespace, +resulting in next-line scope. +.El +Here, a conditional request is followed by trailing whitespace only, +and there is no other content on its logical input line. +Note that it doesn't matter whether the logical input line is split +across multiple physical input lines using +.Sq \e +line continuation characters. +This is one of the rare cases +where trailing whitespace is syntactically significant. +The conditional request controls a scope containing whitespace only, +so it is unlikely to have a significant effect, +except that it may control a following +.Ic \&el +clause. +.It Sy "skipping empty macro" +.Pq mdoc +The indicated macro has no arguments and hence no effect. +.It Sy "empty block" +.Pq mdoc , man +A +.Ic \&Bd , +.Ic \&Bk , +.Ic \&Bl , +.Ic \&D1 , +.Ic \&Dl , +.Ic \&RS , +or +.Ic \&UR +block contains nothing in its body and will produce no output. +.It Sy "empty argument, using 0n" +.Pq mdoc +The required width is missing after +.Ic \&Bd +or +.Ic \&Bl +.Fl offset +or +.Fl width. +.It Sy "missing display type, using -ragged" +.Pq mdoc +The +.Ic \&Bd +macro is invoked without the required display type. +.It Sy "list type is not the first argument" +.Pq mdoc +In a +.Ic \&Bl +macro, at least one other argument precedes the type argument. +The +.Nm +utility copes with any argument order, but some other +.Xr mdoc 7 +implementations do not. +.It Sy "missing -width in -tag list, using 8n" +.Pq mdoc +Every +.Ic \&Bl +macro having the +.Fl tag +argument requires +.Fl width , +too. +.It Sy "missing utility name, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Ex Fl std +macro is called without an argument before +.Ic \&Nm +has first been called with an argument. +.It Sy "missing function name, using \(dq\(dq" +.Pq mdoc +The +.Ic \&Fo +macro is called without an argument. +No function name is printed. +.It Sy "empty head in list item" +.Pq mdoc +In a +.Ic \&Bl +.Fl diag , +.Fl hang , +.Fl inset , +.Fl ohang , +or +.Fl tag +list, an +.Ic \&It +macro lacks the required argument. +The item head is left empty. +.It Sy "empty list item" +.Pq mdoc +In a +.Ic \&Bl +.Fl bullet , +.Fl dash , +.Fl enum , +or +.Fl hyphen +list, an +.Ic \&It +block is empty. +An empty list item is shown. +.It Sy "missing font type, using \efR" +.Pq mdoc +A +.Ic \&Bf +macro has no argument. +It switches to the default font. +.It Sy "unknown font type, using \efR" +.Pq mdoc +The +.Ic \&Bf +argument is invalid. +The default font is used instead. +.It Sy "nothing follows prefix" +.Pq mdoc +A +.Ic \&Pf +macro has no argument, or only one argument and no macro follows +on the same input line. +This defeats its purpose; in particular, spacing is not suppressed +before the text or macros following on the next input line. +.It Sy "empty reference block" +.Pq mdoc +An +.Ic \&Rs +macro is immediately followed by an +.Ic \&Re +macro on the next input line. +Such an empty block does not produce any output. +.It Sy "missing -std argument, adding it" +.Pq mdoc +An +.Ic \&Ex +or +.Ic \&Rv +macro lacks the required +.Fl std +argument. +The +.Nm +utility assumes +.Fl std +even when it is not specified, but other implementations may not. +.It Sy "missing option string, using \(dq\(dq" +.Pq man +The +.Ic \&OP +macro is invoked without any argument. +An empty pair of square brackets is shown. +.It Sy "missing resource identifier, using \(dq\(dq" +.Pq man +The +.Ic \&UR +macro is invoked without any argument. +An empty pair of angle brackets is shown. +.It Sy "missing eqn box, using \(dq\(dq" +.Pq eqn +A diacritic mark or a binary operator is found, +but there is nothing to the left of it. +An empty box is inserted. +.El +.Ss "Warnings related to bad macro arguments" +.Bl -ohang +.It Sy "unterminated quoted argument" +.Pq roff +Macro arguments can be enclosed in double quote characters +such that space characters and macro names contained in the quoted +argument need not be escaped. +The closing quote of the last argument of a macro can be omitted. +However, omitting it is not recommended because it makes the code +harder to read. +.It Sy "duplicate argument" +.Pq mdoc +A +.Ic \&Bd +or +.Ic \&Bl +macro has more than one +.Fl compact , +more than one +.Fl offset , +or more than one +.Fl width +argument. +All but the last instances of these arguments are ignored. +.It Sy "skipping duplicate argument" +.Pq mdoc +An +.Ic \&An +macro has more than one +.Fl split +or +.Fl nosplit +argument. +All but the first of these arguments are ignored. +.It Sy "skipping duplicate display type" +.Pq mdoc +A +.Ic \&Bd +macro has more than one type argument; the first one is used. +.It Sy "skipping duplicate list type" +.Pq mdoc +A +.Ic \&Bl +macro has more than one type argument; the first one is used. +.It Sy "skipping -width argument" +.Pq mdoc +A +.Ic \&Bl +.Fl column , +.Fl diag , +.Fl ohang , +.Fl inset , +or +.Fl item +list has a +.Fl width +argument. +That has no effect. +.It Sy "wrong number of cells" +In a line of a +.Ic \&Bl Fl column +list, the number of tabs or +.Ic \&Ta +macros is less than the number expected from the list header line +or exceeds the expected number by more than one. +Missing cells remain empty, and all cells exceeding the number of +columns are joined into one single cell. +.It Sy "unknown AT&T UNIX version" +.Pq mdoc +An +.Ic \&At +macro has an invalid argument. +It is used verbatim, with +.Qq "AT&T UNIX " +prefixed to it. +.It Sy "comma in function argument" +.Pq mdoc +An argument of an +.Ic \&Fa +or +.Ic \&Fn +macro contains a comma; it should probably be split into two arguments. +.It Sy "parenthesis in function name" +.Pq mdoc +The first argument of an +.Ic \&Fc +or +.Ic \&Fn +macro contains an opening or closing parenthesis; that's probably wrong, +parentheses are added automatically. +.It Sy "invalid content in Rs block" +.Pq mdoc +An +.Ic \&Rs +block contains plain text or non-% macros. +The bogus content is left in the syntax tree. +Formatting may be poor. +.It Sy "invalid Boolean argument" +.Pq mdoc +An +.Ic \&Sm +macro has an argument other than +.Cm on +or +.Cm off . +The invalid argument is moved out of the macro, which leaves the macro +empty, causing it to toggle the spacing mode. +.It Sy "unknown font, skipping request" +.Pq man , tbl +A +.Xr roff 7 +.Ic \&ft +request or a +.Xr tbl 7 +.Ic \&f +layout modifier has an unknown +.Ar font +argument. +.It Sy "odd number of characters in request" +.Pq roff +A +.Ic \&tr +request contains an odd number of characters. +The last character is mapped to the blank character. +.El +.Ss "Warnings related to plain text" +.Bl -ohang +.It Sy "blank line in fill mode, using .sp" +.Pq mdoc +The meaning of blank input lines is only well-defined in non-fill mode: +In fill mode, line breaks of text input lines are not supposed to be +significant. +However, for compatibility with groff, blank lines in fill mode +are replaced with +.Ic \&sp +requests. +.It Sy "tab in filled text" +.Pq mdoc , man +The meaning of tab characters is only well-defined in non-fill mode: +In fill mode, whitespace is not supposed to be significant +on text input lines. +As an implementation dependent choice, tab characters on text lines +are passed through to the formatters in any case. +Given that the text before the tab character will be filled, +it is hard to predict which tab stop position the tab will advance to. +.It Sy "whitespace at end of input line" +.Pq mdoc , man , roff +Whitespace at the end of input lines is almost never semantically +significant \(em but in the odd case where it might be, it is +extremely confusing when reviewing and maintaining documents. +.It Sy "bad comment style" +.Pq roff +Comment lines start with a dot, a backslash, and a double-quote character. +The +.Nm +utility treats the line as a comment line even without the backslash, +but leaving out the backslash might not be portable. +.It Sy "invalid escape sequence" +.Pq roff +An escape sequence has an invalid opening argument delimiter, lacks the +closing argument delimiter, or the argument has too few characters. +If the argument is incomplete, +.Ic \e* +and +.Ic \en +expand to an empty string, +.Ic \eB +to the digit +.Sq 0 , +and +.Ic \ew +to the length of the incomplete argument. +All other invalid escape sequences are ignored. +.It Sy "undefined string, using \(dq\(dq" +.Pq roff +If a string is used without being defined before, +its value is implicitly set to the empty string. +However, defining strings explicitly before use +keeps the code more readable. +.El +.Ss "Warnings related to tables" +.Bl -ohang +.It Sy "tbl line starts with span" +.Pq tbl +The first cell in a table layout line is a horizontal span +.Pq Sq Cm s . +Data provided for this cell is ignored, and nothing is printed in the cell. +.It Sy "tbl column starts with span" +.Pq tbl +The first line of a table layout specification +requests a vertical span +.Pq Sq Cm ^ . +Data provided for this cell is ignored, and nothing is printed in the cell. +.It Sy "skipping vertical bar in tbl layout" +.Pq tbl +A table layout specification contains more than two consecutive vertical bars. +A double bar is printed, all additional bars are discarded. +.El +.Ss "Errors related to tables" +.Bl -ohang +.It Sy "non-alphabetic character in tbl options" +.Pq tbl +The table options line contains a character other than a letter, +blank, or comma where the beginning of an option name is expected. +The character is ignored. +.It Sy "skipping unknown tbl option" +.Pq tbl +The table options line contains a string of letters that does not +match any known option name. +The word is ignored. +.It Sy "missing tbl option argument" +.Pq tbl +A table option that requires an argument is not followed by an +opening parenthesis, or the opening parenthesis is immediately +followed by a closing parenthesis. +The option is ignored. +.It Sy "wrong tbl option argument size" +.Pq tbl +A table option argument contains an invalid number of characters. +Both the option and the argument are ignored. +.It Sy "empty tbl layout" +.Pq tbl +A table layout specification is completely empty, +specifying zero lines and zero columns. +As a fallback, a single left-justified column is used. +.It Sy "invalid character in tbl layout" +.Pq tbl +A table layout specification contains a character that can neither +be interpreted as a layout key character nor as a layout modifier, +or a modifier precedes the first key. +The invalid character is discarded. +.It Sy "unmatched parenthesis in tbl layout" +.Pq tbl +A table layout specification contains an opening parenthesis, +but no matching closing parenthesis. +The rest of the input line, starting from the parenthesis, has no effect. +.It Sy "tbl without any data cells" +.Pq tbl +A table does not contain any data cells. +It will probably produce no output. +.It Sy "ignoring data in spanned tbl cell" +.Pq tbl +A table cell is marked as a horizontal span +.Pq Sq Cm s +or vertical span +.Pq Sq Cm ^ +in the table layout, but it contains data. +The data is ignored. +.It Sy "ignoring extra tbl data cells" +.Pq tbl +A data line contains more cells than the corresponding layout line. +The data in the extra cells is ignored. +.It Sy "data block open at end of tbl" +.Pq tbl +A data block is opened with +.Cm T{ , +but never closed with a matching +.Cm T} . +The remaining data lines of the table are all put into one cell, +and any remaining cells stay empty. +.El +.Ss "Errors related to roff, mdoc, and man code" +.Bl -ohang +.It Sy "input stack limit exceeded, infinite loop?" +.Pq roff +Explicit recursion limits are implemented for the following features, +in order to prevent infinite loops: +.Bl -dash -compact +.It +expansion of nested escape sequences +including expansion of strings and number registers, +.It +expansion of nested user-defined macros, +.It +and +.Ic \&so +file inclusion. +.El +When a limit is hit, the output is incorrect, typically losing +some content, but the parser can continue. +.It Sy "skipping bad character" +.Pq mdoc , man , roff +The input file contains a byte that is not a printable +.Xr ascii 7 +character. +The message mentions the character number. +The offending byte is replaced with a question mark +.Pq Sq \&? . +Consider editing the input file to replace the byte with an ASCII +transliteration of the intended character. +.It Sy "skipping unknown macro" +.Pq mdoc , man , roff +The first identifier on a request or macro line is neither recognized as a +.Xr roff 7 +request, nor as a user-defined macro, nor, respectively, as an +.Xr mdoc 7 +or +.Xr man 7 +macro. +It may be mistyped or unsupported. +The request or macro is discarded including its arguments. +.It Sy "skipping insecure request" +.Pq roff +An input file attempted to run a shell command +or to read or write an external file. +Such attempts are denied for security reasons. +.It Sy "skipping item outside list" +.Pq mdoc , eqn +An +.Ic \&It +macro occurs outside any +.Ic \&Bl +list, or an +.Xr eqn 7 +.Ic above +delimiter occurs outside any pile. +It is discarded including its arguments. +.It Sy "skipping column outside column list" +.Pq mdoc +A +.Ic \&Ta +macro occurs outside any +.Ic \&Bl Fl column +block. +It is discarded including its arguments. +.It Sy "skipping end of block that is not open" +.Pq mdoc , man , eqn , tbl , roff +Various syntax elements can only be used to explicitly close blocks +that have previously been opened. +An +.Xr mdoc 7 +block closing macro, a +.Xr man 7 +.Ic \&RE +or +.Ic \&UE +macro, an +.Xr eqn 7 +right delimiter or closing brace, or the end of an equation, table, or +.Xr roff 7 +conditional request is encountered but no matching block is open. +The offending request or macro is discarded. +.It Sy "fewer RS blocks open, skipping" +.Pq man +The +.Ic \&RE +macro is invoked with an argument, but less than the specified number of +.Ic \&RS +blocks is open. +The +.Ic \&RE +macro is discarded. +.It Sy "inserting missing end of block" +.Pq mdoc , tbl +Various +.Xr mdoc 7 +macros as well as tables require explicit closing by dedicated macros. +A block that doesn't support bad nesting +ends before all of its children are properly closed. +The open child nodes are closed implicitly. +.It Sy "appending missing end of block" +.Pq mdoc , man , eqn , tbl , roff +At the end of the document, an explicit +.Xr mdoc 7 +block, a +.Xr man 7 +next-line scope or +.Ic \&RS +or +.Ic \&UR +block, an equation, table, or +.Xr roff 7 +conditional or ignore block is still open. +The open block is closed implicitly. +.It Sy "escaped character not allowed in a name" +.Pq roff +Macro, string and register identifiers consist of printable, +non-whitespace ASCII characters. +Escape sequences and characters and strings expressed in terms of them +cannot form part of a name. +The first argument of an +.Ic \&am , +.Ic \&as , +.Ic \&de , +.Ic \&ds , +.Ic \&nr , +or +.Ic \&rr +request, or any argument of an +.Ic \&rm +request, or the name of a request or user defined macro being called, +is terminated by an escape sequence. +In the cases of +.Ic \&as , +.Ic \&ds , +and +.Ic \&nr , +the request has no effect at all. +In the cases of +.Ic \&am , +.Ic \&de , +.Ic \&rr , +and +.Ic \&rm , +what was parsed up to this point is used as the arguments to the request, +and the rest of the input line is discarded including the escape sequence. +When parsing for a request or a user-defined macro name to be called, +only the escape sequence is discarded. +The characters preceding it are used as the request or macro name, +the characters following it are used as the arguments to the request or macro. +.It Sy "NOT IMPLEMENTED: Bd -file" +.Pq mdoc +For security reasons, the +.Ic \&Bd +macro does not support the +.Fl file +argument. +By requesting the inclusion of a sensitive file, a malicious document +might otherwise trick a privileged user into inadvertently displaying +the file on the screen, revealing the file content to bystanders. +The argument is ignored including the file name following it. +.It Sy "skipping display without arguments" +.Pq mdoc +A +.Ic \&Bd +block macro does not have any arguments. +The block is discarded, and the block content is displayed in +whatever mode was active before the block. +.It Sy "missing list type, using -item" +.Pq mdoc +A +.Ic \&Bl +macro fails to specify the list type. +.It Sy "missing manual name, using \(dq\(dq" +.Pq mdoc +The first call to +.Ic \&Nm +lacks the required argument. +.It Sy "uname(3) system call failed, using UNKNOWN" +.Pq mdoc +The +.Ic \&Os +macro is called without arguments, and the +.Xr uname 3 +system call failed. +As a workaround, +.Nm +can be compiled with +.Sm off +.Fl D Cm OSNAME=\(dq\e\(dq Ar string Cm \e\(dq\(dq . +.Sm on +.It Sy "unknown standard specifier" +.Pq mdoc +An +.Ic \&St +macro has an unknown argument and is discarded. +.It Sy "skipping request without numeric argument" +.Pq roff , eqn +An +.Ic \&it +request or an +.Xr eqn 7 +.Ic \&size +or +.Ic \&gsize +statement has a non-numeric or negative argument or no argument at all. +The invalid request or statement is ignored. +.It Sy "NOT IMPLEMENTED: .so with absolute path or \(dq..\(dq" +.Pq roff +For security reasons, +.Nm +allows +.Ic \&so +file inclusion requests only with relative paths +and only without ascending to any parent directory. +By requesting the inclusion of a sensitive file, a malicious document +might otherwise trick a privileged user into inadvertently displaying +the file on the screen, revealing the file content to bystanders. +.Nm +only shows the path as it appears behind +.Ic \&so . +.It Sy ".so request failed" +.Pq roff +Servicing a +.Ic \&so +request requires reading an external file, but the file could not be +opened. +.Nm +only shows the path as it appears behind +.Ic \&so . +.It Sy "skipping all arguments" +.Pq mdoc , man , eqn , roff +An +.Xr mdoc 7 +.Ic \&Bt , +.Ic \&Ed , +.Ic \&Ef , +.Ic \&Ek , +.Ic \&El , +.Ic \&Lp , +.Ic \&Pp , +.Ic \&Re , +.Ic \&Rs , +or +.Ic \&Ud +macro, an +.Ic \&It +macro in a list that don't support item heads, a +.Xr man 7 +.Ic \&LP , +.Ic \&P , +or +.Ic \&PP +macro, an +.Xr eqn 7 +.Ic \&EQ +or +.Ic \&EN +macro, or a +.Xr roff 7 +.Ic \&br , +.Ic \&fi , +or +.Ic \&nf +request or +.Sq \&.. +block closing request is invoked with at least one argument. +All arguments are ignored. +.It Sy "skipping excess arguments" +.Pq mdoc , man , roff +A macro or request is invoked with too many arguments: +.Bl -dash -offset 2n -width 2n -compact +.It +.Ic \&Fo , +.Ic \&PD , +.Ic \&RS , +.Ic \&UR , +.Ic \&ft , +or +.Ic \&sp +with more than one argument +.It +.Ic \&An +with another argument after +.Fl split +or +.Fl nosplit +.It +.Ic \&RE +with more than one argument or with a non-integer argument +.It +.Ic \&OP +or a request of the +.Ic \&de +family with more than two arguments +.It +.Ic \&Dt +with more than three arguments +.It +.Ic \&TH +with more than five arguments +.It +.Ic \&Bd , +.Ic \&Bk , +or +.Ic \&Bl +with invalid arguments +.El +The excess arguments are ignored. +.El +.Ss Unsupported features +.Bl -ohang +.It Sy "input too large" +.Pq mdoc , man +Currently, +.Nm +cannot handle input files larger than its arbitrary size limit +of 2^31 bytes (2 Gigabytes). +Since useful manuals are always small, this is not a problem in practice. +Parsing is aborted as soon as the condition is detected. +.It Sy "unsupported control character" +.Pq roff +An ASCII control character supported by other +.Xr roff 7 +implementations but not by +.Nm +was found in an input file. +It is replaced by a question mark. +.It Sy "unsupported roff request" +.Pq roff +An input file contains a +.Xr roff 7 +request supported by GNU troff or Heirloom troff but not by +.Nm , +and it is likely that this will cause information loss +or considerable misformatting. +.It Sy "eqn delim option in tbl" +.Pq eqn , tbl +The options line of a table defines equation delimiters. +Any equation source code contained in the table will be printed unformatted. +.It Sy "unsupported table layout modifier" +.Pq tbl +A table layout specification contains an +.Sq Cm m +modifier. +The modifier is discarded. +.It Sy "ignoring macro in table" +.Pq tbl , mdoc , man +A table contains an invocation of an +.Xr mdoc 7 +or +.Xr man 7 +macro or of an undefined macro. +The macro is ignored, and its arguments are handled +as if they were a text line. +.El +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr eqn 7 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 , +.Xr tbl 7 +.Sh AUTHORS +.An -nosplit +The +.Nm +utility was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and is maintained by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . +.Sh BUGS +In +.Fl T Cm html , +the maximum size of an element attribute is determined by +.Dv BUFSIZ , +which is usually 1024 bytes. +Be aware of this when setting long link +formats such as +.Fl O Cm style Ns = Ns Ar really/long/link . diff --git a/contrib/mdocml/mandoc.3 b/contrib/mdocml/mandoc.3 new file mode 100644 index 0000000..61012ed --- /dev/null +++ b/contrib/mdocml/mandoc.3 @@ -0,0 +1,686 @@ +.\" $Id: mandoc.3,v 1.36 2016/01/08 17:48:09 schwarze Exp $ +.\" +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: January 8 2016 $ +.Dt MANDOC 3 +.Os +.Sh NAME +.Nm mandoc , +.Nm man_deroff , +.Nm man_meta , +.Nm man_mparse , +.Nm man_node , +.Nm mdoc_deroff , +.Nm mdoc_meta , +.Nm mdoc_node , +.Nm mparse_alloc , +.Nm mparse_free , +.Nm mparse_getkeep , +.Nm mparse_keep , +.Nm mparse_open , +.Nm mparse_readfd , +.Nm mparse_reset , +.Nm mparse_result , +.Nm mparse_strerror , +.Nm mparse_strlevel +.Nd mandoc macro compiler library +.Sh SYNOPSIS +.In sys/types.h +.In mandoc.h +.Pp +.Fd "#define ASCII_NBRSP" +.Fd "#define ASCII_HYPH" +.Fd "#define ASCII_BREAK" +.Ft struct mparse * +.Fo mparse_alloc +.Fa "int options" +.Fa "enum mandoclevel wlevel" +.Fa "mandocmsg mmsg" +.Fa "char *defos" +.Fc +.Ft void +.Fo (*mandocmsg) +.Fa "enum mandocerr errtype" +.Fa "enum mandoclevel level" +.Fa "const char *file" +.Fa "int line" +.Fa "int col" +.Fa "const char *msg" +.Fc +.Ft void +.Fo mparse_free +.Fa "struct mparse *parse" +.Fc +.Ft const char * +.Fo mparse_getkeep +.Fa "const struct mparse *parse" +.Fc +.Ft void +.Fo mparse_keep +.Fa "struct mparse *parse" +.Fc +.Ft int +.Fo mparse_open +.Fa "struct mparse *parse" +.Fa "const char *fname" +.Fc +.Ft "enum mandoclevel" +.Fo mparse_readfd +.Fa "struct mparse *parse" +.Fa "int fd" +.Fa "const char *fname" +.Fc +.Ft void +.Fo mparse_reset +.Fa "struct mparse *parse" +.Fc +.Ft void +.Fo mparse_result +.Fa "struct mparse *parse" +.Fa "struct mdoc **mdoc" +.Fa "struct man **man" +.Fa "char **sodest" +.Fc +.Ft "const char *" +.Fo mparse_strerror +.Fa "enum mandocerr" +.Fc +.Ft "const char *" +.Fo mparse_strlevel +.Fa "enum mandoclevel" +.Fc +.In sys/types.h +.In mandoc.h +.In mdoc.h +.Ft void +.Fo mdoc_deroff +.Fa "char **dest" +.Fa "const struct mdoc_node *node" +.Fc +.Ft "const struct mdoc_meta *" +.Fo mdoc_meta +.Fa "const struct mdoc *mdoc" +.Fc +.Ft "const struct mdoc_node *" +.Fo mdoc_node +.Fa "const struct mdoc *mdoc" +.Fc +.Vt extern const char * const * mdoc_argnames; +.Vt extern const char * const * mdoc_macronames; +.In sys/types.h +.In mandoc.h +.In man.h +.Ft void +.Fo man_deroff +.Fa "char **dest" +.Fa "const struct man_node *node" +.Fc +.Ft "const struct man_meta *" +.Fo man_meta +.Fa "const struct man *man" +.Fc +.Ft "const struct mparse *" +.Fo man_mparse +.Fa "const struct man *man" +.Fc +.Ft "const struct man_node *" +.Fo man_node +.Fa "const struct man *man" +.Fc +.Vt extern const char * const * man_macronames; +.Sh DESCRIPTION +The +.Nm mandoc +library parses a +.Ux +manual into an abstract syntax tree (AST). +.Ux +manuals are composed of +.Xr mdoc 7 +or +.Xr man 7 , +and may be mixed with +.Xr roff 7 , +.Xr tbl 7 , +and +.Xr eqn 7 +invocations. +.Pp +The following describes a general parse sequence: +.Bl -enum +.It +initiate a parsing sequence with +.Xr mchars_alloc 3 +and +.Fn mparse_alloc ; +.It +open a file with +.Xr open 2 +or +.Fn mparse_open ; +.It +parse it with +.Fn mparse_readfd ; +.It +close it with +.Xr close 2 ; +.It +retrieve the syntax tree with +.Fn mparse_result ; +.It +iterate over parse nodes with +.Fn mdoc_node +or +.Fn man_node ; +.It +free all allocated memory with +.Fn mparse_free +and +.Xr mchars_free 3 , +or invoke +.Fn mparse_reset +and parse new files. +.El +.Sh REFERENCE +This section documents the functions, types, and variables available +via +.In mandoc.h , +with the exception of those documented in +.Xr mandoc_escape 3 +and +.Xr mchars_alloc 3 . +.Ss Types +.Bl -ohang +.It Vt "enum mandocerr" +An error or warning message during parsing. +.It Vt "enum mandoclevel" +A classification of an +.Vt "enum mandocerr" +as regards system operation. +.It Vt "struct mparse" +An opaque pointer to a running parse sequence. +Created with +.Fn mparse_alloc +and freed with +.Fn mparse_free . +This may be used across parsed input if +.Fn mparse_reset +is called between parses. +.It Vt "mandocmsg" +A prototype for a function to handle error and warning +messages emitted by the parser. +.El +.Ss Functions +.Bl -ohang +.It Fn man_deroff +Obtain a text-only representation of a +.Vt struct man_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn man_node . +When it is no longer needed, the pointer returned from +.Fn man_deroff +can be passed to +.Xr free 3 . +.It Fn man_meta +Obtain the meta-data of a successful +.Xr man 7 +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn man_mparse +Get the parser used for the current output. +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn man_node +Obtain the root node of a successful +.Xr man 7 +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In man.h , +implemented in +.Pa man.c . +.It Fn mdoc_deroff +Obtain a text-only representation of a +.Vt struct mdoc_node , +including text contained in its child nodes. +To be used on children of the pointer returned from +.Fn mdoc_node . +When it is no longer needed, the pointer returned from +.Fn mdoc_deroff +can be passed to +.Xr free 3 . +.It Fn mdoc_meta +Obtain the meta-data of a successful +.Xr mdoc +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . +.It Fn mdoc_node +Obtain the root node of a successful +.Xr mdoc +parse. +This may only be used on a pointer returned by +.Fn mparse_result . +Declared in +.In mdoc.h , +implemented in +.Pa mdoc.c . +.It Fn mparse_alloc +Allocate a parser. +The arguments have the following effect: +.Bl -tag -offset 5n -width inttype +.It Ar options +When the +.Dv MPARSE_MDOC +or +.Dv MPARSE_MAN +bit is set, only that parser is used. +Otherwise, the document type is automatically detected. +.Pp +When the +.Dv MPARSE_SO +bit is set, +.Xr roff 7 +.Ic \&so +file inclusion requests are always honoured. +Otherwise, if the request is the only content in an input file, +only the file name is remembered, to be returned in the +.Fa sodest +argument of +.Fn mparse_result . +.Pp +When the +.Dv MPARSE_QUICK +bit is set, parsing is aborted after the NAME section. +This is for example useful in +.Xr makewhatis 8 +.Fl Q +to quickly build minimal databases. +.It Ar wlevel +Can be set to +.Dv MANDOCLEVEL_BADARG , +.Dv MANDOCLEVEL_ERROR , +or +.Dv MANDOCLEVEL_WARNING . +Messages below the selected level will be suppressed. +.It Ar mmsg +A callback function to handle errors and warnings. +See +.Pa main.c +for an example. +.It Ar defos +A default string for the +.Xr mdoc 7 +.Sq \&Os +macro, overriding the +.Dv OSNAME +preprocessor definition and the results of +.Xr uname 3 . +.El +.Pp +The same parser may be used for multiple files so long as +.Fn mparse_reset +is called between parses. +.Fn mparse_free +must be called to free the memory allocated by this function. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_free +Free all memory allocated by +.Fn mparse_alloc . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_getkeep +Acquire the keep buffer. +Must follow a call of +.Fn mparse_keep . +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_keep +Instruct the parser to retain a copy of its parsed input. +This can be acquired with subsequent +.Fn mparse_getkeep +calls. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_open +Open the file for reading. +If that fails and +.Fa fname +does not already end in +.Ql .gz , +try again after appending +.Ql .gz . +Save the information whether the file is zipped or not. +Return a file descriptor open for reading or -1 on failure. +It can be passed to +.Fn mparse_readfd +or used directly. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_readfd +Parse a file descriptor opened with +.Xr open 2 +or +.Fn mparse_open . +Pass the associated filename in +.Va fname . +This function may be called multiple times with different parameters; however, +.Xr close 2 +and +.Fn mparse_reset +should be invoked between parses. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_reset +Reset a parser so that +.Fn mparse_readfd +may be used again. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_result +Obtain the result of a parse. +One of the three pointers will be filled in. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_strerror +Return a statically-allocated string representation of an error code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.It Fn mparse_strlevel +Return a statically-allocated string representation of a level code. +Declared in +.In mandoc.h , +implemented in +.Pa read.c . +.El +.Ss Variables +.Bl -ohang +.It Va man_macronames +The string representation of a man macro as indexed by +.Vt "enum mant" . +.It Va mdoc_argnames +The string representation of a mdoc macro argument as indexed by +.Vt "enum mdocargt" . +.It Va mdoc_macronames +The string representation of a mdoc macro as indexed by +.Vt "enum mdoct" . +.El +.Sh IMPLEMENTATION NOTES +This section consists of structural documentation for +.Xr mdoc 7 +and +.Xr man 7 +syntax trees and strings. +.Ss Man and Mdoc Strings +Strings may be extracted from mdoc and man meta-data, or from text +nodes (MDOC_TEXT and MAN_TEXT, respectively). +These strings have special non-printing formatting cues embedded in the +text itself, as well as +.Xr roff 7 +escapes preserved from input. +Implementing systems will need to handle both situations to produce +human-readable text. +In general, strings may be assumed to consist of 7-bit ASCII characters. +.Pp +The following non-printing characters may be embedded in text strings: +.Bl -tag -width Ds +.It Dv ASCII_NBRSP +A non-breaking space character. +.It Dv ASCII_HYPH +A soft hyphen. +.It Dv ASCII_BREAK +A breakable zero-width space. +.El +.Pp +Escape characters are also passed verbatim into text strings. +An escape character is a sequence of characters beginning with the +backslash +.Pq Sq \e . +To construct human-readable text, these should be intercepted with +.Xr mandoc_escape 3 +and converted with one the functions described in +.Xr mchars_alloc 3 . +.Ss Man Abstract Syntax Tree +This AST is governed by the ontological rules dictated in +.Xr man 7 +and derives its terminology accordingly. +.Pp +The AST is composed of +.Vt struct man_node +nodes with element, root and text types as declared by the +.Va type +field. +Each node also provides its parse point (the +.Va line , +.Va sec , +and +.Va pos +fields), its position in the tree (the +.Va parent , +.Va child , +.Va next +and +.Va prev +fields) and some type-specific data. +.Pp +The tree itself is arranged according to the following normal form, +where capitalised non-terminals represent nodes. +.Pp +.Bl -tag -width "ELEMENTXX" -compact +.It ROOT +\(<- mnode+ +.It mnode +\(<- ELEMENT | TEXT | BLOCK +.It BLOCK +\(<- HEAD BODY +.It HEAD +\(<- mnode* +.It BODY +\(<- mnode* +.It ELEMENT +\(<- ELEMENT | TEXT* +.It TEXT +\(<- [[:ascii:]]* +.El +.Pp +The only elements capable of nesting other elements are those with +next-line scope as documented in +.Xr man 7 . +.Ss Mdoc Abstract Syntax Tree +This AST is governed by the ontological +rules dictated in +.Xr mdoc 7 +and derives its terminology accordingly. +.Qq In-line +elements described in +.Xr mdoc 7 +are described simply as +.Qq elements . +.Pp +The AST is composed of +.Vt struct mdoc_node +nodes with block, head, body, element, root and text types as declared +by the +.Va type +field. +Each node also provides its parse point (the +.Va line , +.Va sec , +and +.Va pos +fields), its position in the tree (the +.Va parent , +.Va child , +.Va last , +.Va next +and +.Va prev +fields) and some type-specific data, in particular, for nodes generated +from macros, the generating macro in the +.Va tok +field. +.Pp +The tree itself is arranged according to the following normal form, +where capitalised non-terminals represent nodes. +.Pp +.Bl -tag -width "ELEMENTXX" -compact +.It ROOT +\(<- mnode+ +.It mnode +\(<- BLOCK | ELEMENT | TEXT +.It BLOCK +\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]] +.It ELEMENT +\(<- TEXT* +.It HEAD +\(<- mnode* +.It BODY +\(<- mnode* [ENDBODY mnode*] +.It TAIL +\(<- mnode* +.It TEXT +\(<- [[:ascii:]]* +.El +.Pp +Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of +the BLOCK production: these refer to punctuation marks. +Furthermore, although a TEXT node will generally have a non-zero-length +string, in the specific case of +.Sq \&.Bd \-literal , +an empty line will produce a zero-length string. +Multiple body parts are only found in invocations of +.Sq \&Bl \-column , +where a new body introduces a new phrase. +.Pp +The +.Xr mdoc 7 +syntax tree accommodates for broken block structures as well. +The ENDBODY node is available to end the formatting associated +with a given block before the physical end of that block. +It has a non-null +.Va end +field, is of the BODY +.Va type , +has the same +.Va tok +as the BLOCK it is ending, and has a +.Va pending +field pointing to that BLOCK's BODY node. +It is an indirect child of that BODY node +and has no children of its own. +.Pp +An ENDBODY node is generated when a block ends while one of its child +blocks is still open, like in the following example: +.Bd -literal -offset indent +\&.Ao ao +\&.Bo bo ac +\&.Ac bc +\&.Bc end +.Ed +.Pp +This example results in the following block structure: +.Bd -literal -offset indent +BLOCK Ao + HEAD Ao + BODY Ao + TEXT ao + BLOCK Bo, pending -> Ao + HEAD Bo + BODY Bo + TEXT bo + TEXT ac + ENDBODY Ao, pending -> Ao + TEXT bc +TEXT end +.Ed +.Pp +Here, the formatting of the +.Sq \&Ao +block extends from TEXT ao to TEXT ac, +while the formatting of the +.Sq \&Bo +block extends from TEXT bo to TEXT bc. +It renders as follows in +.Fl T Ns Cm ascii +mode: +.Pp +.Dl <ao [bo ac> bc] end +.Pp +Support for badly-nested blocks is only provided for backward +compatibility with some older +.Xr mdoc 7 +implementations. +Using badly-nested blocks is +.Em strongly discouraged ; +for example, the +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml +front-ends to +.Xr mandoc 1 +are unable to render them in any meaningful way. +Furthermore, behaviour when encountering badly-nested blocks is not +consistent across troff implementations, especially when using multiple +levels of badly-nested blocks. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr mandoc_malloc 3 , +.Xr mchars_alloc 3 , +.Xr eqn 7 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 , +.Xr tbl 7 +.Sh AUTHORS +The +.Nm +library was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/mandoc.c b/contrib/mdocml/mandoc.c new file mode 100644 index 0000000..d265463 --- /dev/null +++ b/contrib/mdocml/mandoc.c @@ -0,0 +1,606 @@ +/* $Id: mandoc.c,v 1.98 2015/11/12 22:44:27 schwarze Exp $ */ +/* + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "libmandoc.h" + +static int a2time(time_t *, const char *, const char *); +static char *time2a(time_t); + + +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + const char *local_start; + int local_sz; + char term; + enum mandoc_esc gly; + + /* + * When the caller doesn't provide return storage, + * use local storage. + */ + + if (NULL == start) + start = &local_start; + if (NULL == sz) + sz = &local_sz; + + /* + * Beyond the backslash, at least one input character + * is part of the escape sequence. With one exception + * (see below), that character won't be returned. + */ + + gly = ESCAPE_ERROR; + *start = ++*end; + *sz = 0; + term = '\0'; + + switch ((*start)[-1]) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case '(': + gly = ESCAPE_SPECIAL; + *sz = 2; + break; + case '[': + gly = ESCAPE_SPECIAL; + term = ']'; + break; + case 'C': + if ('\'' != **start) + return ESCAPE_ERROR; + *start = ++*end; + gly = ESCAPE_SPECIAL; + term = '\''; + break; + + /* + * Escapes taking no arguments at all. + */ + case 'd': + case 'u': + case ',': + case '/': + return ESCAPE_IGNORE; + + /* + * The \z escape is supposed to output the following + * character without advancing the cursor position. + * Since we are mostly dealing with terminal mode, + * let us just skip the next character. + */ + case 'z': + return ESCAPE_SKIPCHAR; + + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case 'F': + case 'g': + case 'k': + case 'M': + case 'm': + case 'n': + case 'V': + case 'Y': + gly = ESCAPE_IGNORE; + /* FALLTHROUGH */ + case 'f': + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; + switch (**start) { + case '(': + *start = ++*end; + *sz = 2; + break; + case '[': + *start = ++*end; + term = ']'; + break; + default: + *sz = 1; + break; + } + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + * The \B and \w escapes are handled in roff.c, roff_res(). + */ + case 'A': + case 'b': + case 'D': + case 'R': + case 'X': + case 'Z': + gly = ESCAPE_IGNORE; + /* FALLTHROUGH */ + case 'o': + if (**start == '\0') + return ESCAPE_ERROR; + if (gly == ESCAPE_ERROR) + gly = ESCAPE_OVERSTRIKE; + term = **start; + *start = ++*end; + break; + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case 'h': + case 'H': + case 'L': + case 'l': + case 'S': + case 'v': + case 'x': + if (strchr(" %&()*+-./0123456789:<=>", **start)) { + if ('\0' != **start) + ++*end; + return ESCAPE_ERROR; + } + gly = ESCAPE_IGNORE; + term = **start; + *start = ++*end; + break; + + /* + * Special handling for the numbered character escape. + * XXX Do any other escapes need similar handling? + */ + case 'N': + if ('\0' == **start) + return ESCAPE_ERROR; + (*end)++; + if (isdigit((unsigned char)**start)) { + *sz = 1; + return ESCAPE_IGNORE; + } + (*start)++; + while (isdigit((unsigned char)**end)) + (*end)++; + *sz = *end - *start; + if ('\0' != **end) + (*end)++; + return ESCAPE_NUMBERED; + + /* + * Sizes get a special category of their own. + */ + case 's': + gly = ESCAPE_IGNORE; + + /* See +/- counts as a sign. */ + if ('+' == **end || '-' == **end || ASCII_HYPH == **end) + *start = ++*end; + + switch (**end) { + case '(': + *start = ++*end; + *sz = 2; + break; + case '[': + *start = ++*end; + term = ']'; + break; + case '\'': + *start = ++*end; + term = '\''; + break; + case '3': + case '2': + case '1': + *sz = (*end)[-1] == 's' && + isdigit((unsigned char)(*end)[1]) ? 2 : 1; + break; + default: + *sz = 1; + break; + } + + break; + + /* + * Anything else is assumed to be a glyph. + * In this case, pass back the character after the backslash. + */ + default: + gly = ESCAPE_SPECIAL; + *start = --*end; + *sz = 1; + break; + } + + assert(ESCAPE_ERROR != gly); + + /* + * Read up to the terminating character, + * paying attention to nested escapes. + */ + + if ('\0' != term) { + while (**end != term) { + switch (**end) { + case '\0': + return ESCAPE_ERROR; + case '\\': + (*end)++; + if (ESCAPE_ERROR == + mandoc_escape(end, NULL, NULL)) + return ESCAPE_ERROR; + break; + default: + (*end)++; + break; + } + } + *sz = (*end)++ - *start; + } else { + assert(*sz > 0); + if ((size_t)*sz > strlen(*start)) + return ESCAPE_ERROR; + *end += *sz; + } + + /* Run post-processors. */ + + switch (gly) { + case ESCAPE_FONT: + if (2 == *sz) { + if ('C' == **start) { + /* + * Treat constant-width font modes + * just like regular font modes. + */ + (*start)++; + (*sz)--; + } else { + if ('B' == (*start)[0] && 'I' == (*start)[1]) + gly = ESCAPE_FONTBI; + break; + } + } else if (1 != *sz) + break; + + switch (**start) { + case '3': + case 'B': + gly = ESCAPE_FONTBOLD; + break; + case '2': + case 'I': + gly = ESCAPE_FONTITALIC; + break; + case 'P': + gly = ESCAPE_FONTPREV; + break; + case '1': + case 'R': + gly = ESCAPE_FONTROMAN; + break; + } + break; + case ESCAPE_SPECIAL: + if (1 == *sz && 'c' == **start) + gly = ESCAPE_NOSPACE; + /* + * Unicode escapes are defined in groff as \[u0000] + * to \[u10FFFF], where the contained value must be + * a valid Unicode codepoint. Here, however, only + * check the length and range. + */ + if (**start != 'u' || *sz < 5 || *sz > 7) + break; + if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) + break; + if (*sz == 6 && (*start)[1] == '0') + break; + if (*sz == 5 && (*start)[1] == 'D' && + strchr("89ABCDEF", (*start)[2]) != NULL) + break; + if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") + + 1 == *sz) + gly = ESCAPE_UNICODE; + break; + default: + break; + } + + return gly; +} + +/* + * Parse a quoted or unquoted roff-style request or macro argument. + * Return a pointer to the parsed argument, which is either the original + * pointer or advanced by one byte in case the argument is quoted. + * NUL-terminate the argument in place. + * Collapse pairs of quotes inside quoted arguments. + * Advance the argument pointer to the next argument, + * or to the NUL byte terminating the argument line. + */ +char * +mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) +{ + char *start, *cp; + int quoted, pairs, white; + + /* Quoting can only start with a new word. */ + start = *cpp; + quoted = 0; + if ('"' == *start) { + quoted = 1; + start++; + } + + pairs = 0; + white = 0; + for (cp = start; '\0' != *cp; cp++) { + + /* + * Move the following text left + * after quoted quotes and after "\\" and "\t". + */ + if (pairs) + cp[-pairs] = cp[0]; + + if ('\\' == cp[0]) { + /* + * In copy mode, translate double to single + * backslashes and backslash-t to literal tabs. + */ + switch (cp[1]) { + case 't': + cp[0] = '\t'; + /* FALLTHROUGH */ + case '\\': + pairs++; + cp++; + break; + case ' ': + /* Skip escaped blanks. */ + if (0 == quoted) + cp++; + break; + default: + break; + } + } else if (0 == quoted) { + if (' ' == cp[0]) { + /* Unescaped blanks end unquoted args. */ + white = 1; + break; + } + } else if ('"' == cp[0]) { + if ('"' == cp[1]) { + /* Quoted quotes collapse. */ + pairs++; + cp++; + } else { + /* Unquoted quotes end quoted args. */ + quoted = 2; + break; + } + } + } + + /* Quoted argument without a closing quote. */ + if (1 == quoted) + mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL); + + /* NUL-terminate this argument and move to the next one. */ + if (pairs) + cp[-pairs] = '\0'; + if ('\0' != *cp) { + *cp++ = '\0'; + while (' ' == *cp) + cp++; + } + *pos += (int)(cp - start) + (quoted ? 1 : 0); + *cpp = cp; + + if ('\0' == *cp && (white || ' ' == cp[-1])) + mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL); + + return start; +} + +static int +a2time(time_t *t, const char *fmt, const char *p) +{ + struct tm tm; + char *pp; + + memset(&tm, 0, sizeof(struct tm)); + + pp = NULL; +#if HAVE_STRPTIME + pp = strptime(p, fmt, &tm); +#endif + if (NULL != pp && '\0' == *pp) { + *t = mktime(&tm); + return 1; + } + + return 0; +} + +static char * +time2a(time_t t) +{ + struct tm *tm; + char *buf, *p; + size_t ssz; + int isz; + + tm = localtime(&t); + if (tm == NULL) + return NULL; + + /* + * Reserve space: + * up to 9 characters for the month (September) + blank + * up to 2 characters for the day + comma + blank + * 4 characters for the year and a terminating '\0' + */ + + p = buf = mandoc_malloc(10 + 4 + 4 + 1); + + if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) + goto fail; + p += (int)ssz; + + /* + * The output format is just "%d" here, not "%2d" or "%02d". + * That's also the reason why we can't just format the + * date as a whole with "%B %e, %Y" or "%B %d, %Y". + * Besides, the present approach is less prone to buffer + * overflows, in case anybody should ever introduce the bug + * of looking at LC_TIME. + */ + + if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1) + goto fail; + p += isz; + + if (strftime(p, 4 + 1, "%Y", tm) == 0) + goto fail; + return buf; + +fail: + free(buf); + return NULL; +} + +char * +mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) +{ + time_t t; + + /* No date specified: use today's date. */ + + if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) { + mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL); + return time2a(time(NULL)); + } + + /* Valid mdoc(7) date format. */ + + if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) || + a2time(&t, "%b %d, %Y", in)) + return time2a(t); + + /* Do not warn about the legacy man(7) format. */ + + if ( ! a2time(&t, "%Y-%m-%d", in)) + mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in); + + /* Use any non-mdoc(7) date verbatim. */ + + return mandoc_strdup(in); +} + +int +mandoc_eos(const char *p, size_t sz) +{ + const char *q; + int enclosed, found; + + if (0 == sz) + return 0; + + /* + * End-of-sentence recognition must include situations where + * some symbols, such as `)', allow prior EOS punctuation to + * propagate outward. + */ + + enclosed = found = 0; + for (q = p + (int)sz - 1; q >= p; q--) { + switch (*q) { + case '\"': + case '\'': + case ']': + case ')': + if (0 == found) + enclosed = 1; + break; + case '.': + case '!': + case '?': + found = 1; + break; + default: + return found && + (!enclosed || isalnum((unsigned char)*q)); + } + } + + return found && !enclosed; +} + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntoi(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return -1; + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return -1; + + if (v > INT_MAX) + v = INT_MAX; + if (v < INT_MIN) + v = INT_MIN; + + return (int)v; +} diff --git a/contrib/mdocml/mandoc.css b/contrib/mdocml/mandoc.css new file mode 100644 index 0000000..38c5c58 --- /dev/null +++ b/contrib/mdocml/mandoc.css @@ -0,0 +1,158 @@ +/* $Id: mandoc.css,v 1.1 2015/11/05 17:47:51 schwarze Exp $ */ + +/* + * This is an example style-sheet provided for mandoc(1) and the -Thtml + * or -Txhtml output mode. + * + * It mimics the appearance of the traditional cvsweb output. + * + * See mdoc(7) and man(7) for macro explanations. + */ + +html { max-width: 880px; margin-left: 1em; } +body { font-size: smaller; font-family: Helvetica,Arial,sans-serif; } +body > div { padding-left: 2em; + padding-top: 1em; } +body > div.mandoc, +body > div#mancgi { padding-left: 0em; + padding-top: 0em; } +body > div.results { font-size: smaller; } +#mancgi fieldset { text-align: center; + border: thin solid silver; + border-radius: 1em; + font-size: small; } +#mancgi input[name=expr] { width: 25%; } +.results td.title { vertical-align: top; + padding-right: 1em; } +h1 { margin-bottom: 1ex; font-size: 110%; margin-left: -4ex; } /* Section header (Sh, SH). */ +h2 { margin-bottom: 1ex; font-size: 105%; margin-left: -2ex; } /* Sub-section header (Ss, SS). */ +table { width: 100%; margin-top: 0ex; margin-bottom: 0ex; } /* All tables. */ +td { vertical-align: top; } /* All table cells. */ +p { } /* Paragraph: Pp, Lp. */ +blockquote { margin-left: 5ex; margin-top: 0ex; margin-bottom: 0ex; } /* D1. */ +div.section { margin-bottom: 2ex; margin-left: 5ex; } /* Sections (Sh, SH). */ +div.subsection { } /* Sub-sections (Ss, SS). */ +table.synopsis { } /* SYNOPSIS section table. */ +div.spacer { margin: 1em 0; } + +/* Preamble structure. */ + +table.foot { font-size: smaller; margin-top: 1em; border-top: 1px dotted #dddddd; } /* Document footer. */ +td.foot-date { width: 50%; } /* Document footer: date. */ +td.foot-os { width: 50%; } /* Document footer: OS/source. */ +table.head { font-size: smaller; margin-bottom: 1em; border-bottom: 1px dotted #dddddd; } /* Document header. */ +td.head-ltitle { width: 10%; } /* Document header: left-title. */ +td.head-vol { width: 80%; } /* Document header: volume. */ +td.head-rtitle { width: 10%; } /* Document header: right-title. */ + +/* General font modes. */ + +i { } /* Italic: BI, IB, I, (implicit). */ +.emph { font-style: italic; font-weight: normal; } /* Emphasis: Em, Bl -emphasis. */ +b { } /* Bold: SB, BI, IB, BR, RB, B, (implicit). */ +.symb { font-style: normal; font-weight: bold; } /* Symbolic: Sy, Ms, Bf -symbolic. */ +small { } /* Small: SB, SM. */ +.lit { font-style: normal; font-weight: normal; font-family: monospace; } /* Literal: Dl, Li, Ql, Bf -literal, Bl -literal, Bl -unfilled. */ + +/* Block modes. */ + +.display { } /* Top of all Bd, D1, Dl. */ +.list { } /* Top of all Bl. */ + +/* Context-specific modes. */ + +i.addr { font-weight: normal; } /* Address (Ad). */ +i.arg { font-weight: normal; } /* Command argument (Ar). */ +span.author { } /* Author name (An). */ +b.cmd { font-style: normal; } /* Command (Cm). */ +b.config { font-style: normal; } /* Config statement (Cd). */ +span.define { } /* Defines (Dv). */ +span.desc { } /* Nd. After em-dash. */ +b.diag { font-style: normal; } /* Diagnostic (Bl -diag). */ +span.env { } /* Environment variables (Ev). */ +span.errno { } /* Error string (Er). */ +i.farg { font-weight: normal; } /* Function argument (Fa, Fn). */ +i.file { font-weight: normal; } /* File (Pa). */ +b.flag { font-style: normal; } /* Flag (Fl, Cm). */ +b.fname { font-style: normal; } /* Function name (Fa, Fn, Rv). */ +i.ftype { font-weight: normal; } /* Function types (Ft, Fn). */ +b.includes { font-style: normal; } /* Header includes (In). */ +span.lib { } /* Library (Lb). */ +i.link-sec { font-weight: normal; } /* Section links (Sx). */ +b.macro { font-style: normal; } /* Macro-ish thing (Fd). */ +b.name { font-style: normal; } /* Name of utility (Nm). */ +span.opt { } /* Options (Op, Oo/Oc). */ +span.ref { } /* Citations (Rs). */ +span.ref-auth { } /* Reference author (%A). */ +i.ref-book { font-weight: normal; } /* Reference book (%B). */ +span.ref-city { } /* Reference city (%C). */ +span.ref-date { } /* Reference date (%D). */ +i.ref-issue { font-weight: normal; } /* Reference issuer/publisher (%I). */ +i.ref-jrnl { font-weight: normal; } /* Reference journal (%J). */ +span.ref-num { } /* Reference number (%N). */ +span.ref-opt { } /* Reference optionals (%O). */ +span.ref-page { } /* Reference page (%P). */ +span.ref-corp { } /* Reference corporate/foreign author (%Q). */ +span.ref-rep { } /* Reference report (%R). */ +span.ref-title { text-decoration: underline; } /* Reference title (%T). */ +span.ref-vol { } /* Reference volume (%V). */ +span.type { font-style: italic; font-weight: normal; } /* Variable types (Vt). */ +span.unix { } /* Unices (Ux, Ox, Nx, Fx, Bx, Bsx, Dx). */ +b.utility { font-style: normal; } /* Name of utility (Ex). */ +b.var { font-style: normal; } /* Variables (Rv). */ + +a.link-ext { } /* Off-site link (Lk). */ +a.link-includes { } /* Include-file link (In). */ +a.link-mail { } /* Mailto links (Mt). */ +a.link-man { } /* Manual links (Xr). */ +a.link-ref { } /* Reference section links (%Q). */ +a.link-sec { } /* Section links (Sx). */ + +/* Formatting for lists. See mdoc(7). */ + +dl.list-diag { } +dt.list-diag { } +dd.list-diag { } + +dl.list-hang { } +dt.list-hang { } +dd.list-hang { } + +dl.list-inset { } +dt.list-inset { } +dd.list-inset { } + +dl.list-ohang { } +dt.list-ohang { } +dd.list-ohang { margin-left: 0ex; } + +dl.list-tag { } +dt.list-tag { } +dd.list-tag { } + +table.list-col { } +tr.list-col { } +td.list-col { } + +ul.list-bul { list-style-type: disc; padding-left: 1em; } +li.list-bul { } + +ul.list-dash { list-style-type: none; padding-left: 0em; } +li.list-dash:before { content: "\2014 "; } + +ul.list-hyph { list-style-type: none; padding-left: 0em; } +li.list-hyph:before { content: "\2013 "; } + +ul.list-item { list-style-type: none; padding-left: 0em; } +li.list-item { } + +ol.list-enum { padding-left: 2em; } +li.list-enum { } + +/* Equation modes. See eqn(7). */ + +span.eqn { } + +/* Table modes. See tbl(7). */ + +table.tbl { } diff --git a/contrib/mdocml/mandoc.db.5 b/contrib/mdocml/mandoc.db.5 new file mode 100644 index 0000000..60908bc --- /dev/null +++ b/contrib/mdocml/mandoc.db.5 @@ -0,0 +1,157 @@ +.\" $Id: mandoc.db.5,v 1.3 2014/12/30 21:34:57 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: December 30 2014 $ +.Dt MANDOC.DB 5 +.Os +.Sh NAME +.Nm mandoc.db +.Nd manual page database +.Sh DESCRIPTION +The +.Nm +SQLite3 file format is used to store information about installed manual +pages to facilitate semantic searching for manuals. +Each manual page tree contains its own +.Nm +file; see +.Sx FILES +for examples. +.Pp +Such database files are generated by +.Xr makewhatis 8 +and used by +.Xr apropos 1 +and +.Xr whatis 1 . +.Pp +One line in the following tables describes: +.Bl -tag -width Ds +.It Sy mpages +One physical manual page file, no matter how many times and under which +names it may appear in the file system. +.It Sy mlinks +One entry in the file system, no matter which content it points to. +.It Sy names +One manual page name, no matter whether it appears in a page header, +in a NAME or SYNOPSIS section, or as a file name. +.It Sy keys +One chunk of text from some macro invocation. +.El +.Pp +Each record in the latter three tables uses its +.Va pageid +column to point to a record in the +.Sy mpages +table. +.Pp +The other columns are as follows; unless stated otherwise, they are +of type +.Vt TEXT . +.Bl -tag -width mpages.desc +.It Sy mpages.desc +The description line +.Pq Sq \&Nd +of the page. +.It Sy mpages.form +An +.Vt INTEGER +bit field. +If bit +.Dv FORM_GZ +is set, the page is compressed and requires +.Xr gunzip 1 +for display. +If bit +.Dv FORM_SRC +is set, the page is unformatted, that is in +.Xr mdoc 7 +or +.Xr man 7 +format, and requires +.Xr mandoc 1 +for display. +If bit +.Dv FORM_SRC +is not set, the page is formatted, i.e. a +.Sq cat +page. +.It Sy mlinks.sec +The manual section as found in the subdirectory name. +.It Sy mlinks.arch +The manual architecture as found in the subdirectory name, or +.Qq any . +.It Sy mlinks.name +The manual name as found in the file name. +.It Sy names.bits +An +.Vt INTEGER +bit mask telling whether the name came from a header line, from the +NAME or SYNOPSIS section, or from a file name. +Bits are defined in +.In mansearch.h . +.It Sy names.name +The name itself. +.It Sy keys.bits +An +.Vt INTEGER +bit mask telling which semantic contexts the key was found in; +defined in +.In mansearch.h , +documented in +.Xr apropos 1 . +.It Sy keys.key +The string found in those contexts. +.El +.Sh FILES +.Bl -tag -width /usr/share/man/mandoc.db -compact +.It Pa /usr/share/man/mandoc.db +The manual page database for the base system. +.It Pa /usr/X11R6/man/mandoc.db +The same for the +.Xr X 7 +Window System. +.It Pa /usr/local/man/mandoc.db +The same for +.Xr packages 7 . +.El +.Sh SEE ALSO +.Xr apropos 1 , +.Xr man 1 , +.Xr sqlite3 1 , +.Xr whatis 1 , +.Xr mansearch 3 , +.Xr makewhatis 8 +.Sh HISTORY +A manual page database +.Pa /usr/lib/whatis +first appeared in +.Bx 2 . +The present format first appeared in +.Ox 5.6 . +.Sh AUTHORS +.An -nosplit +The original version of +.Xr makewhatis 8 +was written by +.An Bill Joy +in 1979. +An SQLite3 version was first implemented by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +in 2012. +The present database format was designed by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org +in 2014. diff --git a/contrib/mdocml/mandoc.h b/contrib/mdocml/mandoc.h new file mode 100644 index 0000000..d63814c --- /dev/null +++ b/contrib/mdocml/mandoc.h @@ -0,0 +1,435 @@ +/* $Id: mandoc.h,v 1.209 2016/01/08 02:53:13 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define ASCII_NBRSP 31 /* non-breaking space */ +#define ASCII_HYPH 30 /* breakable hyphen */ +#define ASCII_BREAK 29 /* breakable zero-width space */ + +/* + * Status level. This refers to both internal status (i.e., whilst + * running, when warnings/errors are reported) and an indicator of a + * threshold of when to halt (when said internal state exceeds the + * threshold). + */ +enum mandoclevel { + MANDOCLEVEL_OK = 0, + MANDOCLEVEL_RESERVED, + MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ + MANDOCLEVEL_ERROR, /* input has been thrown away */ + MANDOCLEVEL_UNSUPP, /* input needs unimplemented features */ + MANDOCLEVEL_BADARG, /* bad argument in invocation */ + MANDOCLEVEL_SYSERR, /* system error */ + MANDOCLEVEL_MAX +}; + +/* + * All possible things that can go wrong within a parse, be it libroff, + * libmdoc, or libman. + */ +enum mandocerr { + MANDOCERR_OK, + + MANDOCERR_WARNING, /* ===== start of warnings ===== */ + + /* related to the prologue */ + MANDOCERR_DT_NOTITLE, /* missing manual title, using UNTITLED: line */ + MANDOCERR_TH_NOTITLE, /* missing manual title, using "": [macro] */ + MANDOCERR_TITLE_CASE, /* lower case character in document title */ + MANDOCERR_MSEC_MISSING, /* missing manual section, using "": macro */ + MANDOCERR_MSEC_BAD, /* unknown manual section: Dt ... section */ + MANDOCERR_DATE_MISSING, /* missing date, using today's date */ + MANDOCERR_DATE_BAD, /* cannot parse date, using it verbatim: date */ + MANDOCERR_OS_MISSING, /* missing Os macro, using "" */ + MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */ + MANDOCERR_PROLOG_LATE, /* late prologue macro: macro */ + MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */ + MANDOCERR_PROLOG_ORDER, /* prologue macros out of order: macros */ + + /* related to document structure */ + MANDOCERR_SO, /* .so is fragile, better use ln(1): so path */ + MANDOCERR_DOC_EMPTY, /* no document body */ + MANDOCERR_SEC_BEFORE, /* content before first section header: macro */ + MANDOCERR_NAMESEC_FIRST, /* first section is not NAME: Sh title */ + MANDOCERR_NAMESEC_NONM, /* NAME section without name */ + MANDOCERR_NAMESEC_NOND, /* NAME section without description */ + MANDOCERR_NAMESEC_ND, /* description not at the end of NAME */ + MANDOCERR_NAMESEC_BAD, /* bad NAME section content: macro */ + MANDOCERR_ND_EMPTY, /* missing description line, using "" */ + MANDOCERR_SEC_ORDER, /* sections out of conventional order: Sh title */ + MANDOCERR_SEC_REP, /* duplicate section title: Sh title */ + MANDOCERR_SEC_MSEC, /* unexpected section: Sh title for ... only */ + MANDOCERR_XR_ORDER, /* unusual Xr order: ... after ... */ + MANDOCERR_XR_PUNCT, /* unusual Xr punctuation: ... after ... */ + MANDOCERR_AN_MISSING, /* AUTHORS section without An macro */ + + /* related to macros and nesting */ + MANDOCERR_MACRO_OBS, /* obsolete macro: macro */ + MANDOCERR_MACRO_CALL, /* macro neither callable nor escaped: macro */ + MANDOCERR_PAR_SKIP, /* skipping paragraph macro: macro ... */ + MANDOCERR_PAR_MOVE, /* moving paragraph macro out of list: macro */ + MANDOCERR_NS_SKIP, /* skipping no-space macro */ + MANDOCERR_BLK_NEST, /* blocks badly nested: macro ... */ + MANDOCERR_BD_NEST, /* nested displays are not portable: macro ... */ + MANDOCERR_BL_MOVE, /* moving content out of list: macro */ + MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ + MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ + MANDOCERR_BLK_LINE, /* line scope broken: macro breaks macro */ + + /* related to missing arguments */ + MANDOCERR_REQ_EMPTY, /* skipping empty request: request */ + MANDOCERR_COND_EMPTY, /* conditional request controls empty scope */ + MANDOCERR_MACRO_EMPTY, /* skipping empty macro: macro */ + MANDOCERR_BLK_EMPTY, /* empty block: macro */ + MANDOCERR_ARG_EMPTY, /* empty argument, using 0n: macro arg */ + MANDOCERR_BD_NOTYPE, /* missing display type, using -ragged: Bd */ + MANDOCERR_BL_LATETYPE, /* list type is not the first argument: Bl arg */ + MANDOCERR_BL_NOWIDTH, /* missing -width in -tag list, using 8n */ + MANDOCERR_EX_NONAME, /* missing utility name, using "": Ex */ + MANDOCERR_FO_NOHEAD, /* missing function name, using "": Fo */ + MANDOCERR_IT_NOHEAD, /* empty head in list item: Bl -type It */ + MANDOCERR_IT_NOBODY, /* empty list item: Bl -type It */ + MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */ + MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */ + MANDOCERR_PF_SKIP, /* nothing follows prefix: Pf arg */ + MANDOCERR_RS_EMPTY, /* empty reference block: Rs */ + MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */ + MANDOCERR_OP_EMPTY, /* missing option string, using "": OP */ + MANDOCERR_UR_NOHEAD, /* missing resource identifier, using "": UR */ + MANDOCERR_EQN_NOBOX, /* missing eqn box, using "": op */ + + /* related to bad arguments */ + MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ + MANDOCERR_ARG_REP, /* duplicate argument: macro arg */ + MANDOCERR_AN_REP, /* skipping duplicate argument: An -arg */ + MANDOCERR_BD_REP, /* skipping duplicate display type: Bd -type */ + MANDOCERR_BL_REP, /* skipping duplicate list type: Bl -type */ + MANDOCERR_BL_SKIPW, /* skipping -width argument: Bl -type */ + MANDOCERR_BL_COL, /* wrong number of cells */ + MANDOCERR_AT_BAD, /* unknown AT&T UNIX version: At version */ + MANDOCERR_FA_COMMA, /* comma in function argument: arg */ + MANDOCERR_FN_PAREN, /* parenthesis in function name: arg */ + MANDOCERR_RS_BAD, /* invalid content in Rs block: macro */ + MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */ + MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */ + MANDOCERR_TR_ODD, /* odd number of characters in request: tr char */ + + /* related to plain text */ + MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */ + MANDOCERR_FI_TAB, /* tab in filled text */ + MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ + MANDOCERR_COMMENT_BAD, /* bad comment style */ + MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */ + MANDOCERR_STR_UNDEF, /* undefined string, using "": name */ + + /* related to tables */ + MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */ + MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */ + MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */ + + MANDOCERR_ERROR, /* ===== start of errors ===== */ + + /* related to tables */ + MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */ + MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */ + MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument: option */ + MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size: option */ + MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */ + MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */ + MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */ + MANDOCERR_TBLDATA_NONE, /* tbl without any data cells */ + MANDOCERR_TBLDATA_SPAN, /* ignoring data in spanned tbl cell: data */ + MANDOCERR_TBLDATA_EXTRA, /* ignoring extra tbl data cells: data */ + MANDOCERR_TBLDATA_BLK, /* data block open at end of tbl: macro */ + + /* related to document structure and macros */ + MANDOCERR_FILE, /* cannot open file */ + MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ + MANDOCERR_CHAR_BAD, /* skipping bad character: number */ + MANDOCERR_MACRO, /* skipping unknown macro: macro */ + MANDOCERR_REQ_INSEC, /* skipping insecure request: request */ + MANDOCERR_IT_STRAY, /* skipping item outside list: It ... */ + MANDOCERR_TA_STRAY, /* skipping column outside column list: Ta */ + MANDOCERR_BLK_NOTOPEN, /* skipping end of block that is not open */ + MANDOCERR_RE_NOTOPEN, /* fewer RS blocks open, skipping: RE arg */ + MANDOCERR_BLK_BROKEN, /* inserting missing end of block: macro ... */ + MANDOCERR_BLK_NOEND, /* appending missing end of block: macro */ + + /* related to request and macro arguments */ + MANDOCERR_NAMESC, /* escaped character not allowed in a name: name */ + MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */ + MANDOCERR_BD_NOARG, /* skipping display without arguments: Bd */ + MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */ + MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */ + MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */ + MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */ + MANDOCERR_IT_NONUM, /* skipping request without numeric argument */ + MANDOCERR_SO_PATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */ + MANDOCERR_SO_FAIL, /* .so request failed */ + MANDOCERR_ARG_SKIP, /* skipping all arguments: macro args */ + MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */ + MANDOCERR_DIVZERO, /* divide by zero */ + + MANDOCERR_UNSUPP, /* ===== start of unsupported features ===== */ + + MANDOCERR_TOOLARGE, /* input too large */ + MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */ + MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */ + MANDOCERR_TBLOPT_EQN, /* eqn delim option in tbl: arg */ + MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */ + MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */ + + MANDOCERR_MAX +}; + +struct tbl_opts { + char tab; /* cell-separator */ + char decimal; /* decimal point */ + int opts; +#define TBL_OPT_CENTRE (1 << 0) +#define TBL_OPT_EXPAND (1 << 1) +#define TBL_OPT_BOX (1 << 2) +#define TBL_OPT_DBOX (1 << 3) +#define TBL_OPT_ALLBOX (1 << 4) +#define TBL_OPT_NOKEEP (1 << 5) +#define TBL_OPT_NOSPACE (1 << 6) +#define TBL_OPT_NOWARN (1 << 7) + int cols; /* number of columns */ + int lvert; /* width of left vertical line */ + int rvert; /* width of right vertical line */ +}; + +enum tbl_cellt { + TBL_CELL_CENTRE, /* c, C */ + TBL_CELL_RIGHT, /* r, R */ + TBL_CELL_LEFT, /* l, L */ + TBL_CELL_NUMBER, /* n, N */ + TBL_CELL_SPAN, /* s, S */ + TBL_CELL_LONG, /* a, A */ + TBL_CELL_DOWN, /* ^ */ + TBL_CELL_HORIZ, /* _, - */ + TBL_CELL_DHORIZ, /* = */ + TBL_CELL_MAX +}; + +/* + * A cell in a layout row. + */ +struct tbl_cell { + struct tbl_cell *next; + int vert; /* width of subsequent vertical line */ + enum tbl_cellt pos; + size_t spacing; + int col; /* column number, starting from 0 */ + int flags; +#define TBL_CELL_TALIGN (1 << 0) /* t, T */ +#define TBL_CELL_BALIGN (1 << 1) /* d, D */ +#define TBL_CELL_BOLD (1 << 2) /* fB, B, b */ +#define TBL_CELL_ITALIC (1 << 3) /* fI, I, i */ +#define TBL_CELL_EQUAL (1 << 4) /* e, E */ +#define TBL_CELL_UP (1 << 5) /* u, U */ +#define TBL_CELL_WIGN (1 << 6) /* z, Z */ +#define TBL_CELL_WMAX (1 << 7) /* x, X */ +}; + +/* + * A layout row. + */ +struct tbl_row { + struct tbl_row *next; + struct tbl_cell *first; + struct tbl_cell *last; + int vert; /* width of left vertical line */ +}; + +enum tbl_datt { + TBL_DATA_NONE, /* has no data */ + TBL_DATA_DATA, /* consists of data/string */ + TBL_DATA_HORIZ, /* horizontal line */ + TBL_DATA_DHORIZ, /* double-horizontal line */ + TBL_DATA_NHORIZ, /* squeezed horizontal line */ + TBL_DATA_NDHORIZ /* squeezed double-horizontal line */ +}; + +/* + * A cell within a row of data. The "string" field contains the actual + * string value that's in the cell. The rest is layout. + */ +struct tbl_dat { + struct tbl_cell *layout; /* layout cell */ + int spans; /* how many spans follow */ + struct tbl_dat *next; + char *string; /* data (NULL if not TBL_DATA_DATA) */ + enum tbl_datt pos; +}; + +enum tbl_spant { + TBL_SPAN_DATA, /* span consists of data */ + TBL_SPAN_HORIZ, /* span is horizontal line */ + TBL_SPAN_DHORIZ /* span is double horizontal line */ +}; + +/* + * A row of data in a table. + */ +struct tbl_span { + struct tbl_opts *opts; + struct tbl_row *layout; /* layout row */ + struct tbl_dat *first; + struct tbl_dat *last; + struct tbl_span *prev; + struct tbl_span *next; + int line; /* parse line */ + enum tbl_spant pos; +}; + +enum eqn_boxt { + EQN_ROOT, /* root of parse tree */ + EQN_TEXT, /* text (number, variable, whatever) */ + EQN_SUBEXPR, /* nested `eqn' subexpression */ + EQN_LIST, /* list (braces, etc.) */ + EQN_LISTONE, /* singleton list */ + EQN_PILE, /* vertical pile */ + EQN_MATRIX /* pile of piles */ +}; + +enum eqn_fontt { + EQNFONT_NONE = 0, + EQNFONT_ROMAN, + EQNFONT_BOLD, + EQNFONT_FAT, + EQNFONT_ITALIC, + EQNFONT__MAX +}; + +enum eqn_post { + EQNPOS_NONE = 0, + EQNPOS_SUP, + EQNPOS_SUBSUP, + EQNPOS_SUB, + EQNPOS_TO, + EQNPOS_FROM, + EQNPOS_FROMTO, + EQNPOS_OVER, + EQNPOS_SQRT, + EQNPOS__MAX +}; + +enum eqn_pilet { + EQNPILE_NONE = 0, + EQNPILE_PILE, + EQNPILE_CPILE, + EQNPILE_RPILE, + EQNPILE_LPILE, + EQNPILE_COL, + EQNPILE_CCOL, + EQNPILE_RCOL, + EQNPILE_LCOL, + EQNPILE__MAX +}; + + /* + * A "box" is a parsed mathematical expression as defined by the eqn.7 + * grammar. + */ +struct eqn_box { + int size; /* font size of expression */ +#define EQN_DEFSIZE INT_MIN + enum eqn_boxt type; /* type of node */ + struct eqn_box *first; /* first child node */ + struct eqn_box *last; /* last child node */ + struct eqn_box *next; /* node sibling */ + struct eqn_box *prev; /* node sibling */ + struct eqn_box *parent; /* node sibling */ + char *text; /* text (or NULL) */ + char *left; /* fence left-hand */ + char *right; /* fence right-hand */ + char *top; /* expression over-symbol */ + char *bottom; /* expression under-symbol */ + size_t args; /* arguments in parent */ + size_t expectargs; /* max arguments in parent */ + enum eqn_post pos; /* position of next box */ + enum eqn_fontt font; /* font of box */ + enum eqn_pilet pile; /* equation piling */ +}; + +/* + * An equation consists of a tree of expressions starting at a given + * line and position. + */ +struct eqn { + char *name; /* identifier (or NULL) */ + struct eqn_box *root; /* root mathematical expression */ + int ln; /* invocation line */ + int pos; /* invocation position */ +}; + +/* + * Parse options. + */ +#define MPARSE_MDOC 1 /* assume -mdoc */ +#define MPARSE_MAN 2 /* assume -man */ +#define MPARSE_SO 4 /* honour .so requests */ +#define MPARSE_QUICK 8 /* abort the parse early */ +#define MPARSE_UTF8 16 /* accept UTF-8 input */ +#define MPARSE_LATIN1 32 /* accept ISO-LATIN-1 input */ + +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTBI, /* bold italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_NOSPACE, /* suppress space if the last on a line */ + ESCAPE_SKIPCHAR, /* skip the next character */ + ESCAPE_OVERSTRIKE /* overstrike all chars in the argument */ +}; + +typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, + const char *, int, int, const char *); + + +struct mparse; +struct roff_man; + +enum mandoc_esc mandoc_escape(const char **, const char **, int *); +void mchars_alloc(void); +void mchars_free(void); +int mchars_num2char(const char *, size_t); +const char *mchars_uc2str(int); +int mchars_num2uc(const char *, size_t); +int mchars_spec2cp(const char *, size_t); +const char *mchars_spec2str(const char *, size_t, size_t *); +struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, const char *); +void mparse_free(struct mparse *); +void mparse_keep(struct mparse *); +int mparse_open(struct mparse *, const char *); +enum mandoclevel mparse_readfd(struct mparse *, int, const char *); +enum mandoclevel mparse_readmem(struct mparse *, void *, size_t, + const char *); +void mparse_reset(struct mparse *); +void mparse_result(struct mparse *, + struct roff_man **, char **); +const char *mparse_getkeep(const struct mparse *); +const char *mparse_strerror(enum mandocerr); +const char *mparse_strlevel(enum mandoclevel); diff --git a/contrib/mdocml/mandoc_aux.c b/contrib/mdocml/mandoc_aux.c new file mode 100644 index 0000000..cc74b7e --- /dev/null +++ b/contrib/mdocml/mandoc_aux.c @@ -0,0 +1,111 @@ +/* $Id: mandoc_aux.c,v 1.9 2015/11/07 14:22:29 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#if HAVE_ERR +#include <err.h> +#endif +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" + + +int +mandoc_asprintf(char **dest, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vasprintf(dest, fmt, ap); + va_end(ap); + + if (ret == -1) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ret; +} + +void * +mandoc_calloc(size_t num, size_t size) +{ + void *ptr; + + ptr = calloc(num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_malloc(size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_realloc(void *ptr, size_t size) +{ + + ptr = realloc(ptr, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +void * +mandoc_reallocarray(void *ptr, size_t num, size_t size) +{ + + ptr = reallocarray(ptr, num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + +char * +mandoc_strdup(const char *ptr) +{ + char *p; + + p = strdup(ptr); + if (p == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return p; +} + +char * +mandoc_strndup(const char *ptr, size_t sz) +{ + char *p; + + p = mandoc_malloc(sz + 1); + memcpy(p, ptr, sz); + p[(int)sz] = '\0'; + return p; +} diff --git a/contrib/mdocml/mandoc_aux.h b/contrib/mdocml/mandoc_aux.h new file mode 100644 index 0000000..2ae3a0c --- /dev/null +++ b/contrib/mdocml/mandoc_aux.h @@ -0,0 +1,25 @@ +/* $Id: mandoc_aux.h,v 1.4 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +int mandoc_asprintf(char **, const char *, ...); +void *mandoc_calloc(size_t, size_t); +void *mandoc_malloc(size_t); +void *mandoc_realloc(void *, size_t); +void *mandoc_reallocarray(void *, size_t, size_t); +char *mandoc_strdup(const char *); +char *mandoc_strndup(const char *, size_t); diff --git a/contrib/mdocml/mandoc_char.7 b/contrib/mdocml/mandoc_char.7 new file mode 100644 index 0000000..d272080 --- /dev/null +++ b/contrib/mdocml/mandoc_char.7 @@ -0,0 +1,765 @@ +.\" $Id: mandoc_char.7,v 1.63 2015/09/02 15:38:35 schwarze Exp $ +.\" +.\" Copyright (c) 2003 Jason McIntyre <jmc@openbsd.org> +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2011, 2013, 2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 2 2015 $ +.Dt MANDOC_CHAR 7 +.Os +.Sh NAME +.Nm mandoc_char +.Nd mandoc special characters +.Sh DESCRIPTION +This page documents the +.Xr roff 7 +escape sequences accepted by +.Xr mandoc 1 +to represent special characters in +.Xr mdoc 7 +and +.Xr man 7 +documents. +.Pp +The rendering depends on the +.Xr mandoc 1 +output mode; in ASCII output, most characters are completely +unintelligible. +For that reason, using any of the special characters documented here, +except those discussed in the +.Sx DESCRIPTION , +is strongly discouraged; they are supported merely for backwards +compatibility with existing documents. +.Pp +In particular, in English manual pages, do not use special-character +escape sequences to represent national language characters in author +names; instead, provide ASCII transcriptions of the names. +.Ss Dashes and Hyphens +In typography there are different types of dashes of various width: +the hyphen (-), +the minus sign (\(mi), +the en-dash (\(en), +and the em-dash (\(em). +.Pp +Hyphens are used for adjectives; +to separate the two parts of a compound word; +or to separate a word across two successive lines of text. +The hyphen does not need to be escaped: +.Bd -unfilled -offset indent +blue-eyed +lorry-driver +.Ed +.Pp +The mathematical minus sign is used for negative numbers or subtraction. +It should be written as +.Sq \e(mi : +.Bd -unfilled -offset indent +a = 3 \e(mi 1; +b = \e(mi2; +.Ed +.Pp +The en-dash is used to separate the two elements of a range, +or can be used the same way as an em-dash. +It should be written as +.Sq \e(en : +.Bd -unfilled -offset indent +pp. 95\e(en97. +Go away \e(en or else! +.Ed +.Pp +The em-dash can be used to show an interruption +or can be used the same way as colons, semi-colons, or parentheses. +It should be written as +.Sq \e(em : +.Bd -unfilled -offset indent +Three things \e(em apples, oranges, and bananas. +This is not that \e(em rather, this is that. +.Ed +.Pp +Note: +hyphens, minus signs, and en-dashes look identical under normal ASCII output. +Other formats, such as PostScript, render them correctly, +with differing widths. +.Ss Spaces +To separate words in normal text, for indenting and alignment +in literal context, and when none of the following special cases apply, +just use the normal space character +.Pq Sq \ . +.Pp +When filling text, output lines may be broken between words, i.e. at space +characters. +To prevent a line break between two particular words, +use the unpaddable non-breaking space escape sequence +.Pq Sq \e\ \& +instead of the normal space character. +For example, the input string +.Dq number\e\ 1 +will be kept together as +.Dq number\ 1 +on the same output line. +.Pp +On request and macro lines, the normal space character serves as an +argument delimiter. +To include whitespace into arguments, quoting is usually the best choice; +see the MACRO SYNTAX section in +.Xr roff 7 . +In some cases, using the non-breaking space escape sequence +.Pq Sq \e\ \& +may be preferable. +.Pp +To escape macro names and to protect whitespace at the end +of input lines, the zero-width space +.Pq Sq \e& +is often useful. +For example, in +.Xr mdoc 7 , +a normal space character can be displayed in single quotes in either +of the following ways: +.Pp +.Dl .Sq \(dq \(dq +.Dl .Sq \e \e& +.Ss Quotes +On request and macro lines, the double-quote character +.Pq Sq \(dq +is handled specially to allow quoting. +One way to prevent this special handling is by using the +.Sq \e(dq +escape sequence. +.Pp +Note that on text lines, literal double-quote characters can be used +verbatim. +All other quote-like characters can be used verbatim as well, +even on request and macro lines. +.Ss Accents +In output modes supporting such special output characters, for example +.Fl T Cm pdf , +some +.Xr roff 7 +formatters convert the following ASCII input characters to the +following Unicode special output characters: +.Bl -column x(ga U+2018 -offset indent +.It \(ga Ta U+2018 Ta left single quotation mark +.It \(aq Ta U+2019 Ta right single quotation mark +.It \(ti Ta U+02DC Ta small tilde +.El +.Pp +In prose, this automatic substitution is often desirable; +but when these characters have to be displayed as plain ASCII +characters, for example in source code samples, they require +escaping to render as follows: +.Bl -column x(ga U+2018 -offset indent +.It \e(ga Ta U+0060 Ta grave accent +.It \e(aq Ta U+0027 Ta apostrophe +.It \e(ti Ta U+007E Ta tilde +.El +.Ss Periods +The period +.Pq Sq \&. +is handled specially at the beginning of an input line, +where it introduces a +.Xr roff 7 +request or a macro, and when appearing alone as a macro argument in +.Xr mdoc 7 . +In such situations, prepend a zero-width space +.Pq Sq \e&. +to make it behave like normal text. +.Pp +Do not use the +.Sq \e. +escape sequence. +It does not prevent special handling of the period. +.Ss Backslashes +To include a literal backslash +.Pq Sq \e +into the output, use the +.Pq Sq \ee +escape sequence. +.Pp +Note that doubling it +.Pq Sq \e\e +is not the right way to output a backslash. +Because +.Xr mandoc 1 +does not implement full +.Xr roff 7 +functionality, it may work with +.Xr mandoc 1 , +but it may have weird effects on complete +.Xr roff 7 +implementations. +.Sh SPECIAL CHARACTERS +Special characters are encoded as +.Sq \eX +.Pq for a one-character escape , +.Sq \e(XX +.Pq two-character , +and +.Sq \e[N] +.Pq N-character . +For details, see the +.Em Special Characters +subsection of the +.Xr roff 7 +manual. +.Pp +Spacing: +.Bl -column "Input" "Description" -offset indent -compact +.It Em Input Ta Em Description +.It Sq \e\ \& Ta unpaddable non-breaking space +.It \e\(ti Ta paddable non-breaking space +.It \e0 Ta unpaddable, breaking digit-width space +.It \e| Ta one-sixth \e(em narrow space, zero width in nroff mode +.It \e^ Ta one-twelfth \e(em half-narrow space, zero width in nroff +.It \e& Ta zero-width space +.It \e% Ta zero-width space allowing hyphenation +.El +.Pp +Lines: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ba Ta \(ba Ta bar +.It \e(br Ta \(br Ta box rule +.It \e(ul Ta \(ul Ta underscore +.It \e(rn Ta \(rn Ta overline +.It \e(bb Ta \(bb Ta broken bar +.It \e(sl Ta \(sl Ta forward slash +.It \e(rs Ta \(rs Ta backward slash +.El +.Pp +Text markers: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ci Ta \(ci Ta circle +.It \e(bu Ta \(bu Ta bullet +.It \e(dd Ta \(dd Ta double dagger +.It \e(dg Ta \(dg Ta dagger +.It \e(lz Ta \(lz Ta lozenge +.It \e(sq Ta \(sq Ta white square +.It \e(ps Ta \(ps Ta paragraph +.It \e(sc Ta \(sc Ta section +.It \e(lh Ta \(lh Ta left hand +.It \e(rh Ta \(rh Ta right hand +.It \e(at Ta \(at Ta at +.It \e(sh Ta \(sh Ta hash (pound) +.It \e(CR Ta \(CR Ta carriage return +.It \e(OK Ta \(OK Ta check mark +.El +.Pp +Legal symbols: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(co Ta \(co Ta copyright +.It \e(rg Ta \(rg Ta registered +.It \e(tm Ta \(tm Ta trademarked +.El +.Pp +Punctuation: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(em Ta \(em Ta em-dash +.It \e(en Ta \(en Ta en-dash +.It \e(hy Ta \(hy Ta hyphen +.It \ee Ta \e Ta back-slash +.It \e. Ta \. Ta period +.It \e(r! Ta \(r! Ta upside-down exclamation +.It \e(r? Ta \(r? Ta upside-down question +.El +.Pp +Quotes: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(Bq Ta \(Bq Ta right low double-quote +.It \e(bq Ta \(bq Ta right low single-quote +.It \e(lq Ta \(lq Ta left double-quote +.It \e(rq Ta \(rq Ta right double-quote +.It \e(oq Ta \(oq Ta left single-quote +.It \e(cq Ta \(cq Ta right single-quote +.It \e(aq Ta \(aq Ta apostrophe quote (text) +.It \e(dq Ta \(dq Ta double quote (text) +.It \e(Fo Ta \(Fo Ta left guillemet +.It \e(Fc Ta \(Fc Ta right guillemet +.It \e(fo Ta \(fo Ta left single guillemet +.It \e(fc Ta \(fc Ta right single guillemet +.El +.Pp +Brackets: +.Bl -column "xxbracketrightbtx" Rendered Description -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(lB Ta \(lB Ta left bracket +.It \e(rB Ta \(rB Ta right bracket +.It \e(lC Ta \(lC Ta left brace +.It \e(rC Ta \(rC Ta right brace +.It \e(la Ta \(la Ta left angle +.It \e(ra Ta \(ra Ta right angle +.It \e(bv Ta \(bv Ta brace extension +.It \e[braceex] Ta \[braceex] Ta brace extension +.It \e[bracketlefttp] Ta \[bracketlefttp] Ta top-left hooked bracket +.It \e[bracketleftbt] Ta \[bracketleftbt] Ta bottom-left hooked bracket +.It \e[bracketleftex] Ta \[bracketleftex] Ta left hooked bracket extension +.It \e[bracketrighttp] Ta \[bracketrighttp] Ta top-right hooked bracket +.It \e[bracketrightbt] Ta \[bracketrightbt] Ta bottom-right hooked bracket +.It \e[bracketrightex] Ta \[bracketrightex] Ta right hooked bracket extension +.It \e(lt Ta \(lt Ta top-left hooked brace +.It \e[bracelefttp] Ta \[bracelefttp] Ta top-left hooked brace +.It \e(lk Ta \(lk Ta mid-left hooked brace +.It \e[braceleftmid] Ta \[braceleftmid] Ta mid-left hooked brace +.It \e(lb Ta \(lb Ta bottom-left hooked brace +.It \e[braceleftbt] Ta \[braceleftbt] Ta bottom-left hooked brace +.It \e[braceleftex] Ta \[braceleftex] Ta left hooked brace extension +.It \e(rt Ta \(rt Ta top-left hooked brace +.It \e[bracerighttp] Ta \[bracerighttp] Ta top-right hooked brace +.It \e(rk Ta \(rk Ta mid-right hooked brace +.It \e[bracerightmid] Ta \[bracerightmid] Ta mid-right hooked brace +.It \e(rb Ta \(rb Ta bottom-right hooked brace +.It \e[bracerightbt] Ta \[bracerightbt] Ta bottom-right hooked brace +.It \e[bracerightex] Ta \[bracerightex] Ta right hooked brace extension +.It \e[parenlefttp] Ta \[parenlefttp] Ta top-left hooked parenthesis +.It \e[parenleftbt] Ta \[parenleftbt] Ta bottom-left hooked parenthesis +.It \e[parenleftex] Ta \[parenleftex] Ta left hooked parenthesis extension +.It \e[parenrighttp] Ta \[parenrighttp] Ta top-right hooked parenthesis +.It \e[parenrightbt] Ta \[parenrightbt] Ta bottom-right hooked parenthesis +.It \e[parenrightex] Ta \[parenrightex] Ta right hooked parenthesis extension +.El +.Pp +Arrows: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(<- Ta \(<- Ta left arrow +.It \e(-> Ta \(-> Ta right arrow +.It \e(<> Ta \(<> Ta left-right arrow +.It \e(da Ta \(da Ta down arrow +.It \e(ua Ta \(ua Ta up arrow +.It \e(va Ta \(va Ta up-down arrow +.It \e(lA Ta \(lA Ta left double-arrow +.It \e(rA Ta \(rA Ta right double-arrow +.It \e(hA Ta \(hA Ta left-right double-arrow +.It \e(uA Ta \(uA Ta up double-arrow +.It \e(dA Ta \(dA Ta down double-arrow +.It \e(vA Ta \(vA Ta up-down double-arrow +.El +.Pp +Logical: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(AN Ta \(AN Ta logical and +.It \e(OR Ta \(OR Ta logical or +.It \e(no Ta \(no Ta logical not +.It \e[tno] Ta \[tno] Ta logical not (text) +.It \e(te Ta \(te Ta existential quantifier +.It \e(fa Ta \(fa Ta universal quantifier +.It \e(st Ta \(st Ta such that +.It \e(tf Ta \(tf Ta therefore +.It \e(3d Ta \(3d Ta therefore +.It \e(or Ta \(or Ta bitwise or +.El +.Pp +Mathematical: +.Bl -column "xxcoproductxx" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(pl Ta \(pl Ta plus +.It \e(mi Ta \(mi Ta minus +.It \e- Ta \- Ta minus (text) +.It \e(-+ Ta \(-+ Ta minus-plus +.It \e(+- Ta \(+- Ta plus-minus +.It \e[t+-] Ta \[t+-] Ta plus-minus (text) +.It \e(pc Ta \(pc Ta center-dot +.It \e(mu Ta \(mu Ta multiply +.It \e[tmu] Ta \[tmu] Ta multiply (text) +.It \e(c* Ta \(c* Ta circle-multiply +.It \e(c+ Ta \(c+ Ta circle-plus +.It \e(di Ta \(di Ta divide +.It \e[tdi] Ta \[tdi] Ta divide (text) +.It \e(f/ Ta \(f/ Ta fraction +.It \e(** Ta \(** Ta asterisk +.It \e(<= Ta \(<= Ta less-than-equal +.It \e(>= Ta \(>= Ta greater-than-equal +.It \e(<< Ta \(<< Ta much less +.It \e(>> Ta \(>> Ta much greater +.It \e(eq Ta \(eq Ta equal +.It \e(!= Ta \(!= Ta not equal +.It \e(== Ta \(== Ta equivalent +.It \e(ne Ta \(ne Ta not equivalent +.It \e(ap Ta \(ap Ta tilde operator +.It \e(|= Ta \(|= Ta asymptotically equal +.It \e(=\(ti Ta \(=~ Ta approximately equal +.It \e(\(ti\(ti Ta \(~~ Ta almost equal +.It \e(\(ti= Ta \(~= Ta almost equal +.It \e(pt Ta \(pt Ta proportionate +.It \e(es Ta \(es Ta empty set +.It \e(mo Ta \(mo Ta element +.It \e(nm Ta \(nm Ta not element +.It \e(sb Ta \(sb Ta proper subset +.It \e(nb Ta \(nb Ta not subset +.It \e(sp Ta \(sp Ta proper superset +.It \e(nc Ta \(nc Ta not superset +.It \e(ib Ta \(ib Ta reflexive subset +.It \e(ip Ta \(ip Ta reflexive superset +.It \e(ca Ta \(ca Ta intersection +.It \e(cu Ta \(cu Ta union +.It \e(/_ Ta \(/_ Ta angle +.It \e(pp Ta \(pp Ta perpendicular +.It \e(is Ta \(is Ta integral +.It \e[integral] Ta \[integral] Ta integral +.It \e[sum] Ta \[sum] Ta summation +.It \e[product] Ta \[product] Ta product +.It \e[coproduct] Ta \[coproduct] Ta coproduct +.It \e(gr Ta \(gr Ta gradient +.It \e(sr Ta \(sr Ta square root +.It \e[sqrt] Ta \[sqrt] Ta square root +.It \e(lc Ta \(lc Ta left-ceiling +.It \e(rc Ta \(rc Ta right-ceiling +.It \e(lf Ta \(lf Ta left-floor +.It \e(rf Ta \(rf Ta right-floor +.It \e(if Ta \(if Ta infinity +.It \e(Ah Ta \(Ah Ta aleph +.It \e(Im Ta \(Im Ta imaginary +.It \e(Re Ta \(Re Ta real +.It \e(pd Ta \(pd Ta partial differential +.It \e(-h Ta \(-h Ta Planck constant over 2\(*p +.It \e[12] Ta \[12] Ta one-half +.It \e[14] Ta \[14] Ta one-fourth +.It \e[34] Ta \[34] Ta three-fourths +.El +.Pp +Ligatures: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(ff Ta \(ff Ta ff ligature +.It \e(fi Ta \(fi Ta fi ligature +.It \e(fl Ta \(fl Ta fl ligature +.It \e(Fi Ta \(Fi Ta ffi ligature +.It \e(Fl Ta \(Fl Ta ffl ligature +.It \e(AE Ta \(AE Ta AE +.It \e(ae Ta \(ae Ta ae +.It \e(OE Ta \(OE Ta OE +.It \e(oe Ta \(oe Ta oe +.It \e(ss Ta \(ss Ta German eszett +.It \e(IJ Ta \(IJ Ta IJ ligature +.It \e(ij Ta \(ij Ta ij ligature +.El +.Pp +Accents: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(a" Ta \(a" Ta Hungarian umlaut +.It \e(a- Ta \(a- Ta macron +.It \e(a. Ta \(a. Ta dotted +.It \e(a^ Ta \(a^ Ta circumflex +.It \e(aa Ta \(aa Ta acute +.It \e\(aq Ta \' Ta acute +.It \e(ga Ta \(ga Ta grave +.It \e\(ga Ta \` Ta grave +.It \e(ab Ta \(ab Ta breve +.It \e(ac Ta \(ac Ta cedilla +.It \e(ad Ta \(ad Ta dieresis +.It \e(ah Ta \(ah Ta caron +.It \e(ao Ta \(ao Ta ring +.It \e(a\(ti Ta \(a~ Ta tilde +.It \e(ho Ta \(ho Ta ogonek +.It \e(ha Ta \(ha Ta hat (text) +.It \e(ti Ta \(ti Ta tilde (text) +.El +.Pp +Accented letters: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(\(aqA Ta \('A Ta acute A +.It \e(\(aqE Ta \('E Ta acute E +.It \e(\(aqI Ta \('I Ta acute I +.It \e(\(aqO Ta \('O Ta acute O +.It \e(\(aqU Ta \('U Ta acute U +.It \e(\(aqa Ta \('a Ta acute a +.It \e(\(aqe Ta \('e Ta acute e +.It \e(\(aqi Ta \('i Ta acute i +.It \e(\(aqo Ta \('o Ta acute o +.It \e(\(aqu Ta \('u Ta acute u +.It \e(\(gaA Ta \(`A Ta grave A +.It \e(\(gaE Ta \(`E Ta grave E +.It \e(\(gaI Ta \(`I Ta grave I +.It \e(\(gaO Ta \(`O Ta grave O +.It \e(\(gaU Ta \(`U Ta grave U +.It \e(\(gaa Ta \(`a Ta grave a +.It \e(\(gae Ta \(`e Ta grave e +.It \e(\(gai Ta \(`i Ta grave i +.It \e(\(gao Ta \(`i Ta grave o +.It \e(\(gau Ta \(`u Ta grave u +.It \e(\(tiA Ta \(~A Ta tilde A +.It \e(\(tiN Ta \(~N Ta tilde N +.It \e(\(tiO Ta \(~O Ta tilde O +.It \e(\(tia Ta \(~a Ta tilde a +.It \e(\(tin Ta \(~n Ta tilde n +.It \e(\(tio Ta \(~o Ta tilde o +.It \e(:A Ta \(:A Ta dieresis A +.It \e(:E Ta \(:E Ta dieresis E +.It \e(:I Ta \(:I Ta dieresis I +.It \e(:O Ta \(:O Ta dieresis O +.It \e(:U Ta \(:U Ta dieresis U +.It \e(:a Ta \(:a Ta dieresis a +.It \e(:e Ta \(:e Ta dieresis e +.It \e(:i Ta \(:i Ta dieresis i +.It \e(:o Ta \(:o Ta dieresis o +.It \e(:u Ta \(:u Ta dieresis u +.It \e(:y Ta \(:y Ta dieresis y +.It \e(^A Ta \(^A Ta circumflex A +.It \e(^E Ta \(^E Ta circumflex E +.It \e(^I Ta \(^I Ta circumflex I +.It \e(^O Ta \(^O Ta circumflex O +.It \e(^U Ta \(^U Ta circumflex U +.It \e(^a Ta \(^a Ta circumflex a +.It \e(^e Ta \(^e Ta circumflex e +.It \e(^i Ta \(^i Ta circumflex i +.It \e(^o Ta \(^o Ta circumflex o +.It \e(^u Ta \(^u Ta circumflex u +.It \e(,C Ta \(,C Ta cedilla C +.It \e(,c Ta \(,c Ta cedilla c +.It \e(/L Ta \(/L Ta stroke L +.It \e(/l Ta \(/l Ta stroke l +.It \e(/O Ta \(/O Ta stroke O +.It \e(/o Ta \(/o Ta stroke o +.It \e(oA Ta \(oA Ta ring A +.It \e(oa Ta \(oa Ta ring a +.El +.Pp +Special letters: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(-D Ta \(-D Ta Eth +.It \e(Sd Ta \(Sd Ta eth +.It \e(TP Ta \(TP Ta Thorn +.It \e(Tp Ta \(Tp Ta thorn +.It \e(.i Ta \(.i Ta dotless i +.It \e(.j Ta \(.j Ta dotless j +.El +.Pp +Currency: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(Do Ta \(Do Ta dollar +.It \e(ct Ta \(ct Ta cent +.It \e(Eu Ta \(Eu Ta Euro symbol +.It \e(eu Ta \(eu Ta Euro symbol +.It \e(Ye Ta \(Ye Ta yen +.It \e(Po Ta \(Po Ta pound +.It \e(Cs Ta \(Cs Ta Scandinavian +.It \e(Fn Ta \(Fn Ta florin +.El +.Pp +Units: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(de Ta \(de Ta degree +.It \e(%0 Ta \(%0 Ta per-thousand +.It \e(fm Ta \(fm Ta minute +.It \e(sd Ta \(sd Ta second +.It \e(mc Ta \(mc Ta micro +.El +.Pp +Greek letters: +.Bl -column "Input" "Rendered" "Description" -offset indent -compact +.It Em Input Ta Em Rendered Ta Em Description +.It \e(*A Ta \(*A Ta Alpha +.It \e(*B Ta \(*B Ta Beta +.It \e(*G Ta \(*G Ta Gamma +.It \e(*D Ta \(*D Ta Delta +.It \e(*E Ta \(*E Ta Epsilon +.It \e(*Z Ta \(*Z Ta Zeta +.It \e(*Y Ta \(*Y Ta Eta +.It \e(*H Ta \(*H Ta Theta +.It \e(*I Ta \(*I Ta Iota +.It \e(*K Ta \(*K Ta Kappa +.It \e(*L Ta \(*L Ta Lambda +.It \e(*M Ta \(*M Ta Mu +.It \e(*N Ta \(*N Ta Nu +.It \e(*C Ta \(*C Ta Xi +.It \e(*O Ta \(*O Ta Omicron +.It \e(*P Ta \(*P Ta Pi +.It \e(*R Ta \(*R Ta Rho +.It \e(*S Ta \(*S Ta Sigma +.It \e(*T Ta \(*T Ta Tau +.It \e(*U Ta \(*U Ta Upsilon +.It \e(*F Ta \(*F Ta Phi +.It \e(*X Ta \(*X Ta Chi +.It \e(*Q Ta \(*Q Ta Psi +.It \e(*W Ta \(*W Ta Omega +.It \e(*a Ta \(*a Ta alpha +.It \e(*b Ta \(*b Ta beta +.It \e(*g Ta \(*g Ta gamma +.It \e(*d Ta \(*d Ta delta +.It \e(*e Ta \(*e Ta epsilon +.It \e(*z Ta \(*z Ta zeta +.It \e(*y Ta \(*y Ta eta +.It \e(*h Ta \(*h Ta theta +.It \e(*i Ta \(*i Ta iota +.It \e(*k Ta \(*k Ta kappa +.It \e(*l Ta \(*l Ta lambda +.It \e(*m Ta \(*m Ta mu +.It \e(*n Ta \(*n Ta nu +.It \e(*c Ta \(*c Ta xi +.It \e(*o Ta \(*o Ta omicron +.It \e(*p Ta \(*p Ta pi +.It \e(*r Ta \(*r Ta rho +.It \e(*s Ta \(*s Ta sigma +.It \e(*t Ta \(*t Ta tau +.It \e(*u Ta \(*u Ta upsilon +.It \e(*f Ta \(*f Ta phi +.It \e(*x Ta \(*x Ta chi +.It \e(*q Ta \(*q Ta psi +.It \e(*w Ta \(*w Ta omega +.It \e(+h Ta \(+h Ta theta variant +.It \e(+f Ta \(+f Ta phi variant +.It \e(+p Ta \(+p Ta pi variant +.It \e(+e Ta \(+e Ta epsilon variant +.It \e(ts Ta \(ts Ta sigma terminal +.El +.Sh PREDEFINED STRINGS +Predefined strings are inherited from the macro packages of historical +troff implementations. +They are +.Em not recommended +for use, as they differ across implementations. +Manuals using these predefined strings are almost certainly not +portable. +.Pp +Their syntax is similar to special characters, using +.Sq \e*X +.Pq for a one-character escape , +.Sq \e*(XX +.Pq two-character , +and +.Sq \e*[N] +.Pq N-character . +For details, see the +.Em Predefined Strings +subsection of the +.Xr roff 7 +manual. +.Bl -column "Input" "Rendered" "Description" -offset indent +.It Em Input Ta Em Rendered Ta Em Description +.It \e*(Ba Ta \*(Ba Ta vertical bar +.It \e*(Ne Ta \*(Ne Ta not equal +.It \e*(Ge Ta \*(Ge Ta greater-than-equal +.It \e*(Le Ta \*(Le Ta less-than-equal +.It \e*(Gt Ta \*(Gt Ta greater-than +.It \e*(Lt Ta \*(Lt Ta less-than +.It \e*(Pm Ta \*(Pm Ta plus-minus +.It \e*(If Ta \*(If Ta infinity +.It \e*(Pi Ta \*(Pi Ta pi +.It \e*(Na Ta \*(Na Ta NaN +.It \e*(Am Ta \*(Am Ta ampersand +.It \e*R Ta \*R Ta restricted mark +.It \e*(Tm Ta \*(Tm Ta trade mark +.It \e*q Ta \*q Ta double-quote +.It \e*(Rq Ta \*(Rq Ta right-double-quote +.It \e*(Lq Ta \*(Lq Ta left-double-quote +.It \e*(lp Ta \*(lp Ta right-parenthesis +.It \e*(rp Ta \*(rp Ta left-parenthesis +.It \e*(lq Ta \*(lq Ta left double-quote +.It \e*(rq Ta \*(rq Ta right double-quote +.It \e*(ua Ta \*(ua Ta up arrow +.It \e*(va Ta \*(va Ta up-down arrow +.It \e*(<= Ta \*(<= Ta less-than-equal +.It \e*(>= Ta \*(>= Ta greater-than-equal +.It \e*(aa Ta \*(aa Ta acute +.It \e*(ga Ta \*(ga Ta grave +.It \e*(Px Ta \*(Px Ta POSIX standard name +.It \e*(Ai Ta \*(Ai Ta ANSI standard name +.El +.Sh UNICODE CHARACTERS +The escape sequences +.Pp +.Dl \e[uXXXX] and \eC\(aquXXXX\(aq +.Pp +are interpreted as Unicode codepoints. +The codepoint must be in the range above U+0080 and less than U+10FFFF. +For compatibility, the hexadecimal digits +.Sq A +to +.Sq F +must be given as uppercase characters, +and points must be zero-padded to four characters; if +greater than four characters, no zero padding is allowed. +Unicode surrogates are not allowed. +.Sh NUMBERED CHARACTERS +For backward compatibility with existing manuals, +.Xr mandoc 1 +also supports the +.Pp +.Dl \eN\(aq Ns Ar number Ns \(aq +.Pp +escape sequence, inserting the character +.Ar number +from the current character set into the output. +Of course, this is inherently non-portable and is already marked +as deprecated in the Heirloom roff manual. +For example, do not use \eN\(aq34\(aq, use \e(dq, or even the plain +.Sq \(dq +character where possible. +.Sh COMPATIBILITY +This section documents compatibility between mandoc and other +troff implementations, at this time limited to GNU troff +.Pq Qq groff . +.Pp +.Bl -dash -compact +.It +The \eN\(aq\(aq escape sequence is limited to printable characters; in +groff, it accepts arbitrary character numbers. +.It +In +.Fl T Ns Cm ascii , +the +\e(ss, \e(nm, \e(nb, \e(nc, \e(ib, \e(ip, \e(pp, \e[sum], \e[product], +\e[coproduct], \e(gr, \e(-h, and \e(a. special characters render +differently between mandoc and groff. +.It +In +.Fl T Ns Cm html +and +.Fl T Ns Cm xhtml , +the \e(\(ti=, \e(nb, and \e(nc special characters render differently +between mandoc and groff. +.It +The +.Fl T Ns Cm ps +and +.Fl T Ns Cm pdf +modes format like +.Fl T Ns Cm ascii +instead of rendering glyphs as in groff. +.It +The \e[radicalex], \e[sqrtex], and \e(ru special characters have been omitted +from mandoc either because they are poorly documented or they have no +known representation. +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 , +.Xr mdoc 7 , +.Xr roff 7 +.Sh AUTHORS +The +.Nm +manual page was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . +.Sh CAVEATS +The predefined string +.Sq \e*(Ba +mimics the behaviour of the +.Sq \&| +character in +.Xr mdoc 7 ; +thus, if you wish to render a vertical bar with no side effects, use +the +.Sq \e(ba +escape. diff --git a/contrib/mdocml/mandoc_escape.3 b/contrib/mdocml/mandoc_escape.3 new file mode 100644 index 0000000..fec298b87 --- /dev/null +++ b/contrib/mdocml/mandoc_escape.3 @@ -0,0 +1,367 @@ +.\" $Id: mandoc_escape.3,v 1.3 2015/01/21 20:33:25 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: January 21 2015 $ +.Dt MANDOC_ESCAPE 3 +.Os +.Sh NAME +.Nm mandoc_escape +.Nd parse roff escape sequences +.Sh SYNOPSIS +.In sys/types.h +.In mandoc.h +.Ft "enum mandoc_esc" +.Fo mandoc_escape +.Fa "const char **end" +.Fa "const char **start" +.Fa "int *sz" +.Fc +.Sh DESCRIPTION +This function scans a +.Xr roff 7 +escape sequence. +.Pp +An escape sequence consists of +.Bl -dash -compact -width 2n +.It +an initial backslash character +.Pq Sq \e , +.It +a single ASCII character called the escape sequence identifier, +.It +and, with only a few exceptions, an argument. +.El +.Pp +Arguments can be given in the following forms; some escape sequence +identifiers only accept some of these forms as specified below. +The first three forms are called the standard forms. +.Bl -tag -width 2n +.It \&In brackets: Ic \&[ Ns Ar argument Ns Ic \&] +The argument starts after the initial +.Sq \&[ , +ends before the final +.Sq \&] , +and the escape sequence ends with the final +.Sq \&] . +.It Two-character argument short form: Ic \&( Ns Ar ar +This form can only be used for arguments +consisting of exactly two characters. +It has the same effect as +.Ic \&[ Ns Ar ar Ns Ic \&] . +.It One-character argument short form: Ar a +This form can only be used for arguments +consisting of exactly one character. +It has the same effect as +.Ic \&[ Ns Ar a Ns Ic \&] . +.It Delimited form: Ar C Ns Ar argument Ns Ar C +The argument starts after the initial delimiter character +.Ar C , +ends before the next occurrence of the delimiter character +.Ar C , +and the escape sequence ends with that second +.Ar C . +Some escape sequences allow arbitrary characters +.Ar C +as quoting characters, some restrict the range of characters +that can be used as quoting characters. +.El +.Pp +Upon function entry, +.Fa end +is expected to point to the escape sequence identifier. +The values passed in as +.Fa start +and +.Fa sz +are ignored and overwritten. +.Pp +By design, this function cannot handle those +.Xr roff 7 +escape sequences that require in-place expansion, in particular +user-defined strings +.Ic \e* , +number registers +.Ic \en , +width measurements +.Ic \ew , +and numerical expression control +.Ic \eB . +These are handled by +.Fn roff_res , +a private preprocessor function called from +.Fn roff_parseln , +see the file +.Pa roff.c . +.Pp +The function +.Fn mandoc_escape +is used +.Bl -dash -compact -width 2n +.It +recursively by itself, because some escape sequence arguments can +in turn contain other escape sequences, +.It +for error detection internally by the +.Xr roff 7 +parser part of the +.Xr mandoc 3 +library, see the file +.Pa roff.c , +.It +above all externally by the +.Xr mandoc +formatting modules, in particular +.Fl Tascii +and +.Fl Thtml , +for formatting purposes, see the files +.Pa term.c +and +.Pa html.c , +.It +and rarely externally by high-level utilities using the mandoc library, +for example +.Xr makewhatis 8 , +to purge escape sequences from text. +.El +.Sh RETURN VALUES +Upon function return, the pointer +.Fa end +is set to the character after the end of the escape sequence, +such that the calling higher-level parser can easily continue. +.Pp +For escape sequences taking an argument, the pointer +.Fa start +is set to the beginning of the argument and +.Fa sz +is set to the length of the argument. +For escape sequences not taking an argument, +.Fa start +is set to the character after the end of the sequence and +.Fa sz +is set to 0. +Both +.Fa start +and +.Fa sz +may be +.Dv NULL ; +in that case, the argument and the length are not returned. +.Pp +For sequences taking an argument, the function +.Fn mandoc_escape +returns one of the following values: +.Bl -tag -width 2n +.It Dv ESCAPE_FONT +The escape sequence +.Ic \ef +taking an argument in standard form: +.Ic \ef[ , \ef( , \ef Ns Ar a . +Two-character arguments starting with the character +.Sq C +are reduced to one-character arguments by skipping the +.Sq C . +More specific values are returned for the most commonly used arguments: +.Bl -column "argument" "ESCAPE_FONTITALIC" +.It argument Ta return value +.It Cm R No or Cm 1 Ta Dv ESCAPE_FONTROMAN +.It Cm I No or Cm 2 Ta Dv ESCAPE_FONTITALIC +.It Cm B No or Cm 3 Ta Dv ESCAPE_FONTBOLD +.It Cm P Ta Dv ESCAPE_FONTPREV +.It Cm BI Ta Dv ESCAPE_FONTBI +.El +.It Dv ESCAPE_SPECIAL +The escape sequence +.Ic \eC +taking an argument delimited with the single quote character +and, as a special exception, the escape sequences +.Em not +having an identifier, that is, those where the argument, in standard +form, directly follows the initial backslash: +.Ic \eC' , \e[ , \e( , \e Ns Ar a . +Note that the one-character argument short form can only be used for +argument characters that do not clash with escape sequence identifiers. +.Pp +If the argument matches one of the forms described below under +.Dv ESCAPE_UNICODE , +that value is returned instead. +.Pp +The +.Dv ESCAPE_SPECIAL +special character escape sequences can be rendered using the functions +.Fn mchars_spec2cp +and +.Fn mchars_spec2str +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_UNICODE +Escape sequences of the same format as described above under +.Dv ESCAPE_SPECIAL , +but with an argument of the forms +.Ic u Ns Ar XXXX , +.Ic u Ns Ar YXXXX , +or +.Ic u10 Ns Ar XXXX +where +.Ar X +and +.Ar Y +are hexadecimal digits and +.Ar Y +is not zero: +.Ic \eC'u , \e[u . +As a special exception, +.Fa start +is set to the character after the +.Ic u , +and the +.Fa sz +return value does not include the +.Ic u +either. +.Pp +Such Unicode character escape sequences can be rendered using the function +.Fn mchars_num2uc +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_NUMBERED +The escape sequence +.Ic \eN +followed by a delimited argument. +The delimiter character is arbitrary except that digits cannot be used. +If a digit is encountered instead of the opening delimiter, that +digit is considered to be the argument and the end of the sequence, and +.Dv ESCAPE_IGNORE +is returned. +.Pp +Such ASCII character escape sequences can be rendered using the function +.Fn mchars_num2char +described in the +.Xr mchars_alloc 3 +manual. +.It Dv ESCAPE_OVERSTRIKE +The escape sequence +.Ic \eo +followed by an argument delimited by an arbitrary character. +.It Dv ESCAPE_IGNORE +.Bl -bullet -width 2n +.It +The escape sequence +.Ic \es +followed by an argument in standard form or by an argument delimited +by the single quote character: +.Ic \es' , \es[ , \es( , \es Ns Ar a . +As a special exception, an optional +.Sq + +or +.Sq \- +character is allowed after the +.Sq s +for all forms. +.It +The escape sequences +.Ic \eF , +.Ic \eg , +.Ic \ek , +.Ic \eM , +.Ic \em , +.Ic \en , +.Ic \eV , +and +.Ic \eY +followed by an argument in standard form. +.It +The escape sequences +.Ic \eA , +.Ic \eb , +.Ic \eD , +.Ic \eR , +.Ic \eX , +and +.Ic \eZ +followed by an argument delimited by an arbitrary character. +.It +The escape sequences +.Ic \eH , +.Ic \eh , +.Ic \eL , +.Ic \el , +.Ic \eS , +.Ic \ev , +and +.Ic \ex +followed by an argument delimited by a character that cannot occur +in numerical expressions. +However, if any character that can occur in numerical expressions +is found instead of a delimiter, the sequence is considered to end +with that character, and +.Dv ESCAPE_ERROR +is returned. +.El +.It Dv ESCAPE_ERROR +Escape sequences taking an argument but not matching any of the above patterns. +In particular, that happens if the end of the logical input line +is reached before the end of the argument. +.El +.Pp +For sequences that do not take an argument, the function +.Fn mandoc_escape +returns one of the following values: +.Bl -tag -width 2n +.It Dv ESCAPE_SKIPCHAR +The escape sequence +.Qq \ez . +.It Dv ESCAPE_NOSPACE +The escape sequence +.Qq \ec . +.It Dv ESCAPE_IGNORE +The escape sequences +.Qq \ed +and +.Qq \eu . +.El +.Sh FILES +This function is implemented in +.Pa mandoc.c . +.Sh SEE ALSO +.Xr mchars_alloc 3 , +.Xr mandoc_char 7 , +.Xr roff 7 +.Sh HISTORY +This function has been available since mandoc 1.11.2. +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org +.Sh BUGS +The function doesn't cleanly distinguish between sequences that are +valid and supported, valid and ignored, valid and unsupported, +syntactically invalid, or undefined. +For sequences that are ignored or unsupported, it doesn't tell +whether that deficiency is likely to cause major formatting problems +and/or loss of document content. +The function is already rather complicated and still parses some +sequences incorrectly. +. +.ig +For these sequences, the list given below specifies a starting string +and either the length of the argument or an ending character. +The argument starts after the starting string. +In the former case, the sequence ends with the end of the argument. +In the latter case, the argument ends before the ending character, +and the sequence ends with the ending character. +.. diff --git a/contrib/mdocml/mandoc_headers.3 b/contrib/mdocml/mandoc_headers.3 new file mode 100644 index 0000000..6c30e16 --- /dev/null +++ b/contrib/mdocml/mandoc_headers.3 @@ -0,0 +1,517 @@ +.Dd December 1, 2014 +.Dt MANDOC_HEADERS 3 +.Os +.Sh NAME +.Nm mandoc_headers +.Nd ordering of mandoc include files +.Sh DESCRIPTION +To support a cleaner coding style, the mandoc header files do not +contain any include directives and do not guard against multiple +inclusion. +The application developer has to make sure that the headers are +included in a proper order, and that no header is included more +than once. +.Pp +The headers and functions form three major groups: +.Sx Parser interface , +.Sx Parser internals , +and +.Sx Formatter interface . +.Pp +Various rules are given below prohibiting the inclusion of certain +combinations of headers into the same file. +The intention is to keep the following functional components +separate from each other: +.Pp +.Bl -dash -offset indent -compact +.It +.Xr mdoc 7 +parser +.It +.Xr man 7 +parser +.It +.Xr roff 7 +parser +.It +.Xr tbl 7 +parser +.It +.Xr eqn 7 +parser +.It +terminal formatters +.It +HTML formatters +.It +search tools +.El +.Pp +Note that mere usage of an opaque struct type does +.Em not +require inclusion of the header where that type is defined. +.Ss Parser interface +Each of the following headers can be included without including +any other mandoc header. +These headers should be included before any other mandoc headers. +.Bl -tag -width Ds +.It Qq Pa mandoc_aux.h +Requires +.In sys/types.h +for +.Vt size_t . +Provides the utility functions documented in +.Xr mandoc_malloc 3 . +.It Qq Pa mandoc.h +Requires +.In sys/types.h +for +.Vt size_t . +.Pp +Provides +.Vt enum mandoc_esc , +.Vt enum mandocerr , +.Vt enum mandoclevel , +.Vt enum tbl_cellt , +.Vt enum tbl_datt , +.Vt enum tbl_spant , +.Vt enum eqn_boxt , +.Vt enum eqn_fontt , +.Vt enum eqn_pilet , +.Vt enum eqn_post , +.Vt struct tbl_opts , +.Vt struct tbl_head , +.Vt struct tbl_cell , +.Vt struct tbl_row , +.Vt struct tbl_dat , +.Vt struct tbl_span , +.Vt struct eqn_box , +.Vt struct eqn , +the function prototype typedef +.Fn mandocmsg , +the function +.Xr mandoc_escape 3 , +the functions described in +.Xr mchars_alloc 3 , +and the functions +.Fn mparse_* +described in +.Xr mandoc 3 . +.Pp +Uses the opaque type +.Vt struct mparse +from +.Pa read.c +for function prototypes. +Uses the types +.Vt struct mdoc +from +.Pa libmdoc.h +and +.Vt struct man +from +.Pa libman.h +as opaque types for function prototypes. +.It Qq Pa roff.h +Provides +.Vt enum mdoc_endbody , +.Vt enum roff_sec , +.Vt enum roff_type , +.Vt struct roff_meta , +and +.Vt struct roff_node . +.Pp +Uses pointers to the types +.Vt struct mdoc_arg +and +.Vt union mdoc_data +from +.Qq Pa mdoc.h +as opaque struct members. +.El +.Pp +The following two require +.Qq Pa roff.h +but no other mandoc headers. +Afterwards, any other mandoc headers can be included as needed. +.Bl -tag -width Ds +.It Qq Pa mdoc.h +Requires +.In sys/types.h +for +.Vt size_t +and +.Qq Pa roff.h +for +.Vt enum roff_type . +.Pp +Provides +.Vt enum mdocargt , +.Vt enum mdoc_disp , +.Vt enum mdoc_list , +.Vt enum mdoc_auth , +.Vt enum mdoc_font , +.Vt struct mdoc_argv , +.Vt struct mdoc_arg , +.Vt struct mdoc_bd , +.Vt struct mdoc_bl , +.Vt struct mdoc_an , +.Vt struct mdoc_bf , +.Vt struct mdoc_rs , +and the functions +.Fn mdoc_* +described in +.Xr mandoc 3 . +.Pp +Uses the type +.Vt struct mdoc +from +.Pa libmdoc.h +as an opaque type for function prototypes. +Uses pointers to the types +.Vt struct tbl_span +and +.Vt struct eqn +as opaque struct members. +.Pp +When this header is included, the same file should not include +.Pa libman.h +or +.Pa libroff.h . +.It Qq Pa man.h +Requires +.Qq Pa roff.h +for +.Vt enum roff_type . +.Pp +Provides the functions +.Fn man_* +described in +.Xr mandoc 3 . +.Pp +Uses the opaque type +.Vt struct mparse +from +.Pa read.c +for function prototypes. +Uses the type +.Vt struct man +from +.Pa libman.h +as an opaque type for function prototypes. +Uses pointers to the types +.Vt struct tbl_span +and +.Vt struct eqn +as opaque struct members. +.Pp +When this header is included, the same file should not include +.Pa libmdoc.h +or +.Pa libroff.h . +.El +.Ss Parser internals +The following headers require inclusion of a parser interface header +before they can be included. All parser interface headers should +precede all parser internal headers. When any parser internal headers +are included, the same file should not include any formatter headers. +.Bl -tag -width Ds +.It Qq Pa libmandoc.h +Requires +.In sys/types.h +for +.Vt size_t , +.Qq Pa mandoc.h +for +.Vt enum mandocerr , +and +.Qq Pa roff.h +for +.Vt struct roff_meta +and +.Vt struct roff_node . +.Pp +Provides +.Vt enum rofferr , +.Vt struct buf , +utility functions needed by multiple parsers, +and the top-level functions to call the parsers. +.Pp +Uses the opaque types +.Vt struct mparse +from +.Pa read.c +and +.Vt struct roff +from +.Pa roff.c +for function prototypes. +Uses the types +.Vt struct tbl_span +and +.Vt struct eqn +from +.Pa mandoc.h , +.Vt struct mdoc +from +.Pa libmdoc.h , +and +.Vt struct man +from +.Pa libman.h +as opaque types for function prototypes. +.It Qq Pa libmdoc.h +Requires +.Qq Pa mdoc.h +for +.Vt enum mdoc_* +and +.Vt struct mdoc_* . +.Pp +Provides +.Vt enum mdoc_next , +.Vt enum margserr , +.Vt enum mdelim , +.Vt struct mdoc , +.Vt struct mdoc_macro , +and many functions internal to the +.Xr mdoc 7 +parser. +.Pp +Uses the opaque types +.Vt struct mparse +from +.Pa read.c +and +.Vt struct roff +from +.Pa roff.c . +.Pp +When this header is included, the same file should not include +.Pa man.h , +.Pa libman.h , +or +.Pa libroff.h . +.It Qq Pa libman.h +Requires +.Qq Pa roff.h +for +.Vt struct roff_meta +and +.Vt struct roff_node . +.Pp +Provides +.Vt enum man_next , +.Vt struct man , +.Vt struct man_macro , +and many functions internal to the +.Xr man 7 +parser. +.Pp +Uses the opaque types +.Vt struct mparse +from +.Pa read.c +and +.Vt struct roff +from +.Pa roff.c . +.Pp +When this header is included, the same file should not include +.Pa mdoc.h , +.Pa libmdoc.h , +or +.Pa libroff.h . +.It Qq Pa libroff.h +Requires +.In sys/types.h +for +.Vt size_t , +.Qq Pa mandoc.h +for +.Vt struct tbl_* +and +.Vt struct eqn , +and +.Qq Pa libmandoc.h +for +.Vt enum rofferr . +.Pp +Provides +.Vt enum tbl_part , +.Vt struct tbl_node , +.Vt struct eqn_def , +.Vt struct eqn_node , +and many functions internal to the +.Xr tbl 7 +and +.Xr eqn 7 +parsers. +.Pp +Uses the opaque type +.Vt struct mparse +from +.Pa read.c . +.Pp +When this header is included, the same file should not include +.Pa man.h , +.Pa mdoc.h , +.Pa libman.h , +or +.Pa libmdoc.h . +.El +.Ss Formatter interface +These headers should be included after any parser interface headers. +No parser internal headers should be included by the same file. +.Bl -tag -width Ds +.It Qq Pa out.h +Requires +.In sys/types.h +for +.Vt size_t . +.Pp +Provides +.Vt enum roffscale , +.Vt struct roffcol , +.Vt struct roffsu , +.Vt struct rofftbl , +.Fn a2roffsu , +and +.Fn tblcalc . +.Pp +Uses +.Vt struct tbl_span +from +.Pa mandoc.h +as an opaque type for function prototypes. +.Pp +When this header is included, the same file should not include +.Pa mansearch.h . +.It Qq Pa term.h +Requires +.In sys/types.h +for +.Vt size_t +and +.Qq Pa out.h +for +.Vt struct roffsu +and +.Vt struct rofftbl . +.Pp +Provides +.Vt enum termenc , +.Vt enum termfont , +.Vt enum termtype , +.Vt struct termp_tbl , +.Vt struct termp , +and many terminal formatting functions. +.Pp +Uses the opaque type +.Vt struct termp_ps +from +.Pa term_ps.c . +Uses +.Vt struct tbl_span +and +.Vt struct eqn +from +.Pa mandoc.h +and +.Vt struct roff_meta +from +.Qq Pa roff.h +as opaque types for function prototypes. +.Pp +When this header is included, the same file should not include +.Pa html.h +or +.Pa mansearch.h . +.It Qq Pa html.h +Requires +.In sys/types.h +for +.Vt size_t , +.In stdio.h +for +.Dv BUFSIZ , +and +.Qq Pa out.h +for +.Vt struct roffsu +and +.Vt struct rofftbl . +.Pp +Provides +.Vt enum htmltag , +.Vt enum htmlattr , +.Vt enum htmlfont , +.Vt struct tag , +.Vt struct tagq , +.Vt struct htmlpair , +.Vt struct html , +and many HTML formatting functions. +.Pp +When this header is included, the same file should not include +.Pa term.h +or +.Pa mansearch.h . +.It Qq Pa main.h +Provides the top level steering functions for all formatters. +.Pp +Uses the types +.Vt struct mdoc +from +.Pa libmdoc.h +and +.Vt struct man +from +.Pa libman.h +as opaque types for function prototypes. +.It Qq Pa manconf.h +Requires +.In sys/types.h +for +.Vt size_t . +.Pp +Provides +.Vt struct manconf , +.Vt struct manpaths , +.Vt struct manoutput , +and the functions +.Fn manconf_parse , +.Fn manconf_output , +and +.Fn manconf_free . +.It Qq Pa mansearch.h +Requires +.In sys/types.h +for +.Vt size_t +and +.In stdint.h +for +.Vt uint64_t . +.Pp +Provides +.Vt enum argmode , +.Vt struct manpage , +.Vt struct mansearch , +and the functions +.Fn mansearch_setup , +.Fn mansearch , +and +.Fn mansearch_free . +.Pp +Uses +.Vt struct manpaths +from +.Pa manconf.h +as an opaque type for function prototypes. +.Pp +When this header is included, the same file should not include +.Pa out.h , +.Pa term.h , +or +.Pa html.h . +.El diff --git a/contrib/mdocml/mandoc_html.3 b/contrib/mdocml/mandoc_html.3 new file mode 100644 index 0000000..994eb3a --- /dev/null +++ b/contrib/mdocml/mandoc_html.3 @@ -0,0 +1,249 @@ +.\" $Id: mandoc_html.3,v 1.1 2014/07/23 18:13:09 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: July 23 2014 $ +.Dt MANDOC_HTML 3 +.Os +.Sh NAME +.Nm mandoc_html +.Nd internals of the mandoc HTML formatter +.Sh SYNOPSIS +.In "html.h" +.Ft void +.Fn print_gen_decls "struct html *h" +.Ft void +.Fn print_gen_head "struct html *h" +.Ft struct tag * +.Fo print_otag +.Fa "struct html *h" +.Fa "enum htmltag tag" +.Fa "int sz" +.Fa "const struct htmlpair *p" +.Fc +.Ft void +.Fo print_tagq +.Fa "struct html *h" +.Fa "const struct tag *until" +.Fc +.Ft void +.Fo print_stagq +.Fa "struct html *h" +.Fa "const struct tag *suntil" +.Fc +.Ft void +.Fo print_text +.Fa "struct html *h" +.Fa "const char *word" +.Fc +.Sh DESCRIPTION +The mandoc HTML formatter is not a formal library. +However, as it is compiled into more than one program, in particular +.Xr mandoc 1 +and +.Xr man.cgi 8 , +and because it may be security-critical in some contexts, +some documentation is useful to help to use it correctly and +to prevent XSS vulnerabilities. +.Pp +The formatter produces HTML output on the standard output. +Since proper escaping is usually required and best taken care of +at one central place, the language-specific formatters +.Po +.Pa *_html.c , +see +.Sx FILES +.Pc +are not supposed to print directly to +.Dv stdout +using functions like +.Xr printf 3 , +.Xr putc 3 , +.Xr puts 3 , +or +.Xr write 2 . +Instead, they are expected to use the output functions declared in +.Pa html.h +and implemented as part of the main HTML formatting engine in +.Pa html.c . +.Ss Data structures +These structures are declared in +.Pa html.h . +.Bl -tag -width Ds +.It Vt struct html +Internal state of the HTML formatter. +.It Vt struct htmlpair +Holds one HTML attribute. +Members are +.Fa "enum htmlattr key" +and +.Fa "const char *val" . +Helper macros +.Fn PAIR_* +are provided to support initialization of such structures. +.It Vt struct tag +One entry for the LIFO stack of HTML elements. +Members are +.Fa "enum htmltag tag" +and +.Fa "struct tag *next" . +.El +.Ss Private interface functions +The function +.Fn print_gen_decls +prints the opening +.Ao Pf \&? Ic xml ? Ac +and +.Aq Pf \&! Ic DOCTYPE +declarations required for the current document type. +.Pp +The function +.Fn print_gen_head +prints the opening +.Aq Ic META +and +.Aq Ic LINK +elements for the document +.Aq Ic HEAD , +using the +.Fa style +member of +.Fa h +unless that is +.Dv NULL . +It uses +.Fn print_otag +which takes care of properly encoding attributes, +which is relevant for the +.Fa style +link in particular. +.Pp +The function +.Fn print_otag +prints the start tag of an HTML element with the name +.Fa tag , +including the +.Fa sz +attributes that can optionally be provided in the +.Fa p +array. +It uses the private function +.Fn print_attr +which in turn uses the private function +.Fn print_encode +to take care of HTML encoding. +If required by the element type, it remembers in +.Fa h +that the element is open. +The function +.Fn print_tagq +is used to close out all open elements up to and including +.Fa until ; +.Fn print_stagq +is a variant to close out all open elements up to but excluding +.Fa suntil . +.Pp +The function +.Fn print_text +prints HTML element content. +It uses the private function +.Fn print_encode +to take care of HTML encoding. +If the document has requested a non-standard font, for example using a +.Xr roff 7 +.Ic \ef +font escape sequence, +.Fn print_text +wraps +.Fa word +in an HTML font selection element using the +.Fn print_otag +and +.Fn print_tagq +functions. +.Pp +The functions +.Fn bufinit , +.Fn bufcat* , +and +.Fn buffmt* +do not directly produce output but buffer text in the +.Fa buf +member of +.Fa h . +They are not used internally by +.Pa html.c +but intended for use by the language-specific formatters +to ease preparation of strings for the +.Fa p +argument of +.Fn print_otag +and for the +.Fa word +argument of +.Fn print_text . +Consequently, these functions do not do any HTML encoding. +.Pp +The functions +.Fn html_strlen , +.Fn print_eqn , +.Fn print_tbl , +and +.Fn print_tblclose +are not yet documented. +.Sh FILES +.Bl -tag -width mandoc_aux.c -compact +.It Pa main.h +declarations of public functions for use by the main program, +not yet documented +.It Pa html.h +declarations of data types and private functions +for use by language-specific HTML formatters +.It Pa html.c +main HTML formatting engine and utility functions +.It Pa mdoc_html.c +.Xr mdoc 7 +HTML formatter +.It Pa man_html.c +.Xr man 7 +HTML formatter +.It Pa tbl_html.c +.Xr tbl 7 +HTML formatter +.It Pa eqn_html.c +.Xr eqn 7 +HTML formatter +.It Pa out.h +declarations of data types and private functions +for shared use by all mandoc formatters, +not yet documented +.It Pa out.c +private functions for shared use by all mandoc formatters +.It Pa mandoc_aux.h +declarations of common mandoc utility functions, see +.Xr mandoc 3 +.It Pa mandoc_aux.c +implementation of common mandoc utility functions +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc 3 , +.Xr man.cgi 8 +.Sh AUTHORS +.An -nosplit +The mandoc HTML formatter was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . +This manual was written by +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/mandoc_malloc.3 b/contrib/mdocml/mandoc_malloc.3 new file mode 100644 index 0000000..c167984 --- /dev/null +++ b/contrib/mdocml/mandoc_malloc.3 @@ -0,0 +1,197 @@ +.\" $Id: mandoc_malloc.3,v 1.1 2014/08/05 05:48:56 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: August 5 2014 $ +.Dt MANDOC_MALLOC 3 +.Os +.Sh NAME +.Nm mandoc_malloc , +.Nm mandoc_realloc , +.Nm mandoc_reallocarray , +.Nm mandoc_calloc , +.Nm mandoc_strdup , +.Nm mandoc_strndup , +.Nm mandoc_asprintf +.Nd memory allocation function wrappers used in the mandoc library +.Sh LIBRARY +.Lb libmandoc +.Sh SYNOPSIS +.In sys/types.h +.In mandoc_aux.h +.Ft "void *" +.Fo mandoc_malloc +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_realloc +.Fa "void *ptr" +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_reallocarray +.Fa "void *ptr" +.Fa "size_t nmemb" +.Fa "size_t size" +.Fc +.Ft "void *" +.Fo mandoc_calloc +.Fa "size_t nmemb" +.Fa "size_t size" +.Fc +.Ft "char *" +.Fo mandoc_strdup +.Fa "const char *s" +.Fc +.Ft "char *" +.Fo mandoc_strndup +.Fa "const char *s" +.Fa "size_t maxlen" +.Fc +.Ft int +.Fo mandoc_asprintf +.Fa "char **ret" +.Fa "const char *format" +.Fa "..." +.Fc +.Sh DESCRIPTION +These functions call the +.Lb libc +functions of the same names, passing through their return values when +successful. +In case of failure, they do not return, but instead call +.Xr perror 3 +and +.Xr exit 3 . +They can be used both internally by any code in the +.Lb libmandoc +and externally by programs using that library, for example +.Xr mandoc 1 , +.Xr apropos 1 , +and +.Xr makewhatis 8 . +.Pp +The function +.Fn mandoc_malloc +allocates one new object, leaving the memory uninitialized. +The functions +.Fn mandoc_realloc +and +.Fn mandoc_reallocarray +change the size of an existing object or array, possibly moving it. +When shrinking the size, existing data is truncated; when growing, +the additional memory is not initialized. +The function +.Fn mandoc_calloc +allocates a new array, initializing it to zero. +.Pp +The argument +.Fa size +is the size of each object. +The argument +.Fa nmemb +is the new number of objects in the array. +The argument +.Fa ptr +is a pointer to the existing object or array to be resized; if it is +.Dv NULL , +a new object or array is allocated. +.Pp +The functions +.Fn mandoc_strdup +and +.Fn mandoc_strndup +copy a string into newly allocated memory. +For +.Fn mandoc_strdup , +the string pointed to by +.Fa s +needs to be NUL-terminated. +For +.Fn mandoc_strndup , +at most +.Fa maxlen +bytes are copied. +The function +.Fn mandoc_asprintf +writes output formatted according to +.Fa format +into newly allocated memory and returns a pointer to the result in +.Fa ret . +For all three string functions, the result is always NUL-terminated. +.Pp +When the objects and strings are no longer needed, +the pointers returned by these functions can be passed to +.Xr free 3 . +.Sh RETURN VALUES +The function +.Fn mandoc_asprintf +always returns the number of characters written, excluding the +final NUL byte. +It never returns -1. +.Pp +The other functions always return a valid pointer; they never return +.Dv NULL . +.Sh FILES +These functions are implemented in +.Pa mandoc_aux.c . +.Sh SEE ALSO +.Xr asprintf 3 , +.Xr exit 3 , +.Xr malloc 3 , +.Xr perror 3 , +.Xr strdup 3 +.Sh STANDARDS +The functions +.Fn malloc , +.Fn realloc , +and +.Fn calloc +are required by +.St -ansiC . +The functions +.Fn strdup +and +.Fn strndup +are required by +.St -p1003.1-2008 . +The function +.Fn asprintf +is a widespread extension that first appeared in the GNU C library. +.Pp +The function +.Fn reallocarray +is an extension that first appeared in +.Ox 5.6 . +If it is not provided by the operating system, the mandoc build system +uses a bundled portable implementation. +.Sh HISTORY +The functions +.Fn mandoc_malloc , +.Fn mandoc_realloc , +.Fn mandoc_calloc , +and +.Fn mandoc_strdup +have been available since mandoc 1.9.12, +.Fn mandoc_strndup +since 1.11.5, +and +.Fn mandoc_asprintf +and +.Fn mandoc_reallocarray +since 1.12.4 and 1.13.0. +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org diff --git a/contrib/mdocml/mandoc_ohash.c b/contrib/mdocml/mandoc_ohash.c new file mode 100644 index 0000000..0627b46 --- /dev/null +++ b/contrib/mdocml/mandoc_ohash.c @@ -0,0 +1,63 @@ +/* $Id: mandoc_ohash.c,v 1.2 2015/10/19 18:58:47 schwarze Exp $ */ +/* + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" + +static void *hash_alloc(size_t, void *); +static void *hash_calloc(size_t, size_t, void *); +static void hash_free(void *, void *); + + +void +mandoc_ohash_init(struct ohash *h, unsigned int sz, ptrdiff_t ko) +{ + struct ohash_info info; + + info.alloc = hash_alloc; + info.calloc = hash_calloc; + info.free = hash_free; + info.data = NULL; + info.key_offset = ko; + + ohash_init(h, sz, &info); +} + +static void * +hash_alloc(size_t sz, void *arg) +{ + + return mandoc_malloc(sz); +} + +static void * +hash_calloc(size_t n, size_t sz, void *arg) +{ + + return mandoc_calloc(n, sz); +} + +static void +hash_free(void *p, void *arg) +{ + + free(p); +} diff --git a/contrib/mdocml/mandoc_ohash.h b/contrib/mdocml/mandoc_ohash.h new file mode 100644 index 0000000..571c4cd --- /dev/null +++ b/contrib/mdocml/mandoc_ohash.h @@ -0,0 +1,23 @@ +/* $Id: mandoc_ohash.h,v 1.2 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#if HAVE_OHASH +#include <ohash.h> +#else +#include "compat_ohash.h" +#endif + +void mandoc_ohash_init(struct ohash *, unsigned int, ptrdiff_t); diff --git a/contrib/mdocml/mandocdb.c b/contrib/mdocml/mandocdb.c new file mode 100644 index 0000000..08f89c1 --- /dev/null +++ b/contrib/mdocml/mandocdb.c @@ -0,0 +1,2531 @@ +/* $Id: mandocdb.c,v 1.215 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +#include <assert.h> +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <errno.h> +#include <fcntl.h> +#if HAVE_FTS +#include <fts.h> +#else +#include "compat_fts.h" +#endif +#include <getopt.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sqlite3.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "manconf.h" +#include "mansearch.h" + +extern int mansearch_keymax; +extern const char *const mansearch_keynames[]; + +#define SQL_EXEC(_v) \ + if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ + say("", "%s: %s", (_v), sqlite3_errmsg(db)) +#define SQL_BIND_TEXT(_s, _i, _v) \ + if (SQLITE_OK != sqlite3_bind_text \ + ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ + say(mlink->file, "%s", sqlite3_errmsg(db)) +#define SQL_BIND_INT(_s, _i, _v) \ + if (SQLITE_OK != sqlite3_bind_int \ + ((_s), (_i)++, (_v))) \ + say(mlink->file, "%s", sqlite3_errmsg(db)) +#define SQL_BIND_INT64(_s, _i, _v) \ + if (SQLITE_OK != sqlite3_bind_int64 \ + ((_s), (_i)++, (_v))) \ + say(mlink->file, "%s", sqlite3_errmsg(db)) +#define SQL_STEP(_s) \ + if (SQLITE_DONE != sqlite3_step((_s))) \ + say(mlink->file, "%s", sqlite3_errmsg(db)) + +enum op { + OP_DEFAULT = 0, /* new dbs from dir list or default config */ + OP_CONFFILE, /* new databases from custom config file */ + OP_UPDATE, /* delete/add entries in existing database */ + OP_DELETE, /* delete entries from existing database */ + OP_TEST /* change no databases, report potential problems */ +}; + +struct str { + const struct mpage *mpage; /* if set, the owning parse */ + uint64_t mask; /* bitmask in sequence */ + char key[]; /* rendered text */ +}; + +struct inodev { + ino_t st_ino; + dev_t st_dev; +}; + +struct mpage { + struct inodev inodev; /* used for hashing routine */ + int64_t pageid; /* pageid in mpages SQL table */ + char *sec; /* section from file content */ + char *arch; /* architecture from file content */ + char *title; /* title from file content */ + char *desc; /* description from file content */ + struct mlink *mlinks; /* singly linked list */ + int form; /* format from file content */ + int name_head_done; +}; + +struct mlink { + char file[PATH_MAX]; /* filename rel. to manpath */ + char *dsec; /* section from directory */ + char *arch; /* architecture from directory */ + char *name; /* name from file name (not empty) */ + char *fsec; /* section from file name suffix */ + struct mlink *next; /* singly linked list */ + struct mpage *mpage; /* parent */ + int dform; /* format from directory */ + int fform; /* format from file name suffix */ + int gzip; /* filename has a .gz suffix */ +}; + +enum stmt { + STMT_DELETE_PAGE = 0, /* delete mpage */ + STMT_INSERT_PAGE, /* insert mpage */ + STMT_INSERT_LINK, /* insert mlink */ + STMT_INSERT_NAME, /* insert name */ + STMT_SELECT_NAME, /* retrieve existing name flags */ + STMT_INSERT_KEY, /* insert parsed key */ + STMT__MAX +}; + +typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, + const struct roff_node *); + +struct mdoc_handler { + mdoc_fp fp; /* optional handler */ + uint64_t mask; /* set unless handler returns 0 */ +}; + +static void dbclose(int); +static void dbadd(struct mpage *); +static void dbadd_mlink(const struct mlink *mlink); +static void dbadd_mlink_name(const struct mlink *mlink); +static int dbopen(int); +static void dbprune(void); +static void filescan(const char *); +static void mlink_add(struct mlink *, const struct stat *); +static void mlink_check(struct mpage *, struct mlink *); +static void mlink_free(struct mlink *); +static void mlinks_undupe(struct mpage *); +static void mpages_free(void); +static void mpages_merge(struct mparse *); +static void names_check(void); +static void parse_cat(struct mpage *, int); +static void parse_man(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void parse_mdoc(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_head(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void parse_mdoc_fname(struct mpage *, const struct roff_node *); +static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, + const struct roff_node *); +static void putkey(const struct mpage *, char *, uint64_t); +static void putkeys(const struct mpage *, char *, size_t, uint64_t); +static void putmdockey(const struct mpage *, + const struct roff_node *, uint64_t); +static int render_string(char **, size_t *); +static void say(const char *, const char *, ...); +static int set_basedir(const char *, int); +static int treescan(void); +static size_t utf8(unsigned int, char [7]); + +static char tempfilename[32]; +static int nodb; /* no database changes */ +static int mparse_options; /* abort the parse early */ +static int use_all; /* use all found files */ +static int debug; /* print what we're doing */ +static int warnings; /* warn about crap */ +static int write_utf8; /* write UTF-8 output; else ASCII */ +static int exitcode; /* to be returned by main */ +static enum op op; /* operational mode */ +static char basedir[PATH_MAX]; /* current base directory */ +static struct ohash mpages; /* table of distinct manual pages */ +static struct ohash mlinks; /* table of directory entries */ +static struct ohash names; /* table of all names */ +static struct ohash strings; /* table of all strings */ +static sqlite3 *db = NULL; /* current database */ +static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ +static uint64_t name_mask; + +static const struct mdoc_handler mdocs[MDOC_MAX] = { + { NULL, 0 }, /* Ap */ + { NULL, 0 }, /* Dd */ + { NULL, 0 }, /* Dt */ + { NULL, 0 }, /* Os */ + { parse_mdoc_Sh, TYPE_Sh }, /* Sh */ + { parse_mdoc_head, TYPE_Ss }, /* Ss */ + { NULL, 0 }, /* Pp */ + { NULL, 0 }, /* D1 */ + { NULL, 0 }, /* Dl */ + { NULL, 0 }, /* Bd */ + { NULL, 0 }, /* Ed */ + { NULL, 0 }, /* Bl */ + { NULL, 0 }, /* El */ + { NULL, 0 }, /* It */ + { NULL, 0 }, /* Ad */ + { NULL, TYPE_An }, /* An */ + { NULL, TYPE_Ar }, /* Ar */ + { NULL, TYPE_Cd }, /* Cd */ + { NULL, TYPE_Cm }, /* Cm */ + { NULL, TYPE_Dv }, /* Dv */ + { NULL, TYPE_Er }, /* Er */ + { NULL, TYPE_Ev }, /* Ev */ + { NULL, 0 }, /* Ex */ + { NULL, TYPE_Fa }, /* Fa */ + { parse_mdoc_Fd, 0 }, /* Fd */ + { NULL, TYPE_Fl }, /* Fl */ + { parse_mdoc_Fn, 0 }, /* Fn */ + { NULL, TYPE_Ft }, /* Ft */ + { NULL, TYPE_Ic }, /* Ic */ + { NULL, TYPE_In }, /* In */ + { NULL, TYPE_Li }, /* Li */ + { parse_mdoc_Nd, 0 }, /* Nd */ + { parse_mdoc_Nm, 0 }, /* Nm */ + { NULL, 0 }, /* Op */ + { NULL, 0 }, /* Ot */ + { NULL, TYPE_Pa }, /* Pa */ + { NULL, 0 }, /* Rv */ + { NULL, TYPE_St }, /* St */ + { parse_mdoc_Va, TYPE_Va }, /* Va */ + { parse_mdoc_Va, TYPE_Vt }, /* Vt */ + { parse_mdoc_Xr, 0 }, /* Xr */ + { NULL, 0 }, /* %A */ + { NULL, 0 }, /* %B */ + { NULL, 0 }, /* %D */ + { NULL, 0 }, /* %I */ + { NULL, 0 }, /* %J */ + { NULL, 0 }, /* %N */ + { NULL, 0 }, /* %O */ + { NULL, 0 }, /* %P */ + { NULL, 0 }, /* %R */ + { NULL, 0 }, /* %T */ + { NULL, 0 }, /* %V */ + { NULL, 0 }, /* Ac */ + { NULL, 0 }, /* Ao */ + { NULL, 0 }, /* Aq */ + { NULL, TYPE_At }, /* At */ + { NULL, 0 }, /* Bc */ + { NULL, 0 }, /* Bf */ + { NULL, 0 }, /* Bo */ + { NULL, 0 }, /* Bq */ + { NULL, TYPE_Bsx }, /* Bsx */ + { NULL, TYPE_Bx }, /* Bx */ + { NULL, 0 }, /* Db */ + { NULL, 0 }, /* Dc */ + { NULL, 0 }, /* Do */ + { NULL, 0 }, /* Dq */ + { NULL, 0 }, /* Ec */ + { NULL, 0 }, /* Ef */ + { NULL, TYPE_Em }, /* Em */ + { NULL, 0 }, /* Eo */ + { NULL, TYPE_Fx }, /* Fx */ + { NULL, TYPE_Ms }, /* Ms */ + { NULL, 0 }, /* No */ + { NULL, 0 }, /* Ns */ + { NULL, TYPE_Nx }, /* Nx */ + { NULL, TYPE_Ox }, /* Ox */ + { NULL, 0 }, /* Pc */ + { NULL, 0 }, /* Pf */ + { NULL, 0 }, /* Po */ + { NULL, 0 }, /* Pq */ + { NULL, 0 }, /* Qc */ + { NULL, 0 }, /* Ql */ + { NULL, 0 }, /* Qo */ + { NULL, 0 }, /* Qq */ + { NULL, 0 }, /* Re */ + { NULL, 0 }, /* Rs */ + { NULL, 0 }, /* Sc */ + { NULL, 0 }, /* So */ + { NULL, 0 }, /* Sq */ + { NULL, 0 }, /* Sm */ + { NULL, 0 }, /* Sx */ + { NULL, TYPE_Sy }, /* Sy */ + { NULL, TYPE_Tn }, /* Tn */ + { NULL, 0 }, /* Ux */ + { NULL, 0 }, /* Xc */ + { NULL, 0 }, /* Xo */ + { parse_mdoc_Fo, 0 }, /* Fo */ + { NULL, 0 }, /* Fc */ + { NULL, 0 }, /* Oo */ + { NULL, 0 }, /* Oc */ + { NULL, 0 }, /* Bk */ + { NULL, 0 }, /* Ek */ + { NULL, 0 }, /* Bt */ + { NULL, 0 }, /* Hf */ + { NULL, 0 }, /* Fr */ + { NULL, 0 }, /* Ud */ + { NULL, TYPE_Lb }, /* Lb */ + { NULL, 0 }, /* Lp */ + { NULL, TYPE_Lk }, /* Lk */ + { NULL, TYPE_Mt }, /* Mt */ + { NULL, 0 }, /* Brq */ + { NULL, 0 }, /* Bro */ + { NULL, 0 }, /* Brc */ + { NULL, 0 }, /* %C */ + { NULL, 0 }, /* Es */ + { NULL, 0 }, /* En */ + { NULL, TYPE_Dx }, /* Dx */ + { NULL, 0 }, /* %Q */ + { NULL, 0 }, /* br */ + { NULL, 0 }, /* sp */ + { NULL, 0 }, /* %U */ + { NULL, 0 }, /* Ta */ + { NULL, 0 }, /* ll */ +}; + + +int +mandocdb(int argc, char *argv[]) +{ + struct manconf conf; + struct mparse *mp; + const char *path_arg, *progname; + size_t j, sz; + int ch, i; + +#if HAVE_PLEDGE + if (pledge("stdio rpath wpath cpath fattr flock proc exec", NULL) == -1) { + warn("pledge"); + return (int)MANDOCLEVEL_SYSERR; + } +#endif + + memset(&conf, 0, sizeof(conf)); + memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); + + /* + * We accept a few different invocations. + * The CHECKOP macro makes sure that invocation styles don't + * clobber each other. + */ +#define CHECKOP(_op, _ch) do \ + if (OP_DEFAULT != (_op)) { \ + warnx("-%c: Conflicting option", (_ch)); \ + goto usage; \ + } while (/*CONSTCOND*/0) + + path_arg = NULL; + op = OP_DEFAULT; + + while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) + switch (ch) { + case 'a': + use_all = 1; + break; + case 'C': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_CONFFILE; + break; + case 'D': + debug++; + break; + case 'd': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_UPDATE; + break; + case 'n': + nodb = 1; + break; + case 'p': + warnings = 1; + break; + case 'Q': + mparse_options |= MPARSE_QUICK; + break; + case 'T': + if (strcmp(optarg, "utf8")) { + warnx("-T%s: Unsupported output format", + optarg); + goto usage; + } + write_utf8 = 1; + break; + case 't': + CHECKOP(op, ch); + dup2(STDOUT_FILENO, STDERR_FILENO); + op = OP_TEST; + nodb = warnings = 1; + break; + case 'u': + CHECKOP(op, ch); + path_arg = optarg; + op = OP_DELETE; + break; + case 'v': + /* Compatibility with espie@'s makewhatis. */ + break; + default: + goto usage; + } + + argc -= optind; + argv += optind; + +#if HAVE_PLEDGE + if (nodb) { + if (pledge("stdio rpath", NULL) == -1) { + warn("pledge"); + return (int)MANDOCLEVEL_SYSERR; + } + } +#endif + + if (OP_CONFFILE == op && argc > 0) { + warnx("-C: Too many arguments"); + goto usage; + } + + exitcode = (int)MANDOCLEVEL_OK; + mchars_alloc(); + mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL); + mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); + mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); + + if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { + + /* + * Most of these deal with a specific directory. + * Jump into that directory first. + */ + if (OP_TEST != op && 0 == set_basedir(path_arg, 1)) + goto out; + + if (dbopen(1)) { + /* + * The existing database is usable. Process + * all files specified on the command-line. + */ +#if HAVE_PLEDGE + if (!nodb) { + if (pledge("stdio rpath wpath cpath fattr flock", NULL) == -1) { + warn("pledge"); + exitcode = (int)MANDOCLEVEL_SYSERR; + goto out; + } + } +#endif + use_all = 1; + for (i = 0; i < argc; i++) + filescan(argv[i]); + if (OP_TEST != op) + dbprune(); + } else { + /* + * Database missing or corrupt. + * Recreate from scratch. + */ + exitcode = (int)MANDOCLEVEL_OK; + op = OP_DEFAULT; + if (0 == treescan()) + goto out; + if (0 == dbopen(0)) + goto out; + } + if (OP_DELETE != op) + mpages_merge(mp); + dbclose(OP_DEFAULT == op ? 0 : 1); + } else { + /* + * If we have arguments, use them as our manpaths. + * If we don't, grok from manpath(1) or however else + * manconf_parse() wants to do it. + */ + if (argc > 0) { + conf.manpath.paths = mandoc_reallocarray(NULL, + argc, sizeof(char *)); + conf.manpath.sz = (size_t)argc; + for (i = 0; i < argc; i++) + conf.manpath.paths[i] = mandoc_strdup(argv[i]); + } else + manconf_parse(&conf, path_arg, NULL, NULL); + + if (conf.manpath.sz == 0) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "Empty manpath"); + } + + /* + * First scan the tree rooted at a base directory, then + * build a new database and finally move it into place. + * Ignore zero-length directories and strip trailing + * slashes. + */ + for (j = 0; j < conf.manpath.sz; j++) { + sz = strlen(conf.manpath.paths[j]); + if (sz && conf.manpath.paths[j][sz - 1] == '/') + conf.manpath.paths[j][--sz] = '\0'; + if (0 == sz) + continue; + + if (j) { + mandoc_ohash_init(&mpages, 6, + offsetof(struct mpage, inodev)); + mandoc_ohash_init(&mlinks, 6, + offsetof(struct mlink, file)); + } + + if ( ! set_basedir(conf.manpath.paths[j], argc > 0)) + continue; + if (0 == treescan()) + continue; + if (0 == dbopen(0)) + continue; + + mpages_merge(mp); + if (warnings && !nodb && + ! (MPARSE_QUICK & mparse_options)) + names_check(); + dbclose(0); + + if (j + 1 < conf.manpath.sz) { + mpages_free(); + ohash_delete(&mpages); + ohash_delete(&mlinks); + } + } + } +out: + manconf_free(&conf); + mparse_free(mp); + mchars_free(); + mpages_free(); + ohash_delete(&mpages); + ohash_delete(&mlinks); + return exitcode; +usage: + progname = getprogname(); + fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" + " %s [-aDnpQ] [-Tutf8] dir ...\n" + " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" + " %s [-Dnp] -u dir [file ...]\n" + " %s [-Q] -t file ...\n", + progname, progname, progname, progname, progname); + + return (int)MANDOCLEVEL_BADARG; +} + +/* + * Scan a directory tree rooted at "basedir" for manpages. + * We use fts(), scanning directory parts along the way for clues to our + * section and architecture. + * + * If use_all has been specified, grok all files. + * If not, sanitise paths to the following: + * + * [./]man*[/<arch>]/<name>.<section> + * or + * [./]cat<section>[/<arch>]/<name>.0 + * + * TODO: accomodate for multi-language directories. + */ +static int +treescan(void) +{ + char buf[PATH_MAX]; + FTS *f; + FTSENT *ff; + struct mlink *mlink; + int dform, gzip; + char *dsec, *arch, *fsec, *cp; + const char *path; + const char *argv[2]; + + argv[0] = "."; + argv[1] = (char *)NULL; + + f = fts_open((char * const *)argv, + FTS_PHYSICAL | FTS_NOCHDIR, NULL); + if (f == NULL) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&fts_open"); + return 0; + } + + dsec = arch = NULL; + dform = FORM_NONE; + + while ((ff = fts_read(f)) != NULL) { + path = ff->fts_path + 2; + switch (ff->fts_info) { + + /* + * Symbolic links require various sanity checks, + * then get handled just like regular files. + */ + case FTS_SL: + if (realpath(path, buf) == NULL) { + if (warnings) + say(path, "&realpath"); + continue; + } + if (strstr(buf, basedir) != buf +#ifdef HOMEBREWDIR + && strstr(buf, HOMEBREWDIR) != buf +#endif + ) { + if (warnings) say("", + "%s: outside base directory", buf); + continue; + } + /* Use logical inode to avoid mpages dupe. */ + if (stat(path, ff->fts_statp) == -1) { + if (warnings) + say(path, "&stat"); + continue; + } + /* FALLTHROUGH */ + + /* + * If we're a regular file, add an mlink by using the + * stored directory data and handling the filename. + */ + case FTS_F: + if ( ! strcmp(path, MANDOC_DB)) + continue; + if ( ! use_all && ff->fts_level < 2) { + if (warnings) + say(path, "Extraneous file"); + continue; + } + gzip = 0; + fsec = NULL; + while (fsec == NULL) { + fsec = strrchr(ff->fts_name, '.'); + if (fsec == NULL || strcmp(fsec+1, "gz")) + break; + gzip = 1; + *fsec = '\0'; + fsec = NULL; + } + if (fsec == NULL) { + if ( ! use_all) { + if (warnings) + say(path, + "No filename suffix"); + continue; + } + } else if ( ! strcmp(++fsec, "html")) { + if (warnings) + say(path, "Skip html"); + continue; + } else if ( ! strcmp(fsec, "ps")) { + if (warnings) + say(path, "Skip ps"); + continue; + } else if ( ! strcmp(fsec, "pdf")) { + if (warnings) + say(path, "Skip pdf"); + continue; + } else if ( ! use_all && + ((dform == FORM_SRC && + strncmp(fsec, dsec, strlen(dsec))) || + (dform == FORM_CAT && strcmp(fsec, "0")))) { + if (warnings) + say(path, "Wrong filename suffix"); + continue; + } else + fsec[-1] = '\0'; + + mlink = mandoc_calloc(1, sizeof(struct mlink)); + if (strlcpy(mlink->file, path, + sizeof(mlink->file)) >= + sizeof(mlink->file)) { + say(path, "Filename too long"); + free(mlink); + continue; + } + mlink->dform = dform; + mlink->dsec = dsec; + mlink->arch = arch; + mlink->name = ff->fts_name; + mlink->fsec = fsec; + mlink->gzip = gzip; + mlink_add(mlink, ff->fts_statp); + continue; + + case FTS_D: + case FTS_DP: + break; + + default: + if (warnings) + say(path, "Not a regular file"); + continue; + } + + switch (ff->fts_level) { + case 0: + /* Ignore the root directory. */ + break; + case 1: + /* + * This might contain manX/ or catX/. + * Try to infer this from the name. + * If we're not in use_all, enforce it. + */ + cp = ff->fts_name; + if (ff->fts_info == FTS_DP) { + dform = FORM_NONE; + dsec = NULL; + break; + } + + if ( ! strncmp(cp, "man", 3)) { + dform = FORM_SRC; + dsec = cp + 3; + } else if ( ! strncmp(cp, "cat", 3)) { + dform = FORM_CAT; + dsec = cp + 3; + } else { + dform = FORM_NONE; + dsec = NULL; + } + + if (dsec != NULL || use_all) + break; + + if (warnings) + say(path, "Unknown directory part"); + fts_set(f, ff, FTS_SKIP); + break; + case 2: + /* + * Possibly our architecture. + * If we're descending, keep tabs on it. + */ + if (ff->fts_info != FTS_DP && dsec != NULL) + arch = ff->fts_name; + else + arch = NULL; + break; + default: + if (ff->fts_info == FTS_DP || use_all) + break; + if (warnings) + say(path, "Extraneous directory part"); + fts_set(f, ff, FTS_SKIP); + break; + } + } + + fts_close(f); + return 1; +} + +/* + * Add a file to the mlinks table. + * Do not verify that it's a "valid" looking manpage (we'll do that + * later). + * + * Try to infer the manual section, architecture, and page name from the + * path, assuming it looks like + * + * [./]man*[/<arch>]/<name>.<section> + * or + * [./]cat<section>[/<arch>]/<name>.0 + * + * See treescan() for the fts(3) version of this. + */ +static void +filescan(const char *file) +{ + char buf[PATH_MAX]; + struct stat st; + struct mlink *mlink; + char *p, *start; + + assert(use_all); + + if (0 == strncmp(file, "./", 2)) + file += 2; + + /* + * We have to do lstat(2) before realpath(3) loses + * the information whether this is a symbolic link. + * We need to know that because for symbolic links, + * we want to use the orginal file name, while for + * regular files, we want to use the real path. + */ + if (-1 == lstat(file, &st)) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(file, "&lstat"); + return; + } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(file, "Not a regular file"); + return; + } + + /* + * We have to resolve the file name to the real path + * in any case for the base directory check. + */ + if (NULL == realpath(file, buf)) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(file, "&realpath"); + return; + } + + if (OP_TEST == op) + start = buf; + else if (strstr(buf, basedir) == buf) + start = buf + strlen(basedir); +#ifdef HOMEBREWDIR + else if (strstr(buf, HOMEBREWDIR) == buf) + start = buf; +#endif + else { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "%s: outside base directory", buf); + return; + } + + /* + * Now we are sure the file is inside our tree. + * If it is a symbolic link, ignore the real path + * and use the original name. + * This implies passing stuff like "cat1/../man1/foo.1" + * on the command line won't work. So don't do that. + * Note the stat(2) can still fail if the link target + * doesn't exist. + */ + if (S_IFLNK & st.st_mode) { + if (-1 == stat(buf, &st)) { + exitcode = (int)MANDOCLEVEL_BADARG; + say(file, "&stat"); + return; + } + if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { + say(file, "Filename too long"); + return; + } + start = buf; + if (OP_TEST != op && strstr(buf, basedir) == buf) + start += strlen(basedir); + } + + mlink = mandoc_calloc(1, sizeof(struct mlink)); + mlink->dform = FORM_NONE; + if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= + sizeof(mlink->file)) { + say(start, "Filename too long"); + free(mlink); + return; + } + + /* + * First try to guess our directory structure. + * If we find a separator, try to look for man* or cat*. + * If we find one of these and what's underneath is a directory, + * assume it's an architecture. + */ + if (NULL != (p = strchr(start, '/'))) { + *p++ = '\0'; + if (0 == strncmp(start, "man", 3)) { + mlink->dform = FORM_SRC; + mlink->dsec = start + 3; + } else if (0 == strncmp(start, "cat", 3)) { + mlink->dform = FORM_CAT; + mlink->dsec = start + 3; + } + + start = p; + if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { + *p++ = '\0'; + mlink->arch = start; + start = p; + } + } + + /* + * Now check the file suffix. + * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. + */ + p = strrchr(start, '\0'); + while (p-- > start && '/' != *p && '.' != *p) + /* Loop. */ ; + + if ('.' == *p) { + *p++ = '\0'; + mlink->fsec = p; + } + + /* + * Now try to parse the name. + * Use the filename portion of the path. + */ + mlink->name = start; + if (NULL != (p = strrchr(start, '/'))) { + mlink->name = p + 1; + *p = '\0'; + } + mlink_add(mlink, &st); +} + +static void +mlink_add(struct mlink *mlink, const struct stat *st) +{ + struct inodev inodev; + struct mpage *mpage; + unsigned int slot; + + assert(NULL != mlink->file); + + mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); + mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); + mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); + mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); + + if ('0' == *mlink->fsec) { + free(mlink->fsec); + mlink->fsec = mandoc_strdup(mlink->dsec); + mlink->fform = FORM_CAT; + } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) + mlink->fform = FORM_SRC; + else + mlink->fform = FORM_NONE; + + slot = ohash_qlookup(&mlinks, mlink->file); + assert(NULL == ohash_find(&mlinks, slot)); + ohash_insert(&mlinks, slot, mlink); + + memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ + inodev.st_ino = st->st_ino; + inodev.st_dev = st->st_dev; + slot = ohash_lookup_memory(&mpages, (char *)&inodev, + sizeof(struct inodev), inodev.st_ino); + mpage = ohash_find(&mpages, slot); + if (NULL == mpage) { + mpage = mandoc_calloc(1, sizeof(struct mpage)); + mpage->inodev.st_ino = inodev.st_ino; + mpage->inodev.st_dev = inodev.st_dev; + ohash_insert(&mpages, slot, mpage); + } else + mlink->next = mpage->mlinks; + mpage->mlinks = mlink; + mlink->mpage = mpage; +} + +static void +mlink_free(struct mlink *mlink) +{ + + free(mlink->dsec); + free(mlink->arch); + free(mlink->name); + free(mlink->fsec); + free(mlink); +} + +static void +mpages_free(void) +{ + struct mpage *mpage; + struct mlink *mlink; + unsigned int slot; + + mpage = ohash_first(&mpages, &slot); + while (NULL != mpage) { + while (NULL != (mlink = mpage->mlinks)) { + mpage->mlinks = mlink->next; + mlink_free(mlink); + } + free(mpage->sec); + free(mpage->arch); + free(mpage->title); + free(mpage->desc); + free(mpage); + mpage = ohash_next(&mpages, &slot); + } +} + +/* + * For each mlink to the mpage, check whether the path looks like + * it is formatted, and if it does, check whether a source manual + * exists by the same name, ignoring the suffix. + * If both conditions hold, drop the mlink. + */ +static void +mlinks_undupe(struct mpage *mpage) +{ + char buf[PATH_MAX]; + struct mlink **prev; + struct mlink *mlink; + char *bufp; + + mpage->form = FORM_CAT; + prev = &mpage->mlinks; + while (NULL != (mlink = *prev)) { + if (FORM_CAT != mlink->dform) { + mpage->form = FORM_NONE; + goto nextlink; + } + (void)strlcpy(buf, mlink->file, sizeof(buf)); + bufp = strstr(buf, "cat"); + assert(NULL != bufp); + memcpy(bufp, "man", 3); + if (NULL != (bufp = strrchr(buf, '.'))) + *++bufp = '\0'; + (void)strlcat(buf, mlink->dsec, sizeof(buf)); + if (NULL == ohash_find(&mlinks, + ohash_qlookup(&mlinks, buf))) + goto nextlink; + if (warnings) + say(mlink->file, "Man source exists: %s", buf); + if (use_all) + goto nextlink; + *prev = mlink->next; + mlink_free(mlink); + continue; +nextlink: + prev = &(*prev)->next; + } +} + +static void +mlink_check(struct mpage *mpage, struct mlink *mlink) +{ + struct str *str; + unsigned int slot; + + /* + * Check whether the manual section given in a file + * agrees with the directory where the file is located. + * Some manuals have suffixes like (3p) on their + * section number either inside the file or in the + * directory name, some are linked into more than one + * section, like encrypt(1) = makekey(8). + */ + + if (FORM_SRC == mpage->form && + strcasecmp(mpage->sec, mlink->dsec)) + say(mlink->file, "Section \"%s\" manual in %s directory", + mpage->sec, mlink->dsec); + + /* + * Manual page directories exist for each kernel + * architecture as returned by machine(1). + * However, many manuals only depend on the + * application architecture as returned by arch(1). + * For example, some (2/ARM) manuals are shared + * across the "armish" and "zaurus" kernel + * architectures. + * A few manuals are even shared across completely + * different architectures, for example fdformat(1) + * on amd64, i386, sparc, and sparc64. + */ + + if (strcasecmp(mpage->arch, mlink->arch)) + say(mlink->file, "Architecture \"%s\" manual in " + "\"%s\" directory", mpage->arch, mlink->arch); + + /* + * XXX + * parse_cat() doesn't set NAME_TITLE yet. + */ + + if (FORM_CAT == mpage->form) + return; + + /* + * Check whether this mlink + * appears as a name in the NAME section. + */ + + slot = ohash_qlookup(&names, mlink->name); + str = ohash_find(&names, slot); + assert(NULL != str); + if ( ! (NAME_TITLE & str->mask)) + say(mlink->file, "Name missing in NAME section"); +} + +/* + * Run through the files in the global vector "mpages" + * and add them to the database specified in "basedir". + * + * This handles the parsing scheme itself, using the cues of directory + * and filename to determine whether the file is parsable or not. + */ +static void +mpages_merge(struct mparse *mp) +{ + char any[] = "any"; + struct mpage *mpage, *mpage_dest; + struct mlink *mlink, *mlink_dest; + struct roff_man *man; + char *sodest; + char *cp; + int fd; + unsigned int pslot; + + if ( ! nodb) + SQL_EXEC("BEGIN TRANSACTION"); + + mpage = ohash_first(&mpages, &pslot); + while (mpage != NULL) { + mlinks_undupe(mpage); + if ((mlink = mpage->mlinks) == NULL) { + mpage = ohash_next(&mpages, &pslot); + continue; + } + + name_mask = NAME_MASK; + mandoc_ohash_init(&names, 4, offsetof(struct str, key)); + mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); + mparse_reset(mp); + man = NULL; + sodest = NULL; + + if ((fd = mparse_open(mp, mlink->file)) == -1) { + say(mlink->file, "&open"); + goto nextpage; + } + + /* + * Interpret the file as mdoc(7) or man(7) source + * code, unless it is known to be formatted. + */ + if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { + mparse_readfd(mp, fd, mlink->file); + close(fd); + mparse_result(mp, &man, &sodest); + } + + if (sodest != NULL) { + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, sodest)); + if (mlink_dest == NULL) { + mandoc_asprintf(&cp, "%s.gz", sodest); + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, cp)); + free(cp); + } + if (mlink_dest != NULL) { + + /* The .so target exists. */ + + mpage_dest = mlink_dest->mpage; + while (1) { + mlink->mpage = mpage_dest; + + /* + * If the target was already + * processed, add the links + * to the database now. + * Otherwise, this will + * happen when we come + * to the target. + */ + + if (mpage_dest->pageid) + dbadd_mlink_name(mlink); + + if (mlink->next == NULL) + break; + mlink = mlink->next; + } + + /* Move all links to the target. */ + + mlink->next = mlink_dest->next; + mlink_dest->next = mpage->mlinks; + mpage->mlinks = NULL; + } + goto nextpage; + } else if (man != NULL && man->macroset == MACROSET_MDOC) { + mdoc_validate(man); + mpage->form = FORM_SRC; + mpage->sec = man->meta.msec; + mpage->sec = mandoc_strdup( + mpage->sec == NULL ? "" : mpage->sec); + mpage->arch = man->meta.arch; + mpage->arch = mandoc_strdup( + mpage->arch == NULL ? "" : mpage->arch); + mpage->title = mandoc_strdup(man->meta.title); + } else if (man != NULL && man->macroset == MACROSET_MAN) { + man_validate(man); + mpage->form = FORM_SRC; + mpage->sec = mandoc_strdup(man->meta.msec); + mpage->arch = mandoc_strdup(mlink->arch); + mpage->title = mandoc_strdup(man->meta.title); + } else { + mpage->form = FORM_CAT; + mpage->sec = mandoc_strdup(mlink->dsec); + mpage->arch = mandoc_strdup(mlink->arch); + mpage->title = mandoc_strdup(mlink->name); + } + putkey(mpage, mpage->sec, TYPE_sec); + if (*mpage->arch != '\0') + putkey(mpage, mpage->arch, TYPE_arch); + + for ( ; mlink != NULL; mlink = mlink->next) { + if ('\0' != *mlink->dsec) + putkey(mpage, mlink->dsec, TYPE_sec); + if ('\0' != *mlink->fsec) + putkey(mpage, mlink->fsec, TYPE_sec); + putkey(mpage, '\0' == *mlink->arch ? + any : mlink->arch, TYPE_arch); + putkey(mpage, mlink->name, NAME_FILE); + } + + assert(mpage->desc == NULL); + if (man != NULL && man->macroset == MACROSET_MDOC) + parse_mdoc(mpage, &man->meta, man->first); + else if (man != NULL) + parse_man(mpage, &man->meta, man->first); + else + parse_cat(mpage, fd); + if (mpage->desc == NULL) + mpage->desc = mandoc_strdup(mpage->mlinks->name); + + if (warnings && !use_all) + for (mlink = mpage->mlinks; mlink; + mlink = mlink->next) + mlink_check(mpage, mlink); + + dbadd(mpage); + mlink = mpage->mlinks; + +nextpage: + ohash_delete(&strings); + ohash_delete(&names); + mpage = ohash_next(&mpages, &pslot); + } + + if (0 == nodb) + SQL_EXEC("END TRANSACTION"); +} + +static void +names_check(void) +{ + sqlite3_stmt *stmt; + const char *name, *sec, *arch, *key; + + sqlite3_prepare_v2(db, + "SELECT name, sec, arch, key FROM (" + "SELECT name AS key, pageid FROM names " + "WHERE bits & ? AND NOT EXISTS (" + "SELECT pageid FROM mlinks " + "WHERE mlinks.pageid == names.pageid " + "AND mlinks.name == names.name" + ")" + ") JOIN (" + "SELECT sec, arch, name, pageid FROM mlinks " + "GROUP BY pageid" + ") USING (pageid);", + -1, &stmt, NULL); + + if (sqlite3_bind_int64(stmt, 1, NAME_TITLE) != SQLITE_OK) + say("", "%s", sqlite3_errmsg(db)); + + while (sqlite3_step(stmt) == SQLITE_ROW) { + name = (const char *)sqlite3_column_text(stmt, 0); + sec = (const char *)sqlite3_column_text(stmt, 1); + arch = (const char *)sqlite3_column_text(stmt, 2); + key = (const char *)sqlite3_column_text(stmt, 3); + say("", "%s(%s%s%s) lacks mlink \"%s\"", name, sec, + '\0' == *arch ? "" : "/", + '\0' == *arch ? "" : arch, key); + } + sqlite3_finalize(stmt); +} + +static void +parse_cat(struct mpage *mpage, int fd) +{ + FILE *stream; + char *line, *p, *title; + size_t linesz, plen, titlesz; + ssize_t len; + int offs; + + stream = (-1 == fd) ? + fopen(mpage->mlinks->file, "r") : + fdopen(fd, "r"); + if (NULL == stream) { + if (-1 != fd) + close(fd); + if (warnings) + say(mpage->mlinks->file, "&fopen"); + return; + } + + line = NULL; + linesz = 0; + + /* Skip to first blank line. */ + + while (getline(&line, &linesz, stream) != -1) + if (*line == '\n') + break; + + /* + * Assume the first line that is not indented + * is the first section header. Skip to it. + */ + + while (getline(&line, &linesz, stream) != -1) + if (*line != '\n' && *line != ' ') + break; + + /* + * Read up until the next section into a buffer. + * Strip the leading and trailing newline from each read line, + * appending a trailing space. + * Ignore empty (whitespace-only) lines. + */ + + titlesz = 0; + title = NULL; + + while ((len = getline(&line, &linesz, stream)) != -1) { + if (*line != ' ') + break; + offs = 0; + while (isspace((unsigned char)line[offs])) + offs++; + if (line[offs] == '\0') + continue; + title = mandoc_realloc(title, titlesz + len - offs); + memcpy(title + titlesz, line + offs, len - offs); + titlesz += len - offs; + title[titlesz - 1] = ' '; + } + free(line); + + /* + * If no page content can be found, or the input line + * is already the next section header, or there is no + * trailing newline, reuse the page title as the page + * description. + */ + + if (NULL == title || '\0' == *title) { + if (warnings) + say(mpage->mlinks->file, + "Cannot find NAME section"); + fclose(stream); + free(title); + return; + } + + title[titlesz - 1] = '\0'; + + /* + * Skip to the first dash. + * Use the remaining line as the description (no more than 70 + * bytes). + */ + + if (NULL != (p = strstr(title, "- "))) { + for (p += 2; ' ' == *p || '\b' == *p; p++) + /* Skip to next word. */ ; + } else { + if (warnings) + say(mpage->mlinks->file, + "No dash in title line"); + p = title; + } + + plen = strlen(p); + + /* Strip backspace-encoding from line. */ + + while (NULL != (line = memchr(p, '\b', plen))) { + len = line - p; + if (0 == len) { + memmove(line, line + 1, plen--); + continue; + } + memmove(line - 1, line + 1, plen - len); + plen -= 2; + } + + mpage->desc = mandoc_strdup(p); + fclose(stream); + free(title); +} + +/* + * Put a type/word pair into the word database for this particular file. + */ +static void +putkey(const struct mpage *mpage, char *value, uint64_t type) +{ + char *cp; + + assert(NULL != value); + if (TYPE_arch == type) + for (cp = value; *cp; cp++) + if (isupper((unsigned char)*cp)) + *cp = _tolower((unsigned char)*cp); + putkeys(mpage, value, strlen(value), type); +} + +/* + * Grok all nodes at or below a certain mdoc node into putkey(). + */ +static void +putmdockey(const struct mpage *mpage, + const struct roff_node *n, uint64_t m) +{ + + for ( ; NULL != n; n = n->next) { + if (NULL != n->child) + putmdockey(mpage, n->child, m); + if (n->type == ROFFT_TEXT) + putkey(mpage, n->string, m); + } +} + +static void +parse_man(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + const struct roff_node *head, *body; + char *start, *title; + char byte; + size_t sz; + + if (n == NULL) + return; + + /* + * We're only searching for one thing: the first text child in + * the BODY of a NAME section. Since we don't keep track of + * sections in -man, run some hoops to find out whether we're in + * the correct section or not. + */ + + if (n->type == ROFFT_BODY && n->tok == MAN_SH) { + body = n; + if ((head = body->parent->head) != NULL && + (head = head->child) != NULL && + head->next == NULL && + head->type == ROFFT_TEXT && + strcmp(head->string, "NAME") == 0 && + body->child != NULL) { + + /* + * Suck the entire NAME section into memory. + * Yes, we might run away. + * But too many manuals have big, spread-out + * NAME sections over many lines. + */ + + title = NULL; + deroff(&title, body); + if (NULL == title) + return; + + /* + * Go through a special heuristic dance here. + * Conventionally, one or more manual names are + * comma-specified prior to a whitespace, then a + * dash, then a description. Try to puzzle out + * the name parts here. + */ + + start = title; + for ( ;; ) { + sz = strcspn(start, " ,"); + if ('\0' == start[sz]) + break; + + byte = start[sz]; + start[sz] = '\0'; + + /* + * Assume a stray trailing comma in the + * name list if a name begins with a dash. + */ + + if ('-' == start[0] || + ('\\' == start[0] && '-' == start[1])) + break; + + putkey(mpage, start, NAME_TITLE); + if ( ! (mpage->name_head_done || + strcasecmp(start, meta->title))) { + putkey(mpage, start, NAME_HEAD); + mpage->name_head_done = 1; + } + + if (' ' == byte) { + start += sz + 1; + break; + } + + assert(',' == byte); + start += sz + 1; + while (' ' == *start) + start++; + } + + if (start == title) { + putkey(mpage, start, NAME_TITLE); + if ( ! (mpage->name_head_done || + strcasecmp(start, meta->title))) { + putkey(mpage, start, NAME_HEAD); + mpage->name_head_done = 1; + } + free(title); + return; + } + + while (isspace((unsigned char)*start)) + start++; + + if (0 == strncmp(start, "-", 1)) + start += 1; + else if (0 == strncmp(start, "\\-\\-", 4)) + start += 4; + else if (0 == strncmp(start, "\\-", 2)) + start += 2; + else if (0 == strncmp(start, "\\(en", 4)) + start += 4; + else if (0 == strncmp(start, "\\(em", 4)) + start += 4; + + while (' ' == *start) + start++; + + mpage->desc = mandoc_strdup(start); + free(title); + return; + } + } + + for (n = n->child; n; n = n->next) { + if (NULL != mpage->desc) + break; + parse_man(mpage, meta, n); + } +} + +static void +parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + assert(NULL != n); + for (n = n->child; NULL != n; n = n->next) { + switch (n->type) { + case ROFFT_ELEM: + case ROFFT_BLOCK: + case ROFFT_HEAD: + case ROFFT_BODY: + case ROFFT_TAIL: + if (NULL != mdocs[n->tok].fp) + if (0 == (*mdocs[n->tok].fp)(mpage, meta, n)) + break; + if (mdocs[n->tok].mask) + putmdockey(mpage, n->child, + mdocs[n->tok].mask); + break; + default: + assert(n->type != ROFFT_ROOT); + continue; + } + if (NULL != n->child) + parse_mdoc(mpage, meta, n); + } +} + +static int +parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *start, *end; + size_t sz; + + if (SEC_SYNOPSIS != n->sec || + NULL == (n = n->child) || + n->type != ROFFT_TEXT) + return 0; + + /* + * Only consider those `Fd' macro fields that begin with an + * "inclusion" token (versus, e.g., #define). + */ + + if (strcmp("#include", n->string)) + return 0; + + if ((n = n->next) == NULL || n->type != ROFFT_TEXT) + return 0; + + /* + * Strip away the enclosing angle brackets and make sure we're + * not zero-length. + */ + + start = n->string; + if ('<' == *start || '"' == *start) + start++; + + if (0 == (sz = strlen(start))) + return 0; + + end = &start[(int)sz - 1]; + if ('>' == *end || '"' == *end) + end--; + + if (end > start) + putkeys(mpage, start, end - start + 1, TYPE_In); + return 0; +} + +static void +parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) +{ + char *cp; + size_t sz; + + if (n->type != ROFFT_TEXT) + return; + + /* Skip function pointer punctuation. */ + + cp = n->string; + while (*cp == '(' || *cp == '*') + cp++; + sz = strcspn(cp, "()"); + + putkeys(mpage, cp, sz, TYPE_Fn); + if (n->sec == SEC_SYNOPSIS) + putkeys(mpage, cp, sz, NAME_SYN); +} + +static int +parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (n->child == NULL) + return 0; + + parse_mdoc_fname(mpage, n->child); + + for (n = n->child->next; n != NULL; n = n->next) + if (n->type == ROFFT_TEXT) + putkey(mpage, n->string, TYPE_Fa); + + return 0; +} + +static int +parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (n->type != ROFFT_HEAD) + return 1; + + if (n->child != NULL) + parse_mdoc_fname(mpage, n->child); + + return 0; +} + +static int +parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *cp; + + if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) + return 0; + + if (n->child != NULL && + n->child->next == NULL && + n->child->type == ROFFT_TEXT) + return 1; + + cp = NULL; + deroff(&cp, n); + if (cp != NULL) { + putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || + n->type == ROFFT_BODY ? TYPE_Va : 0)); + free(cp); + } + + return 0; +} + +static int +parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + char *cp; + + if (NULL == (n = n->child)) + return 0; + + if (NULL == n->next) { + putkey(mpage, n->string, TYPE_Xr); + return 0; + } + + mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); + putkey(mpage, cp, TYPE_Xr); + free(cp); + return 0; +} + +static int +parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (n->type == ROFFT_BODY) + deroff(&mpage->desc, n); + return 0; +} + +static int +parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + if (SEC_NAME == n->sec) + putmdockey(mpage, n->child, NAME_TITLE); + else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { + if (n->child == NULL) + putkey(mpage, meta->name, NAME_SYN); + else + putmdockey(mpage, n->child, NAME_SYN); + } + if ( ! (mpage->name_head_done || + n->child == NULL || n->child->string == NULL || + strcasecmp(n->child->string, meta->title))) { + putkey(mpage, n->child->string, ROFFT_HEAD); + mpage->name_head_done = 1; + } + return 0; +} + +static int +parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; +} + +static int +parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, + const struct roff_node *n) +{ + + return n->type == ROFFT_HEAD; +} + +/* + * Add a string to the hash table for the current manual. + * Each string has a bitmask telling which macros it belongs to. + * When we finish the manual, we'll dump the table. + */ +static void +putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) +{ + struct ohash *htab; + struct str *s; + const char *end; + unsigned int slot; + int i, mustfree; + + if (0 == sz) + return; + + mustfree = render_string(&cp, &sz); + + if (TYPE_Nm & v) { + htab = &names; + v &= name_mask; + if (v & NAME_FIRST) + name_mask &= ~NAME_FIRST; + if (debug > 1) + say(mpage->mlinks->file, + "Adding name %*s, bits=%d", sz, cp, v); + } else { + htab = &strings; + if (debug > 1) + for (i = 0; i < mansearch_keymax; i++) + if ((uint64_t)1 << i & v) + say(mpage->mlinks->file, + "Adding key %s=%*s", + mansearch_keynames[i], sz, cp); + } + + end = cp + sz; + slot = ohash_qlookupi(htab, cp, &end); + s = ohash_find(htab, slot); + + if (NULL != s && mpage == s->mpage) { + s->mask |= v; + return; + } else if (NULL == s) { + s = mandoc_calloc(1, sizeof(struct str) + sz + 1); + memcpy(s->key, cp, sz); + ohash_insert(htab, slot, s); + } + s->mpage = mpage; + s->mask = v; + + if (mustfree) + free(cp); +} + +/* + * Take a Unicode codepoint and produce its UTF-8 encoding. + * This isn't the best way to do this, but it works. + * The magic numbers are from the UTF-8 packaging. + * They're not as scary as they seem: read the UTF-8 spec for details. + */ +static size_t +utf8(unsigned int cp, char out[7]) +{ + size_t rc; + + rc = 0; + if (cp <= 0x0000007F) { + rc = 1; + out[0] = (char)cp; + } else if (cp <= 0x000007FF) { + rc = 2; + out[0] = (cp >> 6 & 31) | 192; + out[1] = (cp & 63) | 128; + } else if (cp <= 0x0000FFFF) { + rc = 3; + out[0] = (cp >> 12 & 15) | 224; + out[1] = (cp >> 6 & 63) | 128; + out[2] = (cp & 63) | 128; + } else if (cp <= 0x001FFFFF) { + rc = 4; + out[0] = (cp >> 18 & 7) | 240; + out[1] = (cp >> 12 & 63) | 128; + out[2] = (cp >> 6 & 63) | 128; + out[3] = (cp & 63) | 128; + } else if (cp <= 0x03FFFFFF) { + rc = 5; + out[0] = (cp >> 24 & 3) | 248; + out[1] = (cp >> 18 & 63) | 128; + out[2] = (cp >> 12 & 63) | 128; + out[3] = (cp >> 6 & 63) | 128; + out[4] = (cp & 63) | 128; + } else if (cp <= 0x7FFFFFFF) { + rc = 6; + out[0] = (cp >> 30 & 1) | 252; + out[1] = (cp >> 24 & 63) | 128; + out[2] = (cp >> 18 & 63) | 128; + out[3] = (cp >> 12 & 63) | 128; + out[4] = (cp >> 6 & 63) | 128; + out[5] = (cp & 63) | 128; + } else + return 0; + + out[rc] = '\0'; + return rc; +} + +/* + * If the string contains escape sequences, + * replace it with an allocated rendering and return 1, + * such that the caller can free it after use. + * Otherwise, do nothing and return 0. + */ +static int +render_string(char **public, size_t *psz) +{ + const char *src, *scp, *addcp, *seq; + char *dst; + size_t ssz, dsz, addsz; + char utfbuf[7], res[6]; + int seqlen, unicode; + + res[0] = '\\'; + res[1] = '\t'; + res[2] = ASCII_NBRSP; + res[3] = ASCII_HYPH; + res[4] = ASCII_BREAK; + res[5] = '\0'; + + src = scp = *public; + ssz = *psz; + dst = NULL; + dsz = 0; + + while (scp < src + *psz) { + + /* Leave normal characters unchanged. */ + + if (strchr(res, *scp) == NULL) { + if (dst != NULL) + dst[dsz++] = *scp; + scp++; + continue; + } + + /* + * Found something that requires replacing, + * make sure we have a destination buffer. + */ + + if (dst == NULL) { + dst = mandoc_malloc(ssz + 1); + dsz = scp - src; + memcpy(dst, src, dsz); + } + + /* Handle single-char special characters. */ + + switch (*scp) { + case '\\': + break; + case '\t': + case ASCII_NBRSP: + dst[dsz++] = ' '; + scp++; + continue; + case ASCII_HYPH: + dst[dsz++] = '-'; + /* FALLTHROUGH */ + case ASCII_BREAK: + scp++; + continue; + default: + abort(); + } + + /* + * Found an escape sequence. + * Read past the slash, then parse it. + * Ignore everything except characters. + */ + + scp++; + if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) + continue; + + /* + * Render the special character + * as either UTF-8 or ASCII. + */ + + if (write_utf8) { + unicode = mchars_spec2cp(seq, seqlen); + if (unicode <= 0) + continue; + addsz = utf8(unicode, utfbuf); + if (addsz == 0) + continue; + addcp = utfbuf; + } else { + addcp = mchars_spec2str(seq, seqlen, &addsz); + if (addcp == NULL) + continue; + if (*addcp == ASCII_NBRSP) { + addcp = " "; + addsz = 1; + } + } + + /* Copy the rendered glyph into the stream. */ + + ssz += addsz; + dst = mandoc_realloc(dst, ssz + 1); + memcpy(dst + dsz, addcp, addsz); + dsz += addsz; + } + if (dst != NULL) { + *public = dst; + *psz = dsz; + } + + /* Trim trailing whitespace and NUL-terminate. */ + + while (*psz > 0 && (*public)[*psz - 1] == ' ') + --*psz; + if (dst != NULL) { + (*public)[*psz] = '\0'; + return 1; + } else + return 0; +} + +static void +dbadd_mlink(const struct mlink *mlink) +{ + size_t i; + + i = 1; + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); + SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); + SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->pageid); + SQL_STEP(stmts[STMT_INSERT_LINK]); + sqlite3_reset(stmts[STMT_INSERT_LINK]); +} + +static void +dbadd_mlink_name(const struct mlink *mlink) +{ + uint64_t bits; + size_t i; + + dbadd_mlink(mlink); + + i = 1; + SQL_BIND_INT64(stmts[STMT_SELECT_NAME], i, mlink->mpage->pageid); + bits = NAME_FILE & NAME_MASK; + if (sqlite3_step(stmts[STMT_SELECT_NAME]) == SQLITE_ROW) { + bits |= sqlite3_column_int64(stmts[STMT_SELECT_NAME], 0); + sqlite3_reset(stmts[STMT_SELECT_NAME]); + } + + i = 1; + SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, bits); + SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, mlink->name); + SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mlink->mpage->pageid); + SQL_STEP(stmts[STMT_INSERT_NAME]); + sqlite3_reset(stmts[STMT_INSERT_NAME]); +} + +/* + * Flush the current page's terms (and their bits) into the database. + * Wrap the entire set of additions in a transaction to make sqlite be a + * little faster. + * Also, handle escape sequences at the last possible moment. + */ +static void +dbadd(struct mpage *mpage) +{ + struct mlink *mlink; + struct str *key; + char *cp; + size_t i; + unsigned int slot; + int mustfree; + + mlink = mpage->mlinks; + + if (nodb) { + for (key = ohash_first(&names, &slot); NULL != key; + key = ohash_next(&names, &slot)) + free(key); + for (key = ohash_first(&strings, &slot); NULL != key; + key = ohash_next(&strings, &slot)) + free(key); + if (0 == debug) + return; + while (NULL != mlink) { + fputs(mlink->name, stdout); + if (NULL == mlink->next || + strcmp(mlink->dsec, mlink->next->dsec) || + strcmp(mlink->fsec, mlink->next->fsec) || + strcmp(mlink->arch, mlink->next->arch)) { + putchar('('); + if ('\0' == *mlink->dsec) + fputs(mlink->fsec, stdout); + else + fputs(mlink->dsec, stdout); + if ('\0' != *mlink->arch) + printf("/%s", mlink->arch); + putchar(')'); + } + mlink = mlink->next; + if (NULL != mlink) + fputs(", ", stdout); + } + printf(" - %s\n", mpage->desc); + return; + } + + if (debug) + say(mlink->file, "Adding to database"); + + cp = mpage->desc; + i = strlen(cp); + mustfree = render_string(&cp, &i); + i = 1; + SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, cp); + SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, mpage->form); + SQL_STEP(stmts[STMT_INSERT_PAGE]); + mpage->pageid = sqlite3_last_insert_rowid(db); + sqlite3_reset(stmts[STMT_INSERT_PAGE]); + if (mustfree) + free(cp); + + while (NULL != mlink) { + dbadd_mlink(mlink); + mlink = mlink->next; + } + mlink = mpage->mlinks; + + for (key = ohash_first(&names, &slot); NULL != key; + key = ohash_next(&names, &slot)) { + assert(key->mpage == mpage); + i = 1; + SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask); + SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->key); + SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->pageid); + SQL_STEP(stmts[STMT_INSERT_NAME]); + sqlite3_reset(stmts[STMT_INSERT_NAME]); + free(key); + } + for (key = ohash_first(&strings, &slot); NULL != key; + key = ohash_next(&strings, &slot)) { + assert(key->mpage == mpage); + i = 1; + SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); + SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->key); + SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->pageid); + SQL_STEP(stmts[STMT_INSERT_KEY]); + sqlite3_reset(stmts[STMT_INSERT_KEY]); + free(key); + } +} + +static void +dbprune(void) +{ + struct mpage *mpage; + struct mlink *mlink; + size_t i; + unsigned int slot; + + if (0 == nodb) + SQL_EXEC("BEGIN TRANSACTION"); + + for (mpage = ohash_first(&mpages, &slot); NULL != mpage; + mpage = ohash_next(&mpages, &slot)) { + mlink = mpage->mlinks; + if (debug) + say(mlink->file, "Deleting from database"); + if (nodb) + continue; + for ( ; NULL != mlink; mlink = mlink->next) { + i = 1; + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->dsec); + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->arch); + SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], + i, mlink->name); + SQL_STEP(stmts[STMT_DELETE_PAGE]); + sqlite3_reset(stmts[STMT_DELETE_PAGE]); + } + } + + if (0 == nodb) + SQL_EXEC("END TRANSACTION"); +} + +/* + * Close an existing database and its prepared statements. + * If "real" is not set, rename the temporary file into the real one. + */ +static void +dbclose(int real) +{ + size_t i; + int status; + pid_t child; + + if (nodb) + return; + + for (i = 0; i < STMT__MAX; i++) { + sqlite3_finalize(stmts[i]); + stmts[i] = NULL; + } + + sqlite3_close(db); + db = NULL; + + if (real) + return; + + if ('\0' == *tempfilename) { + if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "&rename"); + } + return; + } + + switch (child = fork()) { + case -1: + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&fork cmp"); + return; + case 0: + execlp("cmp", "cmp", "-s", + tempfilename, MANDOC_DB, (char *)NULL); + say("", "&exec cmp"); + exit(0); + default: + break; + } + if (-1 == waitpid(child, &status, 0)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&wait cmp"); + } else if (WIFSIGNALED(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "cmp died from signal %d", WTERMSIG(status)); + } else if (WEXITSTATUS(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, + "Data changed, but cannot replace database"); + } + + *strrchr(tempfilename, '/') = '\0'; + switch (child = fork()) { + case -1: + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&fork rm"); + return; + case 0: + execlp("rm", "rm", "-rf", tempfilename, (char *)NULL); + say("", "&exec rm"); + exit((int)MANDOCLEVEL_SYSERR); + default: + break; + } + if (-1 == waitpid(child, &status, 0)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&wait rm"); + } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "%s: Cannot remove temporary directory", + tempfilename); + } +} + +/* + * This is straightforward stuff. + * Open a database connection to a "temporary" database, then open a set + * of prepared statements we'll use over and over again. + * If "real" is set, we use the existing database; if not, we truncate a + * temporary one. + * Must be matched by dbclose(). + */ +static int +dbopen(int real) +{ + const char *sql; + int rc, ofl; + + if (nodb) + return 1; + + *tempfilename = '\0'; + ofl = SQLITE_OPEN_READWRITE; + + if (real) { + rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL); + if (SQLITE_OK != rc) { + exitcode = (int)MANDOCLEVEL_SYSERR; + if (SQLITE_CANTOPEN != rc) + say(MANDOC_DB, "%s", sqlite3_errstr(rc)); + return 0; + } + goto prepare_statements; + } + + ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE; + + remove(MANDOC_DB "~"); + rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL); + if (SQLITE_OK == rc) + goto create_tables; + if (MPARSE_QUICK & mparse_options) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB "~", "%s", sqlite3_errstr(rc)); + return 0; + } + + (void)strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", + sizeof(tempfilename)); + if (NULL == mkdtemp(tempfilename)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&%s", tempfilename); + return 0; + } + (void)strlcat(tempfilename, "/" MANDOC_DB, + sizeof(tempfilename)); + rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); + if (SQLITE_OK != rc) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "%s: %s", tempfilename, sqlite3_errstr(rc)); + return 0; + } + +create_tables: + sql = "CREATE TABLE \"mpages\" (\n" + " \"desc\" TEXT NOT NULL,\n" + " \"form\" INTEGER NOT NULL,\n" + " \"pageid\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" + ");\n" + "\n" + "CREATE TABLE \"mlinks\" (\n" + " \"sec\" TEXT NOT NULL,\n" + " \"arch\" TEXT NOT NULL,\n" + " \"name\" TEXT NOT NULL,\n" + " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " + "ON DELETE CASCADE\n" + ");\n" + "CREATE INDEX mlinks_pageid_idx ON mlinks (pageid);\n" + "\n" + "CREATE TABLE \"names\" (\n" + " \"bits\" INTEGER NOT NULL,\n" + " \"name\" TEXT NOT NULL,\n" + " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " + "ON DELETE CASCADE,\n" + " UNIQUE (\"name\", \"pageid\") ON CONFLICT REPLACE\n" + ");\n" + "\n" + "CREATE TABLE \"keys\" (\n" + " \"bits\" INTEGER NOT NULL,\n" + " \"key\" TEXT NOT NULL,\n" + " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " + "ON DELETE CASCADE\n" + ");\n" + "CREATE INDEX keys_pageid_idx ON keys (pageid);\n"; + + if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "%s", sqlite3_errmsg(db)); + sqlite3_close(db); + return 0; + } + +prepare_statements: + if (SQLITE_OK != sqlite3_exec(db, + "PRAGMA foreign_keys = ON", NULL, NULL, NULL)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "PRAGMA foreign_keys: %s", + sqlite3_errmsg(db)); + sqlite3_close(db); + return 0; + } + + sql = "DELETE FROM mpages WHERE pageid IN " + "(SELECT pageid FROM mlinks WHERE " + "sec=? AND arch=? AND name=?)"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL); + sql = "INSERT INTO mpages " + "(desc,form) VALUES (?,?)"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL); + sql = "INSERT INTO mlinks " + "(sec,arch,name,pageid) VALUES (?,?,?,?)"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL); + sql = "SELECT bits FROM names where pageid = ?"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_SELECT_NAME], NULL); + sql = "INSERT INTO names " + "(bits,name,pageid) VALUES (?,?,?)"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL); + sql = "INSERT INTO keys " + "(bits,key,pageid) VALUES (?,?,?)"; + sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); + +#ifndef __APPLE__ + /* + * When opening a new database, we can turn off + * synchronous mode for much better performance. + */ + + if (real && SQLITE_OK != sqlite3_exec(db, + "PRAGMA synchronous = OFF", NULL, NULL, NULL)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say(MANDOC_DB, "PRAGMA synchronous: %s", + sqlite3_errmsg(db)); + sqlite3_close(db); + return 0; + } +#endif + + return 1; +} + +static int +set_basedir(const char *targetdir, int report_baddir) +{ + static char startdir[PATH_MAX]; + static int getcwd_status; /* 1 = ok, 2 = failure */ + static int chdir_status; /* 1 = changed directory */ + char *cp; + + /* + * Remember the original working directory, if possible. + * This will be needed if the second or a later directory + * on the command line is given as a relative path. + * Do not error out if the current directory is not + * searchable: Maybe it won't be needed after all. + */ + if (0 == getcwd_status) { + if (NULL == getcwd(startdir, sizeof(startdir))) { + getcwd_status = 2; + (void)strlcpy(startdir, strerror(errno), + sizeof(startdir)); + } else + getcwd_status = 1; + } + + /* + * We are leaving the old base directory. + * Do not use it any longer, not even for messages. + */ + *basedir = '\0'; + + /* + * If and only if the directory was changed earlier and + * the next directory to process is given as a relative path, + * first go back, or bail out if that is impossible. + */ + if (chdir_status && '/' != *targetdir) { + if (2 == getcwd_status) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "getcwd: %s", startdir); + return 0; + } + if (-1 == chdir(startdir)) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "&chdir %s", startdir); + return 0; + } + } + + /* + * Always resolve basedir to the canonicalized absolute + * pathname and append a trailing slash, such that + * we can reliably check whether files are inside. + */ + if (NULL == realpath(targetdir, basedir)) { + if (report_baddir || errno != ENOENT) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "&%s: realpath", targetdir); + } + return 0; + } else if (-1 == chdir(basedir)) { + if (report_baddir || errno != ENOENT) { + exitcode = (int)MANDOCLEVEL_BADARG; + say("", "&chdir"); + } + return 0; + } + chdir_status = 1; + cp = strchr(basedir, '\0'); + if ('/' != cp[-1]) { + if (cp - basedir >= PATH_MAX - 1) { + exitcode = (int)MANDOCLEVEL_SYSERR; + say("", "Filename too long"); + return 0; + } + *cp++ = '/'; + *cp = '\0'; + } + return 1; +} + +static void +say(const char *file, const char *format, ...) +{ + va_list ap; + int use_errno; + + if ('\0' != *basedir) + fprintf(stderr, "%s", basedir); + if ('\0' != *basedir && '\0' != *file) + fputc('/', stderr); + if ('\0' != *file) + fprintf(stderr, "%s", file); + + use_errno = 1; + if (NULL != format) { + switch (*format) { + case '&': + format++; + break; + case '\0': + format = NULL; + break; + default: + use_errno = 0; + break; + } + } + if (NULL != format) { + if ('\0' != *basedir || '\0' != *file) + fputs(": ", stderr); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + } + if (use_errno) { + if ('\0' != *basedir || '\0' != *file || NULL != format) + fputs(": ", stderr); + perror(NULL); + } else + fputc('\n', stderr); +} diff --git a/contrib/mdocml/manpage.c b/contrib/mdocml/manpage.c new file mode 100644 index 0000000..45b6e76 --- /dev/null +++ b/contrib/mdocml/manpage.c @@ -0,0 +1,196 @@ +/* $Id: manpage.c,v 1.13 2015/11/07 17:58:55 schwarze Exp $ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <getopt.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "manconf.h" +#include "mansearch.h" + +static void show(const char *, const char *); + +int +main(int argc, char *argv[]) +{ + int ch, term; + size_t i, sz, linesz; + ssize_t len; + struct mansearch search; + struct manpage *res; + char *conf_file, *defpaths, *auxpaths, *line; + char buf[PATH_MAX]; + const char *cmd; + struct manconf conf; + char *progname; + extern char *optarg; + extern int optind; + + term = isatty(STDIN_FILENO) && isatty(STDOUT_FILENO); + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + auxpaths = defpaths = conf_file = NULL; + memset(&conf, 0, sizeof(conf)); + memset(&search, 0, sizeof(struct mansearch)); + + while (-1 != (ch = getopt(argc, argv, "C:M:m:S:s:"))) + switch (ch) { + case ('C'): + conf_file = optarg; + break; + case ('M'): + defpaths = optarg; + break; + case ('m'): + auxpaths = optarg; + break; + case ('S'): + search.arch = optarg; + break; + case ('s'): + search.sec = optarg; + break; + default: + goto usage; + } + + argc -= optind; + argv += optind; + + if (0 == argc) + goto usage; + + search.outkey = "Nd"; + search.argmode = ARG_EXPR; + + manconf_parse(&conf, conf_file, defpaths, auxpaths); + ch = mansearch(&search, &conf.manpath, argc, argv, &res, &sz); + manconf_free(&conf); + + if (0 == ch) + goto usage; + + if (0 == sz) { + free(res); + return EXIT_FAILURE; + } else if (1 == sz && term) { + i = 1; + goto show; + } else if (NULL == res) + return EXIT_FAILURE; + + for (i = 0; i < sz; i++) { + printf("%6zu %s: %s\n", + i + 1, res[i].names, res[i].output); + free(res[i].names); + free(res[i].output); + } + + if (0 == term) { + for (i = 0; i < sz; i++) + free(res[i].file); + free(res); + return EXIT_SUCCESS; + } + + i = 1; + printf("Enter a choice [1]: "); + fflush(stdout); + + line = NULL; + linesz = 0; + if ((len = getline(&line, &linesz, stdin)) != -1) { + if ('\n' == line[--len] && len > 0) { + line[len] = '\0'; + if ((i = atoi(line)) < 1 || i > sz) + i = 0; + } + } + free(line); + + if (0 == i) { + for (i = 0; i < sz; i++) + free(res[i].file); + free(res); + return EXIT_SUCCESS; + } +show: + cmd = res[i - 1].form ? "mandoc" : "cat"; + strlcpy(buf, res[i - 1].file, PATH_MAX); + for (i = 0; i < sz; i++) + free(res[i].file); + free(res); + + show(cmd, buf); + /* NOTREACHED */ +usage: + fprintf(stderr, "usage: %s [-C conf] " + "[-M paths] " + "[-m paths] " + "[-S arch] " + "[-s section] " + "expr ...\n", + progname); + return EXIT_FAILURE; +} + +static void +show(const char *cmd, const char *file) +{ + int fds[2]; + pid_t pid; + + if (-1 == pipe(fds)) { + perror(NULL); + exit(EXIT_FAILURE); + } + + if (-1 == (pid = fork())) { + perror(NULL); + exit(EXIT_FAILURE); + } else if (pid > 0) { + dup2(fds[0], STDIN_FILENO); + close(fds[1]); + cmd = NULL != getenv("MANPAGER") ? + getenv("MANPAGER") : + (NULL != getenv("PAGER") ? + getenv("PAGER") : "more"); + execlp(cmd, cmd, (char *)NULL); + perror(cmd); + exit(EXIT_FAILURE); + } + + dup2(fds[1], STDOUT_FILENO); + close(fds[0]); + execlp(cmd, cmd, file, (char *)NULL); + perror(cmd); + exit(EXIT_FAILURE); +} diff --git a/contrib/mdocml/manpath.c b/contrib/mdocml/manpath.c new file mode 100644 index 0000000..0627f13 --- /dev/null +++ b/contrib/mdocml/manpath.c @@ -0,0 +1,336 @@ +/* $Id: manpath.c,v 1.29 2015/11/07 17:58:55 schwarze Exp $ */ +/* + * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "manconf.h" + +#if !HAVE_MANPATH +static void manconf_file(struct manconf *, const char *); +#endif +static void manpath_add(struct manpaths *, const char *, int); +static void manpath_parseline(struct manpaths *, char *, int); + + +void +manconf_parse(struct manconf *conf, const char *file, + char *defp, char *auxp) +{ +#if HAVE_MANPATH + char cmd[(PATH_MAX * 3) + 20]; + FILE *stream; + char *buf; + size_t sz, bsz; + + strlcpy(cmd, "manpath", sizeof(cmd)); + if (file) { + strlcat(cmd, " -C ", sizeof(cmd)); + strlcat(cmd, file, sizeof(cmd)); + } + if (auxp) { + strlcat(cmd, " -m ", sizeof(cmd)); + strlcat(cmd, auxp, sizeof(cmd)); + } + if (defp) { + strlcat(cmd, " -M ", sizeof(cmd)); + strlcat(cmd, defp, sizeof(cmd)); + } + + /* Open manpath(1). Ignore errors. */ + + stream = popen(cmd, "r"); + if (NULL == stream) + return; + + buf = NULL; + bsz = 0; + + /* Read in as much output as we can. */ + + do { + buf = mandoc_realloc(buf, bsz + 1024); + sz = fread(buf + bsz, 1, 1024, stream); + bsz += sz; + } while (sz > 0); + + if ( ! ferror(stream) && feof(stream) && + bsz && '\n' == buf[bsz - 1]) { + buf[bsz - 1] = '\0'; + manpath_parseline(&conf->manpath, buf, 1); + } + + free(buf); + pclose(stream); +#else + char *insert; + + /* Always prepend -m. */ + manpath_parseline(&conf->manpath, auxp, 1); + + /* If -M is given, it overrides everything else. */ + if (NULL != defp) { + manpath_parseline(&conf->manpath, defp, 1); + return; + } + + /* MANPATH and man.conf(5) cooperate. */ + defp = getenv("MANPATH"); + if (NULL == file) + file = MAN_CONF_FILE; + + /* No MANPATH; use man.conf(5) only. */ + if (NULL == defp || '\0' == defp[0]) { + manconf_file(conf, file); + return; + } + + /* Prepend man.conf(5) to MANPATH. */ + if (':' == defp[0]) { + manconf_file(conf, file); + manpath_parseline(&conf->manpath, defp, 0); + return; + } + + /* Append man.conf(5) to MANPATH. */ + if (':' == defp[strlen(defp) - 1]) { + manpath_parseline(&conf->manpath, defp, 0); + manconf_file(conf, file); + return; + } + + /* Insert man.conf(5) into MANPATH. */ + insert = strstr(defp, "::"); + if (NULL != insert) { + *insert++ = '\0'; + manpath_parseline(&conf->manpath, defp, 0); + manconf_file(conf, file); + manpath_parseline(&conf->manpath, insert + 1, 0); + return; + } + + /* MANPATH overrides man.conf(5) completely. */ + manpath_parseline(&conf->manpath, defp, 0); +#endif +} + +/* + * Parse a FULL pathname from a colon-separated list of arrays. + */ +static void +manpath_parseline(struct manpaths *dirs, char *path, int complain) +{ + char *dir; + + if (NULL == path) + return; + + for (dir = strtok(path, ":"); dir; dir = strtok(NULL, ":")) + manpath_add(dirs, dir, complain); +} + +/* + * Add a directory to the array, ignoring bad directories. + * Grow the array one-by-one for simplicity's sake. + */ +static void +manpath_add(struct manpaths *dirs, const char *dir, int complain) +{ + char buf[PATH_MAX]; + struct stat sb; + char *cp; + size_t i; + + if (NULL == (cp = realpath(dir, buf))) { + if (complain) + warn("manpath: %s", dir); + return; + } + + for (i = 0; i < dirs->sz; i++) + if (0 == strcmp(dirs->paths[i], dir)) + return; + + if (stat(cp, &sb) == -1) { + if (complain) + warn("manpath: %s", dir); + return; + } + + dirs->paths = mandoc_reallocarray(dirs->paths, + dirs->sz + 1, sizeof(char *)); + + dirs->paths[dirs->sz++] = mandoc_strdup(cp); +} + +void +manconf_free(struct manconf *conf) +{ + size_t i; + + for (i = 0; i < conf->manpath.sz; i++) + free(conf->manpath.paths[i]); + + free(conf->manpath.paths); + free(conf->output.includes); + free(conf->output.man); + free(conf->output.paper); + free(conf->output.style); +} + +#if !HAVE_MANPATH +static void +manconf_file(struct manconf *conf, const char *file) +{ + const char *const toks[] = { "manpath", "output", "_whatdb" }; + char manpath_default[] = MANPATH_DEFAULT; + + FILE *stream; + char *line, *cp, *ep; + size_t linesz, tok, toklen; + ssize_t linelen; + + if ((stream = fopen(file, "r")) == NULL) + goto out; + + line = NULL; + linesz = 0; + + while ((linelen = getline(&line, &linesz, stream)) != -1) { + cp = line; + ep = cp + linelen; + if (ep[-1] != '\n') + break; + *--ep = '\0'; + while (isspace((unsigned char)*cp)) + cp++; + if (*cp == '#') + continue; + + for (tok = 0; tok < sizeof(toks)/sizeof(toks[0]); tok++) { + toklen = strlen(toks[tok]); + if (cp + toklen < ep && + isspace((unsigned char)cp[toklen]) && + strncmp(cp, toks[tok], toklen) == 0) { + cp += toklen; + while (isspace((unsigned char)*cp)) + cp++; + break; + } + } + + switch (tok) { + case 2: /* _whatdb */ + while (ep > cp && ep[-1] != '/') + ep--; + if (ep == cp) + continue; + *ep = '\0'; + /* FALLTHROUGH */ + case 0: /* manpath */ + manpath_add(&conf->manpath, cp, 0); + *manpath_default = '\0'; + break; + case 1: /* output */ + manconf_output(&conf->output, cp); + break; + default: + break; + } + } + free(line); + fclose(stream); + +out: + if (*manpath_default != '\0') + manpath_parseline(&conf->manpath, manpath_default, 0); +} +#endif + +void +manconf_output(struct manoutput *conf, const char *cp) +{ + const char *const toks[] = { + "includes", "man", "paper", "style", + "indent", "width", "fragment", "mdoc" + }; + + size_t len, tok; + + for (tok = 0; tok < sizeof(toks)/sizeof(toks[0]); tok++) { + len = strlen(toks[tok]); + if ( ! strncmp(cp, toks[tok], len) && + strchr(" = ", cp[len]) != NULL) { + cp += len; + if (*cp == '=') + cp++; + while (isspace((unsigned char)*cp)) + cp++; + break; + } + } + + if (tok < 6 && *cp == '\0') + return; + + switch (tok) { + case 0: + if (conf->includes == NULL) + conf->includes = mandoc_strdup(cp); + break; + case 1: + if (conf->man == NULL) + conf->man = mandoc_strdup(cp); + break; + case 2: + if (conf->paper == NULL) + conf->paper = mandoc_strdup(cp); + break; + case 3: + if (conf->style == NULL) + conf->style = mandoc_strdup(cp); + break; + case 4: + if (conf->indent == 0) + conf->indent = strtonum(cp, 0, 1000, NULL); + break; + case 5: + if (conf->width == 0) + conf->width = strtonum(cp, 58, 1000, NULL); + break; + case 6: + conf->fragment = 1; + break; + case 7: + conf->mdoc = 1; + break; + default: + break; + } +} diff --git a/contrib/mdocml/manpath.h b/contrib/mdocml/manpath.h new file mode 100644 index 0000000..728373b --- /dev/null +++ b/contrib/mdocml/manpath.h @@ -0,0 +1,34 @@ +/* $Id: manpath.h,v 1.7 2014/12/01 04:05:32 schwarze Exp $ */ +/* + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Unsorted list of unique, absolute paths to be searched for manual + * databases. + */ +struct manpaths { + size_t sz; + char **paths; +}; + +__BEGIN_DECLS + +void manpath_manconf(struct manpaths *, const char *); +void manpath_parse(struct manpaths *, const char *, char *, char *); +void manpath_free(struct manpaths *); + +__END_DECLS diff --git a/contrib/mdocml/mansearch.3 b/contrib/mdocml/mansearch.3 new file mode 100644 index 0000000..051f9aa --- /dev/null +++ b/contrib/mdocml/mansearch.3 @@ -0,0 +1,228 @@ +.\" $Id: mansearch.3,v 1.4 2015/03/27 17:37:25 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: March 27 2015 $ +.Dt MANSEARCH 3 +.Os +.Sh NAME +.Nm mansearch , +.Nm mansearch_setup +.Nd search manual page databases +.Sh SYNOPSIS +.In stdint.h +.In manconf.h +.In mansearch.h +.Ft int +.Fo mansearch_setup +.Fa "int start" +.Fc +.Ft int +.Fo mansearch +.Fa "const struct mansearch *search" +.Fa "const struct manpaths *paths" +.Fa "int argc" +.Fa "char *argv[]" +.Fa "const char *outkey" +.Fa "struct manpage **res" +.Fa "size_t *sz" +.Fc +.Sh DESCRIPTION +The +.Fn mansearch +function returns information about manuals matching a search query from a +.Xr mandoc.db 5 +SQLite3 database. +.Pp +The query arguments are as follows: +.Bl -tag -width Ds +.It Fa "const struct mansearch *search" +Search options, defined in +.In mansearch.h . +.It Fa "const struct manpaths *paths" +Directories to be searched, defined in +.In manconf.h . +.It Fa "int argc" , "char *argv[]" +Search criteria, usually taken from the command line. +.El +.Pp +The +.Fa "const char *outkey" +selects which data to return in the +.Va output +field of the +.Fa res +structures. +It takes any of the macro keys defined in +.Pa mansearch_const.c +and described in +.Xr apropos 1 . +.Pp +The output arguments are as follows: +.Bl -tag -width Ds +.It Fa "struct manpage **res" +Returns a pointer to an array of result structures defined in +.In mansearch.h . +The user is expected to call +.Xr free 3 +on the +.Va file , +.Va names , +and +.Va output +fields of all structures, as well as the +.Fa res +array itself. +.It Fa "size_t *sz" +Returns the number of result structures contained in +.Fa res . +.El +.Pp +To speed up searches, the +.Fn mansearch_setup +function can optionally be called with a +.Fa start +argument of 1 before +.Fn mansearch +to set up an SQLite3 pagecache. +If it was called, it has to be called again with a +.Fa start +argument of 0 after the last call to +.Fn mansearch +to release the memory used for the pagecache. +.Sh IMPLEMENTATION NOTES +For each manual page tree, the search is done in two steps. +In the first step, a list of pages matching the search criteria is built. +In the second step, the requested information about these pages is +retrieved from the database and assembled into the +.Fa res +array. +.Pp +All function mentioned here are defined in the file +.Pa mansearch.c . +No functions except +.Fn mansearch +and +.Fn sql_statement +build any SQL code, and no functions except +.Fn mansearch , +.Fn buildnames , +and +.Fn buildoutput +execute it. +.Ss Finding matches +The query is built using the following grammar: +.Bd -literal -offset indent +<query> ::= "SELECT * FROM mpages WHERE" <condition> +<condition> ::= "(" <condition> ")" | + <condition> "OR" <condition> | + <condition> "AND" <condition> | + "desc" <operator> "?" | + "id IN (SELECT pageid FROM" <subquery> ")" +<subquery> ::= "names WHERE name" <operator> "?" | + "keys WHERE key" <operator> "? AND bits & ?" +<operator> ::= "MATCH" | "REGEXP" +.Ed +.Pp +The MATCH and REGEXP operators are implemented by the functions +.Fn sql_match +and +.Fn sql_regexp , +respectively. +This is required because SQLite3 natively neither supports +case-insensitive substring matching nor regular expression matching, +but only string identity, shell globbing, and the weird home-brewed +LIKE operator. +.Pp +Command line parsing is done by the function +.Fn exprcomp +building a singly linked list of +.Vt expr +structures, using the helper functions +.Fn exprterm +and +.Fn exprspec . +The resulting SQL statement is assembled by the function +.Fn sql_statement +and evaluated in the main loop of the +.Fn mansearch +function. +.Ss Assembling the results +The names, sections, and architectures of the manuals found +are assembled into the +.Va names +field of the result structure by the function +.Fn buildnames , +using the following query: +.Pp +.Dl "SELECT * FROM mlinks WHERE pageid=? ORDER BY sec, arch, name" +.Pp +If the +.Fa outkey +differs from +.Qq Ic \&Nd , +the requested output data is assembled into the +.Va output +field of the result structure by the function +.Fn buildoutput , +using the following query: +.Pp +.Dl "SELECT * FROM keys WHERE pageid=? AND bits & ?" +.Sh FILES +.Bl -tag -width mandoc.db -compact +.It Pa mandoc.db +The manual page database. +.El +.Sh EXAMPLES +The simplest invocation +.Pp +.Dl apropos keyword +.Pp +results in the following SQL query: +.Bd -literal +SELECT * FROM mpages WHERE ( + id IN (SELECT pageid FROM names WHERE name MATCH 'keyword') OR + desc MATCH 'keyword' +); +.Ed +.Pp +A more complicated request like +.Pp +.Dl apropos -s 2 Nm,Xr=getuid +.Pp +results in: +.Bd -literal +SELECT * FROM mpages WHERE ( + id IN (SELECT pageid FROM names WHERE name MATCH 'getuid') OR + id IN (SELECT pageid FROM keys WHERE key MATCH 'getuid' AND bits & 4) +) AND id IN (SELECT pageid FROM keys WHERE key REGEXP '^2$' AND bits & 2); +.Ed +.Sh SEE ALSO +.Xr apropos 1 , +.Xr mandoc.db 5 , +.Xr makewhatis 8 +.Sh HISTORY +The +.Fn mansearch +subsystem first appeared in +.Ox 5.6 . +.Sh AUTHORS +.An -nosplit +A module to search manual page databases was first written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +in 2011, at first using the Berkeley DB; +he rewrote it for SQLite3 in 2012. +The current version received major changes from +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/mansearch.c b/contrib/mdocml/mansearch.c new file mode 100644 index 0000000..843326b --- /dev/null +++ b/contrib/mdocml/mansearch.c @@ -0,0 +1,853 @@ +/* $Id: mansearch.c,v 1.64 2016/01/08 15:02:54 schwarze Exp $ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/mman.h> +#include <sys/types.h> + +#include <assert.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <glob.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sqlite3.h> +#ifndef SQLITE_DETERMINISTIC +#define SQLITE_DETERMINISTIC 0 +#endif + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "manconf.h" +#include "mansearch.h" + +extern int mansearch_keymax; +extern const char *const mansearch_keynames[]; + +#define SQL_BIND_TEXT(_db, _s, _i, _v) \ + do { if (SQLITE_OK != sqlite3_bind_text \ + ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ + errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ + } while (0) +#define SQL_BIND_INT64(_db, _s, _i, _v) \ + do { if (SQLITE_OK != sqlite3_bind_int64 \ + ((_s), (_i)++, (_v))) \ + errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ + } while (0) +#define SQL_BIND_BLOB(_db, _s, _i, _v) \ + do { if (SQLITE_OK != sqlite3_bind_blob \ + ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ + errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \ + } while (0) + +struct expr { + regex_t regexp; /* compiled regexp, if applicable */ + const char *substr; /* to search for, if applicable */ + struct expr *next; /* next in sequence */ + uint64_t bits; /* type-mask */ + int equal; /* equality, not subsring match */ + int open; /* opening parentheses before */ + int and; /* logical AND before */ + int close; /* closing parentheses after */ +}; + +struct match { + uint64_t pageid; /* identifier in database */ + uint64_t bits; /* name type mask */ + char *desc; /* manual page description */ + int form; /* bit field: formatted, zipped? */ +}; + +static void buildnames(const struct mansearch *, + struct manpage *, sqlite3 *, + sqlite3_stmt *, uint64_t, + const char *, int form); +static char *buildoutput(sqlite3 *, sqlite3_stmt *, + uint64_t, uint64_t); +static struct expr *exprcomp(const struct mansearch *, + int, char *[]); +static void exprfree(struct expr *); +static struct expr *exprterm(const struct mansearch *, char *, int); +static int manpage_compare(const void *, const void *); +static void sql_append(char **sql, size_t *sz, + const char *newstr, int count); +static void sql_match(sqlite3_context *context, + int argc, sqlite3_value **argv); +static void sql_regexp(sqlite3_context *context, + int argc, sqlite3_value **argv); +static char *sql_statement(const struct expr *); + + +int +mansearch_setup(int start) +{ + static void *pagecache; + int c; + +#define PC_PAGESIZE 1280 +#define PC_NUMPAGES 256 + + if (start) { + if (NULL != pagecache) { + warnx("pagecache already enabled"); + return (int)MANDOCLEVEL_BADARG; + } + + pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0); + + if (MAP_FAILED == pagecache) { + warn("mmap"); + pagecache = NULL; + return (int)MANDOCLEVEL_SYSERR; + } + + c = sqlite3_config(SQLITE_CONFIG_PAGECACHE, + pagecache, PC_PAGESIZE, PC_NUMPAGES); + + if (SQLITE_OK == c) + return (int)MANDOCLEVEL_OK; + + warnx("pagecache: %s", sqlite3_errstr(c)); + + } else if (NULL == pagecache) { + warnx("pagecache missing"); + return (int)MANDOCLEVEL_BADARG; + } + + if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) { + warn("munmap"); + pagecache = NULL; + return (int)MANDOCLEVEL_SYSERR; + } + + pagecache = NULL; + return (int)MANDOCLEVEL_OK; +} + +int +mansearch(const struct mansearch *search, + const struct manpaths *paths, + int argc, char *argv[], + struct manpage **res, size_t *sz) +{ + int64_t pageid; + uint64_t outbit, iterbit; + char buf[PATH_MAX]; + char *sql; + struct manpage *mpage; + struct expr *e, *ep; + sqlite3 *db; + sqlite3_stmt *s, *s2; + struct match *mp; + struct ohash htab; + unsigned int idx; + size_t i, j, cur, maxres; + int c, chdir_status, getcwd_status, indexbit; + + if (argc == 0 || (e = exprcomp(search, argc, argv)) == NULL) { + *sz = 0; + return 0; + } + + cur = maxres = 0; + *res = NULL; + + if (NULL != search->outkey) { + outbit = TYPE_Nd; + for (indexbit = 0, iterbit = 1; + indexbit < mansearch_keymax; + indexbit++, iterbit <<= 1) { + if (0 == strcasecmp(search->outkey, + mansearch_keynames[indexbit])) { + outbit = iterbit; + break; + } + } + } else + outbit = 0; + + /* + * Remember the original working directory, if possible. + * This will be needed if the second or a later directory + * is given as a relative path. + * Do not error out if the current directory is not + * searchable: Maybe it won't be needed after all. + */ + + if (getcwd(buf, PATH_MAX) == NULL) { + getcwd_status = 0; + (void)strlcpy(buf, strerror(errno), sizeof(buf)); + } else + getcwd_status = 1; + + sql = sql_statement(e); + + /* + * Loop over the directories (containing databases) for us to + * search. + * Don't let missing/bad databases/directories phase us. + * In each, try to open the resident database and, if it opens, + * scan it for our match expression. + */ + + chdir_status = 0; + for (i = 0; i < paths->sz; i++) { + if (chdir_status && paths->paths[i][0] != '/') { + if ( ! getcwd_status) { + warnx("%s: getcwd: %s", paths->paths[i], buf); + continue; + } else if (chdir(buf) == -1) { + warn("%s", buf); + continue; + } + } + if (chdir(paths->paths[i]) == -1) { + warn("%s", paths->paths[i]); + continue; + } + chdir_status = 1; + + c = sqlite3_open_v2(MANDOC_DB, &db, + SQLITE_OPEN_READONLY, NULL); + + if (SQLITE_OK != c) { + warn("%s/%s", paths->paths[i], MANDOC_DB); + sqlite3_close(db); + continue; + } + + /* + * Define the SQL functions for substring + * and regular expression matching. + */ + + c = sqlite3_create_function(db, "match", 2, + SQLITE_UTF8 | SQLITE_DETERMINISTIC, + NULL, sql_match, NULL, NULL); + assert(SQLITE_OK == c); + c = sqlite3_create_function(db, "regexp", 2, + SQLITE_UTF8 | SQLITE_DETERMINISTIC, + NULL, sql_regexp, NULL, NULL); + assert(SQLITE_OK == c); + + j = 1; + c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); + if (SQLITE_OK != c) + errx((int)MANDOCLEVEL_SYSERR, + "%s", sqlite3_errmsg(db)); + + for (ep = e; NULL != ep; ep = ep->next) { + if (NULL == ep->substr) { + SQL_BIND_BLOB(db, s, j, ep->regexp); + } else + SQL_BIND_TEXT(db, s, j, ep->substr); + if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits)) + SQL_BIND_INT64(db, s, j, ep->bits); + } + + mandoc_ohash_init(&htab, 4, offsetof(struct match, pageid)); + + /* + * Hash each entry on its [unique] document identifier. + * This is a uint64_t. + * Instead of using a hash function, simply convert the + * uint64_t to a uint32_t, the hash value's type. + * This gives good performance and preserves the + * distribution of buckets in the table. + */ + while (SQLITE_ROW == (c = sqlite3_step(s))) { + pageid = sqlite3_column_int64(s, 2); + idx = ohash_lookup_memory(&htab, + (char *)&pageid, sizeof(uint64_t), + (uint32_t)pageid); + + if (NULL != ohash_find(&htab, idx)) + continue; + + mp = mandoc_calloc(1, sizeof(struct match)); + mp->pageid = pageid; + mp->form = sqlite3_column_int(s, 1); + mp->bits = sqlite3_column_int64(s, 3); + if (TYPE_Nd == outbit) + mp->desc = mandoc_strdup((const char *) + sqlite3_column_text(s, 0)); + ohash_insert(&htab, idx, mp); + } + + if (SQLITE_DONE != c) + warnx("%s", sqlite3_errmsg(db)); + + sqlite3_finalize(s); + + c = sqlite3_prepare_v2(db, + "SELECT sec, arch, name, pageid FROM mlinks " + "WHERE pageid=? ORDER BY sec, arch, name", + -1, &s, NULL); + if (SQLITE_OK != c) + errx((int)MANDOCLEVEL_SYSERR, + "%s", sqlite3_errmsg(db)); + + c = sqlite3_prepare_v2(db, + "SELECT bits, key, pageid FROM keys " + "WHERE pageid=? AND bits & ?", + -1, &s2, NULL); + if (SQLITE_OK != c) + errx((int)MANDOCLEVEL_SYSERR, + "%s", sqlite3_errmsg(db)); + + for (mp = ohash_first(&htab, &idx); + NULL != mp; + mp = ohash_next(&htab, &idx)) { + if (cur + 1 > maxres) { + maxres += 1024; + *res = mandoc_reallocarray(*res, + maxres, sizeof(struct manpage)); + } + mpage = *res + cur; + mpage->ipath = i; + mpage->bits = mp->bits; + mpage->sec = 10; + mpage->form = mp->form; + buildnames(search, mpage, db, s, mp->pageid, + paths->paths[i], mp->form); + if (mpage->names != NULL) { + mpage->output = TYPE_Nd & outbit ? + mp->desc : outbit ? + buildoutput(db, s2, mp->pageid, outbit) : + NULL; + cur++; + } + free(mp); + } + + sqlite3_finalize(s); + sqlite3_finalize(s2); + sqlite3_close(db); + ohash_delete(&htab); + + /* + * In man(1) mode, prefer matches in earlier trees + * over matches in later trees. + */ + + if (cur && search->firstmatch) + break; + } + qsort(*res, cur, sizeof(struct manpage), manpage_compare); + if (chdir_status && getcwd_status && chdir(buf) == -1) + warn("%s", buf); + exprfree(e); + free(sql); + *sz = cur; + return 1; +} + +void +mansearch_free(struct manpage *res, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) { + free(res[i].file); + free(res[i].names); + free(res[i].output); + } + free(res); +} + +static int +manpage_compare(const void *vp1, const void *vp2) +{ + const struct manpage *mp1, *mp2; + int diff; + + mp1 = vp1; + mp2 = vp2; + return (diff = mp2->bits - mp1->bits) ? diff : + (diff = mp1->sec - mp2->sec) ? diff : + strcasecmp(mp1->names, mp2->names); +} + +static void +buildnames(const struct mansearch *search, struct manpage *mpage, + sqlite3 *db, sqlite3_stmt *s, + uint64_t pageid, const char *path, int form) +{ + glob_t globinfo; + char *firstname, *newnames, *prevsec, *prevarch; + const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec; + size_t i; + int c, globres; + + mpage->file = NULL; + mpage->names = NULL; + firstname = prevsec = prevarch = NULL; + i = 1; + SQL_BIND_INT64(db, s, i, pageid); + while (SQLITE_ROW == (c = sqlite3_step(s))) { + + /* Decide whether we already have some names. */ + + if (NULL == mpage->names) { + oldnames = ""; + sep1 = ""; + } else { + oldnames = mpage->names; + sep1 = ", "; + } + + /* Fetch the next name, rejecting sec/arch mismatches. */ + + sec = (const char *)sqlite3_column_text(s, 0); + if (search->sec != NULL && strcasecmp(sec, search->sec)) + continue; + arch = (const char *)sqlite3_column_text(s, 1); + if (search->arch != NULL && *arch != '\0' && + strcasecmp(arch, search->arch)) + continue; + name = (const char *)sqlite3_column_text(s, 2); + + /* Remember the first section found. */ + + if (9 < mpage->sec && '1' <= *sec && '9' >= *sec) + mpage->sec = (*sec - '1') + 1; + + /* If the section changed, append the old one. */ + + if (NULL != prevsec && + (strcmp(sec, prevsec) || + strcmp(arch, prevarch))) { + sep2 = '\0' == *prevarch ? "" : "/"; + mandoc_asprintf(&newnames, "%s(%s%s%s)", + oldnames, prevsec, sep2, prevarch); + free(mpage->names); + oldnames = mpage->names = newnames; + free(prevsec); + free(prevarch); + prevsec = prevarch = NULL; + } + + /* Save the new section, to append it later. */ + + if (NULL == prevsec) { + prevsec = mandoc_strdup(sec); + prevarch = mandoc_strdup(arch); + } + + /* Append the new name. */ + + mandoc_asprintf(&newnames, "%s%s%s", + oldnames, sep1, name); + free(mpage->names); + mpage->names = newnames; + + /* Also save the first file name encountered. */ + + if (mpage->file != NULL) + continue; + + if (form & FORM_SRC) { + sep1 = "man"; + fsec = sec; + } else { + sep1 = "cat"; + fsec = "0"; + } + sep2 = *arch == '\0' ? "" : "/"; + mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s", + path, sep1, sec, sep2, arch, name, fsec); + if (access(mpage->file, R_OK) != -1) + continue; + + /* Handle unusual file name extensions. */ + + if (firstname == NULL) + firstname = mpage->file; + else + free(mpage->file); + mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.*", + path, sep1, sec, sep2, arch, name); + globres = glob(mpage->file, 0, NULL, &globinfo); + free(mpage->file); + mpage->file = globres ? NULL : + mandoc_strdup(*globinfo.gl_pathv); + globfree(&globinfo); + } + if (c != SQLITE_DONE) + warnx("%s", sqlite3_errmsg(db)); + sqlite3_reset(s); + + /* If none of the files is usable, use the first name. */ + + if (mpage->file == NULL) + mpage->file = firstname; + else if (mpage->file != firstname) + free(firstname); + + /* Append one final section to the names. */ + + if (prevsec != NULL) { + sep2 = *prevarch == '\0' ? "" : "/"; + mandoc_asprintf(&newnames, "%s(%s%s%s)", + mpage->names, prevsec, sep2, prevarch); + free(mpage->names); + mpage->names = newnames; + free(prevsec); + free(prevarch); + } +} + +static char * +buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit) +{ + char *output, *newoutput; + const char *oldoutput, *sep1, *data; + size_t i; + int c; + + output = NULL; + i = 1; + SQL_BIND_INT64(db, s, i, pageid); + SQL_BIND_INT64(db, s, i, outbit); + while (SQLITE_ROW == (c = sqlite3_step(s))) { + if (NULL == output) { + oldoutput = ""; + sep1 = ""; + } else { + oldoutput = output; + sep1 = " # "; + } + data = (const char *)sqlite3_column_text(s, 1); + mandoc_asprintf(&newoutput, "%s%s%s", + oldoutput, sep1, data); + free(output); + output = newoutput; + } + if (SQLITE_DONE != c) + warnx("%s", sqlite3_errmsg(db)); + sqlite3_reset(s); + return output; +} + +/* + * Implement substring match as an application-defined SQL function. + * Using the SQL LIKE or GLOB operators instead would be a bad idea + * because that would require escaping metacharacters in the string + * being searched for. + */ +static void +sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + + assert(2 == argc); + sqlite3_result_int(context, NULL != strcasestr( + (const char *)sqlite3_value_text(argv[1]), + (const char *)sqlite3_value_text(argv[0]))); +} + +/* + * Implement regular expression match + * as an application-defined SQL function. + */ +static void +sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + + assert(2 == argc); + sqlite3_result_int(context, !regexec( + (regex_t *)sqlite3_value_blob(argv[0]), + (const char *)sqlite3_value_text(argv[1]), + 0, NULL, 0)); +} + +static void +sql_append(char **sql, size_t *sz, const char *newstr, int count) +{ + size_t newsz; + + newsz = 1 < count ? (size_t)count : strlen(newstr); + *sql = mandoc_realloc(*sql, *sz + newsz + 1); + if (1 < count) + memset(*sql + *sz, *newstr, (size_t)count); + else + memcpy(*sql + *sz, newstr, newsz); + *sz += newsz; + (*sql)[*sz] = '\0'; +} + +/* + * Prepare the search SQL statement. + */ +static char * +sql_statement(const struct expr *e) +{ + char *sql; + size_t sz; + int needop; + + sql = mandoc_strdup(e->equal ? + "SELECT desc, form, pageid, bits " + "FROM mpages NATURAL JOIN names WHERE " : + "SELECT desc, form, pageid, 0 FROM mpages WHERE "); + sz = strlen(sql); + + for (needop = 0; NULL != e; e = e->next) { + if (e->and) + sql_append(&sql, &sz, " AND ", 1); + else if (needop) + sql_append(&sql, &sz, " OR ", 1); + if (e->open) + sql_append(&sql, &sz, "(", e->open); + sql_append(&sql, &sz, + TYPE_Nd & e->bits + ? (NULL == e->substr + ? "desc REGEXP ?" + : "desc MATCH ?") + : TYPE_Nm == e->bits + ? (NULL == e->substr + ? "pageid IN (SELECT pageid FROM names " + "WHERE name REGEXP ?)" + : e->equal + ? "name = ? " + : "pageid IN (SELECT pageid FROM names " + "WHERE name MATCH ?)") + : (NULL == e->substr + ? "pageid IN (SELECT pageid FROM keys " + "WHERE key REGEXP ? AND bits & ?)" + : "pageid IN (SELECT pageid FROM keys " + "WHERE key MATCH ? AND bits & ?)"), 1); + if (e->close) + sql_append(&sql, &sz, ")", e->close); + needop = 1; + } + + return sql; +} + +/* + * Compile a set of string tokens into an expression. + * Tokens in "argv" are assumed to be individual expression atoms (e.g., + * "(", "foo=bar", etc.). + */ +static struct expr * +exprcomp(const struct mansearch *search, int argc, char *argv[]) +{ + uint64_t mask; + int i, toopen, logic, igncase, toclose; + struct expr *first, *prev, *cur, *next; + + first = cur = NULL; + logic = igncase = toopen = toclose = 0; + + for (i = 0; i < argc; i++) { + if (0 == strcmp("(", argv[i])) { + if (igncase) + goto fail; + toopen++; + toclose++; + continue; + } else if (0 == strcmp(")", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + cur->close++; + if (0 > --toclose) + goto fail; + continue; + } else if (0 == strcmp("-a", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + logic = 1; + continue; + } else if (0 == strcmp("-o", argv[i])) { + if (toopen || logic || igncase || NULL == cur) + goto fail; + logic = 2; + continue; + } else if (0 == strcmp("-i", argv[i])) { + if (igncase) + goto fail; + igncase = 1; + continue; + } + next = exprterm(search, argv[i], !igncase); + if (NULL == next) + goto fail; + if (NULL == first) + first = next; + else + cur->next = next; + prev = cur = next; + + /* + * Searching for descriptions must be split out + * because they are stored in the mpages table, + * not in the keys table. + */ + + for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) { + if (mask & cur->bits && ~mask & cur->bits) { + next = mandoc_calloc(1, + sizeof(struct expr)); + memcpy(next, cur, sizeof(struct expr)); + prev->open = 1; + cur->bits = mask; + cur->next = next; + cur = next; + cur->bits &= ~mask; + } + } + prev->and = (1 == logic); + prev->open += toopen; + if (cur != prev) + cur->close = 1; + + toopen = logic = igncase = 0; + } + if ( ! (toopen || logic || igncase || toclose)) + return first; + +fail: + if (NULL != first) + exprfree(first); + return NULL; +} + +static struct expr * +exprterm(const struct mansearch *search, char *buf, int cs) +{ + char errbuf[BUFSIZ]; + struct expr *e; + char *key, *val; + uint64_t iterbit; + int i, irc; + + if ('\0' == *buf) + return NULL; + + e = mandoc_calloc(1, sizeof(struct expr)); + + if (search->argmode == ARG_NAME) { + e->bits = TYPE_Nm; + e->substr = buf; + e->equal = 1; + return e; + } + + /* + * Separate macro keys from search string. + * If needed, request regular expression handling + * by setting e->substr to NULL. + */ + + if (search->argmode == ARG_WORD) { + e->bits = TYPE_Nm; + e->substr = NULL; +#if HAVE_REWB_BSD + mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf); +#elif HAVE_REWB_SYSV + mandoc_asprintf(&val, "\\<%s\\>", buf); +#else + mandoc_asprintf(&val, + "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", buf); +#endif + cs = 0; + } else if ((val = strpbrk(buf, "=~")) == NULL) { + e->bits = TYPE_Nm | TYPE_Nd; + e->substr = buf; + } else { + if (val == buf) + e->bits = TYPE_Nm | TYPE_Nd; + if ('=' == *val) + e->substr = val + 1; + *val++ = '\0'; + if (NULL != strstr(buf, "arch")) + cs = 0; + } + + /* Compile regular expressions. */ + + if (NULL == e->substr) { + irc = regcomp(&e->regexp, val, + REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)); + if (search->argmode == ARG_WORD) + free(val); + if (irc) { + regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); + warnx("regcomp: %s", errbuf); + free(e); + return NULL; + } + } + + if (e->bits) + return e; + + /* + * Parse out all possible fields. + * If the field doesn't resolve, bail. + */ + + while (NULL != (key = strsep(&buf, ","))) { + if ('\0' == *key) + continue; + for (i = 0, iterbit = 1; + i < mansearch_keymax; + i++, iterbit <<= 1) { + if (0 == strcasecmp(key, + mansearch_keynames[i])) { + e->bits |= iterbit; + break; + } + } + if (i == mansearch_keymax) { + if (strcasecmp(key, "any")) { + free(e); + return NULL; + } + e->bits |= ~0ULL; + } + } + + return e; +} + +static void +exprfree(struct expr *p) +{ + struct expr *pp; + + while (NULL != p) { + pp = p->next; + free(p); + p = pp; + } +} diff --git a/contrib/mdocml/mansearch.h b/contrib/mdocml/mansearch.h new file mode 100644 index 0000000..7f68ff6 --- /dev/null +++ b/contrib/mdocml/mansearch.h @@ -0,0 +1,108 @@ +/* $Id: mansearch.h,v 1.24 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MANDOC_DB "mandoc.db" + +#define TYPE_arch 0x0000000000000001ULL +#define TYPE_sec 0x0000000000000002ULL +#define TYPE_Xr 0x0000000000000004ULL +#define TYPE_Ar 0x0000000000000008ULL +#define TYPE_Fa 0x0000000000000010ULL +#define TYPE_Fl 0x0000000000000020ULL +#define TYPE_Dv 0x0000000000000040ULL +#define TYPE_Fn 0x0000000000000080ULL +#define TYPE_Ic 0x0000000000000100ULL +#define TYPE_Pa 0x0000000000000200ULL +#define TYPE_Cm 0x0000000000000400ULL +#define TYPE_Li 0x0000000000000800ULL +#define TYPE_Em 0x0000000000001000ULL +#define TYPE_Cd 0x0000000000002000ULL +#define TYPE_Va 0x0000000000004000ULL +#define TYPE_Ft 0x0000000000008000ULL +#define TYPE_Tn 0x0000000000010000ULL +#define TYPE_Er 0x0000000000020000ULL +#define TYPE_Ev 0x0000000000040000ULL +#define TYPE_Sy 0x0000000000080000ULL +#define TYPE_Sh 0x0000000000100000ULL +#define TYPE_In 0x0000000000200000ULL +#define TYPE_Ss 0x0000000000400000ULL +#define TYPE_Ox 0x0000000000800000ULL +#define TYPE_An 0x0000000001000000ULL +#define TYPE_Mt 0x0000000002000000ULL +#define TYPE_St 0x0000000004000000ULL +#define TYPE_Bx 0x0000000008000000ULL +#define TYPE_At 0x0000000010000000ULL +#define TYPE_Nx 0x0000000020000000ULL +#define TYPE_Fx 0x0000000040000000ULL +#define TYPE_Lk 0x0000000080000000ULL +#define TYPE_Ms 0x0000000100000000ULL +#define TYPE_Bsx 0x0000000200000000ULL +#define TYPE_Dx 0x0000000400000000ULL +#define TYPE_Rs 0x0000000800000000ULL +#define TYPE_Vt 0x0000001000000000ULL +#define TYPE_Lb 0x0000002000000000ULL +#define TYPE_Nm 0x0000004000000000ULL +#define TYPE_Nd 0x0000008000000000ULL + +#define NAME_SYN 0x0000004000000001ULL +#define NAME_FIRST 0x0000004000000004ULL +#define NAME_TITLE 0x0000004000000006ULL +#define NAME_HEAD 0x0000004000000008ULL +#define NAME_FILE 0x0000004000000010ULL +#define NAME_MASK 0x000000000000001fULL + +#define FORM_CAT 0 /* manual page is preformatted */ +#define FORM_SRC 1 /* format is mdoc(7) or man(7) */ +#define FORM_NONE 4 /* format is unknown */ + +enum argmode { + ARG_FILE = 0, + ARG_NAME, + ARG_WORD, + ARG_EXPR +}; + +struct manpage { + char *file; /* to be prefixed by manpath */ + char *names; /* a list of names with sections */ + char *output; /* user-defined additional output */ + size_t ipath; /* number of the manpath */ + uint64_t bits; /* name type mask */ + int sec; /* section number, 10 means invalid */ + int form; /* 0 == catpage */ +}; + +struct mansearch { + const char *arch; /* architecture/NULL */ + const char *sec; /* mansection/NULL */ + const char *outkey; /* show content of this macro */ + enum argmode argmode; /* interpretation of arguments */ + int firstmatch; /* first matching database only */ +}; + + +struct manpaths; + +int mansearch_setup(int); +int mansearch(const struct mansearch *cfg, /* options */ + const struct manpaths *paths, /* manpaths */ + int argc, /* size of argv */ + char *argv[], /* search terms */ + struct manpage **res, /* results */ + size_t *ressz); /* results returned */ +void mansearch_free(struct manpage *, size_t); diff --git a/contrib/mdocml/mansearch_const.c b/contrib/mdocml/mansearch_const.c new file mode 100644 index 0000000..61351c3 --- /dev/null +++ b/contrib/mdocml/mansearch_const.c @@ -0,0 +1,33 @@ +/* $Id: mansearch_const.c,v 1.7 2014/12/01 08:05:52 schwarze Exp $ */ +/* + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <stdint.h> + +#include "mansearch.h" + +const int mansearch_keymax = 40; + +const char *const mansearch_keynames[40] = { + "arch", "sec", "Xr", "Ar", "Fa", "Fl", "Dv", "Fn", + "Ic", "Pa", "Cm", "Li", "Em", "Cd", "Va", "Ft", + "Tn", "Er", "Ev", "Sy", "Sh", "In", "Ss", "Ox", + "An", "Mt", "St", "Bx", "At", "Nx", "Fx", "Lk", + "Ms", "Bsx", "Dx", "Rs", "Vt", "Lb", "Nm", "Nd" +}; diff --git a/contrib/mdocml/mchars_alloc.3 b/contrib/mdocml/mchars_alloc.3 new file mode 100644 index 0000000..2d42a43 --- /dev/null +++ b/contrib/mdocml/mchars_alloc.3 @@ -0,0 +1,226 @@ +.\" $Id: mchars_alloc.3,v 1.3 2015/10/13 22:59:54 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: October 13 2015 $ +.Dt MCHARS_ALLOC 3 +.Os +.Sh NAME +.Nm mchars_alloc , +.Nm mchars_free , +.Nm mchars_num2char , +.Nm mchars_num2uc , +.Nm mchars_spec2cp , +.Nm mchars_spec2str +.Nd character table for mandoc +.Sh SYNOPSIS +.In sys/types.h +.In mandoc.h +.Ft void +.Fn mchars_alloc void +.Ft void +.Fn mchars_free void +.Ft char +.Fo mchars_num2char +.Fa "const char *decimal" +.Fa "size_t sz" +.Fc +.Ft int +.Fo mchars_num2uc +.Fa "const char *hexadecimal" +.Fa "size_t sz" +.Fc +.Ft int +.Fo mchars_spec2cp +.Fa "const char *name" +.Fa "size_t sz" +.Fc +.Ft "const char *" +.Fo mchars_spec2str +.Fa "const char *name" +.Fa "size_t sz" +.Fa "size_t *rsz" +.Fc +.Ft "const char *" +.Fn mchars_uc2str "int codepoint" +.Sh DESCRIPTION +These functions translate Unicode character numbers and +.Xr roff 7 +character names into glyphs. +See +.Xr mandoc_char 7 +for a list of +.Xr roff 7 +special characters. +These functions are intended for external use by programs formatting +.Xr mdoc 7 +and +.Xr man 7 +pages for output, for example the +.Xr mandoc 1 +output formatter modules and +.Xr makewhatis 8 . +The +.Fa decimal , +.Fa hexadecimal , +.Fa name , +and +.Fa size +input arguments are usually obtained from the +.Xr mandoc_escape 3 +parser function. +.Pp +The function +.Fn mchars_num2char +converts a +.Fa decimal +string representation of a character number consisting of +.Fa sz +digits into a printable ASCII character. +If the input string is non-numeric or does not represent a printable +ASCII character, the NUL character +.Pq Sq \e0 +is returned. +For example, the +.Xr mandoc 1 +.Fl Tascii , +.Fl Tutf8 , +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \eN +escape sequences. +.Pp +The function +.Fn mchars_num2uc +converts a +.Fa hexadecimal +string representation of a Unicode codepoint consisting of +.Fa sz +digits into an integer representation. +If the input string is non-numeric or represents an ASCII character, +the NUL character +.Pq Sq \e0 +is returned. +For example, the +.Xr mandoc 1 +.Fl Tutf8 +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \e[u Ns Ar XXXX Ns Ic \&] +and +.Ic \eC\(aqu Ns Ar XXXX Ns Ic \(aq +escape sequences. +.Pp +The function +.Fn mchars_alloc +initializes a static +.Vt "struct ohash" +object for subsequent use by the following two lookup functions. +When no longer needed, this object can be destroyed with +.Fn mchars_free . +.Pp +The function +.Fn mchars_spec2cp +looks up a +.Xr roff 7 +special character +.Fa name +consisting of +.Fa sz +characters and returns the corresponding Unicode codepoint. +If the +.Ar name +is not recognized, \-1 is returned. +For example, the +.Xr mandoc 1 +.Fl Tutf8 +and +.Fl Thtml +output modules use this function to render +.Xr roff 7 +.Ic \e[ Ns Ar name Ns Ic \&] +and +.Ic \eC\(aq Ns Ar name Ns Ic \(aq +escape sequences. +.Pp +The function +.Fn mchars_spec2str +looks up a +.Xr roff 7 +special character +.Fa name +consisting of +.Fa sz +characters and returns an ASCII string representation. +The length of the representation is returned in +.Fa rsz . +In many cases, the meaning of such ASCII representations +is not quite obvious, so using +.Xr roff 7 +special characters in documents intended for ASCII rendering +is usually a bad idea. +If the +.Ar name +is not recognized, +.Dv NULL +is returned. +For example, +.Xr makewhatis 8 +and the +.Xr mandoc 1 +.Fl Tascii +output module use this function to render +.Xr roff 7 +.Ic \e[ Ns Ar name Ns Ic \&] +and +.Ic \eC\(aq Ns Ar name Ns Ic \(aq +escape sequences. +.Pp +The function +.Fn mchars_uc2str +performs a reverse lookup of the Unicode +.Fa codepoint +and returns an ASCII string representation, or the string +.Qq <?> +if none is available. +.Sh FILES +These funtions are implemented in the file +.Pa chars.c . +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc_escape 3 , +.Xr ohash_init 3 , +.Xr mandoc_char 7 , +.Xr roff 7 +.Sh HISTORY +These functions and their predecessors have been available since the +following mandoc versions: +.Bl -column "mchars_num2char()" "1.11.3" "chars_num2char()" "1.10.10" +.It Sy function Ta since Ta Sy predecessor Ta since +.It Fn mchars_alloc Ta 1.11.3 Ta Fn ascii2htab Ta 1.5.3 +.It Fn mchars_free Ta 1.11.2 Ta Fn asciifree Ta 1.6.0 +.It Fn mchars_num2char Ta 1.11.2 Ta Fn chars_num2char Ta 1.10.10 +.It Fn mchars_num2uc Ta 1.11.3 Ta \(em Ta \(em +.It Fn mchars_spec2cp Ta 1.11.2 Ta Fn chars_spec2cp Ta 1.10.5 +.It Fn mchars_spec2str Ta 1.11.2 Ta Fn a2ascii Ta 1.5.3 +.It Fn mchars_uc2str Ta 1.13.2 Ta \(em Ta \(em +.El +.Sh AUTHORS +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +.An Ingo Schwarze Aq Mt schwarze@openbsd.org diff --git a/contrib/mdocml/mdoc.7 b/contrib/mdocml/mdoc.7 new file mode 100644 index 0000000..198a46a --- /dev/null +++ b/contrib/mdocml/mdoc.7 @@ -0,0 +1,3235 @@ +.\" $Id: mdoc.7,v 1.257 2015/11/05 12:06:45 schwarze Exp $ +.\" +.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2010, 2011, 2013 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: November 5 2015 $ +.Dt MDOC 7 +.Os +.Sh NAME +.Nm mdoc +.Nd semantic markup language for formatting manual pages +.Sh DESCRIPTION +The +.Nm mdoc +language supports authoring of manual pages for the +.Xr man 1 +utility by allowing semantic annotations of words, phrases, +page sections and complete manual pages. +Such annotations are used by formatting tools to achieve a uniform +presentation across all manuals written in +.Nm , +and to support hyperlinking if supported by the output medium. +.Pp +This reference document describes the structure of manual pages +and the syntax and usage of the +.Nm +language. +The reference implementation of a parsing and formatting tool is +.Xr mandoc 1 ; +the +.Sx COMPATIBILITY +section describes compatibility with other implementations. +.Pp +In an +.Nm +document, lines beginning with the control character +.Sq \&. +are called +.Dq macro lines . +The first word is the macro name. +It consists of two or three letters. +Most macro names begin with a capital letter. +For a list of available macros, see +.Sx MACRO OVERVIEW . +The words following the macro name are arguments to the macro, optionally +including the names of other, callable macros; see +.Sx MACRO SYNTAX +for details. +.Pp +Lines not beginning with the control character are called +.Dq text lines . +They provide free-form text to be printed; the formatting of the text +depends on the respective processing context: +.Bd -literal -offset indent +\&.Sh Macro lines change control state. +Text lines are interpreted within the current state. +.Ed +.Pp +Many aspects of the basic syntax of the +.Nm +language are based on the +.Xr roff 7 +language; see the +.Em LANGUAGE SYNTAX +and +.Em MACRO SYNTAX +sections in the +.Xr roff 7 +manual for details, in particular regarding +comments, escape sequences, whitespace, and quoting. +However, using +.Xr roff 7 +requests in +.Nm +documents is discouraged; +.Xr mandoc 1 +supports some of them merely for backward compatibility. +.Sh MANUAL STRUCTURE +A well-formed +.Nm +document consists of a document prologue followed by one or more +sections. +.Pp +The prologue, which consists of the +.Sx \&Dd , +.Sx \&Dt , +and +.Sx \&Os +macros in that order, is required for every document. +.Pp +The first section (sections are denoted by +.Sx \&Sh ) +must be the NAME section, consisting of at least one +.Sx \&Nm +followed by +.Sx \&Nd . +.Pp +Following that, convention dictates specifying at least the +.Em SYNOPSIS +and +.Em DESCRIPTION +sections, although this varies between manual sections. +.Pp +The following is a well-formed skeleton +.Nm +file for a utility +.Qq progname : +.Bd -literal -offset indent +\&.Dd $\&Mdocdate$ +\&.Dt PROGNAME section +\&.Os +\&.Sh NAME +\&.Nm progname +\&.Nd one line about what it does +\&.\e\(dq .Sh LIBRARY +\&.\e\(dq For sections 2, 3, and 9 only. +\&.\e\(dq Not used in OpenBSD. +\&.Sh SYNOPSIS +\&.Nm progname +\&.Op Fl options +\&.Ar +\&.Sh DESCRIPTION +The +\&.Nm +utility processes files ... +\&.\e\(dq .Sh CONTEXT +\&.\e\(dq For section 9 functions only. +\&.\e\(dq .Sh IMPLEMENTATION NOTES +\&.\e\(dq Not used in OpenBSD. +\&.\e\(dq .Sh RETURN VALUES +\&.\e\(dq For sections 2, 3, and 9 function return values only. +\&.\e\(dq .Sh ENVIRONMENT +\&.\e\(dq For sections 1, 6, 7, and 8 only. +\&.\e\(dq .Sh FILES +\&.\e\(dq .Sh EXIT STATUS +\&.\e\(dq For sections 1, 6, and 8 only. +\&.\e\(dq .Sh EXAMPLES +\&.\e\(dq .Sh DIAGNOSTICS +\&.\e\(dq For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only. +\&.\e\(dq .Sh ERRORS +\&.\e\(dq For sections 2, 3, 4, and 9 errno settings only. +\&.\e\(dq .Sh SEE ALSO +\&.\e\(dq .Xr foobar 1 +\&.\e\(dq .Sh STANDARDS +\&.\e\(dq .Sh HISTORY +\&.\e\(dq .Sh AUTHORS +\&.\e\(dq .Sh CAVEATS +\&.\e\(dq .Sh BUGS +\&.\e\(dq .Sh SECURITY CONSIDERATIONS +\&.\e\(dq Not used in OpenBSD. +.Ed +.Pp +The sections in an +.Nm +document are conventionally ordered as they appear above. +Sections should be composed as follows: +.Bl -ohang -offset Ds +.It Em NAME +The name(s) and a one line description of the documented material. +The syntax for this as follows: +.Bd -literal -offset indent +\&.Nm name0 , +\&.Nm name1 , +\&.Nm name2 +\&.Nd a one line description +.Ed +.Pp +Multiple +.Sq \&Nm +names should be separated by commas. +.Pp +The +.Sx \&Nm +macro(s) must precede the +.Sx \&Nd +macro. +.Pp +See +.Sx \&Nm +and +.Sx \&Nd . +.It Em LIBRARY +The name of the library containing the documented material, which is +assumed to be a function in a section 2, 3, or 9 manual. +The syntax for this is as follows: +.Bd -literal -offset indent +\&.Lb libarm +.Ed +.Pp +See +.Sx \&Lb . +.It Em SYNOPSIS +Documents the utility invocation syntax, function call syntax, or device +configuration. +.Pp +For the first, utilities (sections 1, 6, and 8), this is +generally structured as follows: +.Bd -literal -offset indent +\&.Nm bar +\&.Op Fl v +\&.Op Fl o Ar file +\&.Op Ar +\&.Nm foo +\&.Op Fl v +\&.Op Fl o Ar file +\&.Op Ar +.Ed +.Pp +Commands should be ordered alphabetically. +.Pp +For the second, function calls (sections 2, 3, 9): +.Bd -literal -offset indent +\&.In header.h +\&.Vt extern const char *global; +\&.Ft "char *" +\&.Fn foo "const char *src" +\&.Ft "char *" +\&.Fn bar "const char *src" +.Ed +.Pp +Ordering of +.Sx \&In , +.Sx \&Vt , +.Sx \&Fn , +and +.Sx \&Fo +macros should follow C header-file conventions. +.Pp +And for the third, configurations (section 4): +.Bd -literal -offset indent +\&.Cd \(dqit* at isa? port 0x2e\(dq +\&.Cd \(dqit* at isa? port 0x4e\(dq +.Ed +.Pp +Manuals not in these sections generally don't need a +.Em SYNOPSIS . +.Pp +Some macros are displayed differently in the +.Em SYNOPSIS +section, particularly +.Sx \&Nm , +.Sx \&Cd , +.Sx \&Fd , +.Sx \&Fn , +.Sx \&Fo , +.Sx \&In , +.Sx \&Vt , +and +.Sx \&Ft . +All of these macros are output on their own line. +If two such dissimilar macros are pairwise invoked (except for +.Sx \&Ft +before +.Sx \&Fo +or +.Sx \&Fn ) , +they are separated by a vertical space, unless in the case of +.Sx \&Fo , +.Sx \&Fn , +and +.Sx \&Ft , +which are always separated by vertical space. +.Pp +When text and macros following an +.Sx \&Nm +macro starting an input line span multiple output lines, +all output lines but the first will be indented to align +with the text immediately following the +.Sx \&Nm +macro, up to the next +.Sx \&Nm , +.Sx \&Sh , +or +.Sx \&Ss +macro or the end of an enclosing block, whichever comes first. +.It Em DESCRIPTION +This begins with an expansion of the brief, one line description in +.Em NAME : +.Bd -literal -offset indent +The +\&.Nm +utility does this, that, and the other. +.Ed +.Pp +It usually follows with a breakdown of the options (if documenting a +command), such as: +.Bd -literal -offset indent +The arguments are as follows: +\&.Bl \-tag \-width Ds +\&.It Fl v +Print verbose information. +\&.El +.Ed +.Pp +List the options in alphabetical order, +uppercase before lowercase for each letter and +with no regard to whether an option takes an argument. +Put digits in ascending order before all letter options. +.Pp +Manuals not documenting a command won't include the above fragment. +.Pp +Since the +.Em DESCRIPTION +section usually contains most of the text of a manual, longer manuals +often use the +.Sx \&Ss +macro to form subsections. +In very long manuals, the +.Em DESCRIPTION +may be split into multiple sections, each started by an +.Sx \&Sh +macro followed by a non-standard section name, and each having +several subsections, like in the present +.Nm +manual. +.It Em CONTEXT +This section lists the contexts in which functions can be called in section 9. +The contexts are autoconf, process, or interrupt. +.It Em IMPLEMENTATION NOTES +Implementation-specific notes should be kept here. +This is useful when implementing standard functions that may have side +effects or notable algorithmic implications. +.It Em RETURN VALUES +This section documents the +return values of functions in sections 2, 3, and 9. +.Pp +See +.Sx \&Rv . +.It Em ENVIRONMENT +Lists the environment variables used by the utility, +and explains the syntax and semantics of their values. +The +.Xr environ 7 +manual provides examples of typical content and formatting. +.Pp +See +.Sx \&Ev . +.It Em FILES +Documents files used. +It's helpful to document both the file name and a short description of how +the file is used (created, modified, etc.). +.Pp +See +.Sx \&Pa . +.It Em EXIT STATUS +This section documents the +command exit status for section 1, 6, and 8 utilities. +Historically, this information was described in +.Em DIAGNOSTICS , +a practise that is now discouraged. +.Pp +See +.Sx \&Ex . +.It Em EXAMPLES +Example usages. +This often contains snippets of well-formed, well-tested invocations. +Make sure that examples work properly! +.It Em DIAGNOSTICS +Documents error messages. +In section 4 and 9 manuals, these are usually messages printed by the +kernel to the console and to the kernel log. +In section 1, 6, 7, and 8, these are usually messages printed by +userland programs to the standard error output. +.Pp +Historically, this section was used in place of +.Em EXIT STATUS +for manuals in sections 1, 6, and 8; however, this practise is +discouraged. +.Pp +See +.Sx \&Bl +.Fl diag . +.It Em ERRORS +Documents +.Xr errno 2 +settings in sections 2, 3, 4, and 9. +.Pp +See +.Sx \&Er . +.It Em SEE ALSO +References other manuals with related topics. +This section should exist for most manuals. +Cross-references should conventionally be ordered first by section, then +alphabetically (ignoring case). +.Pp +References to other documentation concerning the topic of the manual page, +for example authoritative books or journal articles, may also be +provided in this section. +.Pp +See +.Sx \&Rs +and +.Sx \&Xr . +.It Em STANDARDS +References any standards implemented or used. +If not adhering to any standards, the +.Em HISTORY +section should be used instead. +.Pp +See +.Sx \&St . +.It Em HISTORY +A brief history of the subject, including where it was first implemented, +and when it was ported to or reimplemented for the operating system at hand. +.It Em AUTHORS +Credits to the person or persons who wrote the code and/or documentation. +Authors should generally be noted by both name and email address. +.Pp +See +.Sx \&An . +.It Em CAVEATS +Common misuses and misunderstandings should be explained +in this section. +.It Em BUGS +Known bugs, limitations, and work-arounds should be described +in this section. +.It Em SECURITY CONSIDERATIONS +Documents any security precautions that operators should consider. +.El +.Sh MACRO OVERVIEW +This overview is sorted such that macros of similar purpose are listed +together, to help find the best macro for any given purpose. +Deprecated macros are not included in the overview, but can be found below +in the alphabetical +.Sx MACRO REFERENCE . +.Ss Document preamble and NAME section macros +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Dd Ta document date: Cm $\&Mdocdate$ | Ar month day , year +.It Sx \&Dt Ta document title: Ar TITLE section Op Ar arch +.It Sx \&Os Ta operating system version: Op Ar system Op Ar version +.It Sx \&Nm Ta document name (one argument) +.It Sx \&Nd Ta document description (one line) +.El +.Ss Sections and cross references +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Sh Ta section header (one line) +.It Sx \&Ss Ta subsection header (one line) +.It Sx \&Sx Ta internal cross reference to a section or subsection +.It Sx \&Xr Ta cross reference to another manual page: Ar name section +.It Sx \&Pp , \&Lp Ta start a text paragraph (no arguments) +.El +.Ss Displays and lists +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Bd , \&Ed Ta display block: +.Fl Ar type +.Op Fl offset Ar width +.Op Fl compact +.It Sx \&D1 Ta indented display (one line) +.It Sx \&Dl Ta indented literal display (one line) +.It Sx \&Ql Ta in-line literal display: Ql text +.It Sx \&Bl , \&El Ta list block: +.Fl Ar type +.Op Fl width Ar val +.Op Fl offset Ar val +.Op Fl compact +.It Sx \&It Ta list item (syntax depends on Fl Ar type ) +.It Sx \&Ta Ta table cell separator in Sx \&Bl Fl column No lists +.It Sx \&Rs , \&%* , \&Re Ta bibliographic block (references) +.El +.Ss Spacing control +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Pf Ta prefix, no following horizontal space (one argument) +.It Sx \&Ns Ta roman font, no preceding horizontal space (no arguments) +.It Sx \&Ap Ta apostrophe without surrounding whitespace (no arguments) +.It Sx \&Sm Ta switch horizontal spacing mode: Op Cm on | off +.It Sx \&Bk , \&Ek Ta keep block: Fl words +.It Sx \&br Ta force output line break in text mode (no arguments) +.It Sx \&sp Ta force vertical space: Op Ar height +.El +.Ss Semantic markup for command line utilities: +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Nm Ta start a SYNOPSIS block with the name of a utility +.It Sx \&Fl Ta command line options (flags) (>=0 arguments) +.It Sx \&Cm Ta command modifier (>0 arguments) +.It Sx \&Ar Ta command arguments (>=0 arguments) +.It Sx \&Op , \&Oo , \&Oc Ta optional syntax elements (enclosure) +.It Sx \&Ic Ta internal or interactive command (>0 arguments) +.It Sx \&Ev Ta environmental variable (>0 arguments) +.It Sx \&Pa Ta file system path (>=0 arguments) +.El +.Ss Semantic markup for function libraries: +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Lb Ta function library (one argument) +.It Sx \&In Ta include file (one argument) +.It Sx \&Fd Ta other preprocessor directive (>0 arguments) +.It Sx \&Ft Ta function type (>0 arguments) +.It Sx \&Fo , \&Fc Ta function block: Ar funcname +.It Sx \&Fn Ta function name: +.Op Ar functype +.Ar funcname +.Oo +.Op Ar argtype +.Ar argname +.Oc +.It Sx \&Fa Ta function argument (>0 arguments) +.It Sx \&Vt Ta variable type (>0 arguments) +.It Sx \&Va Ta variable name (>0 arguments) +.It Sx \&Dv Ta defined variable or preprocessor constant (>0 arguments) +.It Sx \&Er Ta error constant (>0 arguments) +.It Sx \&Ev Ta environmental variable (>0 arguments) +.El +.Ss Various semantic markup: +.Bl -column "Brq, Bro, Brc" description +.It Sx \&An Ta author name (>0 arguments) +.It Sx \&Lk Ta hyperlink: Ar uri Op Ar name +.It Sx \&Mt Ta Do mailto Dc hyperlink: Ar address +.It Sx \&Cd Ta kernel configuration declaration (>0 arguments) +.It Sx \&Ad Ta memory address (>0 arguments) +.It Sx \&Ms Ta mathematical symbol (>0 arguments) +.El +.Ss Physical markup +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Em Ta italic font or underline (emphasis) (>0 arguments) +.It Sx \&Sy Ta boldface font (symbolic) (>0 arguments) +.It Sx \&Li Ta typewriter font (literal) (>0 arguments) +.It Sx \&No Ta return to roman font (normal) (no arguments) +.It Sx \&Bf , \&Ef Ta font block: +.Op Fl Ar type | Cm \&Em | \&Li | \&Sy +.El +.Ss Physical enclosures +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Dq , \&Do , \&Dc Ta enclose in typographic double quotes: Dq text +.It Sx \&Qq , \&Qo , \&Qc Ta enclose in typewriter double quotes: Qq text +.It Sx \&Sq , \&So , \&Sc Ta enclose in single quotes: Sq text +.It Sx \&Pq , \&Po , \&Pc Ta enclose in parentheses: Pq text +.It Sx \&Bq , \&Bo , \&Bc Ta enclose in square brackets: Bq text +.It Sx \&Brq , \&Bro , \&Brc Ta enclose in curly braces: Brq text +.It Sx \&Aq , \&Ao , \&Ac Ta enclose in angle brackets: Aq text +.It Sx \&Eo , \&Ec Ta generic enclosure +.El +.Ss Text production +.Bl -column "Brq, Bro, Brc" description +.It Sx \&Ex Fl std Ta standard command exit values: Op Ar utility ... +.It Sx \&Rv Fl std Ta standard function return values: Op Ar function ... +.It Sx \&St Ta reference to a standards document (one argument) +.It Sx \&At Ta At +.It Sx \&Bx Ta Bx +.It Sx \&Bsx Ta Bsx +.It Sx \&Nx Ta Nx +.It Sx \&Fx Ta Fx +.It Sx \&Ox Ta Ox +.It Sx \&Dx Ta Dx +.El +.Sh MACRO REFERENCE +This section is a canonical reference of all macros, arranged +alphabetically. +For the scoping of individual macros, see +.Sx MACRO SYNTAX . +.Ss \&%A +Author name of an +.Sx \&Rs +block. +Multiple authors should each be accorded their own +.Sx \%%A +line. +Author names should be ordered with full or abbreviated forename(s) +first, then full surname. +.Ss \&%B +Book title of an +.Sx \&Rs +block. +This macro may also be used in a non-bibliographic context when +referring to book titles. +.Ss \&%C +Publication city or location of an +.Sx \&Rs +block. +.Ss \&%D +Publication date of an +.Sx \&Rs +block. +Recommended formats of arguments are +.Ar month day , year +or just +.Ar year . +.Ss \&%I +Publisher or issuer name of an +.Sx \&Rs +block. +.Ss \&%J +Journal name of an +.Sx \&Rs +block. +.Ss \&%N +Issue number (usually for journals) of an +.Sx \&Rs +block. +.Ss \&%O +Optional information of an +.Sx \&Rs +block. +.Ss \&%P +Book or journal page number of an +.Sx \&Rs +block. +.Ss \&%Q +Institutional author (school, government, etc.) of an +.Sx \&Rs +block. +Multiple institutional authors should each be accorded their own +.Sx \&%Q +line. +.Ss \&%R +Technical report name of an +.Sx \&Rs +block. +.Ss \&%T +Article title of an +.Sx \&Rs +block. +This macro may also be used in a non-bibliographical context when +referring to article titles. +.Ss \&%U +URI of reference document. +.Ss \&%V +Volume number of an +.Sx \&Rs +block. +.Ss \&Ac +Close an +.Sx \&Ao +block. +Does not have any tail arguments. +.Ss \&Ad +Memory address. +Do not use this for postal addresses. +.Pp +Examples: +.Dl \&.Ad [0,$] +.Dl \&.Ad 0x00000000 +.Ss \&An +Author name. +Can be used both for the authors of the program, function, or driver +documented in the manual, or for the authors of the manual itself. +Requires either the name of an author or one of the following arguments: +.Pp +.Bl -tag -width "-nosplitX" -offset indent -compact +.It Fl split +Start a new output line before each subsequent invocation of +.Sx \&An . +.It Fl nosplit +The opposite of +.Fl split . +.El +.Pp +The default is +.Fl nosplit . +The effect of selecting either of the +.Fl split +modes ends at the beginning of the +.Em AUTHORS +section. +In the +.Em AUTHORS +section, the default is +.Fl nosplit +for the first author listing and +.Fl split +for all other author listings. +.Pp +Examples: +.Dl \&.An -nosplit +.Dl \&.An Kristaps Dzonsons \&Aq \&Mt kristaps@bsd.lv +.Ss \&Ao +Begin a block enclosed by angle brackets. +Does not have any head arguments. +.Pp +Examples: +.Dl \&.Fl -key= \&Ns \&Ao \&Ar val \&Ac +.Pp +See also +.Sx \&Aq . +.Ss \&Ap +Inserts an apostrophe without any surrounding whitespace. +This is generally used as a grammatical device when referring to the verb +form of a function. +.Pp +Examples: +.Dl \&.Fn execve \&Ap d +.Ss \&Aq +Encloses its arguments in angle brackets. +.Pp +Examples: +.Dl \&.Fl -key= \&Ns \&Aq \&Ar val +.Pp +.Em Remarks : +this macro is often abused for rendering URIs, which should instead use +.Sx \&Lk +or +.Sx \&Mt , +or to note pre-processor +.Dq Li #include +statements, which should use +.Sx \&In . +.Pp +See also +.Sx \&Ao . +.Ss \&Ar +Command arguments. +If an argument is not provided, the string +.Dq file ...\& +is used as a default. +.Pp +Examples: +.Dl ".Fl o Ar file" +.Dl ".Ar" +.Dl ".Ar arg1 , arg2 ." +.Pp +The arguments to the +.Sx \&Ar +macro are names and placeholders for command arguments; +for fixed strings to be passed verbatim as arguments, use +.Sx \&Fl +or +.Sx \&Cm . +.Ss \&At +Formats an +.At +version. +Accepts one optional argument: +.Pp +.Bl -tag -width "v[1-7] | 32vX" -offset indent -compact +.It Cm v[1-7] | 32v +A version of +.At . +.It Cm III +.At III . +.It Cm V[.[1-4]]? +A version of +.At V . +.El +.Pp +Note that these arguments do not begin with a hyphen. +.Pp +Examples: +.Dl \&.At +.Dl \&.At III +.Dl \&.At V.1 +.Pp +See also +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Bc +Close a +.Sx \&Bo +block. +Does not have any tail arguments. +.Ss \&Bd +Begin a display block. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Bd +.Fl Ns Ar type +.Op Fl offset Ar width +.Op Fl compact +.Ed +.Pp +Display blocks are used to select a different indentation and +justification than the one used by the surrounding text. +They may contain both macro lines and text lines. +By default, a display block is preceded by a vertical space. +.Pp +The +.Ar type +must be one of the following: +.Bl -tag -width 13n -offset indent +.It Fl centered +Produce one output line from each input line, and center-justify each line. +Using this display type is not recommended; many +.Nm +implementations render it poorly. +.It Fl filled +Change the positions of line breaks to fill each line, and left- and +right-justify the resulting block. +.It Fl literal +Produce one output line from each input line, +and do not justify the block at all. +Preserve white space as it appears in the input. +Always use a constant-width font. +Use this for displaying source code. +.It Fl ragged +Change the positions of line breaks to fill each line, and left-justify +the resulting block. +.It Fl unfilled +The same as +.Fl literal , +but using the same font as for normal text, which is a variable width font +if supported by the output device. +.El +.Pp +The +.Ar type +must be provided first. +Additional arguments may follow: +.Bl -tag -width 13n -offset indent +.It Fl offset Ar width +Indent the display by the +.Ar width , +which may be one of the following: +.Bl -item +.It +One of the pre-defined strings +.Cm indent , +the width of a standard indentation (six constant width characters); +.Cm indent-two , +twice +.Cm indent ; +.Cm left , +which has no effect; +.Cm right , +which justifies to the right margin; or +.Cm center , +which aligns around an imagined center axis. +.It +A macro invocation, which selects a predefined width +associated with that macro. +The most popular is the imaginary macro +.Ar \&Ds , +which resolves to +.Sy 6n . +.It +A scaling width as described in +.Xr roff 7 . +.It +An arbitrary string, which indents by the length of this string. +.El +.Pp +When the argument is missing, +.Fl offset +is ignored. +.It Fl compact +Do not assert vertical space before the display. +.El +.Pp +Examples: +.Bd -literal -offset indent +\&.Bd \-literal \-offset indent \-compact + Hello world. +\&.Ed +.Ed +.Pp +See also +.Sx \&D1 +and +.Sx \&Dl . +.Ss \&Bf +Change the font mode for a scoped block of text. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Bf +.Oo +.Fl emphasis | literal | symbolic | +.Cm \&Em | \&Li | \&Sy +.Oc +.Ed +.Pp +The +.Fl emphasis +and +.Cm \&Em +argument are equivalent, as are +.Fl symbolic +and +.Cm \&Sy , +and +.Fl literal +and +.Cm \&Li . +Without an argument, this macro does nothing. +The font mode continues until broken by a new font mode in a nested +scope or +.Sx \&Ef +is encountered. +.Pp +See also +.Sx \&Li , +.Sx \&Ef , +.Sx \&Em , +and +.Sx \&Sy . +.Ss \&Bk +For each macro, keep its output together on the same output line, +until the end of the macro or the end of the input line is reached, +whichever comes first. +Line breaks in text lines are unaffected. +The syntax is as follows: +.Pp +.D1 Pf \. Sx \&Bk Fl words +.Pp +The +.Fl words +argument is required; additional arguments are ignored. +.Pp +The following example will not break within each +.Sx \&Op +macro line: +.Bd -literal -offset indent +\&.Bk \-words +\&.Op Fl f Ar flags +\&.Op Fl o Ar output +\&.Ek +.Ed +.Pp +Be careful in using over-long lines within a keep block! +Doing so will clobber the right margin. +.Ss \&Bl +Begin a list. +Lists consist of items specified using the +.Sx \&It +macro, containing a head or a body or both. +The list syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Bl +.Fl Ns Ar type +.Op Fl width Ar val +.Op Fl offset Ar val +.Op Fl compact +.Op HEAD ... +.Ed +.Pp +The list +.Ar type +is mandatory and must be specified first. +The +.Fl width +and +.Fl offset +arguments accept macro names as described for +.Sx \&Bd +.Fl offset , +scaling widths as described in +.Xr roff 7 , +or use the length of the given string. +The +.Fl offset +is a global indentation for the whole list, affecting both item heads +and bodies. +For those list types supporting it, the +.Fl width +argument requests an additional indentation of item bodies, +to be added to the +.Fl offset . +Unless the +.Fl compact +argument is specified, list entries are separated by vertical space. +.Pp +A list must specify one of the following list types: +.Bl -tag -width 12n -offset indent +.It Fl bullet +No item heads can be specified, but a bullet will be printed at the head +of each item. +Item bodies start on the same output line as the bullet +and are indented according to the +.Fl width +argument. +.It Fl column +A columnated list. +The +.Fl width +argument has no effect; instead, each argument specifies the width +of one column, using either the scaling width syntax described in +.Xr roff 7 +or the string length of the argument. +If the first line of the body of a +.Fl column +list is not an +.Sx \&It +macro line, +.Sx \&It +contexts spanning one input line each are implied until an +.Sx \&It +macro line is encountered, at which point items start being interpreted as +described in the +.Sx \&It +documentation. +.It Fl dash +Like +.Fl bullet , +except that dashes are used in place of bullets. +.It Fl diag +Like +.Fl inset , +except that item heads are not parsed for macro invocations. +Most often used in the +.Em DIAGNOSTICS +section with error constants in the item heads. +.It Fl enum +A numbered list. +No item heads can be specified. +Formatted like +.Fl bullet , +except that cardinal numbers are used in place of bullets, +starting at 1. +.It Fl hang +Like +.Fl tag , +except that the first lines of item bodies are not indented, but follow +the item heads like in +.Fl inset +lists. +.It Fl hyphen +Synonym for +.Fl dash . +.It Fl inset +Item bodies follow items heads on the same line, using normal inter-word +spacing. +Bodies are not indented, and the +.Fl width +argument is ignored. +.It Fl item +No item heads can be specified, and none are printed. +Bodies are not indented, and the +.Fl width +argument is ignored. +.It Fl ohang +Item bodies start on the line following item heads and are not indented. +The +.Fl width +argument is ignored. +.It Fl tag +Item bodies are indented according to the +.Fl width +argument. +When an item head fits inside the indentation, the item body follows +this head on the same output line. +Otherwise, the body starts on the output line following the head. +.El +.Pp +Lists may be nested within lists and displays. +Nesting of +.Fl column +and +.Fl enum +lists may not be portable. +.Pp +See also +.Sx \&El +and +.Sx \&It . +.Ss \&Bo +Begin a block enclosed by square brackets. +Does not have any head arguments. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Bo 1 , +\&.Dv BUFSIZ \&Bc +.Ed +.Pp +See also +.Sx \&Bq . +.Ss \&Bq +Encloses its arguments in square brackets. +.Pp +Examples: +.Dl \&.Bq 1 , \&Dv BUFSIZ +.Pp +.Em Remarks : +this macro is sometimes abused to emulate optional arguments for +commands; the correct macros to use for this purpose are +.Sx \&Op , +.Sx \&Oo , +and +.Sx \&Oc . +.Pp +See also +.Sx \&Bo . +.Ss \&Brc +Close a +.Sx \&Bro +block. +Does not have any tail arguments. +.Ss \&Bro +Begin a block enclosed by curly braces. +Does not have any head arguments. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Bro 1 , ... , +\&.Va n \&Brc +.Ed +.Pp +See also +.Sx \&Brq . +.Ss \&Brq +Encloses its arguments in curly braces. +.Pp +Examples: +.Dl \&.Brq 1 , ... , \&Va n +.Pp +See also +.Sx \&Bro . +.Ss \&Bsx +Format the +.Bsx +version provided as an argument, or a default value if +no argument is provided. +.Pp +Examples: +.Dl \&.Bsx 1.0 +.Dl \&.Bsx +.Pp +See also +.Sx \&At , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Bt +Supported only for compatibility, do not use this in new manuals. +Prints +.Dq is currently in beta test. +.Ss \&Bx +Format the +.Bx +version provided as an argument, or a default value if no +argument is provided. +.Pp +Examples: +.Dl \&.Bx 4.3 Tahoe +.Dl \&.Bx 4.4 +.Dl \&.Bx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Dx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Cd +Kernel configuration declaration. +This denotes strings accepted by +.Xr config 8 . +It is most often used in section 4 manual pages. +.Pp +Examples: +.Dl \&.Cd device le0 at scode? +.Pp +.Em Remarks : +this macro is commonly abused by using quoted literals to retain +whitespace and align consecutive +.Sx \&Cd +declarations. +This practise is discouraged. +.Ss \&Cm +Command modifiers. +Typically used for fixed strings passed as arguments, unless +.Sx \&Fl +is more appropriate. +Also useful when specifying configuration options or keys. +.Pp +Examples: +.Dl ".Nm mt Fl f Ar device Cm rewind" +.Dl ".Nm ps Fl o Cm pid , Ns Cm command" +.Dl ".Nm dd Cm if= Ns Ar file1 Cm of= Ns Ar file2" +.Dl ".Cm IdentityFile Pa ~/.ssh/id_rsa" +.Dl ".Cm LogLevel Dv DEBUG" +.Ss \&D1 +One-line indented display. +This is formatted by the default rules and is useful for simple indented +statements. +It is followed by a newline. +.Pp +Examples: +.Dl \&.D1 \&Fl abcdefgh +.Pp +See also +.Sx \&Bd +and +.Sx \&Dl . +.Ss \&Db +This macro is obsolete. +No replacement is needed. +It is ignored by +.Xr mandoc 1 +and groff including its arguments. +It was formerly used to toggle a debugging mode. +.Ss \&Dc +Close a +.Sx \&Do +block. +Does not have any tail arguments. +.Ss \&Dd +Document date for display in the page footer. +This is the mandatory first macro of any +.Nm +manual. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Dd Ar month day , year +.Pp +The +.Ar month +is the full English month name, the +.Ar day +is an optionally zero-padded numeral, and the +.Ar year +is the full four-digit year. +.Pp +Other arguments are not portable; the +.Xr mandoc 1 +utility handles them as follows: +.Bl -dash -offset 3n -compact +.It +To have the date automatically filled in by the +.Ox +version of +.Xr cvs 1 , +the special string +.Dq $\&Mdocdate$ +can be given as an argument. +.It +The traditional, purely numeric +.Xr man 7 +format +.Ar year Ns \(en Ns Ar month Ns \(en Ns Ar day +is accepted, too. +.It +If a date string cannot be parsed, it is used verbatim. +.It +If no date string is given, the current date is used. +.El +.Pp +Examples: +.Dl \&.Dd $\&Mdocdate$ +.Dl \&.Dd $\&Mdocdate: July 21 2007$ +.Dl \&.Dd July 21, 2007 +.Pp +See also +.Sx \&Dt +and +.Sx \&Os . +.Ss \&Dl +One-line indented display. +This is formatted as literal text and is useful for commands and +invocations. +It is followed by a newline. +.Pp +Examples: +.Dl \&.Dl % mandoc mdoc.7 \e(ba less +.Pp +See also +.Sx \&Ql , +.Sx \&Bd +.Fl literal , +and +.Sx \&D1 . +.Ss \&Do +Begin a block enclosed by double quotes. +Does not have any head arguments. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Do +April is the cruellest month +\&.Dc +\e(em T.S. Eliot +.Ed +.Pp +See also +.Sx \&Dq . +.Ss \&Dq +Encloses its arguments in +.Dq typographic +double-quotes. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Dq April is the cruellest month +\e(em T.S. Eliot +.Ed +.Pp +See also +.Sx \&Qq , +.Sx \&Sq , +and +.Sx \&Do . +.Ss \&Dt +Document title for display in the page header. +This is the mandatory second macro of any +.Nm +file. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Dt +.Ar TITLE +.Ar section +.Op Ar arch +.Ed +.Pp +Its arguments are as follows: +.Bl -tag -width section -offset 2n +.It Ar TITLE +The document's title (name), defaulting to +.Dq UNTITLED +if unspecified. +To achieve a uniform appearance of page header lines, +it should by convention be all caps. +.It Ar section +The manual section. +This may be one of +.Cm 1 +.Pq General Commands , +.Cm 2 +.Pq System Calls , +.Cm 3 +.Pq Library Functions , +.Cm 3p +.Pq Perl Library , +.Cm 4 +.Pq Device Drivers , +.Cm 5 +.Pq File Formats , +.Cm 6 +.Pq Games , +.Cm 7 +.Pq Miscellaneous Information , +.Cm 8 +.Pq System Manager's Manual , +or +.Cm 9 +.Pq Kernel Developer's Manual . +It should correspond to the manual's filename suffix and defaults to +the empty string if unspecified. +.It Ar arch +This specifies the machine architecture a manual page applies to, +where relevant, for example +.Cm alpha , +.Cm amd64 , +.Cm i386 , +or +.Cm sparc64 . +The list of valid architectures varies by operating system. +.El +.Pp +Examples: +.Dl \&.Dt FOO 1 +.Dl \&.Dt FOO 9 i386 +.Pp +See also +.Sx \&Dd +and +.Sx \&Os . +.Ss \&Dv +Defined variables such as preprocessor constants, constant symbols, +enumeration values, and so on. +.Pp +Examples: +.Dl \&.Dv NULL +.Dl \&.Dv BUFSIZ +.Dl \&.Dv STDOUT_FILENO +.Pp +See also +.Sx \&Er +and +.Sx \&Ev +for special-purpose constants, +.Sx \&Va +for variable symbols, and +.Sx \&Fd +for listing preprocessor variable definitions in the +.Em SYNOPSIS . +.Ss \&Dx +Format the +.Dx +version provided as an argument, or a default +value if no argument is provided. +.Pp +Examples: +.Dl \&.Dx 2.4.1 +.Dl \&.Dx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Fx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Ec +Close a scope started by +.Sx \&Eo . +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Ec Op Ar TERM +.Pp +The +.Ar TERM +argument is used as the enclosure tail, for example, specifying \e(rq +will emulate +.Sx \&Dc . +.Ss \&Ed +End a display context started by +.Sx \&Bd . +.Ss \&Ef +End a font mode context started by +.Sx \&Bf . +.Ss \&Ek +End a keep context started by +.Sx \&Bk . +.Ss \&El +End a list context started by +.Sx \&Bl . +.Pp +See also +.Sx \&Bl +and +.Sx \&It . +.Ss \&Em +Request an italic font. +If the output device does not provide that, underline. +.Pp +This is most often used for stress emphasis (not to be confused with +importance, see +.Sx \&Sy ) . +In the rare cases where none of the semantic markup macros fit, +it can also be used for technical terms and placeholders, except +that for syntax elements, +.Sx \&Sy +and +.Sx \&Ar +are preferred, respectively. +.Pp +Examples: +.Bd -literal -compact -offset indent +Selected lines are those +\&.Em not +matching any of the specified patterns. +Some of the functions use a +\&.Em hold space +to save the pattern space for subsequent retrieval. +.Ed +.Pp +See also +.Sx \&Bf , +.Sx \&Li , +.Sx \&No , +and +.Sx \&Sy . +.Ss \&En +This macro is obsolete. +Use +.Sx \&Eo +or any of the other enclosure macros. +.Pp +It encloses its argument in the delimiters specified by the last +.Sx \&Es +macro. +.Ss \&Eo +An arbitrary enclosure. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Eo Op Ar TERM +.Pp +The +.Ar TERM +argument is used as the enclosure head, for example, specifying \e(lq +will emulate +.Sx \&Do . +.Ss \&Er +Error constants for definitions of the +.Va errno +libc global variable. +This is most often used in section 2 and 3 manual pages. +.Pp +Examples: +.Dl \&.Er EPERM +.Dl \&.Er ENOENT +.Pp +See also +.Sx \&Dv +for general constants. +.Ss \&Es +This macro is obsolete. +Use +.Sx \&Eo +or any of the other enclosure macros. +.Pp +It takes two arguments, defining the delimiters to be used by subsequent +.Sx \&En +macros. +.Ss \&Ev +Environmental variables such as those specified in +.Xr environ 7 . +.Pp +Examples: +.Dl \&.Ev DISPLAY +.Dl \&.Ev PATH +.Pp +See also +.Sx \&Dv +for general constants. +.Ss \&Ex +Insert a standard sentence regarding command exit values of 0 on success +and >0 on failure. +This is most often used in section 1, 6, and 8 manual pages. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Ex Fl std Op Ar utility ... +.Pp +If +.Ar utility +is not specified, the document's name set by +.Sx \&Nm +is used. +Multiple +.Ar utility +arguments are treated as separate utilities. +.Pp +See also +.Sx \&Rv . +.Ss \&Fa +Function argument or parameter. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Fa +.Qo +.Op Ar argtype +.Op Ar argname +.Qc Ar \&... +.Ed +.Pp +Each argument may be a name and a type (recommended for the +.Em SYNOPSIS +section), a name alone (for function invocations), +or a type alone (for function prototypes). +If both a type and a name are given or if the type consists of multiple +words, all words belonging to the same function argument have to be +given in a single argument to the +.Sx \&Fa +macro. +.Pp +This macro is also used to specify the field name of a structure. +.Pp +Most often, the +.Sx \&Fa +macro is used in the +.Em SYNOPSIS +within +.Sx \&Fo +blocks when documenting multi-line function prototypes. +If invoked with multiple arguments, the arguments are separated by a +comma. +Furthermore, if the following macro is another +.Sx \&Fa , +the last argument will also have a trailing comma. +.Pp +Examples: +.Dl \&.Fa \(dqconst char *p\(dq +.Dl \&.Fa \(dqint a\(dq \(dqint b\(dq \(dqint c\(dq +.Dl \&.Fa \(dqchar *\(dq size_t +.Pp +See also +.Sx \&Fo . +.Ss \&Fc +End a function context started by +.Sx \&Fo . +.Ss \&Fd +Preprocessor directive, in particular for listing it in the +.Em SYNOPSIS . +Historically, it was also used to document include files. +The latter usage has been deprecated in favour of +.Sx \&In . +.Pp +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf \. Sx \&Fd +.Li # Ns Ar directive +.Op Ar argument ... +.Ed +.Pp +Examples: +.Dl \&.Fd #define sa_handler __sigaction_u.__sa_handler +.Dl \&.Fd #define SIO_MAXNFDS +.Dl \&.Fd #ifdef FS_DEBUG +.Dl \&.Ft void +.Dl \&.Fn dbg_open \(dqconst char *\(dq +.Dl \&.Fd #endif +.Pp +See also +.Sx MANUAL STRUCTURE , +.Sx \&In , +and +.Sx \&Dv . +.Ss \&Fl +Command-line flag or option. +Used when listing arguments to command-line utilities. +Prints a fixed-width hyphen +.Sq \- +directly followed by each argument. +If no arguments are provided, a hyphen is printed followed by a space. +If the argument is a macro, a hyphen is prefixed to the subsequent macro +output. +.Pp +Examples: +.Dl ".Fl R Op Fl H | L | P" +.Dl ".Op Fl 1AaCcdFfgHhikLlmnopqRrSsTtux" +.Dl ".Fl type Cm d Fl name Pa CVS" +.Dl ".Fl Ar signal_number" +.Dl ".Fl o Fl" +.Pp +See also +.Sx \&Cm . +.Ss \&Fn +A function name. +Its syntax is as follows: +.Bd -ragged -offset indent +.Pf . Sx \&Fn +.Op Ar functype +.Ar funcname +.Op Oo Ar argtype Oc Ar argname +.Ed +.Pp +Function arguments are surrounded in parenthesis and +are delimited by commas. +If no arguments are specified, blank parenthesis are output. +In the +.Em SYNOPSIS +section, this macro starts a new output line, +and a blank line is automatically inserted between function definitions. +.Pp +Examples: +.Dl \&.Fn \(dqint funcname\(dq \(dqint arg0\(dq \(dqint arg1\(dq +.Dl \&.Fn funcname \(dqint arg0\(dq +.Dl \&.Fn funcname arg0 +.Pp +.Bd -literal -offset indent -compact +\&.Ft functype +\&.Fn funcname +.Ed +.Pp +When referring to a function documented in another manual page, use +.Sx \&Xr +instead. +See also +.Sx MANUAL STRUCTURE , +.Sx \&Fo , +and +.Sx \&Ft . +.Ss \&Fo +Begin a function block. +This is a multi-line version of +.Sx \&Fn . +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Fo Ar funcname +.Pp +Invocations usually occur in the following context: +.Bd -ragged -offset indent +.Pf \. Sx \&Ft Ar functype +.br +.Pf \. Sx \&Fo Ar funcname +.br +.Pf \. Sx \&Fa Qq Ar argtype Ar argname +.br +\&.\.\. +.br +.Pf \. Sx \&Fc +.Ed +.Pp +A +.Sx \&Fo +scope is closed by +.Sx \&Fc . +.Pp +See also +.Sx MANUAL STRUCTURE , +.Sx \&Fa , +.Sx \&Fc , +and +.Sx \&Ft . +.Ss \&Fr +This macro is obsolete. +No replacement markup is needed. +.Pp +It was used to show numerical function return values in an italic font. +.Ss \&Ft +A function type. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Ft Ar functype +.Pp +In the +.Em SYNOPSIS +section, a new output line is started after this macro. +.Pp +Examples: +.Dl \&.Ft int +.Bd -literal -offset indent -compact +\&.Ft functype +\&.Fn funcname +.Ed +.Pp +See also +.Sx MANUAL STRUCTURE , +.Sx \&Fn , +and +.Sx \&Fo . +.Ss \&Fx +Format the +.Fx +version provided as an argument, or a default value +if no argument is provided. +.Pp +Examples: +.Dl \&.Fx 7.1 +.Dl \&.Fx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Nx , +and +.Sx \&Ox . +.Ss \&Hf +This macro is not implemented in +.Xr mandoc 1 . +.Pp +It was used to include the contents of a (header) file literally. +The syntax was: +.Pp +.Dl Pf . Sx \&Hf Ar filename +.Ss \&Ic +Designate an internal or interactive command. +This is similar to +.Sx \&Cm +but used for instructions rather than values. +.Pp +Examples: +.Dl \&.Ic :wq +.Dl \&.Ic hash +.Dl \&.Ic alias +.Pp +Note that using +.Sx \&Bd Fl literal +or +.Sx \&D1 +is preferred for displaying code; the +.Sx \&Ic +macro is used when referring to specific instructions. +.Ss \&In +The name of an include file. +This macro is most often used in section 2, 3, and 9 manual pages. +.Pp +When invoked as the first macro on an input line in the +.Em SYNOPSIS +section, the argument is displayed in angle brackets +and preceded by +.Qq #include , +and a blank line is inserted in front if there is a preceding +function declaration. +In other sections, it only encloses its argument in angle brackets +and causes no line break. +.Pp +Examples: +.Dl \&.In sys/types.h +.Pp +See also +.Sx MANUAL STRUCTURE . +.Ss \&It +A list item. +The syntax of this macro depends on the list type. +.Pp +Lists +of type +.Fl hang , +.Fl ohang , +.Fl inset , +and +.Fl diag +have the following syntax: +.Pp +.D1 Pf \. Sx \&It Ar args +.Pp +Lists of type +.Fl bullet , +.Fl dash , +.Fl enum , +.Fl hyphen +and +.Fl item +have the following syntax: +.Pp +.D1 Pf \. Sx \&It +.Pp +with subsequent lines interpreted within the scope of the +.Sx \&It +until either a closing +.Sx \&El +or another +.Sx \&It . +.Pp +The +.Fl tag +list has the following syntax: +.Pp +.D1 Pf \. Sx \&It Op Cm args +.Pp +Subsequent lines are interpreted as with +.Fl bullet +and family. +The line arguments correspond to the list's left-hand side; body +arguments correspond to the list's contents. +.Pp +The +.Fl column +list is the most complicated. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&It Ar cell Op <TAB> Ar cell ... +.D1 Pf \. Sx \&It Ar cell Op Sx \&Ta Ar cell ... +.Pp +The arguments consist of one or more lines of text and macros +representing a complete table line. +Cells within the line are delimited by tabs or by the special +.Sx \&Ta +block macro. +The tab cell delimiter may only be used within the +.Sx \&It +line itself; on following lines, only the +.Sx \&Ta +macro can be used to delimit cells, and +.Sx \&Ta +is only recognised as a macro when called by other macros, +not as the first macro on a line. +.Pp +Note that quoted strings may span tab-delimited cells on an +.Sx \&It +line. +For example, +.Pp +.Dl .It \(dqcol1 ; <TAB> col2 ;\(dq \&; +.Pp +will preserve the semicolon whitespace except for the last. +.Pp +See also +.Sx \&Bl . +.Ss \&Lb +Specify a library. +The syntax is as follows: +.Pp +.D1 Pf \. Sx \&Lb Ar library +.Pp +The +.Ar library +parameter may be a system library, such as +.Cm libz +or +.Cm libpam , +in which case a small library description is printed next to the linker +invocation; or a custom library, in which case the library name is +printed in quotes. +This is most commonly used in the +.Em SYNOPSIS +section as described in +.Sx MANUAL STRUCTURE . +.Pp +Examples: +.Dl \&.Lb libz +.Dl \&.Lb libmandoc +.Ss \&Li +Denotes text that should be in a +.Li literal +font mode. +Note that this is a presentation term and should not be used for +stylistically decorating technical terms. +.Pp +On terminal output devices, this is often indistinguishable from +normal text. +.Pp +See also +.Sx \&Bf , +.Sx \&Em , +.Sx \&No , +and +.Sx \&Sy . +.Ss \&Lk +Format a hyperlink. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Lk Ar uri Op Ar name +.Pp +Examples: +.Dl \&.Lk http://bsd.lv \(dqThe BSD.lv Project\(dq +.Dl \&.Lk http://bsd.lv +.Pp +See also +.Sx \&Mt . +.Ss \&Lp +Synonym for +.Sx \&Pp . +.Ss \&Ms +Display a mathematical symbol. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Ms Ar symbol +.Pp +Examples: +.Dl \&.Ms sigma +.Dl \&.Ms aleph +.Ss \&Mt +Format a +.Dq mailto: +hyperlink. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Mt Ar address +.Pp +Examples: +.Dl \&.Mt discuss@manpages.bsd.lv +.Dl \&.An Kristaps Dzonsons \&Aq \&Mt kristaps@bsd.lv +.Ss \&Nd +A one line description of the manual's content. +This is the mandatory last macro of the +.Em NAME +section and not appropriate for other sections. +.Pp +Examples: +.Dl Pf . Sx \&Nd mdoc language reference +.Dl Pf . Sx \&Nd format and display UNIX manuals +.Pp +The +.Sx \&Nd +macro technically accepts child macros and terminates with a subsequent +.Sx \&Sh +invocation. +Do not assume this behaviour: some +.Xr whatis 1 +database generators are not smart enough to parse more than the line +arguments and will display macros verbatim. +.Pp +See also +.Sx \&Nm . +.Ss \&Nm +The name of the manual page, or \(em in particular in section 1, 6, +and 8 pages \(em of an additional command or feature documented in +the manual page. +When first invoked, the +.Sx \&Nm +macro expects a single argument, the name of the manual page. +Usually, the first invocation happens in the +.Em NAME +section of the page. +The specified name will be remembered and used whenever the macro is +called again without arguments later in the page. +The +.Sx \&Nm +macro uses +.Sx Block full-implicit +semantics when invoked as the first macro on an input line in the +.Em SYNOPSIS +section; otherwise, it uses ordinary +.Sx In-line +semantics. +.Pp +Examples: +.Bd -literal -offset indent +\&.Sh SYNOPSIS +\&.Nm cat +\&.Op Fl benstuv +\&.Op Ar +.Ed +.Pp +In the +.Em SYNOPSIS +of section 2, 3 and 9 manual pages, use the +.Sx \&Fn +macro rather than +.Sx \&Nm +to mark up the name of the manual page. +.Ss \&No +Normal text. +Closes the scope of any preceding in-line macro. +When used after physical formatting macros like +.Sx \&Em +or +.Sx \&Sy , +switches back to the standard font face and weight. +Can also be used to embed plain text strings in macro lines +using semantic annotation macros. +.Pp +Examples: +.Dl ".Em italic , Sy bold , No and roman" +.Pp +.Bd -literal -offset indent -compact +\&.Sm off +\&.Cm :C No / Ar pattern No / Ar replacement No / +\&.Sm on +.Ed +.Pp +See also +.Sx \&Em , +.Sx \&Li , +and +.Sx \&Sy . +.Ss \&Ns +Suppress a space between the output of the preceding macro +and the following text or macro. +Following invocation, input is interpreted as normal text +just like after an +.Sx \&No +macro. +.Pp +This has no effect when invoked at the start of a macro line. +.Pp +Examples: +.Dl ".Ar name Ns = Ns Ar value" +.Dl ".Cm :M Ns Ar pattern" +.Dl ".Fl o Ns Ar output" +.Pp +See also +.Sx \&No +and +.Sx \&Sm . +.Ss \&Nx +Format the +.Nx +version provided as an argument, or a default value if +no argument is provided. +.Pp +Examples: +.Dl \&.Nx 5.01 +.Dl \&.Nx +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +and +.Sx \&Ox . +.Ss \&Oc +Close multi-line +.Sx \&Oo +context. +.Ss \&Oo +Multi-line version of +.Sx \&Op . +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Oo +\&.Op Fl flag Ns Ar value +\&.Oc +.Ed +.Ss \&Op +Optional part of a command line. +Prints the argument(s) in brackets. +This is most often used in the +.Em SYNOPSIS +section of section 1 and 8 manual pages. +.Pp +Examples: +.Dl \&.Op \&Fl a \&Ar b +.Dl \&.Op \&Ar a | b +.Pp +See also +.Sx \&Oo . +.Ss \&Os +Operating system version for display in the page footer. +This is the mandatory third macro of +any +.Nm +file. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Os Op Ar system Op Ar version +.Pp +The optional +.Ar system +parameter specifies the relevant operating system or environment. +It is suggested to leave it unspecified, in which case +.Xr mandoc 1 +uses its +.Fl Ios +argument or, if that isn't specified either, +.Fa sysname +and +.Fa release +as returned by +.Xr uname 3 . +.Pp +Examples: +.Dl \&.Os +.Dl \&.Os KTH/CSC/TCS +.Dl \&.Os BSD 4.3 +.Pp +See also +.Sx \&Dd +and +.Sx \&Dt . +.Ss \&Ot +This macro is obsolete. +Use +.Sx \&Ft +instead; with +.Xr mandoc 1 , +both have the same effect. +.Pp +Historical +.Nm +packages described it as +.Dq "old function type (FORTRAN)" . +.Ss \&Ox +Format the +.Ox +version provided as an argument, or a default value +if no argument is provided. +.Pp +Examples: +.Dl \&.Ox 4.5 +.Dl \&.Ox +.Pp +See also +.Sx \&At , +.Sx \&Bsx , +.Sx \&Bx , +.Sx \&Dx , +.Sx \&Fx , +and +.Sx \&Nx . +.Ss \&Pa +An absolute or relative file system path, or a file or directory name. +If an argument is not provided, the character +.Sq \(ti +is used as a default. +.Pp +Examples: +.Dl \&.Pa /usr/bin/mandoc +.Dl \&.Pa /usr/share/man/man7/mdoc.7 +.Pp +See also +.Sx \&Lk . +.Ss \&Pc +Close parenthesised context opened by +.Sx \&Po . +.Ss \&Pf +Removes the space between its argument and the following macro. +Its syntax is as follows: +.Pp +.D1 .Pf Ar prefix macro arguments ... +.Pp +This is equivalent to: +.Pp +.D1 .No \e& Ns Ar prefix No \&Ns Ar macro arguments ... +.Pp +The +.Ar prefix +argument is not parsed for macro names or delimiters, +but used verbatim as if it were escaped. +.Pp +Examples: +.Dl ".Pf $ Ar variable_name" +.Dl ".Pf . Ar macro_name" +.Dl ".Pf 0x Ar hex_digits" +.Pp +See also +.Sx \&Ns +and +.Sx \&Sm . +.Ss \&Po +Multi-line version of +.Sx \&Pq . +.Ss \&Pp +Break a paragraph. +This will assert vertical space between prior and subsequent macros +and/or text. +.Pp +Paragraph breaks are not needed before or after +.Sx \&Sh +or +.Sx \&Ss +macros or before displays +.Pq Sx \&Bd +or lists +.Pq Sx \&Bl +unless the +.Fl compact +flag is given. +.Ss \&Pq +Parenthesised enclosure. +.Pp +See also +.Sx \&Po . +.Ss \&Qc +Close quoted context opened by +.Sx \&Qo . +.Ss \&Ql +In-line literal display. +This can for example be used for complete command invocations and +for multi-word code fragments when more specific markup is not +appropriate and an indented display is not desired. +While +.Xr mandoc 1 +always encloses the arguments in single quotes, other formatters +usually omit the quotes on non-terminal output devices when the +arguments have three or more characters. +.Pp +See also +.Sx \&Dl +and +.Sx \&Bd +.Fl literal . +.Ss \&Qo +Multi-line version of +.Sx \&Qq . +.Ss \&Qq +Encloses its arguments in +.Qq typewriter +double-quotes. +Consider using +.Sx \&Dq . +.Pp +See also +.Sx \&Dq , +.Sx \&Sq , +and +.Sx \&Qo . +.Ss \&Re +Close an +.Sx \&Rs +block. +Does not have any tail arguments. +.Ss \&Rs +Begin a bibliographic +.Pq Dq reference +block. +Does not have any head arguments. +The block macro may only contain +.Sx \&%A , +.Sx \&%B , +.Sx \&%C , +.Sx \&%D , +.Sx \&%I , +.Sx \&%J , +.Sx \&%N , +.Sx \&%O , +.Sx \&%P , +.Sx \&%Q , +.Sx \&%R , +.Sx \&%T , +.Sx \&%U , +and +.Sx \&%V +child macros (at least one must be specified). +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.Rs +\&.%A J. E. Hopcroft +\&.%A J. D. Ullman +\&.%B Introduction to Automata Theory, Languages, and Computation +\&.%I Addison-Wesley +\&.%C Reading, Massachusetts +\&.%D 1979 +\&.Re +.Ed +.Pp +If an +.Sx \&Rs +block is used within a SEE ALSO section, a vertical space is asserted +before the rendered output, else the block continues on the current +line. +.Ss \&Rv +Insert a standard sentence regarding a function call's return value of 0 +on success and \-1 on error, with the +.Va errno +libc global variable set on error. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Rv Fl std Op Ar function ... +.Pp +If +.Ar function +is not specified, the document's name set by +.Sx \&Nm +is used. +Multiple +.Ar function +arguments are treated as separate functions. +.Pp +See also +.Sx \&Ex . +.Ss \&Sc +Close single-quoted context opened by +.Sx \&So . +.Ss \&Sh +Begin a new section. +For a list of conventional manual sections, see +.Sx MANUAL STRUCTURE . +These sections should be used unless it's absolutely necessary that +custom sections be used. +.Pp +Section names should be unique so that they may be keyed by +.Sx \&Sx . +Although this macro is parsed, it should not consist of child node or it +may not be linked with +.Sx \&Sx . +.Pp +See also +.Sx \&Pp , +.Sx \&Ss , +and +.Sx \&Sx . +.Ss \&Sm +Switches the spacing mode for output generated from macros. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Sm Op Cm on | off +.Pp +By default, spacing is +.Cm on . +When switched +.Cm off , +no white space is inserted between macro arguments and between the +output generated from adjacent macros, but text lines +still get normal spacing between words and sentences. +.Pp +When called without an argument, the +.Sx \&Sm +macro toggles the spacing mode. +Using this is not recommended because it makes the code harder to read. +.Ss \&So +Multi-line version of +.Sx \&Sq . +.Ss \&Sq +Encloses its arguments in +.Sq typewriter +single-quotes. +.Pp +See also +.Sx \&Dq , +.Sx \&Qq , +and +.Sx \&So . +.Ss \&Ss +Begin a new subsection. +Unlike with +.Sx \&Sh , +there is no convention for the naming of subsections. +Except +.Em DESCRIPTION , +the conventional sections described in +.Sx MANUAL STRUCTURE +rarely have subsections. +.Pp +Sub-section names should be unique so that they may be keyed by +.Sx \&Sx . +Although this macro is parsed, it should not consist of child node or it +may not be linked with +.Sx \&Sx . +.Pp +See also +.Sx \&Pp , +.Sx \&Sh , +and +.Sx \&Sx . +.Ss \&St +Replace an abbreviation for a standard with the full form. +The following standards are recognised. +Where multiple lines are given without a blank line in between, +they all refer to the same standard, and using the first form +is recommended. +.Bl -tag -width 1n +.It C language standards +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-ansiC +.St -ansiC +.It \-ansiC-89 +.St -ansiC-89 +.It \-isoC +.St -isoC +.It \-isoC-90 +.St -isoC-90 +.br +The original C standard. +.Pp +.It \-isoC-amd1 +.St -isoC-amd1 +.Pp +.It \-isoC-tcor1 +.St -isoC-tcor1 +.Pp +.It \-isoC-tcor2 +.St -isoC-tcor2 +.Pp +.It \-isoC-99 +.St -isoC-99 +.br +The second major version of the C language standard. +.Pp +.It \-isoC-2011 +.St -isoC-2011 +.br +The third major version of the C language standard. +.El +.It POSIX.1 before the Single UNIX Specification +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-p1003.1-88 +.St -p1003.1-88 +.It \-p1003.1 +.St -p1003.1 +.br +The original POSIX standard, based on ANSI C. +.Pp +.It \-p1003.1-90 +.St -p1003.1-90 +.It \-iso9945-1-90 +.St -iso9945-1-90 +.br +The first update of POSIX.1. +.Pp +.It \-p1003.1b-93 +.St -p1003.1b-93 +.It \-p1003.1b +.St -p1003.1b +.br +Real-time extensions. +.Pp +.It \-p1003.1c-95 +.St -p1003.1c-95 +.br +POSIX thread interfaces. +.Pp +.It \-p1003.1i-95 +.St -p1003.1i-95 +.br +Technical Corrigendum. +.Pp +.It \-p1003.1-96 +.St -p1003.1-96 +.It \-iso9945-1-96 +.St -iso9945-1-96 +.br +Includes POSIX.1-1990, 1b, 1c, and 1i. +.El +.It X/Open Portability Guide version 4 and related standards +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-xpg3 +.St -xpg3 +.br +An XPG4 precursor, published in 1989. +.Pp +.It \-p1003.2 +.St -p1003.2 +.It \-p1003.2-92 +.St -p1003.2-92 +.It \-iso9945-2-93 +.St -iso9945-2-93 +.br +An XCU4 precursor. +.Pp +.It \-p1003.2a-92 +.St -p1003.2a-92 +.br +Updates to POSIX.2. +.Pp +.It \-xpg4 +.St -xpg4 +.br +Based on POSIX.1 and POSIX.2, published in 1992. +.El +.It Single UNIX Specification version 1 and related standards +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-susv1 +.St -susv1 +.It \-xpg4.2 +.St -xpg4.2 +.br +This standard was published in 1994. +It was used as the basis for UNIX 95 certification. +The following three refer to parts of it. +.Pp +.It \-xsh4.2 +.St -xsh4.2 +.Pp +.It \-xcurses4.2 +.St -xcurses4.2 +.Pp +.It \-p1003.1g-2000 +.St -p1003.1g-2000 +.br +Networking APIs, including sockets. +.Pp +.It \-svid4 +.St -svid4 , +.br +Published in 1995. +.El +.It Single UNIX Specification version 2 and related standards +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-susv2 +.St -susv2 +This Standard was published in 1997 +and is also called X/Open Portability Guide version 5. +It was used as the basis for UNIX 98 certification. +The following refer to parts of it. +.Pp +.It \-xbd5 +.St -xbd5 +.Pp +.It \-xsh5 +.St -xsh5 +.Pp +.It \-xcu5 +.St -xcu5 +.Pp +.It \-xns5 +.St -xns5 +.It \-xns5.2 +.St -xns5.2 +.El +.It Single UNIX Specification version 3 +.Pp +.Bl -tag -width "-p1003.1-2001" -compact +.It \-p1003.1-2001 +.St -p1003.1-2001 +.It \-susv3 +.St -susv3 +.br +This standard is based on C99, SUSv2, POSIX.1-1996, 1d, and 1j. +It is also called X/Open Portability Guide version 6. +It is used as the basis for UNIX 03 certification. +.Pp +.It \-p1003.1-2004 +.St -p1003.1-2004 +.br +The second and last Technical Corrigendum. +.El +.It Single UNIX Specification version 4 +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-p1003.1-2008 +.St -p1003.1-2008 +.It \-susv4 +.St -susv4 +.br +This standard is also called +X/Open Portability Guide version 7. +.Pp +.It \-p1003.1-2013 +.St -p1003.1-2013 +.br +This is the first Technical Corrigendum. +.El +.It Other standards +.Pp +.Bl -tag -width "-p1003.1g-2000" -compact +.It \-ieee754 +.St -ieee754 +.br +Floating-point arithmetic. +.Pp +.It \-iso8601 +.St -iso8601 +.br +Representation of dates and times, published in 1988. +.Pp +.It \-iso8802-3 +.St -iso8802-3 +.br +Ethernet local area networks. +.Pp +.It \-ieee1275-94 +.St -ieee1275-94 +.El +.El +.Ss \&Sx +Reference a section or subsection in the same manual page. +The referenced section or subsection name must be identical to the +enclosed argument, including whitespace. +.Pp +Examples: +.Dl \&.Sx MANUAL STRUCTURE +.Pp +See also +.Sx \&Sh +and +.Sx \&Ss . +.Ss \&Sy +Request a boldface font. +.Pp +This is most often used to indicate importance or seriousness (not to be +confused with stress emphasis, see +.Sx \&Em ) . +When none of the semantic macros fit, it is also adequate for syntax +elements that have to be given or that appear verbatim. +.Pp +Examples: +.Bd -literal -compact -offset indent +\&.Sy Warning : +If +\&.Sy s +appears in the owner permissions, set-user-ID mode is set. +This utility replaces the former +\&.Sy dumpdir +program. +.Ed +.Pp +See also +.Sx \&Bf , +.Sx \&Em , +.Sx \&Li , +and +.Sx \&No . +.Ss \&Ta +Table cell separator in +.Sx \&Bl Fl column +lists; can only be used below +.Sx \&It . +.Ss \&Tn +Supported only for compatibility, do not use this in new manuals. +Even though the macro name +.Pq Dq tradename +suggests a semantic function, historic usage is inconsistent, mostly +using it as a presentation-level macro to request a small caps font. +.Ss \&Ud +Supported only for compatibility, do not use this in new manuals. +Prints out +.Dq currently under development. +.Ss \&Ux +Supported only for compatibility, do not use this in new manuals. +Prints out +.Dq Ux . +.Ss \&Va +A variable name. +.Pp +Examples: +.Dl \&.Va foo +.Dl \&.Va const char *bar ; +.Pp +For function arguments and parameters, use +.Sx \&Fa +instead. +For declarations of global variables in the +.Em SYNOPSIS +section, use +.Sx \&Vt . +.Ss \&Vt +A variable type. +.Pp +This is also used for indicating global variables in the +.Em SYNOPSIS +section, in which case a variable name is also specified. +Note that it accepts +.Sx Block partial-implicit +syntax when invoked as the first macro on an input line in the +.Em SYNOPSIS +section, else it accepts ordinary +.Sx In-line +syntax. +In the former case, this macro starts a new output line, +and a blank line is inserted in front if there is a preceding +function definition or include directive. +.Pp +Examples: +.Dl \&.Vt unsigned char +.Dl \&.Vt extern const char * const sys_signame[] \&; +.Pp +For parameters in function prototypes, use +.Sx \&Fa +instead, for function return types +.Sx \&Ft , +and for variable names outside the +.Em SYNOPSIS +section +.Sx \&Va , +even when including a type with the name. +See also +.Sx MANUAL STRUCTURE . +.Ss \&Xc +Close a scope opened by +.Sx \&Xo . +.Ss \&Xo +Extend the header of an +.Sx \&It +macro or the body of a partial-implicit block macro +beyond the end of the input line. +This macro originally existed to work around the 9-argument limit +of historic +.Xr roff 7 . +.Ss \&Xr +Link to another manual +.Pq Qq cross-reference . +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&Xr Ar name Op section +.Pp +Cross reference the +.Ar name +and +.Ar section +number of another man page; +omitting the section number is rarely useful. +.Pp +Examples: +.Dl \&.Xr mandoc 1 +.Dl \&.Xr mandoc 1 \&; +.Dl \&.Xr mandoc 1 \&Ns s behaviour +.Ss \&br +Emits a line-break. +This macro should not be used; it is implemented for compatibility with +historical manuals. +.Pp +Consider using +.Sx \&Pp +in the event of natural paragraph breaks. +.Ss \&sp +Emits vertical space. +This macro should not be used; it is implemented for compatibility with +historical manuals. +Its syntax is as follows: +.Pp +.D1 Pf \. Sx \&sp Op Ar height +.Pp +The +.Ar height +argument is a scaling width as described in +.Xr roff 7 . +If unspecified, +.Sx \&sp +asserts a single vertical space. +.Sh MACRO SYNTAX +The syntax of a macro depends on its classification. +In this section, +.Sq \-arg +refers to macro arguments, which may be followed by zero or more +.Sq parm +parameters; +.Sq \&Yo +opens the scope of a macro; and if specified, +.Sq \&Yc +closes it out. +.Pp +The +.Em Callable +column indicates that the macro may also be called by passing its name +as an argument to another macro. +For example, +.Sq \&.Op \&Fl O \&Ar file +produces +.Sq Op Fl O Ar file . +To prevent a macro call and render the macro name literally, +escape it by prepending a zero-width space, +.Sq \e& . +For example, +.Sq \&Op \e&Fl O +produces +.Sq Op \&Fl O . +If a macro is not callable but its name appears as an argument +to another macro, it is interpreted as opaque text. +For example, +.Sq \&.Fl \&Sh +produces +.Sq Fl \&Sh . +.Pp +The +.Em Parsed +column indicates whether the macro may call other macros by receiving +their names as arguments. +If a macro is not parsed but the name of another macro appears +as an argument, it is interpreted as opaque text. +.Pp +The +.Em Scope +column, if applicable, describes closure rules. +.Ss Block full-explicit +Multi-line scope closed by an explicit closing macro. +All macros contains bodies; only +.Sx \&Bf +and +.Pq optionally +.Sx \&Bl +contain a head. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB +\(lBbody...\(rB +\&.Yc +.Ed +.Bl -column "MacroX" "CallableX" "ParsedX" "closed by XXX" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed Ta Em Scope +.It Sx \&Bd Ta \&No Ta \&No Ta closed by Sx \&Ed +.It Sx \&Bf Ta \&No Ta \&No Ta closed by Sx \&Ef +.It Sx \&Bk Ta \&No Ta \&No Ta closed by Sx \&Ek +.It Sx \&Bl Ta \&No Ta \&No Ta closed by Sx \&El +.It Sx \&Ed Ta \&No Ta \&No Ta opened by Sx \&Bd +.It Sx \&Ef Ta \&No Ta \&No Ta opened by Sx \&Bf +.It Sx \&Ek Ta \&No Ta \&No Ta opened by Sx \&Bk +.It Sx \&El Ta \&No Ta \&No Ta opened by Sx \&Bl +.El +.Ss Block full-implicit +Multi-line scope closed by end-of-file or implicitly by another macro. +All macros have bodies; some +.Po +.Sx \&It Fl bullet , +.Fl hyphen , +.Fl dash , +.Fl enum , +.Fl item +.Pc +don't have heads; only one +.Po +.Sx \&It +in +.Sx \&Bl Fl column +.Pc +has multiple heads. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead... \(lBTa head...\(rB\(rB +\(lBbody...\(rB +.Ed +.Bl -column "MacroX" "CallableX" "ParsedX" "closed by XXXXXXXXXXX" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed Ta Em Scope +.It Sx \&It Ta \&No Ta Yes Ta closed by Sx \&It , Sx \&El +.It Sx \&Nd Ta \&No Ta \&No Ta closed by Sx \&Sh +.It Sx \&Nm Ta \&No Ta Yes Ta closed by Sx \&Nm , Sx \&Sh , Sx \&Ss +.It Sx \&Sh Ta \&No Ta Yes Ta closed by Sx \&Sh +.It Sx \&Ss Ta \&No Ta Yes Ta closed by Sx \&Sh , Sx \&Ss +.El +.Pp +Note that the +.Sx \&Nm +macro is a +.Sx Block full-implicit +macro only when invoked as the first macro +in a +.Em SYNOPSIS +section line, else it is +.Sx In-line . +.Ss Block partial-explicit +Like block full-explicit, but also with single-line scope. +Each has at least a body and, in limited circumstances, a head +.Po +.Sx \&Fo , +.Sx \&Eo +.Pc +and/or tail +.Pq Sx \&Ec . +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB +\(lBbody...\(rB +\&.Yc \(lBtail...\(rB + +\&.Yo \(lB\-arg \(lBparm...\(rB\(rB \(lBhead...\(rB \ +\(lBbody...\(rB \&Yc \(lBtail...\(rB +.Ed +.Bl -column "MacroX" "CallableX" "ParsedX" "closed by XXXX" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed Ta Em Scope +.It Sx \&Ac Ta Yes Ta Yes Ta opened by Sx \&Ao +.It Sx \&Ao Ta Yes Ta Yes Ta closed by Sx \&Ac +.It Sx \&Bc Ta Yes Ta Yes Ta closed by Sx \&Bo +.It Sx \&Bo Ta Yes Ta Yes Ta opened by Sx \&Bc +.It Sx \&Brc Ta Yes Ta Yes Ta opened by Sx \&Bro +.It Sx \&Bro Ta Yes Ta Yes Ta closed by Sx \&Brc +.It Sx \&Dc Ta Yes Ta Yes Ta opened by Sx \&Do +.It Sx \&Do Ta Yes Ta Yes Ta closed by Sx \&Dc +.It Sx \&Ec Ta Yes Ta Yes Ta opened by Sx \&Eo +.It Sx \&Eo Ta Yes Ta Yes Ta closed by Sx \&Ec +.It Sx \&Fc Ta Yes Ta Yes Ta opened by Sx \&Fo +.It Sx \&Fo Ta \&No Ta \&No Ta closed by Sx \&Fc +.It Sx \&Oc Ta Yes Ta Yes Ta closed by Sx \&Oo +.It Sx \&Oo Ta Yes Ta Yes Ta opened by Sx \&Oc +.It Sx \&Pc Ta Yes Ta Yes Ta closed by Sx \&Po +.It Sx \&Po Ta Yes Ta Yes Ta opened by Sx \&Pc +.It Sx \&Qc Ta Yes Ta Yes Ta opened by Sx \&Oo +.It Sx \&Qo Ta Yes Ta Yes Ta closed by Sx \&Oc +.It Sx \&Re Ta \&No Ta \&No Ta opened by Sx \&Rs +.It Sx \&Rs Ta \&No Ta \&No Ta closed by Sx \&Re +.It Sx \&Sc Ta Yes Ta Yes Ta opened by Sx \&So +.It Sx \&So Ta Yes Ta Yes Ta closed by Sx \&Sc +.It Sx \&Xc Ta Yes Ta Yes Ta opened by Sx \&Xo +.It Sx \&Xo Ta Yes Ta Yes Ta closed by Sx \&Xc +.El +.Ss Block partial-implicit +Like block full-implicit, but with single-line scope closed by the +end of the line. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBbody...\(rB \(lBres...\(rB +.Ed +.Bl -column "MacroX" "CallableX" "ParsedX" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed +.It Sx \&Aq Ta Yes Ta Yes +.It Sx \&Bq Ta Yes Ta Yes +.It Sx \&Brq Ta Yes Ta Yes +.It Sx \&D1 Ta \&No Ta \&Yes +.It Sx \&Dl Ta \&No Ta Yes +.It Sx \&Dq Ta Yes Ta Yes +.It Sx \&En Ta Yes Ta Yes +.It Sx \&Op Ta Yes Ta Yes +.It Sx \&Pq Ta Yes Ta Yes +.It Sx \&Ql Ta Yes Ta Yes +.It Sx \&Qq Ta Yes Ta Yes +.It Sx \&Sq Ta Yes Ta Yes +.It Sx \&Vt Ta Yes Ta Yes +.El +.Pp +Note that the +.Sx \&Vt +macro is a +.Sx Block partial-implicit +only when invoked as the first macro +in a +.Em SYNOPSIS +section line, else it is +.Sx In-line . +.Ss Special block macro +The +.Sx \&Ta +macro can only be used below +.Sx \&It +in +.Sx \&Bl Fl column +lists. +It delimits blocks representing table cells; +these blocks have bodies, but no heads. +.Bl -column "MacroX" "CallableX" "ParsedX" "closed by XXXX" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed Ta Em Scope +.It Sx \&Ta Ta Yes Ta Yes Ta closed by Sx \&Ta , Sx \&It +.El +.Ss In-line +Closed by the end of the line, fixed argument lengths, +and/or subsequent macros. +In-line macros have only text children. +If a number (or inequality) of arguments is +.Pq n , +then the macro accepts an arbitrary number of arguments. +.Bd -literal -offset indent +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBargs...\(rB \(lBres...\(rB + +\&.Yo \(lB\-arg \(lBval...\(rB\(rB \(lBargs...\(rB Yc... + +\&.Yo \(lB\-arg \(lBval...\(rB\(rB arg0 arg1 argN +.Ed +.Bl -column "MacroX" "CallableX" "ParsedX" "Arguments" -offset indent +.It Em Macro Ta Em Callable Ta Em Parsed Ta Em Arguments +.It Sx \&%A Ta \&No Ta \&No Ta >0 +.It Sx \&%B Ta \&No Ta \&No Ta >0 +.It Sx \&%C Ta \&No Ta \&No Ta >0 +.It Sx \&%D Ta \&No Ta \&No Ta >0 +.It Sx \&%I Ta \&No Ta \&No Ta >0 +.It Sx \&%J Ta \&No Ta \&No Ta >0 +.It Sx \&%N Ta \&No Ta \&No Ta >0 +.It Sx \&%O Ta \&No Ta \&No Ta >0 +.It Sx \&%P Ta \&No Ta \&No Ta >0 +.It Sx \&%Q Ta \&No Ta \&No Ta >0 +.It Sx \&%R Ta \&No Ta \&No Ta >0 +.It Sx \&%T Ta \&No Ta \&No Ta >0 +.It Sx \&%U Ta \&No Ta \&No Ta >0 +.It Sx \&%V Ta \&No Ta \&No Ta >0 +.It Sx \&Ad Ta Yes Ta Yes Ta >0 +.It Sx \&An Ta Yes Ta Yes Ta >0 +.It Sx \&Ap Ta Yes Ta Yes Ta 0 +.It Sx \&Ar Ta Yes Ta Yes Ta n +.It Sx \&At Ta Yes Ta Yes Ta 1 +.It Sx \&Bsx Ta Yes Ta Yes Ta n +.It Sx \&Bt Ta \&No Ta \&No Ta 0 +.It Sx \&Bx Ta Yes Ta Yes Ta n +.It Sx \&Cd Ta Yes Ta Yes Ta >0 +.It Sx \&Cm Ta Yes Ta Yes Ta >0 +.It Sx \&Db Ta \&No Ta \&No Ta 1 +.It Sx \&Dd Ta \&No Ta \&No Ta n +.It Sx \&Dt Ta \&No Ta \&No Ta n +.It Sx \&Dv Ta Yes Ta Yes Ta >0 +.It Sx \&Dx Ta Yes Ta Yes Ta n +.It Sx \&Em Ta Yes Ta Yes Ta >0 +.It Sx \&Er Ta Yes Ta Yes Ta >0 +.It Sx \&Es Ta Yes Ta Yes Ta 2 +.It Sx \&Ev Ta Yes Ta Yes Ta >0 +.It Sx \&Ex Ta \&No Ta \&No Ta n +.It Sx \&Fa Ta Yes Ta Yes Ta >0 +.It Sx \&Fd Ta \&No Ta \&No Ta >0 +.It Sx \&Fl Ta Yes Ta Yes Ta n +.It Sx \&Fn Ta Yes Ta Yes Ta >0 +.It Sx \&Fr Ta Yes Ta Yes Ta >0 +.It Sx \&Ft Ta Yes Ta Yes Ta >0 +.It Sx \&Fx Ta Yes Ta Yes Ta n +.It Sx \&Hf Ta \&No Ta \&No Ta n +.It Sx \&Ic Ta Yes Ta Yes Ta >0 +.It Sx \&In Ta \&No Ta \&No Ta 1 +.It Sx \&Lb Ta \&No Ta \&No Ta 1 +.It Sx \&Li Ta Yes Ta Yes Ta >0 +.It Sx \&Lk Ta Yes Ta Yes Ta >0 +.It Sx \&Lp Ta \&No Ta \&No Ta 0 +.It Sx \&Ms Ta Yes Ta Yes Ta >0 +.It Sx \&Mt Ta Yes Ta Yes Ta >0 +.It Sx \&Nm Ta Yes Ta Yes Ta n +.It Sx \&No Ta Yes Ta Yes Ta 0 +.It Sx \&Ns Ta Yes Ta Yes Ta 0 +.It Sx \&Nx Ta Yes Ta Yes Ta n +.It Sx \&Os Ta \&No Ta \&No Ta n +.It Sx \&Ot Ta Yes Ta Yes Ta >0 +.It Sx \&Ox Ta Yes Ta Yes Ta n +.It Sx \&Pa Ta Yes Ta Yes Ta n +.It Sx \&Pf Ta Yes Ta Yes Ta 1 +.It Sx \&Pp Ta \&No Ta \&No Ta 0 +.It Sx \&Rv Ta \&No Ta \&No Ta n +.It Sx \&Sm Ta \&No Ta \&No Ta <2 +.It Sx \&St Ta \&No Ta Yes Ta 1 +.It Sx \&Sx Ta Yes Ta Yes Ta >0 +.It Sx \&Sy Ta Yes Ta Yes Ta >0 +.It Sx \&Tn Ta Yes Ta Yes Ta >0 +.It Sx \&Ud Ta \&No Ta \&No Ta 0 +.It Sx \&Ux Ta Yes Ta Yes Ta n +.It Sx \&Va Ta Yes Ta Yes Ta n +.It Sx \&Vt Ta Yes Ta Yes Ta >0 +.It Sx \&Xr Ta Yes Ta Yes Ta >0 +.It Sx \&br Ta \&No Ta \&No Ta 0 +.It Sx \&sp Ta \&No Ta \&No Ta 1 +.El +.Ss Delimiters +When a macro argument consists of one single input character +considered as a delimiter, the argument gets special handling. +This does not apply when delimiters appear in arguments containing +more than one character. +Consequently, to prevent special handling and just handle it +like any other argument, a delimiter can be escaped by prepending +a zero-width space +.Pq Sq \e& . +In text lines, delimiters never need escaping, but may be used +as normal punctuation. +.Pp +For many macros, when the leading arguments are opening delimiters, +these delimiters are put before the macro scope, +and when the trailing arguments are closing delimiters, +these delimiters are put after the macro scope. +For example, +.Pp +.D1 Pf \. \&Aq "( [ word ] ) ." +.Pp +renders as: +.Pp +.D1 Aq ( [ word ] ) . +.Pp +Opening delimiters are: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It \&( +left parenthesis +.It \&[ +left bracket +.El +.Pp +Closing delimiters are: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It \&. +period +.It \&, +comma +.It \&: +colon +.It \&; +semicolon +.It \&) +right parenthesis +.It \&] +right bracket +.It \&? +question mark +.It \&! +exclamation mark +.El +.Pp +Note that even a period preceded by a backslash +.Pq Sq \e.\& +gets this special handling; use +.Sq \e&. +to prevent that. +.Pp +Many in-line macros interrupt their scope when they encounter +delimiters, and resume their scope when more arguments follow that +are not delimiters. +For example, +.Pp +.D1 Pf \. \&Fl "a ( b | c \e*(Ba d ) e" +.Pp +renders as: +.Pp +.D1 Fl a ( b | c \*(Ba d ) e +.Pp +This applies to both opening and closing delimiters, +and also to the middle delimiter: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It \&| +vertical bar +.El +.Pp +As a special case, the predefined string \e*(Ba is handled and rendered +in the same way as a plain +.Sq \&| +character. +Using this predefined string is not recommended in new manuals. +.Ss Font handling +In +.Nm +documents, usage of semantic markup is recommended in order to have +proper fonts automatically selected; only when no fitting semantic markup +is available, consider falling back to +.Sx Physical markup +macros. +Whenever any +.Nm +macro switches the +.Xr roff 7 +font mode, it will automatically restore the previous font when exiting +its scope. +Manually switching the font using the +.Xr roff 7 +.Ql \ef +font escape sequences is never required. +.Sh COMPATIBILITY +This section provides an incomplete list of compatibility issues +between mandoc and GNU troff +.Pq Qq groff . +.Pp +The following problematic behaviour is found in groff: +.Pp +.Bl -dash -compact +.It +.Sx \&Dd +with non-standard arguments behaves very strangely. +When there are three arguments, they are printed verbatim. +Any other number of arguments is replaced by the current date, +but without any arguments the string +.Dq Epoch +is printed. +.It +.Sx \&Lk +only accepts a single link-name argument; the remainder is misformatted. +.It +.Sx \&Pa +does not format its arguments when used in the FILES section under +certain list types. +.It +.Sx \&Ta +can only be called by other macros, but not at the beginning of a line. +.It +.Sx \&%C +is not implemented (up to and including groff-1.22.2). +.It +.Sq \ef +.Pq font face +and +.Sq \eF +.Pq font family face +.Sx Text Decoration +escapes behave irregularly when specified within line-macro scopes. +.It +Negative scaling units return to prior lines. +Instead, mandoc truncates them to zero. +.El +.Pp +The following features are unimplemented in mandoc: +.Pp +.Bl -dash -compact +.It +.Sx \&Bd +.Fl file Ar file +is unsupported for security reasons. +.It +.Sx \&Bd +.Fl filled +does not adjust the right margin, but is an alias for +.Sx \&Bd +.Fl ragged . +.It +.Sx \&Bd +.Fl literal +does not use a literal font, but is an alias for +.Sx \&Bd +.Fl unfilled . +.It +.Sx \&Bd +.Fl offset Cm center +and +.Fl offset Cm right +don't work. +Groff does not implement centered and flush-right rendering either, +but produces large indentations. +.El +.Sh SEE ALSO +.Xr man 1 , +.Xr mandoc 1 , +.Xr eqn 7 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr roff 7 , +.Xr tbl 7 +.Sh HISTORY +The +.Nm +language first appeared as a troff macro package in +.Bx 4.4 . +It was later significantly updated by Werner Lemberg and Ruslan Ermilov +in groff-1.17. +The standalone implementation that is part of the +.Xr mandoc 1 +utility written by Kristaps Dzonsons appeared in +.Ox 4.6 . +.Sh AUTHORS +The +.Nm +reference was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/mdoc.c b/contrib/mdocml/mdoc.c new file mode 100644 index 0000000..724d45c --- /dev/null +++ b/contrib/mdocml/mdoc.c @@ -0,0 +1,489 @@ +/* $Id: mdoc.c,v 1.256 2015/10/30 19:04:16 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +const char *const __mdoc_macronames[MDOC_MAX + 1] = { + "text", + "Ap", "Dd", "Dt", "Os", + "Sh", "Ss", "Pp", "D1", + "Dl", "Bd", "Ed", "Bl", + "El", "It", "Ad", "An", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + "Xr", "%A", "%B", "%D", + "%I", "%J", "%N", "%O", + "%P", "%R", "%T", "%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", "Lb", "Lp", + "Lk", "Mt", "Brq", "Bro", + "Brc", "%C", "Es", "En", + "Dx", "%Q", "br", "sp", + "%U", "Ta", "ll", +}; + +const char *const __mdoc_argnames[MDOC_ARG_MAX] = { + "split", "nosplit", "ragged", + "unfilled", "literal", "file", + "offset", "bullet", "dash", + "hyphen", "item", "enum", + "tag", "diag", "hang", + "ohang", "inset", "column", + "width", "compact", "std", + "filled", "words", "emphasis", + "symbolic", "nested", "centered" + }; + +const char * const *mdoc_macronames = __mdoc_macronames + 1; +const char * const *mdoc_argnames = __mdoc_argnames; + +static int mdoc_ptext(struct roff_man *, int, char *, int); +static int mdoc_pmacro(struct roff_man *, int, char *, int); + + +/* + * Main parse routine. Parses a single line -- really just hands off to + * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). + */ +int +mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) +{ + + if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) + mdoc->flags |= MDOC_NEWLINE; + + /* + * Let the roff nS register switch SYNOPSIS mode early, + * such that the parser knows at all times + * whether this mode is on or off. + * Note that this mode is also switched by the Sh macro. + */ + if (roff_getreg(mdoc->roff, "nS")) + mdoc->flags |= MDOC_SYNOPSIS; + else + mdoc->flags &= ~MDOC_SYNOPSIS; + + return roff_getcontrol(mdoc->roff, buf, &offs) ? + mdoc_pmacro(mdoc, ln, buf, offs) : + mdoc_ptext(mdoc, ln, buf, offs); +} + +void +mdoc_macro(MACRO_PROT_ARGS) +{ + assert(tok > TOKEN_NONE && tok < MDOC_MAX); + + (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); +} + +void +mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; +} + +struct roff_node * +mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, + struct roff_node *body, enum mdoc_endbody end) +{ + struct roff_node *p; + + body->flags |= MDOC_ENDED; + body->parent->flags |= MDOC_ENDED; + p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); + p->body = body; + p->norm = body->norm; + p->end = end; + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_SIBLING; + return p; +} + +struct roff_node * +mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, + int tok, struct mdoc_arg *args) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case MDOC_Bd: + case MDOC_Bf: + case MDOC_Bl: + case MDOC_En: + case MDOC_Rs: + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; + return p; +} + +void +mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, + int tok, struct mdoc_arg *args) +{ + struct roff_node *p; + + p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case MDOC_An: + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + roff_node_append(mdoc, p); + mdoc->next = ROFF_NEXT_CHILD; +} + +void +mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) +{ + + roff_node_unlink(mdoc, p); + p->prev = p->next = NULL; + roff_node_append(mdoc, p); +} + +/* + * Parse free-form text, that is, a line that does not begin with the + * control character. + */ +static int +mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) +{ + struct roff_node *n; + char *c, *ws, *end; + + assert(mdoc->last); + n = mdoc->last; + + /* + * Divert directly to list processing if we're encountering a + * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry + * (a ROFFT_BODY means it's already open, in which case we should + * process within its context in the normal way). + */ + + if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && + n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { + /* `Bl' is open without any children. */ + mdoc->flags |= MDOC_FREECOL; + mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); + return 1; + } + + if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->norm->Bl.type) { + /* `Bl' has block-level `It' children. */ + mdoc->flags |= MDOC_FREECOL; + mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); + return 1; + } + + /* + * Search for the beginning of unescaped trailing whitespace (ws) + * and for the first character not to be output (end). + */ + + /* FIXME: replace with strcspn(). */ + ws = NULL; + for (c = end = buf + offs; *c; c++) { + switch (*c) { + case ' ': + if (NULL == ws) + ws = c; + continue; + case '\t': + /* + * Always warn about trailing tabs, + * even outside literal context, + * where they should be put on the next line. + */ + if (NULL == ws) + ws = c; + /* + * Strip trailing tabs in literal context only; + * outside, they affect the next line. + */ + if (MDOC_LITERAL & mdoc->flags) + continue; + break; + case '\\': + /* Skip the escaped character, too, if any. */ + if (c[1]) + c++; + /* FALLTHROUGH */ + default: + ws = NULL; + break; + } + end = c + 1; + } + *end = '\0'; + + if (ws) + mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, + line, (int)(ws-buf), NULL); + + if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { + mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, + line, (int)(c - buf), NULL); + + /* + * Insert a `sp' in the case of a blank line. Technically, + * blank lines aren't allowed, but enough manuals assume this + * behaviour that we want to work around it. + */ + roff_elem_alloc(mdoc, line, offs, MDOC_sp); + mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; + mdoc->next = ROFF_NEXT_SIBLING; + return 1; + } + + roff_word_alloc(mdoc, line, offs, buf+offs); + + if (mdoc->flags & MDOC_LITERAL) + return 1; + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(buf < end); + + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) + mdoc->last->flags |= MDOC_EOS; + return 1; +} + +/* + * Parse a macro line, that is, a line beginning with the control + * character. + */ +static int +mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) +{ + struct roff_node *n; + const char *cp; + int tok; + int i, sv; + char mac[5]; + + sv = offs; + + /* + * Copy the first word into a nil-terminated buffer. + * Stop when a space, tab, escape, or eoln is encountered. + */ + + i = 0; + while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) + mac[i++] = buf[offs++]; + + mac[i] = '\0'; + + tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; + + if (tok == TOKEN_NONE) { + mandoc_msg(MANDOCERR_MACRO, mdoc->parse, + ln, sv, buf + sv - 1); + return 1; + } + + /* Skip a leading escape sequence or tab. */ + + switch (buf[offs]) { + case '\\': + cp = buf + offs + 1; + mandoc_escape(&cp, NULL, NULL); + offs = cp - buf; + break; + case '\t': + offs++; + break; + default: + break; + } + + /* Jump to the next non-whitespace word. */ + + while (buf[offs] && ' ' == buf[offs]) + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[offs] && ' ' == buf[offs - 1]) + mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, + ln, offs - 1, NULL); + + /* + * If an initial macro or a list invocation, divert directly + * into macro processing. + */ + + if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { + mdoc_macro(mdoc, tok, ln, sv, &offs, buf); + return 1; + } + + n = mdoc->last; + assert(mdoc->last); + + /* + * If the first macro of a `Bl -column', open an `It' block + * context around the parsed macro. + */ + + if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && + n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { + mdoc->flags |= MDOC_FREECOL; + mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); + return 1; + } + + /* + * If we're following a block-level `It' within a `Bl -column' + * context (perhaps opened in the above block or in ptext()), + * then open an `It' block context around the parsed macro. + */ + + if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->norm->Bl.type) { + mdoc->flags |= MDOC_FREECOL; + mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); + return 1; + } + + /* Normal processing of a macro. */ + + mdoc_macro(mdoc, tok, ln, sv, &offs, buf); + + /* In quick mode (for mandocdb), abort after the NAME section. */ + + if (mdoc->quick && MDOC_Sh == tok && + SEC_NAME != mdoc->last->sec) + return 2; + + return 1; +} + +enum mdelim +mdoc_isdelim(const char *p) +{ + + if ('\0' == p[0]) + return DELIM_NONE; + + if ('\0' == p[1]) + switch (p[0]) { + case '(': + case '[': + return DELIM_OPEN; + case '|': + return DELIM_MIDDLE; + case '.': + case ',': + case ';': + case ':': + case '?': + case '!': + case ')': + case ']': + return DELIM_CLOSE; + default: + return DELIM_NONE; + } + + if ('\\' != p[0]) + return DELIM_NONE; + + if (0 == strcmp(p + 1, ".")) + return DELIM_CLOSE; + if (0 == strcmp(p + 1, "fR|\\fP")) + return DELIM_MIDDLE; + + return DELIM_NONE; +} + +void +mdoc_validate(struct roff_man *mdoc) +{ + + mdoc->last = mdoc->first; + mdoc_node_validate(mdoc); + mdoc_state_reset(mdoc); +} diff --git a/contrib/mdocml/mdoc.h b/contrib/mdocml/mdoc.h new file mode 100644 index 0000000..ebe4391 --- /dev/null +++ b/contrib/mdocml/mdoc.h @@ -0,0 +1,284 @@ +/* $Id: mdoc.h,v 1.144 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define MDOC_Ap 0 +#define MDOC_Dd 1 +#define MDOC_Dt 2 +#define MDOC_Os 3 +#define MDOC_Sh 4 +#define MDOC_Ss 5 +#define MDOC_Pp 6 +#define MDOC_D1 7 +#define MDOC_Dl 8 +#define MDOC_Bd 9 +#define MDOC_Ed 10 +#define MDOC_Bl 11 +#define MDOC_El 12 +#define MDOC_It 13 +#define MDOC_Ad 14 +#define MDOC_An 15 +#define MDOC_Ar 16 +#define MDOC_Cd 17 +#define MDOC_Cm 18 +#define MDOC_Dv 19 +#define MDOC_Er 20 +#define MDOC_Ev 21 +#define MDOC_Ex 22 +#define MDOC_Fa 23 +#define MDOC_Fd 24 +#define MDOC_Fl 25 +#define MDOC_Fn 26 +#define MDOC_Ft 27 +#define MDOC_Ic 28 +#define MDOC_In 29 +#define MDOC_Li 30 +#define MDOC_Nd 31 +#define MDOC_Nm 32 +#define MDOC_Op 33 +#define MDOC_Ot 34 +#define MDOC_Pa 35 +#define MDOC_Rv 36 +#define MDOC_St 37 +#define MDOC_Va 38 +#define MDOC_Vt 39 +#define MDOC_Xr 40 +#define MDOC__A 41 +#define MDOC__B 42 +#define MDOC__D 43 +#define MDOC__I 44 +#define MDOC__J 45 +#define MDOC__N 46 +#define MDOC__O 47 +#define MDOC__P 48 +#define MDOC__R 49 +#define MDOC__T 50 +#define MDOC__V 51 +#define MDOC_Ac 52 +#define MDOC_Ao 53 +#define MDOC_Aq 54 +#define MDOC_At 55 +#define MDOC_Bc 56 +#define MDOC_Bf 57 +#define MDOC_Bo 58 +#define MDOC_Bq 59 +#define MDOC_Bsx 60 +#define MDOC_Bx 61 +#define MDOC_Db 62 +#define MDOC_Dc 63 +#define MDOC_Do 64 +#define MDOC_Dq 65 +#define MDOC_Ec 66 +#define MDOC_Ef 67 +#define MDOC_Em 68 +#define MDOC_Eo 69 +#define MDOC_Fx 70 +#define MDOC_Ms 71 +#define MDOC_No 72 +#define MDOC_Ns 73 +#define MDOC_Nx 74 +#define MDOC_Ox 75 +#define MDOC_Pc 76 +#define MDOC_Pf 77 +#define MDOC_Po 78 +#define MDOC_Pq 79 +#define MDOC_Qc 80 +#define MDOC_Ql 81 +#define MDOC_Qo 82 +#define MDOC_Qq 83 +#define MDOC_Re 84 +#define MDOC_Rs 85 +#define MDOC_Sc 86 +#define MDOC_So 87 +#define MDOC_Sq 88 +#define MDOC_Sm 89 +#define MDOC_Sx 90 +#define MDOC_Sy 91 +#define MDOC_Tn 92 +#define MDOC_Ux 93 +#define MDOC_Xc 94 +#define MDOC_Xo 95 +#define MDOC_Fo 96 +#define MDOC_Fc 97 +#define MDOC_Oo 98 +#define MDOC_Oc 99 +#define MDOC_Bk 100 +#define MDOC_Ek 101 +#define MDOC_Bt 102 +#define MDOC_Hf 103 +#define MDOC_Fr 104 +#define MDOC_Ud 105 +#define MDOC_Lb 106 +#define MDOC_Lp 107 +#define MDOC_Lk 108 +#define MDOC_Mt 109 +#define MDOC_Brq 110 +#define MDOC_Bro 111 +#define MDOC_Brc 112 +#define MDOC__C 113 +#define MDOC_Es 114 +#define MDOC_En 115 +#define MDOC_Dx 116 +#define MDOC__Q 117 +#define MDOC_br 118 +#define MDOC_sp 119 +#define MDOC__U 120 +#define MDOC_Ta 121 +#define MDOC_ll 122 +#define MDOC_MAX 123 + +enum mdocargt { + MDOC_Split, /* -split */ + MDOC_Nosplit, /* -nospli */ + MDOC_Ragged, /* -ragged */ + MDOC_Unfilled, /* -unfilled */ + MDOC_Literal, /* -literal */ + MDOC_File, /* -file */ + MDOC_Offset, /* -offset */ + MDOC_Bullet, /* -bullet */ + MDOC_Dash, /* -dash */ + MDOC_Hyphen, /* -hyphen */ + MDOC_Item, /* -item */ + MDOC_Enum, /* -enum */ + MDOC_Tag, /* -tag */ + MDOC_Diag, /* -diag */ + MDOC_Hang, /* -hang */ + MDOC_Ohang, /* -ohang */ + MDOC_Inset, /* -inset */ + MDOC_Column, /* -column */ + MDOC_Width, /* -width */ + MDOC_Compact, /* -compact */ + MDOC_Std, /* -std */ + MDOC_Filled, /* -filled */ + MDOC_Words, /* -words */ + MDOC_Emphasis, /* -emphasis */ + MDOC_Symbolic, /* -symbolic */ + MDOC_Nested, /* -nested */ + MDOC_Centred, /* -centered */ + MDOC_ARG_MAX +}; + +/* + * An argument to a macro (multiple values = `-column xxx yyy'). + */ +struct mdoc_argv { + enum mdocargt arg; /* type of argument */ + int line; + int pos; + size_t sz; /* elements in "value" */ + char **value; /* argument strings */ +}; + +/* + * Reference-counted macro arguments. These are refcounted because + * blocks have multiple instances of the same arguments spread across + * the HEAD, BODY, TAIL, and BLOCK node types. + */ +struct mdoc_arg { + size_t argc; + struct mdoc_argv *argv; + unsigned int refcnt; +}; + +enum mdoc_list { + LIST__NONE = 0, + LIST_bullet, /* -bullet */ + LIST_column, /* -column */ + LIST_dash, /* -dash */ + LIST_diag, /* -diag */ + LIST_enum, /* -enum */ + LIST_hang, /* -hang */ + LIST_hyphen, /* -hyphen */ + LIST_inset, /* -inset */ + LIST_item, /* -item */ + LIST_ohang, /* -ohang */ + LIST_tag, /* -tag */ + LIST_MAX +}; + +enum mdoc_disp { + DISP__NONE = 0, + DISP_centered, /* -centered */ + DISP_ragged, /* -ragged */ + DISP_unfilled, /* -unfilled */ + DISP_filled, /* -filled */ + DISP_literal /* -literal */ +}; + +enum mdoc_auth { + AUTH__NONE = 0, + AUTH_split, /* -split */ + AUTH_nosplit /* -nosplit */ +}; + +enum mdoc_font { + FONT__NONE = 0, + FONT_Em, /* Em, -emphasis */ + FONT_Li, /* Li, -literal */ + FONT_Sy /* Sy, -symbolic */ +}; + +struct mdoc_bd { + const char *offs; /* -offset */ + enum mdoc_disp type; /* -ragged, etc. */ + int comp; /* -compact */ +}; + +struct mdoc_bl { + const char *width; /* -width */ + const char *offs; /* -offset */ + enum mdoc_list type; /* -tag, -enum, etc. */ + int comp; /* -compact */ + size_t ncols; /* -column arg count */ + const char **cols; /* -column val ptr */ + int count; /* -enum counter */ +}; + +struct mdoc_bf { + enum mdoc_font font; /* font */ +}; + +struct mdoc_an { + enum mdoc_auth auth; /* -split, etc. */ +}; + +struct mdoc_rs { + int quote_T; /* whether to quote %T */ +}; + +/* + * Consists of normalised node arguments. These should be used instead + * of iterating through the mdoc_arg pointers of a node: defaults are + * provided, etc. + */ +union mdoc_data { + struct mdoc_an An; + struct mdoc_bd Bd; + struct mdoc_bf Bf; + struct mdoc_bl Bl; + struct roff_node *Es; + struct mdoc_rs Rs; +}; + +/* Names of macros. */ +extern const char *const *mdoc_macronames; + +/* Names of macro args. Index is enum mdocargt. */ +extern const char *const *mdoc_argnames; + + +void mdoc_validate(struct roff_man *); diff --git a/contrib/mdocml/mdoc_argv.c b/contrib/mdocml/mdoc_argv.c new file mode 100644 index 0000000..8675bdb --- /dev/null +++ b/contrib/mdocml/mdoc_argv.c @@ -0,0 +1,677 @@ +/* $Id: mdoc_argv.c,v 1.107 2015/10/17 00:21:07 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "libmdoc.h" + +#define MULTI_STEP 5 /* pre-allocate argument values */ +#define DELIMSZ 6 /* max possible size of a delimiter */ + +enum argsflag { + ARGSFL_NONE = 0, + ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ + ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ +}; + +enum argvflag { + ARGV_NONE, /* no args to flag (e.g., -split) */ + ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ + ARGV_MULTI /* multiple args (e.g., -column xxx yyy) */ +}; + +struct mdocarg { + enum argsflag flags; + const enum mdocargt *argvs; +}; + +static void argn_free(struct mdoc_arg *, int); +static enum margserr args(struct roff_man *, int, int *, + char *, enum argsflag, char **); +static int args_checkpunct(const char *, int); +static void argv_multi(struct roff_man *, int, + struct mdoc_argv *, int *, char *); +static void argv_single(struct roff_man *, int, + struct mdoc_argv *, int *, char *); + +static const enum argvflag argvflags[MDOC_ARG_MAX] = { + ARGV_NONE, /* MDOC_Split */ + ARGV_NONE, /* MDOC_Nosplit */ + ARGV_NONE, /* MDOC_Ragged */ + ARGV_NONE, /* MDOC_Unfilled */ + ARGV_NONE, /* MDOC_Literal */ + ARGV_SINGLE, /* MDOC_File */ + ARGV_SINGLE, /* MDOC_Offset */ + ARGV_NONE, /* MDOC_Bullet */ + ARGV_NONE, /* MDOC_Dash */ + ARGV_NONE, /* MDOC_Hyphen */ + ARGV_NONE, /* MDOC_Item */ + ARGV_NONE, /* MDOC_Enum */ + ARGV_NONE, /* MDOC_Tag */ + ARGV_NONE, /* MDOC_Diag */ + ARGV_NONE, /* MDOC_Hang */ + ARGV_NONE, /* MDOC_Ohang */ + ARGV_NONE, /* MDOC_Inset */ + ARGV_MULTI, /* MDOC_Column */ + ARGV_SINGLE, /* MDOC_Width */ + ARGV_NONE, /* MDOC_Compact */ + ARGV_NONE, /* MDOC_Std */ + ARGV_NONE, /* MDOC_Filled */ + ARGV_NONE, /* MDOC_Words */ + ARGV_NONE, /* MDOC_Emphasis */ + ARGV_NONE, /* MDOC_Symbolic */ + ARGV_NONE /* MDOC_Symbolic */ +}; + +static const enum mdocargt args_Ex[] = { + MDOC_Std, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_An[] = { + MDOC_Split, + MDOC_Nosplit, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bd[] = { + MDOC_Ragged, + MDOC_Unfilled, + MDOC_Filled, + MDOC_Literal, + MDOC_File, + MDOC_Offset, + MDOC_Compact, + MDOC_Centred, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bf[] = { + MDOC_Emphasis, + MDOC_Literal, + MDOC_Symbolic, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bk[] = { + MDOC_Words, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bl[] = { + MDOC_Bullet, + MDOC_Dash, + MDOC_Hyphen, + MDOC_Item, + MDOC_Enum, + MDOC_Tag, + MDOC_Diag, + MDOC_Hang, + MDOC_Ohang, + MDOC_Inset, + MDOC_Column, + MDOC_Width, + MDOC_Offset, + MDOC_Compact, + MDOC_Nested, + MDOC_ARG_MAX +}; + +static const struct mdocarg mdocargs[MDOC_MAX] = { + { ARGSFL_DELIM, NULL }, /* Ap */ + { ARGSFL_NONE, NULL }, /* Dd */ + { ARGSFL_NONE, NULL }, /* Dt */ + { ARGSFL_NONE, NULL }, /* Os */ + { ARGSFL_NONE, NULL }, /* Sh */ + { ARGSFL_NONE, NULL }, /* Ss */ + { ARGSFL_NONE, NULL }, /* Pp */ + { ARGSFL_DELIM, NULL }, /* D1 */ + { ARGSFL_DELIM, NULL }, /* Dl */ + { ARGSFL_NONE, args_Bd }, /* Bd */ + { ARGSFL_NONE, NULL }, /* Ed */ + { ARGSFL_NONE, args_Bl }, /* Bl */ + { ARGSFL_NONE, NULL }, /* El */ + { ARGSFL_NONE, NULL }, /* It */ + { ARGSFL_DELIM, NULL }, /* Ad */ + { ARGSFL_DELIM, args_An }, /* An */ + { ARGSFL_DELIM, NULL }, /* Ar */ + { ARGSFL_DELIM, NULL }, /* Cd */ + { ARGSFL_DELIM, NULL }, /* Cm */ + { ARGSFL_DELIM, NULL }, /* Dv */ + { ARGSFL_DELIM, NULL }, /* Er */ + { ARGSFL_DELIM, NULL }, /* Ev */ + { ARGSFL_NONE, args_Ex }, /* Ex */ + { ARGSFL_DELIM, NULL }, /* Fa */ + { ARGSFL_NONE, NULL }, /* Fd */ + { ARGSFL_DELIM, NULL }, /* Fl */ + { ARGSFL_DELIM, NULL }, /* Fn */ + { ARGSFL_DELIM, NULL }, /* Ft */ + { ARGSFL_DELIM, NULL }, /* Ic */ + { ARGSFL_DELIM, NULL }, /* In */ + { ARGSFL_DELIM, NULL }, /* Li */ + { ARGSFL_NONE, NULL }, /* Nd */ + { ARGSFL_DELIM, NULL }, /* Nm */ + { ARGSFL_DELIM, NULL }, /* Op */ + { ARGSFL_DELIM, NULL }, /* Ot */ + { ARGSFL_DELIM, NULL }, /* Pa */ + { ARGSFL_NONE, args_Ex }, /* Rv */ + { ARGSFL_DELIM, NULL }, /* St */ + { ARGSFL_DELIM, NULL }, /* Va */ + { ARGSFL_DELIM, NULL }, /* Vt */ + { ARGSFL_DELIM, NULL }, /* Xr */ + { ARGSFL_NONE, NULL }, /* %A */ + { ARGSFL_NONE, NULL }, /* %B */ + { ARGSFL_NONE, NULL }, /* %D */ + { ARGSFL_NONE, NULL }, /* %I */ + { ARGSFL_NONE, NULL }, /* %J */ + { ARGSFL_NONE, NULL }, /* %N */ + { ARGSFL_NONE, NULL }, /* %O */ + { ARGSFL_NONE, NULL }, /* %P */ + { ARGSFL_NONE, NULL }, /* %R */ + { ARGSFL_NONE, NULL }, /* %T */ + { ARGSFL_NONE, NULL }, /* %V */ + { ARGSFL_DELIM, NULL }, /* Ac */ + { ARGSFL_NONE, NULL }, /* Ao */ + { ARGSFL_DELIM, NULL }, /* Aq */ + { ARGSFL_DELIM, NULL }, /* At */ + { ARGSFL_DELIM, NULL }, /* Bc */ + { ARGSFL_NONE, args_Bf }, /* Bf */ + { ARGSFL_NONE, NULL }, /* Bo */ + { ARGSFL_DELIM, NULL }, /* Bq */ + { ARGSFL_DELIM, NULL }, /* Bsx */ + { ARGSFL_DELIM, NULL }, /* Bx */ + { ARGSFL_NONE, NULL }, /* Db */ + { ARGSFL_DELIM, NULL }, /* Dc */ + { ARGSFL_NONE, NULL }, /* Do */ + { ARGSFL_DELIM, NULL }, /* Dq */ + { ARGSFL_DELIM, NULL }, /* Ec */ + { ARGSFL_NONE, NULL }, /* Ef */ + { ARGSFL_DELIM, NULL }, /* Em */ + { ARGSFL_NONE, NULL }, /* Eo */ + { ARGSFL_DELIM, NULL }, /* Fx */ + { ARGSFL_DELIM, NULL }, /* Ms */ + { ARGSFL_DELIM, NULL }, /* No */ + { ARGSFL_DELIM, NULL }, /* Ns */ + { ARGSFL_DELIM, NULL }, /* Nx */ + { ARGSFL_DELIM, NULL }, /* Ox */ + { ARGSFL_DELIM, NULL }, /* Pc */ + { ARGSFL_DELIM, NULL }, /* Pf */ + { ARGSFL_NONE, NULL }, /* Po */ + { ARGSFL_DELIM, NULL }, /* Pq */ + { ARGSFL_DELIM, NULL }, /* Qc */ + { ARGSFL_DELIM, NULL }, /* Ql */ + { ARGSFL_NONE, NULL }, /* Qo */ + { ARGSFL_DELIM, NULL }, /* Qq */ + { ARGSFL_NONE, NULL }, /* Re */ + { ARGSFL_NONE, NULL }, /* Rs */ + { ARGSFL_DELIM, NULL }, /* Sc */ + { ARGSFL_NONE, NULL }, /* So */ + { ARGSFL_DELIM, NULL }, /* Sq */ + { ARGSFL_NONE, NULL }, /* Sm */ + { ARGSFL_DELIM, NULL }, /* Sx */ + { ARGSFL_DELIM, NULL }, /* Sy */ + { ARGSFL_DELIM, NULL }, /* Tn */ + { ARGSFL_DELIM, NULL }, /* Ux */ + { ARGSFL_DELIM, NULL }, /* Xc */ + { ARGSFL_NONE, NULL }, /* Xo */ + { ARGSFL_NONE, NULL }, /* Fo */ + { ARGSFL_DELIM, NULL }, /* Fc */ + { ARGSFL_NONE, NULL }, /* Oo */ + { ARGSFL_DELIM, NULL }, /* Oc */ + { ARGSFL_NONE, args_Bk }, /* Bk */ + { ARGSFL_NONE, NULL }, /* Ek */ + { ARGSFL_NONE, NULL }, /* Bt */ + { ARGSFL_NONE, NULL }, /* Hf */ + { ARGSFL_DELIM, NULL }, /* Fr */ + { ARGSFL_NONE, NULL }, /* Ud */ + { ARGSFL_DELIM, NULL }, /* Lb */ + { ARGSFL_NONE, NULL }, /* Lp */ + { ARGSFL_DELIM, NULL }, /* Lk */ + { ARGSFL_DELIM, NULL }, /* Mt */ + { ARGSFL_DELIM, NULL }, /* Brq */ + { ARGSFL_NONE, NULL }, /* Bro */ + { ARGSFL_DELIM, NULL }, /* Brc */ + { ARGSFL_NONE, NULL }, /* %C */ + { ARGSFL_NONE, NULL }, /* Es */ + { ARGSFL_DELIM, NULL }, /* En */ + { ARGSFL_DELIM, NULL }, /* Dx */ + { ARGSFL_NONE, NULL }, /* %Q */ + { ARGSFL_NONE, NULL }, /* br */ + { ARGSFL_NONE, NULL }, /* sp */ + { ARGSFL_NONE, NULL }, /* %U */ + { ARGSFL_NONE, NULL }, /* Ta */ + { ARGSFL_NONE, NULL }, /* ll */ +}; + + +/* + * Parse flags and their arguments from the input line. + * These come in the form -flag [argument ...]. + * Some flags take no argument, some one, some multiple. + */ +void +mdoc_argv(struct roff_man *mdoc, int line, int tok, + struct mdoc_arg **reta, int *pos, char *buf) +{ + struct mdoc_argv tmpv; + struct mdoc_argv **retv; + const enum mdocargt *argtable; + char *argname; + int ipos, retc; + char savechar; + + *reta = NULL; + + /* Which flags does this macro support? */ + + argtable = mdocargs[tok].argvs; + if (argtable == NULL) + return; + + /* Loop over the flags on the input line. */ + + ipos = *pos; + while (buf[ipos] == '-') { + + /* Seek to the first unescaped space. */ + + for (argname = buf + ++ipos; buf[ipos] != '\0'; ipos++) + if (buf[ipos] == ' ' && buf[ipos - 1] != '\\') + break; + + /* + * We want to nil-terminate the word to look it up. + * But we may not have a flag, in which case we need + * to restore the line as-is. So keep around the + * stray byte, which we'll reset upon exiting. + */ + + if ((savechar = buf[ipos]) != '\0') + buf[ipos++] = '\0'; + + /* + * Now look up the word as a flag. Use temporary + * storage that we'll copy into the node's flags. + */ + + while ((tmpv.arg = *argtable++) != MDOC_ARG_MAX) + if ( ! strcmp(argname, mdoc_argnames[tmpv.arg])) + break; + + /* If it isn't a flag, restore the saved byte. */ + + if (tmpv.arg == MDOC_ARG_MAX) { + if (savechar != '\0') + buf[ipos - 1] = savechar; + break; + } + + /* Read to the next word (the first argument). */ + + while (buf[ipos] == ' ') + ipos++; + + /* Parse the arguments of the flag. */ + + tmpv.line = line; + tmpv.pos = *pos; + tmpv.sz = 0; + tmpv.value = NULL; + + switch (argvflags[tmpv.arg]) { + case ARGV_SINGLE: + argv_single(mdoc, line, &tmpv, &ipos, buf); + break; + case ARGV_MULTI: + argv_multi(mdoc, line, &tmpv, &ipos, buf); + break; + case ARGV_NONE: + break; + } + + /* Append to the return values. */ + + if (*reta == NULL) + *reta = mandoc_calloc(1, sizeof(**reta)); + + retc = ++(*reta)->argc; + retv = &(*reta)->argv; + *retv = mandoc_reallocarray(*retv, retc, sizeof(**retv)); + memcpy(*retv + retc - 1, &tmpv, sizeof(**retv)); + + /* Prepare for parsing the next flag. */ + + *pos = ipos; + argtable = mdocargs[tok].argvs; + } +} + +void +mdoc_argv_free(struct mdoc_arg *p) +{ + int i; + + if (NULL == p) + return; + + if (p->refcnt) { + --(p->refcnt); + if (p->refcnt) + return; + } + assert(p->argc); + + for (i = (int)p->argc - 1; i >= 0; i--) + argn_free(p, i); + + free(p->argv); + free(p); +} + +static void +argn_free(struct mdoc_arg *p, int iarg) +{ + struct mdoc_argv *arg; + int j; + + arg = &p->argv[iarg]; + + if (arg->sz && arg->value) { + for (j = (int)arg->sz - 1; j >= 0; j--) + free(arg->value[j]); + free(arg->value); + } + + for (--p->argc; iarg < (int)p->argc; iarg++) + p->argv[iarg] = p->argv[iarg+1]; +} + +enum margserr +mdoc_args(struct roff_man *mdoc, int line, int *pos, + char *buf, int tok, char **v) +{ + struct roff_node *n; + char *v_local; + enum argsflag fl; + + if (v == NULL) + v = &v_local; + fl = tok == TOKEN_NONE ? ARGSFL_NONE : mdocargs[tok].flags; + if (tok != MDOC_It) + return args(mdoc, line, pos, buf, fl, v); + + /* + * We know that we're in an `It', so it's reasonable to expect + * us to be sitting in a `Bl'. Someday this may not be the case + * (if we allow random `It's sitting out there), so provide a + * safe fall-back into the default behaviour. + */ + + for (n = mdoc->last; n; n = n->parent) + if (MDOC_Bl == n->tok) + if (LIST_column == n->norm->Bl.type) { + fl = ARGSFL_TABSEP; + break; + } + + return args(mdoc, line, pos, buf, fl, v); +} + +static enum margserr +args(struct roff_man *mdoc, int line, int *pos, + char *buf, enum argsflag fl, char **v) +{ + char *p; + int pairs; + + if (buf[*pos] == '\0') { + if (mdoc->flags & MDOC_PHRASELIT && + ! (mdoc->flags & MDOC_PHRASE)) { + mandoc_msg(MANDOCERR_ARG_QUOTE, + mdoc->parse, line, *pos, NULL); + mdoc->flags &= ~MDOC_PHRASELIT; + } + return ARGS_EOLN; + } + + *v = buf + *pos; + + if (fl == ARGSFL_DELIM && args_checkpunct(buf, *pos)) + return ARGS_PUNCT; + + /* + * Tabs in `It' lines in `Bl -column' can't be escaped. + * Phrases are reparsed for `Ta' and other macros later. + */ + + if (fl == ARGSFL_TABSEP) { + if ((p = strchr(*v, '\t')) != NULL) { + + /* + * Words right before and right after + * tab characters are not parsed, + * unless there is a blank in between. + */ + + if (p[-1] != ' ') + mdoc->flags |= MDOC_PHRASEQL; + if (p[1] != ' ') + mdoc->flags |= MDOC_PHRASEQN; + + /* + * One or more blanks after a tab cause + * one leading blank in the next column. + * So skip all but one of them. + */ + + *pos += (int)(p - *v) + 1; + while (buf[*pos] == ' ' && buf[*pos + 1] == ' ') + (*pos)++; + + /* + * A tab at the end of an input line + * switches to the next column. + */ + + if (buf[*pos] == '\0' || buf[*pos + 1] == '\0') + mdoc->flags |= MDOC_PHRASEQN; + } else { + p = strchr(*v, '\0'); + if (p[-1] == ' ') + mandoc_msg(MANDOCERR_SPACE_EOL, + mdoc->parse, line, *pos, NULL); + *pos += (int)(p - *v); + } + + /* Skip any trailing blank characters. */ + while (p > *v && p[-1] == ' ' && + (p - 1 == *v || p[-2] != '\\')) + p--; + *p = '\0'; + + return ARGS_PHRASE; + } + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * NUL-terminate the literal in place. + * Collapse pairs of quotes inside quoted literals. + * Whitespace is NOT involved in literal termination. + */ + + if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') { + if ( ! (mdoc->flags & MDOC_PHRASELIT)) + *v = &buf[++(*pos)]; + + if (mdoc->flags & MDOC_PHRASE) + mdoc->flags |= MDOC_PHRASELIT; + + pairs = 0; + for ( ; buf[*pos]; (*pos)++) { + /* Move following text left after quoted quotes. */ + if (pairs) + buf[*pos - pairs] = buf[*pos]; + if ('\"' != buf[*pos]) + continue; + /* Unquoted quotes end quoted args. */ + if ('\"' != buf[*pos + 1]) + break; + /* Quoted quotes collapse. */ + pairs++; + (*pos)++; + } + if (pairs) + buf[*pos - pairs] = '\0'; + + if (buf[*pos] == '\0') { + if ( ! (mdoc->flags & MDOC_PHRASE)) + mandoc_msg(MANDOCERR_ARG_QUOTE, + mdoc->parse, line, *pos, NULL); + return ARGS_QWORD; + } + + mdoc->flags &= ~MDOC_PHRASELIT; + buf[(*pos)++] = '\0'; + + if ('\0' == buf[*pos]) + return ARGS_QWORD; + + while (' ' == buf[*pos]) + (*pos)++; + + if ('\0' == buf[*pos]) + mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, + line, *pos, NULL); + + return ARGS_QWORD; + } + + p = &buf[*pos]; + *v = mandoc_getarg(mdoc->parse, &p, line, pos); + + /* + * After parsing the last word in this phrase, + * tell lookup() whether or not to interpret it. + */ + + if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) { + mdoc->flags &= ~MDOC_PHRASEQL; + mdoc->flags |= MDOC_PHRASEQF; + } + return ARGS_WORD; +} + +/* + * Check if the string consists only of space-separated closing + * delimiters. This is a bit of a dance: the first must be a close + * delimiter, but it may be followed by middle delimiters. Arbitrary + * whitespace may separate these tokens. + */ +static int +args_checkpunct(const char *buf, int i) +{ + int j; + char dbuf[DELIMSZ]; + enum mdelim d; + + /* First token must be a close-delimiter. */ + + for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) + dbuf[j] = buf[i]; + + if (DELIMSZ == j) + return 0; + + dbuf[j] = '\0'; + if (DELIM_CLOSE != mdoc_isdelim(dbuf)) + return 0; + + while (' ' == buf[i]) + i++; + + /* Remaining must NOT be open/none. */ + + while (buf[i]) { + j = 0; + while (buf[i] && ' ' != buf[i] && j < DELIMSZ) + dbuf[j++] = buf[i++]; + + if (DELIMSZ == j) + return 0; + + dbuf[j] = '\0'; + d = mdoc_isdelim(dbuf); + if (DELIM_NONE == d || DELIM_OPEN == d) + return 0; + + while (' ' == buf[i]) + i++; + } + + return '\0' == buf[i]; +} + +static void +argv_multi(struct roff_man *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + for (v->sz = 0; ; v->sz++) { + if (buf[*pos] == '-') + break; + ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); + if (ac == ARGS_EOLN) + break; + + if (v->sz % MULTI_STEP == 0) + v->value = mandoc_reallocarray(v->value, + v->sz + MULTI_STEP, sizeof(char *)); + + v->value[(int)v->sz] = mandoc_strdup(p); + } +} + +static void +argv_single(struct roff_man *mdoc, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); + if (ac == ARGS_EOLN) + return; + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = mandoc_strdup(p); +} diff --git a/contrib/mdocml/mdoc_hash.c b/contrib/mdocml/mdoc_hash.c new file mode 100644 index 0000000..476116d --- /dev/null +++ b/contrib/mdocml/mdoc_hash.c @@ -0,0 +1,93 @@ +/* $Id: mdoc_hash.c,v 1.26 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "roff.h" +#include "mdoc.h" +#include "libmdoc.h" + +static unsigned char table[27 * 12]; + + +void +mdoc_hash_init(void) +{ + int i, j, major; + const char *p; + + if (*table != '\0') + return; + + memset(table, UCHAR_MAX, sizeof(table)); + + for (i = 0; i < (int)MDOC_MAX; i++) { + p = mdoc_macronames[i]; + + if (isalpha((unsigned char)p[1])) + major = 12 * (tolower((unsigned char)p[1]) - 97); + else + major = 12 * 26; + + for (j = 0; j < 12; j++) + if (UCHAR_MAX == table[major + j]) { + table[major + j] = (unsigned char)i; + break; + } + + assert(j < 12); + } +} + +int +mdoc_hash_find(const char *p) +{ + int major, i, j; + + if (0 == p[0]) + return TOKEN_NONE; + if ( ! isalpha((unsigned char)p[0]) && '%' != p[0]) + return TOKEN_NONE; + + if (isalpha((unsigned char)p[1])) + major = 12 * (tolower((unsigned char)p[1]) - 97); + else if ('1' == p[1]) + major = 12 * 26; + else + return TOKEN_NONE; + + if (p[2] && p[3]) + return TOKEN_NONE; + + for (j = 0; j < 12; j++) { + if (UCHAR_MAX == (i = table[major + j])) + break; + if (0 == strcmp(p, mdoc_macronames[i])) + return i; + } + + return TOKEN_NONE; +} diff --git a/contrib/mdocml/mdoc_html.c b/contrib/mdocml/mdoc_html.c new file mode 100644 index 0000000..8e21bc7 --- /dev/null +++ b/contrib/mdocml/mdoc_html.c @@ -0,0 +1,2178 @@ +/* $Id: mdoc_html.c,v 1.240 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "roff.h" +#include "mdoc.h" +#include "out.h" +#include "html.h" +#include "main.h" + +#define INDENT 5 + +#define MDOC_ARGS const struct roff_meta *meta, \ + struct roff_node *n, \ + struct html *h + +#ifndef MIN +#define MIN(a,b) ((/*CONSTCOND*/(a)<(b))?(a):(b)) +#endif + +struct htmlmdoc { + int (*pre)(MDOC_ARGS); + void (*post)(MDOC_ARGS); +}; + +static void print_mdoc_head(MDOC_ARGS); +static void print_mdoc_node(MDOC_ARGS); +static void print_mdoc_nodelist(MDOC_ARGS); +static void synopsis_pre(struct html *, + const struct roff_node *); + +static void a2width(const char *, struct roffsu *); + +static void mdoc_root_post(MDOC_ARGS); +static int mdoc_root_pre(MDOC_ARGS); + +static void mdoc__x_post(MDOC_ARGS); +static int mdoc__x_pre(MDOC_ARGS); +static int mdoc_ad_pre(MDOC_ARGS); +static int mdoc_an_pre(MDOC_ARGS); +static int mdoc_ap_pre(MDOC_ARGS); +static int mdoc_ar_pre(MDOC_ARGS); +static int mdoc_bd_pre(MDOC_ARGS); +static int mdoc_bf_pre(MDOC_ARGS); +static void mdoc_bk_post(MDOC_ARGS); +static int mdoc_bk_pre(MDOC_ARGS); +static int mdoc_bl_pre(MDOC_ARGS); +static int mdoc_bt_pre(MDOC_ARGS); +static int mdoc_bx_pre(MDOC_ARGS); +static int mdoc_cd_pre(MDOC_ARGS); +static int mdoc_d1_pre(MDOC_ARGS); +static int mdoc_dv_pre(MDOC_ARGS); +static int mdoc_fa_pre(MDOC_ARGS); +static int mdoc_fd_pre(MDOC_ARGS); +static int mdoc_fl_pre(MDOC_ARGS); +static int mdoc_fn_pre(MDOC_ARGS); +static int mdoc_ft_pre(MDOC_ARGS); +static int mdoc_em_pre(MDOC_ARGS); +static void mdoc_eo_post(MDOC_ARGS); +static int mdoc_eo_pre(MDOC_ARGS); +static int mdoc_er_pre(MDOC_ARGS); +static int mdoc_ev_pre(MDOC_ARGS); +static int mdoc_ex_pre(MDOC_ARGS); +static void mdoc_fo_post(MDOC_ARGS); +static int mdoc_fo_pre(MDOC_ARGS); +static int mdoc_ic_pre(MDOC_ARGS); +static int mdoc_igndelim_pre(MDOC_ARGS); +static int mdoc_in_pre(MDOC_ARGS); +static int mdoc_it_pre(MDOC_ARGS); +static int mdoc_lb_pre(MDOC_ARGS); +static int mdoc_li_pre(MDOC_ARGS); +static int mdoc_lk_pre(MDOC_ARGS); +static int mdoc_mt_pre(MDOC_ARGS); +static int mdoc_ms_pre(MDOC_ARGS); +static int mdoc_nd_pre(MDOC_ARGS); +static int mdoc_nm_pre(MDOC_ARGS); +static int mdoc_no_pre(MDOC_ARGS); +static int mdoc_ns_pre(MDOC_ARGS); +static int mdoc_pa_pre(MDOC_ARGS); +static void mdoc_pf_post(MDOC_ARGS); +static int mdoc_pp_pre(MDOC_ARGS); +static void mdoc_quote_post(MDOC_ARGS); +static int mdoc_quote_pre(MDOC_ARGS); +static int mdoc_rs_pre(MDOC_ARGS); +static int mdoc_rv_pre(MDOC_ARGS); +static int mdoc_sh_pre(MDOC_ARGS); +static int mdoc_skip_pre(MDOC_ARGS); +static int mdoc_sm_pre(MDOC_ARGS); +static int mdoc_sp_pre(MDOC_ARGS); +static int mdoc_ss_pre(MDOC_ARGS); +static int mdoc_sx_pre(MDOC_ARGS); +static int mdoc_sy_pre(MDOC_ARGS); +static int mdoc_ud_pre(MDOC_ARGS); +static int mdoc_va_pre(MDOC_ARGS); +static int mdoc_vt_pre(MDOC_ARGS); +static int mdoc_xr_pre(MDOC_ARGS); +static int mdoc_xx_pre(MDOC_ARGS); + +static const struct htmlmdoc mdocs[MDOC_MAX] = { + {mdoc_ap_pre, NULL}, /* Ap */ + {NULL, NULL}, /* Dd */ + {NULL, NULL}, /* Dt */ + {NULL, NULL}, /* Os */ + {mdoc_sh_pre, NULL }, /* Sh */ + {mdoc_ss_pre, NULL }, /* Ss */ + {mdoc_pp_pre, NULL}, /* Pp */ + {mdoc_d1_pre, NULL}, /* D1 */ + {mdoc_d1_pre, NULL}, /* Dl */ + {mdoc_bd_pre, NULL}, /* Bd */ + {NULL, NULL}, /* Ed */ + {mdoc_bl_pre, NULL}, /* Bl */ + {NULL, NULL}, /* El */ + {mdoc_it_pre, NULL}, /* It */ + {mdoc_ad_pre, NULL}, /* Ad */ + {mdoc_an_pre, NULL}, /* An */ + {mdoc_ar_pre, NULL}, /* Ar */ + {mdoc_cd_pre, NULL}, /* Cd */ + {mdoc_fl_pre, NULL}, /* Cm */ + {mdoc_dv_pre, NULL}, /* Dv */ + {mdoc_er_pre, NULL}, /* Er */ + {mdoc_ev_pre, NULL}, /* Ev */ + {mdoc_ex_pre, NULL}, /* Ex */ + {mdoc_fa_pre, NULL}, /* Fa */ + {mdoc_fd_pre, NULL}, /* Fd */ + {mdoc_fl_pre, NULL}, /* Fl */ + {mdoc_fn_pre, NULL}, /* Fn */ + {mdoc_ft_pre, NULL}, /* Ft */ + {mdoc_ic_pre, NULL}, /* Ic */ + {mdoc_in_pre, NULL}, /* In */ + {mdoc_li_pre, NULL}, /* Li */ + {mdoc_nd_pre, NULL}, /* Nd */ + {mdoc_nm_pre, NULL}, /* Nm */ + {mdoc_quote_pre, mdoc_quote_post}, /* Op */ + {mdoc_ft_pre, NULL}, /* Ot */ + {mdoc_pa_pre, NULL}, /* Pa */ + {mdoc_rv_pre, NULL}, /* Rv */ + {NULL, NULL}, /* St */ + {mdoc_va_pre, NULL}, /* Va */ + {mdoc_vt_pre, NULL}, /* Vt */ + {mdoc_xr_pre, NULL}, /* Xr */ + {mdoc__x_pre, mdoc__x_post}, /* %A */ + {mdoc__x_pre, mdoc__x_post}, /* %B */ + {mdoc__x_pre, mdoc__x_post}, /* %D */ + {mdoc__x_pre, mdoc__x_post}, /* %I */ + {mdoc__x_pre, mdoc__x_post}, /* %J */ + {mdoc__x_pre, mdoc__x_post}, /* %N */ + {mdoc__x_pre, mdoc__x_post}, /* %O */ + {mdoc__x_pre, mdoc__x_post}, /* %P */ + {mdoc__x_pre, mdoc__x_post}, /* %R */ + {mdoc__x_pre, mdoc__x_post}, /* %T */ + {mdoc__x_pre, mdoc__x_post}, /* %V */ + {NULL, NULL}, /* Ac */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ao */ + {mdoc_quote_pre, mdoc_quote_post}, /* Aq */ + {NULL, NULL}, /* At */ + {NULL, NULL}, /* Bc */ + {mdoc_bf_pre, NULL}, /* Bf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bq */ + {mdoc_xx_pre, NULL}, /* Bsx */ + {mdoc_bx_pre, NULL}, /* Bx */ + {mdoc_skip_pre, NULL}, /* Db */ + {NULL, NULL}, /* Dc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Do */ + {mdoc_quote_pre, mdoc_quote_post}, /* Dq */ + {NULL, NULL}, /* Ec */ /* FIXME: no space */ + {NULL, NULL}, /* Ef */ + {mdoc_em_pre, NULL}, /* Em */ + {mdoc_eo_pre, mdoc_eo_post}, /* Eo */ + {mdoc_xx_pre, NULL}, /* Fx */ + {mdoc_ms_pre, NULL}, /* Ms */ + {mdoc_no_pre, NULL}, /* No */ + {mdoc_ns_pre, NULL}, /* Ns */ + {mdoc_xx_pre, NULL}, /* Nx */ + {mdoc_xx_pre, NULL}, /* Ox */ + {NULL, NULL}, /* Pc */ + {mdoc_igndelim_pre, mdoc_pf_post}, /* Pf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Po */ + {mdoc_quote_pre, mdoc_quote_post}, /* Pq */ + {NULL, NULL}, /* Qc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ql */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qq */ + {NULL, NULL}, /* Re */ + {mdoc_rs_pre, NULL}, /* Rs */ + {NULL, NULL}, /* Sc */ + {mdoc_quote_pre, mdoc_quote_post}, /* So */ + {mdoc_quote_pre, mdoc_quote_post}, /* Sq */ + {mdoc_sm_pre, NULL}, /* Sm */ + {mdoc_sx_pre, NULL}, /* Sx */ + {mdoc_sy_pre, NULL}, /* Sy */ + {NULL, NULL}, /* Tn */ + {mdoc_xx_pre, NULL}, /* Ux */ + {NULL, NULL}, /* Xc */ + {NULL, NULL}, /* Xo */ + {mdoc_fo_pre, mdoc_fo_post}, /* Fo */ + {NULL, NULL}, /* Fc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Oo */ + {NULL, NULL}, /* Oc */ + {mdoc_bk_pre, mdoc_bk_post}, /* Bk */ + {NULL, NULL}, /* Ek */ + {mdoc_bt_pre, NULL}, /* Bt */ + {NULL, NULL}, /* Hf */ + {mdoc_em_pre, NULL}, /* Fr */ + {mdoc_ud_pre, NULL}, /* Ud */ + {mdoc_lb_pre, NULL}, /* Lb */ + {mdoc_pp_pre, NULL}, /* Lp */ + {mdoc_lk_pre, NULL}, /* Lk */ + {mdoc_mt_pre, NULL}, /* Mt */ + {mdoc_quote_pre, mdoc_quote_post}, /* Brq */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bro */ + {NULL, NULL}, /* Brc */ + {mdoc__x_pre, mdoc__x_post}, /* %C */ + {mdoc_skip_pre, NULL}, /* Es */ + {mdoc_quote_pre, mdoc_quote_post}, /* En */ + {mdoc_xx_pre, NULL}, /* Dx */ + {mdoc__x_pre, mdoc__x_post}, /* %Q */ + {mdoc_sp_pre, NULL}, /* br */ + {mdoc_sp_pre, NULL}, /* sp */ + {mdoc__x_pre, mdoc__x_post}, /* %U */ + {NULL, NULL}, /* Ta */ + {mdoc_skip_pre, NULL}, /* ll */ +}; + +static const char * const lists[LIST_MAX] = { + NULL, + "list-bul", + "list-col", + "list-dash", + "list-diag", + "list-enum", + "list-hang", + "list-hyph", + "list-inset", + "list-item", + "list-ohang", + "list-tag" +}; + + +/* + * Calculate the scaling unit passed in a `-width' argument. This uses + * either a native scaling unit (e.g., 1i, 2m) or the string length of + * the value. + */ +static void +a2width(const char *p, struct roffsu *su) +{ + + if (a2roffsu(p, su, SCALE_MAX) < 2) { + su->unit = SCALE_EN; + su->scale = html_strlen(p); + } else if (su->scale < 0.0) + su->scale = 0.0; +} + +/* + * See the same function in mdoc_term.c for documentation. + */ +static void +synopsis_pre(struct html *h, const struct roff_node *n) +{ + + if (NULL == n->prev || ! (MDOC_SYNPRETTY & n->flags)) + return; + + if (n->prev->tok == n->tok && + MDOC_Fo != n->tok && + MDOC_Ft != n->tok && + MDOC_Fn != n->tok) { + print_otag(h, TAG_BR, 0, NULL); + return; + } + + switch (n->prev->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + print_paragraph(h); + break; + case MDOC_Ft: + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + print_paragraph(h); + break; + } + /* FALLTHROUGH */ + default: + print_otag(h, TAG_BR, 0, NULL); + break; + } +} + +void +html_mdoc(void *arg, const struct roff_man *mdoc) +{ + struct htmlpair tag; + struct html *h; + struct tag *t, *tt; + + PAIR_CLASS_INIT(&tag, "mandoc"); + h = (struct html *)arg; + + if ( ! (HTML_FRAGMENT & h->oflags)) { + print_gen_decls(h); + t = print_otag(h, TAG_HTML, 0, NULL); + tt = print_otag(h, TAG_HEAD, 0, NULL); + print_mdoc_head(&mdoc->meta, mdoc->first->child, h); + print_tagq(h, tt); + print_otag(h, TAG_BODY, 0, NULL); + print_otag(h, TAG_DIV, 1, &tag); + } else + t = print_otag(h, TAG_DIV, 1, &tag); + + mdoc_root_pre(&mdoc->meta, mdoc->first->child, h); + print_mdoc_nodelist(&mdoc->meta, mdoc->first->child, h); + mdoc_root_post(&mdoc->meta, mdoc->first->child, h); + print_tagq(h, t); + putchar('\n'); +} + +static void +print_mdoc_head(MDOC_ARGS) +{ + + print_gen_head(h); + bufinit(h); + bufcat(h, meta->title); + if (meta->msec) + bufcat_fmt(h, "(%s)", meta->msec); + if (meta->arch) + bufcat_fmt(h, " (%s)", meta->arch); + + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + +static void +print_mdoc_nodelist(MDOC_ARGS) +{ + + while (n != NULL) { + print_mdoc_node(meta, n, h); + n = n->next; + } +} + +static void +print_mdoc_node(MDOC_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + n->flags &= ~MDOC_ENDED; + + switch (n->type) { + case ROFFT_TEXT: + /* No tables in this mode... */ + assert(NULL == h->tblt); + + /* + * Make sure that if we're in a literal mode already + * (i.e., within a <PRE>) don't print the newline. + */ + if (' ' == *n->string && MDOC_LINE & n->flags) + if ( ! (HTML_LITERAL & h->flags)) + print_otag(h, TAG_BR, 0, NULL); + if (MDOC_DELIMC & n->flags) + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + if (MDOC_DELIMO & n->flags) + h->flags |= HTML_NOSPACE; + return; + case ROFFT_EQN: + if (n->flags & MDOC_LINE) + putchar('\n'); + print_eqn(h, n->eqn); + break; + case ROFFT_TBL: + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt != NULL) { + print_tblclose(h); + t = h->tags.head; + } + assert(h->tblt == NULL); + if (mdocs[n->tok].pre && (n->end == ENDBODY_NOT || n->child)) + child = (*mdocs[n->tok].pre)(meta, n, h); + break; + } + + if (h->flags & HTML_KEEP && n->flags & MDOC_LINE) { + h->flags &= ~HTML_KEEP; + h->flags |= HTML_PREKEEP; + } + + if (child && n->child) + print_mdoc_nodelist(meta, n->child, h); + + print_stagq(h, t); + + switch (n->type) { + case ROFFT_EQN: + break; + default: + if ( ! mdocs[n->tok].post || n->flags & MDOC_ENDED) + break; + (*mdocs[n->tok].post)(meta, n, h); + if (n->end != ENDBODY_NOT) + n->body->flags |= MDOC_ENDED; + if (n->end == ENDBODY_NOSPACE) + h->flags |= HTML_NOSPACE; + break; + } +} + +static void +mdoc_root_post(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t, *tt; + + PAIR_CLASS_INIT(&tag, "foot"); + t = print_otag(h, TAG_TABLE, 1, &tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "foot-date"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, meta->date); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "foot-os"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, meta->os); + print_tagq(h, t); +} + +static int +mdoc_root_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t, *tt; + char *volume, *title; + + if (NULL == meta->arch) + volume = mandoc_strdup(meta->vol); + else + mandoc_asprintf(&volume, "%s (%s)", + meta->vol, meta->arch); + + if (NULL == meta->msec) + title = mandoc_strdup(meta->title); + else + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + + PAIR_CLASS_INIT(&tag, "head"); + t = print_otag(h, TAG_TABLE, 1, &tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "head-ltitle"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, title); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "head-vol"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, volume); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag, "head-rtitle"); + print_otag(h, TAG_TD, 1, &tag); + print_text(h, title); + print_tagq(h, t); + + free(title); + free(volume); + return 1; +} + +static int +mdoc_sh_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + switch (n->type) { + case ROFFT_BLOCK: + PAIR_CLASS_INIT(&tag, "section"); + print_otag(h, TAG_DIV, 1, &tag); + return 1; + case ROFFT_BODY: + if (n->sec == SEC_AUTHORS) + h->flags &= ~(HTML_SPLIT|HTML_NOSPLIT); + return 1; + default: + break; + } + + bufinit(h); + + for (n = n->child; n != NULL && n->type == ROFFT_TEXT; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + if (NULL == n) { + PAIR_ID_INIT(&tag, h->buf); + print_otag(h, TAG_H1, 1, &tag); + } else + print_otag(h, TAG_H1, 0, NULL); + + return 1; +} + +static int +mdoc_ss_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->type == ROFFT_BLOCK) { + PAIR_CLASS_INIT(&tag, "subsection"); + print_otag(h, TAG_DIV, 1, &tag); + return 1; + } else if (n->type == ROFFT_BODY) + return 1; + + bufinit(h); + + for (n = n->child; n != NULL && n->type == ROFFT_TEXT; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + if (NULL == n) { + PAIR_ID_INIT(&tag, h->buf); + print_otag(h, TAG_H2, 1, &tag); + } else + print_otag(h, TAG_H2, 0, NULL); + + return 1; +} + +static int +mdoc_fl_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "flag"); + print_otag(h, TAG_B, 1, &tag); + + /* `Cm' has no leading hyphen. */ + + if (MDOC_Cm == n->tok) + return 1; + + print_text(h, "\\-"); + + if (!(n->child == NULL && + (n->next == NULL || + n->next->type == ROFFT_TEXT || + n->next->flags & MDOC_LINE))) + h->flags |= HTML_NOSPACE; + + return 1; +} + +static int +mdoc_nd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->type != ROFFT_BODY) + return 1; + + /* XXX: this tag in theory can contain block elements. */ + + print_text(h, "\\(em"); + PAIR_CLASS_INIT(&tag, "desc"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_nm_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + int len; + + switch (n->type) { + case ROFFT_HEAD: + print_otag(h, TAG_TD, 0, NULL); + /* FALLTHROUGH */ + case ROFFT_ELEM: + PAIR_CLASS_INIT(&tag, "name"); + print_otag(h, TAG_B, 1, &tag); + if (n->child == NULL && meta->name != NULL) + print_text(h, meta->name); + return 1; + case ROFFT_BODY: + print_otag(h, TAG_TD, 0, NULL); + return 1; + default: + break; + } + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "synopsis"); + print_otag(h, TAG_TABLE, 1, &tag); + + for (len = 0, n = n->head->child; n; n = n->next) + if (n->type == ROFFT_TEXT) + len += html_strlen(n->string); + + if (len == 0 && meta->name != NULL) + len = html_strlen(meta->name); + + SCALE_HS_INIT(&su, len); + bufinit(h); + bufcat_su(h, "width", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_COL, 1, &tag); + print_otag(h, TAG_COL, 0, NULL); + print_otag(h, TAG_TBODY, 0, NULL); + print_otag(h, TAG_TR, 0, NULL); + return 1; +} + +static int +mdoc_xr_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + if (NULL == n->child) + return 0; + + PAIR_CLASS_INIT(&tag[0], "link-man"); + + if (h->base_man) { + buffmt_man(h, n->child->string, + n->child->next ? + n->child->next->string : NULL); + PAIR_HREF_INIT(&tag[1], h->buf); + print_otag(h, TAG_A, 2, tag); + } else + print_otag(h, TAG_A, 1, tag); + + n = n->child; + print_text(h, n->string); + + if (NULL == (n = n->next)) + return 0; + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + return 0; +} + +static int +mdoc_ns_pre(MDOC_ARGS) +{ + + if ( ! (MDOC_LINE & n->flags)) + h->flags |= HTML_NOSPACE; + return 1; +} + +static int +mdoc_ar_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "arg"); + print_otag(h, TAG_I, 1, &tag); + return 1; +} + +static int +mdoc_xx_pre(MDOC_ARGS) +{ + const char *pp; + struct htmlpair tag; + int flags; + + switch (n->tok) { + case MDOC_Bsx: + pp = "BSD/OS"; + break; + case MDOC_Dx: + pp = "DragonFly"; + break; + case MDOC_Fx: + pp = "FreeBSD"; + break; + case MDOC_Nx: + pp = "NetBSD"; + break; + case MDOC_Ox: + pp = "OpenBSD"; + break; + case MDOC_Ux: + pp = "UNIX"; + break; + default: + return 1; + } + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + + print_text(h, pp); + if (n->child) { + flags = h->flags; + h->flags |= HTML_KEEP; + print_text(h, n->child->string); + h->flags = flags; + } + return 0; +} + +static int +mdoc_bx_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + + if (NULL != (n = n->child)) { + print_text(h, n->string); + h->flags |= HTML_NOSPACE; + print_text(h, "BSD"); + } else { + print_text(h, "BSD"); + return 0; + } + + if (NULL != (n = n->next)) { + h->flags |= HTML_NOSPACE; + print_text(h, "-"); + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + } + + return 0; +} + +static int +mdoc_it_pre(MDOC_ARGS) +{ + struct roffsu su; + enum mdoc_list type; + struct htmlpair tag[2]; + const struct roff_node *bl; + + bl = n->parent; + while (bl && MDOC_Bl != bl->tok) + bl = bl->parent; + + assert(bl); + + type = bl->norm->Bl.type; + + assert(lists[type]); + PAIR_CLASS_INIT(&tag[0], lists[type]); + + bufinit(h); + + if (n->type == ROFFT_HEAD) { + switch (type) { + case LIST_bullet: + case LIST_dash: + case LIST_item: + case LIST_hyphen: + case LIST_enum: + return 0; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DT, 2, tag); + if (LIST_diag != type) + break; + PAIR_CLASS_INIT(&tag[0], "diag"); + print_otag(h, TAG_B, 1, tag); + break; + case LIST_column: + break; + default: + break; + } + } else if (n->type == ROFFT_BODY) { + switch (type) { + case LIST_bullet: + case LIST_hyphen: + case LIST_dash: + case LIST_enum: + case LIST_item: + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_LI, 2, tag); + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + if (NULL == bl->norm->Bl.width) { + print_otag(h, TAG_DD, 1, tag); + break; + } + a2width(bl->norm->Bl.width, &su); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DD, 2, tag); + break; + case LIST_column: + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_TD, 2, tag); + break; + default: + break; + } + } else { + switch (type) { + case LIST_column: + print_otag(h, TAG_TR, 1, tag); + break; + default: + break; + } + } + + return 1; +} + +static int +mdoc_bl_pre(MDOC_ARGS) +{ + int i; + struct htmlpair tag[3]; + struct roffsu su; + char buf[BUFSIZ]; + + if (n->type == ROFFT_BODY) { + if (LIST_column == n->norm->Bl.type) + print_otag(h, TAG_TBODY, 0, NULL); + return 1; + } + + if (n->type == ROFFT_HEAD) { + if (LIST_column != n->norm->Bl.type) + return 0; + + /* + * For each column, print out the <COL> tag with our + * suggested width. The last column gets min-width, as + * in terminal mode it auto-sizes to the width of the + * screen and we want to preserve that behaviour. + */ + + for (i = 0; i < (int)n->norm->Bl.ncols; i++) { + bufinit(h); + a2width(n->norm->Bl.cols[i], &su); + if (i < (int)n->norm->Bl.ncols - 1) + bufcat_su(h, "width", &su); + else + bufcat_su(h, "min-width", &su); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_COL, 1, tag); + } + + return 0; + } + + SCALE_VS_INIT(&su, 0); + bufinit(h); + bufcat_su(h, "margin-top", &su); + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[0], h); + + assert(lists[n->norm->Bl.type]); + (void)strlcpy(buf, "list ", BUFSIZ); + (void)strlcat(buf, lists[n->norm->Bl.type], BUFSIZ); + PAIR_INIT(&tag[1], ATTR_CLASS, buf); + + /* Set the block's left-hand margin. */ + + if (n->norm->Bl.offs) { + a2width(n->norm->Bl.offs, &su); + bufcat_su(h, "margin-left", &su); + } + + switch (n->norm->Bl.type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_item: + print_otag(h, TAG_UL, 2, tag); + break; + case LIST_enum: + print_otag(h, TAG_OL, 2, tag); + break; + case LIST_diag: + case LIST_hang: + case LIST_inset: + case LIST_ohang: + case LIST_tag: + print_otag(h, TAG_DL, 2, tag); + break; + case LIST_column: + print_otag(h, TAG_TABLE, 2, tag); + break; + default: + abort(); + } + + return 1; +} + +static int +mdoc_ex_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + struct roff_node *nch; + + if (n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "utility"); + + print_text(h, "The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + assert(nch->type == ROFFT_TEXT); + + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, nch->string); + print_tagq(h, t); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + + if (nch->next->next == NULL) + print_text(h, "and"); + } + + if (n->child != NULL && n->child->next != NULL) + print_text(h, "utilities exit\\~0"); + else + print_text(h, "utility exits\\~0"); + + print_text(h, "on success, and\\~>0 if an error occurs."); + return 0; +} + +static int +mdoc_em_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "emph"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_d1_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (n->type != ROFFT_BLOCK) + return 1; + + SCALE_VS_INIT(&su, 0); + bufinit(h); + bufcat_su(h, "margin-top", &su); + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_BLOCKQUOTE, 1, tag); + + /* BLOCKQUOTE needs a block body. */ + + PAIR_CLASS_INIT(&tag[0], "display"); + print_otag(h, TAG_DIV, 1, tag); + + if (MDOC_Dl == n->tok) { + PAIR_CLASS_INIT(&tag[0], "lit"); + print_otag(h, TAG_CODE, 1, tag); + } + + return 1; +} + +static int +mdoc_sx_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + bufinit(h); + bufcat(h, "#"); + + for (n = n->child; n; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + PAIR_CLASS_INIT(&tag[0], "link-sec"); + PAIR_HREF_INIT(&tag[1], h->buf); + + print_otag(h, TAG_I, 1, tag); + print_otag(h, TAG_A, 2, tag); + return 1; +} + +static int +mdoc_bd_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + int comp, sv; + struct roff_node *nn; + struct roffsu su; + + if (n->type == ROFFT_HEAD) + return 0; + + if (n->type == ROFFT_BLOCK) { + comp = n->norm->Bd.comp; + for (nn = n; nn && ! comp; nn = nn->parent) { + if (nn->type != ROFFT_BLOCK) + continue; + if (MDOC_Ss == nn->tok || MDOC_Sh == nn->tok) + comp = 1; + if (nn->prev) + break; + } + if ( ! comp) + print_paragraph(h); + return 1; + } + + /* Handle the -offset argument. */ + + if (n->norm->Bd.offs == NULL || + ! strcmp(n->norm->Bd.offs, "left")) + SCALE_HS_INIT(&su, 0); + else if ( ! strcmp(n->norm->Bd.offs, "indent")) + SCALE_HS_INIT(&su, INDENT); + else if ( ! strcmp(n->norm->Bd.offs, "indent-two")) + SCALE_HS_INIT(&su, INDENT * 2); + else + a2width(n->norm->Bd.offs, &su); + + bufinit(h); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[0], h); + + if (DISP_unfilled != n->norm->Bd.type && + DISP_literal != n->norm->Bd.type) { + PAIR_CLASS_INIT(&tag[1], "display"); + print_otag(h, TAG_DIV, 2, tag); + return 1; + } + + PAIR_CLASS_INIT(&tag[1], "lit display"); + print_otag(h, TAG_PRE, 2, tag); + + /* This can be recursive: save & set our literal state. */ + + sv = h->flags & HTML_LITERAL; + h->flags |= HTML_LITERAL; + + for (nn = n->child; nn; nn = nn->next) { + print_mdoc_node(meta, nn, h); + /* + * If the printed node flushes its own line, then we + * needn't do it here as well. This is hacky, but the + * notion of selective eoln whitespace is pretty dumb + * anyway, so don't sweat it. + */ + switch (nn->tok) { + case MDOC_Sm: + case MDOC_br: + case MDOC_sp: + case MDOC_Bl: + case MDOC_D1: + case MDOC_Dl: + case MDOC_Lp: + case MDOC_Pp: + continue; + default: + break; + } + if (h->flags & HTML_NONEWLINE || + (nn->next && ! (nn->next->flags & MDOC_LINE))) + continue; + else if (nn->next) + print_text(h, "\n"); + + h->flags |= HTML_NOSPACE; + } + + if (0 == sv) + h->flags &= ~HTML_LITERAL; + + return 0; +} + +static int +mdoc_pa_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "file"); + print_otag(h, TAG_I, 1, &tag); + return 1; +} + +static int +mdoc_ad_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "addr"); + print_otag(h, TAG_I, 1, &tag); + return 1; +} + +static int +mdoc_an_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->norm->An.auth == AUTH_split) { + h->flags &= ~HTML_NOSPLIT; + h->flags |= HTML_SPLIT; + return 0; + } + if (n->norm->An.auth == AUTH_nosplit) { + h->flags &= ~HTML_SPLIT; + h->flags |= HTML_NOSPLIT; + return 0; + } + + if (h->flags & HTML_SPLIT) + print_otag(h, TAG_BR, 0, NULL); + + if (n->sec == SEC_AUTHORS && ! (h->flags & HTML_NOSPLIT)) + h->flags |= HTML_SPLIT; + + PAIR_CLASS_INIT(&tag, "author"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_cd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "config"); + print_otag(h, TAG_B, 1, &tag); + return 1; +} + +static int +mdoc_dv_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "define"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_ev_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "env"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_er_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "errno"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_fa_pre(MDOC_ARGS) +{ + const struct roff_node *nn; + struct htmlpair tag; + struct tag *t; + + PAIR_CLASS_INIT(&tag, "farg"); + if (n->parent->tok != MDOC_Fo) { + print_otag(h, TAG_I, 1, &tag); + return 1; + } + + for (nn = n->child; nn; nn = nn->next) { + t = print_otag(h, TAG_I, 1, &tag); + print_text(h, nn->string); + print_tagq(h, t); + if (nn->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + + if (n->child && n->next && n->next->tok == MDOC_Fa) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + + return 0; +} + +static int +mdoc_fd_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + char buf[BUFSIZ]; + size_t sz; + int i; + struct tag *t; + + synopsis_pre(h, n); + + if (NULL == (n = n->child)) + return 0; + + assert(n->type == ROFFT_TEXT); + + if (strcmp(n->string, "#include")) { + PAIR_CLASS_INIT(&tag[0], "macro"); + print_otag(h, TAG_B, 1, tag); + return 1; + } + + PAIR_CLASS_INIT(&tag[0], "includes"); + print_otag(h, TAG_B, 1, tag); + print_text(h, n->string); + + if (NULL != (n = n->next)) { + assert(n->type == ROFFT_TEXT); + + /* + * XXX This is broken and not easy to fix. + * When using -Oincludes, truncation may occur. + * Dynamic allocation wouldn't help because + * passing long strings to buffmt_includes() + * does not work either. + */ + + strlcpy(buf, '<' == *n->string || '"' == *n->string ? + n->string + 1 : n->string, BUFSIZ); + + sz = strlen(buf); + if (sz && ('>' == buf[sz - 1] || '"' == buf[sz - 1])) + buf[sz - 1] = '\0'; + + PAIR_CLASS_INIT(&tag[0], "link-includes"); + + i = 1; + if (h->base_includes) { + buffmt_includes(h, buf); + PAIR_HREF_INIT(&tag[i], h->buf); + i++; + } + + t = print_otag(h, TAG_A, i, tag); + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + for ( ; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + print_text(h, n->string); + } + + return 0; +} + +static int +mdoc_vt_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->type == ROFFT_BLOCK) { + synopsis_pre(h, n); + return 1; + } else if (n->type == ROFFT_ELEM) { + synopsis_pre(h, n); + } else if (n->type == ROFFT_HEAD) + return 0; + + PAIR_CLASS_INIT(&tag, "type"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_ft_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "ftype"); + print_otag(h, TAG_I, 1, &tag); + return 1; +} + +static int +mdoc_fn_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag[2]; + char nbuf[BUFSIZ]; + const char *sp, *ep; + int sz, i, pretty; + + pretty = MDOC_SYNPRETTY & n->flags; + synopsis_pre(h, n); + + /* Split apart into type and name. */ + assert(n->child->string); + sp = n->child->string; + + ep = strchr(sp, ' '); + if (NULL != ep) { + PAIR_CLASS_INIT(&tag[0], "ftype"); + t = print_otag(h, TAG_I, 1, tag); + + while (ep) { + sz = MIN((int)(ep - sp), BUFSIZ - 1); + (void)memcpy(nbuf, sp, (size_t)sz); + nbuf[sz] = '\0'; + print_text(h, nbuf); + sp = ++ep; + ep = strchr(sp, ' '); + } + print_tagq(h, t); + } + + PAIR_CLASS_INIT(&tag[0], "fname"); + + /* + * FIXME: only refer to IDs that we know exist. + */ + +#if 0 + if (MDOC_SYNPRETTY & n->flags) { + nbuf[0] = '\0'; + html_idcat(nbuf, sp, BUFSIZ); + PAIR_ID_INIT(&tag[1], nbuf); + } else { + strlcpy(nbuf, "#", BUFSIZ); + html_idcat(nbuf, sp, BUFSIZ); + PAIR_HREF_INIT(&tag[1], nbuf); + } +#endif + + t = print_otag(h, TAG_B, 1, tag); + + if (sp) + print_text(h, sp); + + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + + PAIR_CLASS_INIT(&tag[0], "farg"); + bufinit(h); + bufcat_style(h, "white-space", "nowrap"); + PAIR_STYLE_INIT(&tag[1], h); + + for (n = n->child->next; n; n = n->next) { + i = 1; + if (MDOC_SYNPRETTY & n->flags) + i = 2; + t = print_otag(h, TAG_I, i, tag); + print_text(h, n->string); + print_tagq(h, t); + if (n->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + + if (pretty) { + h->flags |= HTML_NOSPACE; + print_text(h, ";"); + } + + return 0; +} + +static int +mdoc_sm_pre(MDOC_ARGS) +{ + + if (NULL == n->child) + h->flags ^= HTML_NONOSPACE; + else if (0 == strcmp("on", n->child->string)) + h->flags &= ~HTML_NONOSPACE; + else + h->flags |= HTML_NONOSPACE; + + if ( ! (HTML_NONOSPACE & h->flags)) + h->flags &= ~HTML_NOSPACE; + + return 0; +} + +static int +mdoc_skip_pre(MDOC_ARGS) +{ + + return 0; +} + +static int +mdoc_pp_pre(MDOC_ARGS) +{ + + print_paragraph(h); + return 0; +} + +static int +mdoc_sp_pre(MDOC_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + + SCALE_VS_INIT(&su, 1); + + if (MDOC_sp == n->tok) { + if (NULL != (n = n->child)) { + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + su.scale = 1.0; + else if (su.scale < 0.0) + su.scale = 0.0; + } + } else + su.scale = 0.0; + + bufinit(h); + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return 0; + +} + +static int +mdoc_lk_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + if (NULL == (n = n->child)) + return 0; + + assert(n->type == ROFFT_TEXT); + + PAIR_CLASS_INIT(&tag[0], "link-ext"); + PAIR_HREF_INIT(&tag[1], n->string); + + print_otag(h, TAG_A, 2, tag); + + if (NULL == n->next) + print_text(h, n->string); + + for (n = n->next; n; n = n->next) + print_text(h, n->string); + + return 0; +} + +static int +mdoc_mt_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct tag *t; + + PAIR_CLASS_INIT(&tag[0], "link-mail"); + + for (n = n->child; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + + bufinit(h); + bufcat(h, "mailto:"); + bufcat(h, n->string); + + PAIR_HREF_INIT(&tag[1], h->buf); + t = print_otag(h, TAG_A, 2, tag); + print_text(h, n->string); + print_tagq(h, t); + } + + return 0; +} + +static int +mdoc_fo_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + + if (n->type == ROFFT_BODY) { + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + return 1; + } else if (n->type == ROFFT_BLOCK) { + synopsis_pre(h, n); + return 1; + } + + if (n->child == NULL) + return 0; + + assert(n->child->string); + PAIR_CLASS_INIT(&tag, "fname"); + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, n->child->string); + print_tagq(h, t); + return 0; +} + +static void +mdoc_fo_post(MDOC_ARGS) +{ + + if (n->type != ROFFT_BODY) + return; + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + h->flags |= HTML_NOSPACE; + print_text(h, ";"); +} + +static int +mdoc_in_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag[2]; + int i; + + synopsis_pre(h, n); + + PAIR_CLASS_INIT(&tag[0], "includes"); + print_otag(h, TAG_B, 1, tag); + + /* + * The first argument of the `In' gets special treatment as + * being a linked value. Subsequent values are printed + * afterward. groff does similarly. This also handles the case + * of no children. + */ + + if (MDOC_SYNPRETTY & n->flags && MDOC_LINE & n->flags) + print_text(h, "#include"); + + print_text(h, "<"); + h->flags |= HTML_NOSPACE; + + if (NULL != (n = n->child)) { + assert(n->type == ROFFT_TEXT); + + PAIR_CLASS_INIT(&tag[0], "link-includes"); + + i = 1; + if (h->base_includes) { + buffmt_includes(h, n->string); + PAIR_HREF_INIT(&tag[i], h->buf); + i++; + } + + t = print_otag(h, TAG_A, i, tag); + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + h->flags |= HTML_NOSPACE; + print_text(h, ">"); + + for ( ; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + print_text(h, n->string); + } + + return 0; +} + +static int +mdoc_ic_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "cmd"); + print_otag(h, TAG_B, 1, &tag); + return 1; +} + +static int +mdoc_rv_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + struct roff_node *nch; + + if (n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "fname"); + + if (n->child != NULL) { + print_text(h, "The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, nch->string); + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "()"); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + if (nch->next->next == NULL) + print_text(h, "and"); + } + + if (n->child != NULL && n->child->next != NULL) + print_text(h, "functions return"); + else + print_text(h, "function returns"); + + print_text(h, "the value\\~0 if successful;"); + } else + print_text(h, "Upon successful completion," + " the value\\~0 is returned;"); + + print_text(h, "otherwise the value\\~\\-1 is returned" + " and the global variable"); + + PAIR_CLASS_INIT(&tag, "var"); + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, "errno"); + print_tagq(h, t); + print_text(h, "is set to indicate the error."); + return 0; +} + +static int +mdoc_va_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "var"); + print_otag(h, TAG_B, 1, &tag); + return 1; +} + +static int +mdoc_ap_pre(MDOC_ARGS) +{ + + h->flags |= HTML_NOSPACE; + print_text(h, "\\(aq"); + h->flags |= HTML_NOSPACE; + return 1; +} + +static int +mdoc_bf_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (n->type == ROFFT_HEAD) + return 0; + else if (n->type != ROFFT_BODY) + return 1; + + if (FONT_Em == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "emph"); + else if (FONT_Sy == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "symb"); + else if (FONT_Li == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "lit"); + else + PAIR_CLASS_INIT(&tag[0], "none"); + + /* + * We want this to be inline-formatted, but needs to be div to + * accept block children. + */ + bufinit(h); + bufcat_style(h, "display", "inline"); + SCALE_HS_INIT(&su, 1); + /* Needs a left-margin for spacing. */ + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return 1; +} + +static int +mdoc_ms_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_igndelim_pre(MDOC_ARGS) +{ + + h->flags |= HTML_IGNDELIM; + return 1; +} + +static void +mdoc_pf_post(MDOC_ARGS) +{ + + if ( ! (n->next == NULL || n->next->flags & MDOC_LINE)) + h->flags |= HTML_NOSPACE; +} + +static int +mdoc_rs_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->type != ROFFT_BLOCK) + return 1; + + if (n->prev && SEC_SEE_ALSO == n->sec) + print_paragraph(h); + + PAIR_CLASS_INIT(&tag, "ref"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_no_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "none"); + print_otag(h, TAG_CODE, 1, &tag); + return 1; +} + +static int +mdoc_li_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "lit"); + print_otag(h, TAG_CODE, 1, &tag); + return 1; +} + +static int +mdoc_sy_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc_bt_pre(MDOC_ARGS) +{ + + print_text(h, "is currently in beta test."); + return 0; +} + +static int +mdoc_ud_pre(MDOC_ARGS) +{ + + print_text(h, "currently under development."); + return 0; +} + +static int +mdoc_lb_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags && n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "lib"); + print_otag(h, TAG_SPAN, 1, &tag); + return 1; +} + +static int +mdoc__x_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + enum htmltag t; + + t = TAG_SPAN; + + switch (n->tok) { + case MDOC__A: + PAIR_CLASS_INIT(&tag[0], "ref-auth"); + if (n->prev && MDOC__A == n->prev->tok) + if (NULL == n->next || MDOC__A != n->next->tok) + print_text(h, "and"); + break; + case MDOC__B: + PAIR_CLASS_INIT(&tag[0], "ref-book"); + t = TAG_I; + break; + case MDOC__C: + PAIR_CLASS_INIT(&tag[0], "ref-city"); + break; + case MDOC__D: + PAIR_CLASS_INIT(&tag[0], "ref-date"); + break; + case MDOC__I: + PAIR_CLASS_INIT(&tag[0], "ref-issue"); + t = TAG_I; + break; + case MDOC__J: + PAIR_CLASS_INIT(&tag[0], "ref-jrnl"); + t = TAG_I; + break; + case MDOC__N: + PAIR_CLASS_INIT(&tag[0], "ref-num"); + break; + case MDOC__O: + PAIR_CLASS_INIT(&tag[0], "ref-opt"); + break; + case MDOC__P: + PAIR_CLASS_INIT(&tag[0], "ref-page"); + break; + case MDOC__Q: + PAIR_CLASS_INIT(&tag[0], "ref-corp"); + break; + case MDOC__R: + PAIR_CLASS_INIT(&tag[0], "ref-rep"); + break; + case MDOC__T: + PAIR_CLASS_INIT(&tag[0], "ref-title"); + break; + case MDOC__U: + PAIR_CLASS_INIT(&tag[0], "link-ref"); + break; + case MDOC__V: + PAIR_CLASS_INIT(&tag[0], "ref-vol"); + break; + default: + abort(); + } + + if (MDOC__U != n->tok) { + print_otag(h, t, 1, tag); + return 1; + } + + PAIR_HREF_INIT(&tag[1], n->child->string); + print_otag(h, TAG_A, 2, tag); + + return 1; +} + +static void +mdoc__x_post(MDOC_ARGS) +{ + + if (MDOC__A == n->tok && n->next && MDOC__A == n->next->tok) + if (NULL == n->next->next || MDOC__A != n->next->next->tok) + if (NULL == n->prev || MDOC__A != n->prev->tok) + return; + + /* TODO: %U */ + + if (NULL == n->parent || MDOC_Rs != n->parent->tok) + return; + + h->flags |= HTML_NOSPACE; + print_text(h, n->next ? "," : "."); +} + +static int +mdoc_bk_pre(MDOC_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->parent->args != NULL || n->prev->child == NULL) + h->flags |= HTML_PREKEEP; + break; + default: + abort(); + } + + return 1; +} + +static void +mdoc_bk_post(MDOC_ARGS) +{ + + if (n->type == ROFFT_BODY) + h->flags &= ~(HTML_KEEP | HTML_PREKEEP); +} + +static int +mdoc_quote_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (n->type != ROFFT_BODY) + return 1; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + print_text(h, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + break; + case MDOC_Bro: + case MDOC_Brq: + print_text(h, "\\(lC"); + break; + case MDOC_Bo: + case MDOC_Bq: + print_text(h, "\\(lB"); + break; + case MDOC_Oo: + case MDOC_Op: + print_text(h, "\\(lB"); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "opt"); + print_otag(h, TAG_SPAN, 1, &tag); + break; + case MDOC_En: + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + print_text(h, n->norm->Es->child->string); + break; + case MDOC_Do: + case MDOC_Dq: + case MDOC_Qo: + case MDOC_Qq: + print_text(h, "\\(lq"); + break; + case MDOC_Po: + case MDOC_Pq: + print_text(h, "("); + break; + case MDOC_Ql: + print_text(h, "\\(oq"); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "lit"); + print_otag(h, TAG_CODE, 1, &tag); + break; + case MDOC_So: + case MDOC_Sq: + print_text(h, "\\(oq"); + break; + default: + abort(); + } + + h->flags |= HTML_NOSPACE; + return 1; +} + +static void +mdoc_quote_post(MDOC_ARGS) +{ + + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return; + + h->flags |= HTML_NOSPACE; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + print_text(h, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); + break; + case MDOC_Bro: + case MDOC_Brq: + print_text(h, "\\(rC"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + print_text(h, "\\(rB"); + break; + case MDOC_En: + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + h->flags &= ~HTML_NOSPACE; + else + print_text(h, n->norm->Es->child->next->string); + break; + case MDOC_Qo: + case MDOC_Qq: + case MDOC_Do: + case MDOC_Dq: + print_text(h, "\\(rq"); + break; + case MDOC_Po: + case MDOC_Pq: + print_text(h, ")"); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + print_text(h, "\\(cq"); + break; + default: + abort(); + } +} + +static int +mdoc_eo_pre(MDOC_ARGS) +{ + + if (n->type != ROFFT_BODY) + return 1; + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + print_text(h, "\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + h->flags |= HTML_NOSPACE; + return 1; +} + +static void +mdoc_eo_post(MDOC_ARGS) +{ + int body, tail; + + if (n->type != ROFFT_BODY) + return; + + if (n->end != ENDBODY_NOT) { + h->flags &= ~HTML_NOSPACE; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + h->flags |= HTML_NOSPACE; + else if ( ! tail) + h->flags &= ~HTML_NOSPACE; +} diff --git a/contrib/mdocml/mdoc_macro.c b/contrib/mdocml/mdoc_macro.c new file mode 100644 index 0000000..ca95958 --- /dev/null +++ b/contrib/mdocml/mdoc_macro.c @@ -0,0 +1,1477 @@ +/* $Id: mdoc_macro.c,v 1.206 2015/10/20 02:01:32 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +static void blk_full(MACRO_PROT_ARGS); +static void blk_exp_close(MACRO_PROT_ARGS); +static void blk_part_exp(MACRO_PROT_ARGS); +static void blk_part_imp(MACRO_PROT_ARGS); +static void ctx_synopsis(MACRO_PROT_ARGS); +static void in_line_eoln(MACRO_PROT_ARGS); +static void in_line_argn(MACRO_PROT_ARGS); +static void in_line(MACRO_PROT_ARGS); +static void phrase_ta(MACRO_PROT_ARGS); + +static void append_delims(struct roff_man *, int, int *, char *); +static void dword(struct roff_man *, int, int, const char *, + enum mdelim, int); +static int find_pending(struct roff_man *, int, int, int, + struct roff_node *); +static int lookup(struct roff_man *, int, int, int, const char *); +static int macro_or_word(MACRO_PROT_ARGS, int); +static int parse_rest(struct roff_man *, int, int, int *, char *); +static int rew_alt(int); +static void rew_elem(struct roff_man *, int); +static void rew_last(struct roff_man *, const struct roff_node *); +static void rew_pending(struct roff_man *, + const struct roff_node *); + +const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ap */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dd */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ + { in_line_eoln, MDOC_PROLOGUE }, /* Os */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* Sh */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* Ss */ + { in_line_eoln, 0 }, /* Pp */ + { blk_part_imp, MDOC_PARSED | MDOC_JOIN }, /* D1 */ + { blk_part_imp, MDOC_PARSED | MDOC_JOIN }, /* Dl */ + { blk_full, MDOC_EXPLICIT }, /* Bd */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ed */ + { blk_full, MDOC_EXPLICIT }, /* Bl */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* El */ + { blk_full, MDOC_PARSED | MDOC_JOIN }, /* It */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* An */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Cd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Er */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */ + { in_line_eoln, 0 }, /* Ex */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */ + { in_line_eoln, 0 }, /* Fd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ft */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ic */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* In */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Li */ + { blk_full, MDOC_JOIN }, /* Nd */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Op */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ot */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */ + { in_line_eoln, 0 }, /* Rv */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* St */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Va */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */ + { in_line_eoln, MDOC_JOIN }, /* %A */ + { in_line_eoln, MDOC_JOIN }, /* %B */ + { in_line_eoln, MDOC_JOIN }, /* %D */ + { in_line_eoln, MDOC_JOIN }, /* %I */ + { in_line_eoln, MDOC_JOIN }, /* %J */ + { in_line_eoln, 0 }, /* %N */ + { in_line_eoln, MDOC_JOIN }, /* %O */ + { in_line_eoln, 0 }, /* %P */ + { in_line_eoln, MDOC_JOIN }, /* %R */ + { in_line_eoln, MDOC_JOIN }, /* %T */ + { in_line_eoln, 0 }, /* %V */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Ac */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Ao */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Aq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* At */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bc */ + { blk_full, MDOC_EXPLICIT }, /* Bf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Bq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bsx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bx */ + { in_line_eoln, 0 }, /* Db */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Dc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Do */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Dq */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ec */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ef */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Em */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Fx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ms */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* No */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | + MDOC_IGNDELIM | MDOC_JOIN }, /* Ns */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Nx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ox */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Pc */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Pf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Po */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Pq */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Qc */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ql */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Qo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Qq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Re */ + { blk_full, MDOC_EXPLICIT }, /* Rs */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Sc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* So */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sq */ + { in_line_argn, 0 }, /* Sm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Sy */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ux */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */ + { blk_full, MDOC_EXPLICIT | MDOC_CALLABLE }, /* Fo */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Fc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Oo */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Oc */ + { blk_full, MDOC_EXPLICIT }, /* Bk */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_JOIN }, /* Ek */ + { in_line_eoln, 0 }, /* Bt */ + { in_line_eoln, 0 }, /* Hf */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fr */ + { in_line_eoln, 0 }, /* Ud */ + { in_line, 0 }, /* Lb */ + { in_line_eoln, 0 }, /* Lp */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Lk */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Mt */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Brq */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Bro */ + { blk_exp_close, MDOC_CALLABLE | MDOC_PARSED | + MDOC_EXPLICIT | MDOC_JOIN }, /* Brc */ + { in_line_eoln, MDOC_JOIN }, /* %C */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Es */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* En */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ + { in_line_eoln, MDOC_JOIN }, /* %Q */ + { in_line_eoln, 0 }, /* br */ + { in_line_eoln, 0 }, /* sp */ + { in_line_eoln, 0 }, /* %U */ + { phrase_ta, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ta */ + { in_line_eoln, MDOC_PROLOGUE }, /* ll */ +}; + +const struct mdoc_macro * const mdoc_macros = __mdoc_macros; + + +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ +void +mdoc_endparse(struct roff_man *mdoc) +{ + struct roff_node *n; + + /* Scan for open explicit scopes. */ + + n = mdoc->last->flags & MDOC_VALID ? + mdoc->last->parent : mdoc->last; + + for ( ; n; n = n->parent) + if (n->type == ROFFT_BLOCK && + mdoc_macros[n->tok].flags & MDOC_EXPLICIT) + mandoc_msg(MANDOCERR_BLK_NOEND, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); + + /* Rewind to the first. */ + + rew_last(mdoc, mdoc->first); + mdoc_state_reset(mdoc); +} + +/* + * Look up the macro at *p called by "from", + * or as a line macro if from == TOKEN_NONE. + */ +static int +lookup(struct roff_man *mdoc, int from, int line, int ppos, const char *p) +{ + int res; + + if (mdoc->flags & MDOC_PHRASEQF) { + mdoc->flags &= ~MDOC_PHRASEQF; + return TOKEN_NONE; + } + if (from == TOKEN_NONE || mdoc_macros[from].flags & MDOC_PARSED) { + res = mdoc_hash_find(p); + if (res != TOKEN_NONE) { + if (mdoc_macros[res].flags & MDOC_CALLABLE) + return res; + if (res != MDOC_br && res != MDOC_sp && res != MDOC_ll) + mandoc_msg(MANDOCERR_MACRO_CALL, + mdoc->parse, line, ppos, p); + } + } + return TOKEN_NONE; +} + +/* + * Rewind up to and including a specific node. + */ +static void +rew_last(struct roff_man *mdoc, const struct roff_node *to) +{ + + if (to->flags & MDOC_VALID) + return; + + while (mdoc->last != to) { + mdoc_state(mdoc, mdoc->last); + mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; + mdoc->last = mdoc->last->parent; + } + mdoc_state(mdoc, mdoc->last); + mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; + mdoc->next = ROFF_NEXT_SIBLING; +} + +/* + * Rewind up to a specific block, including all blocks that broke it. + */ +static void +rew_pending(struct roff_man *mdoc, const struct roff_node *n) +{ + + for (;;) { + rew_last(mdoc, n); + + if (mdoc->last == n) { + switch (n->type) { + case ROFFT_HEAD: + roff_body_alloc(mdoc, n->line, n->pos, + n->tok); + return; + case ROFFT_BLOCK: + break; + default: + return; + } + if ( ! (n->flags & MDOC_BROKEN)) + return; + } else + n = mdoc->last; + + for (;;) { + if ((n = n->parent) == NULL) + return; + + if (n->type == ROFFT_BLOCK || + n->type == ROFFT_HEAD) { + if (n->flags & MDOC_ENDED) + break; + else + return; + } + } + } +} + +/* + * For a block closing macro, return the corresponding opening one. + * Otherwise, return the macro itself. + */ +static int +rew_alt(int tok) +{ + switch (tok) { + case MDOC_Ac: + return MDOC_Ao; + case MDOC_Bc: + return MDOC_Bo; + case MDOC_Brc: + return MDOC_Bro; + case MDOC_Dc: + return MDOC_Do; + case MDOC_Ec: + return MDOC_Eo; + case MDOC_Ed: + return MDOC_Bd; + case MDOC_Ef: + return MDOC_Bf; + case MDOC_Ek: + return MDOC_Bk; + case MDOC_El: + return MDOC_Bl; + case MDOC_Fc: + return MDOC_Fo; + case MDOC_Oc: + return MDOC_Oo; + case MDOC_Pc: + return MDOC_Po; + case MDOC_Qc: + return MDOC_Qo; + case MDOC_Re: + return MDOC_Rs; + case MDOC_Sc: + return MDOC_So; + case MDOC_Xc: + return MDOC_Xo; + default: + return tok; + } +} + +static void +rew_elem(struct roff_man *mdoc, int tok) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->type != ROFFT_ELEM) + n = n->parent; + assert(n->type == ROFFT_ELEM); + assert(tok == n->tok); + rew_last(mdoc, n); +} + +/* + * If there is an open sub-block of the target requiring + * explicit close-out, postpone closing out the target until + * the rew_pending() call closing out the sub-block. + */ +static int +find_pending(struct roff_man *mdoc, int tok, int line, int ppos, + struct roff_node *target) +{ + struct roff_node *n; + int irc; + + irc = 0; + for (n = mdoc->last; n != NULL && n != target; n = n->parent) { + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } + if (n->type == ROFFT_BLOCK && + mdoc_macros[n->tok].flags & MDOC_EXPLICIT) { + irc = 1; + n->flags = MDOC_BROKEN; + if (target->type == ROFFT_HEAD) + target->flags = MDOC_ENDED; + else if ( ! (target->flags & MDOC_ENDED)) { + mandoc_vmsg(MANDOCERR_BLK_NEST, + mdoc->parse, line, ppos, + "%s breaks %s", mdoc_macronames[tok], + mdoc_macronames[n->tok]); + mdoc_endbody_alloc(mdoc, line, ppos, + tok, target, ENDBODY_NOSPACE); + } + } + } + return irc; +} + +/* + * Allocate a word and check whether it's punctuation or not. + * Punctuation consists of those tokens found in mdoc_isdelim(). + */ +static void +dword(struct roff_man *mdoc, int line, int col, const char *p, + enum mdelim d, int may_append) +{ + + if (d == DELIM_MAX) + d = mdoc_isdelim(p); + + if (may_append && + ! (mdoc->flags & (MDOC_SYNOPSIS | MDOC_KEEP | MDOC_SMOFF)) && + d == DELIM_NONE && mdoc->last->type == ROFFT_TEXT && + mdoc_isdelim(mdoc->last->string) == DELIM_NONE) { + roff_word_append(mdoc, p); + return; + } + + roff_word_alloc(mdoc, line, col, p); + + /* + * If the word consists of a bare delimiter, + * flag the new node accordingly, + * unless doing so was vetoed by the invoking macro. + * Always clear the veto, it is only valid for one word. + */ + + if (d == DELIM_OPEN) + mdoc->last->flags |= MDOC_DELIMO; + else if (d == DELIM_CLOSE && + ! (mdoc->flags & MDOC_NODELIMC) && + mdoc->last->parent->tok != MDOC_Fd) + mdoc->last->flags |= MDOC_DELIMC; + mdoc->flags &= ~MDOC_NODELIMC; +} + +static void +append_delims(struct roff_man *mdoc, int line, int *pos, char *buf) +{ + char *p; + int la; + + if (buf[*pos] == '\0') + return; + + for (;;) { + la = *pos; + if (mdoc_args(mdoc, line, pos, buf, TOKEN_NONE, &p) == + ARGS_EOLN) + break; + dword(mdoc, line, la, p, DELIM_MAX, 1); + + /* + * If we encounter end-of-sentence symbols, then trigger + * the double-space. + * + * XXX: it's easy to allow this to propagate outward to + * the last symbol, such that `. )' will cause the + * correct double-spacing. However, (1) groff isn't + * smart enough to do this and (2) it would require + * knowing which symbols break this behaviour, for + * example, `. ;' shouldn't propagate the double-space. + */ + + if (mandoc_eos(p, strlen(p))) + mdoc->last->flags |= MDOC_EOS; + } +} + +/* + * Parse one word. + * If it is a macro, call it and return 1. + * Otherwise, allocate it and return 0. + */ +static int +macro_or_word(MACRO_PROT_ARGS, int parsed) +{ + char *p; + int ntok; + + p = buf + ppos; + ntok = TOKEN_NONE; + if (*p == '"') + p++; + else if (parsed && ! (mdoc->flags & MDOC_PHRASELIT)) + ntok = lookup(mdoc, tok, line, ppos, p); + + if (ntok == TOKEN_NONE) { + dword(mdoc, line, ppos, p, DELIM_MAX, tok == TOKEN_NONE || + mdoc_macros[tok].flags & MDOC_JOIN); + return 0; + } else { + if (mdoc_macros[tok].fp == in_line_eoln) + rew_elem(mdoc, tok); + mdoc_macro(mdoc, ntok, line, ppos, pos, buf); + if (tok == TOKEN_NONE) + append_delims(mdoc, line, pos, buf); + return 1; + } +} + +/* + * Close out block partial/full explicit. + */ +static void +blk_exp_close(MACRO_PROT_ARGS) +{ + struct roff_node *body; /* Our own body. */ + struct roff_node *endbody; /* Our own end marker. */ + struct roff_node *itblk; /* An It block starting later. */ + struct roff_node *later; /* A sub-block starting later. */ + struct roff_node *n; /* Search back to our block. */ + struct roff_node *target; /* For find_pending(). */ + + int j, lastarg, maxargs, nl, pending; + enum margserr ac; + int atok, ntok; + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + switch (tok) { + case MDOC_Ec: + maxargs = 1; + break; + case MDOC_Ek: + mdoc->flags &= ~MDOC_KEEP; + /* FALLTHROUGH */ + default: + maxargs = 0; + break; + } + + /* + * Search backwards for beginnings of blocks, + * both of our own and of pending sub-blocks. + */ + + atok = rew_alt(tok); + body = endbody = itblk = later = NULL; + for (n = mdoc->last; n; n = n->parent) { + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } + + /* Remember the start of our own body. */ + + if (n->type == ROFFT_BODY && atok == n->tok) { + if (n->end == ENDBODY_NOT) + body = n; + continue; + } + + if (n->type != ROFFT_BLOCK || n->tok == MDOC_Nm) + continue; + + if (n->tok == MDOC_It) { + itblk = n; + continue; + } + + if (atok == n->tok) { + assert(body); + + /* + * Found the start of our own block. + * When there is no pending sub block, + * just proceed to closing out. + */ + + if (later == NULL || + (tok == MDOC_El && itblk == NULL)) + break; + + /* + * When there is a pending sub block, postpone + * closing out the current block until the + * rew_pending() closing out the sub-block. + * Mark the place where the formatting - but not + * the scope - of the current block ends. + */ + + mandoc_vmsg(MANDOCERR_BLK_NEST, mdoc->parse, + line, ppos, "%s breaks %s", + mdoc_macronames[atok], + mdoc_macronames[later->tok]); + + endbody = mdoc_endbody_alloc(mdoc, line, ppos, + atok, body, ENDBODY_SPACE); + + if (tok == MDOC_El) + itblk->flags |= MDOC_ENDED | MDOC_BROKEN; + + /* + * If a block closing macro taking arguments + * breaks another block, put the arguments + * into the end marker. + */ + + if (maxargs) + mdoc->next = ROFF_NEXT_CHILD; + break; + } + + /* Explicit blocks close out description lines. */ + + if (n->tok == MDOC_Nd) { + rew_last(mdoc, n); + continue; + } + + /* Breaking an open sub block. */ + + n->flags |= MDOC_BROKEN; + if (later == NULL) + later = n; + } + + if (body == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, mdoc->parse, + line, ppos, mdoc_macronames[tok]); + if (later != NULL) + later->flags &= ~MDOC_BROKEN; + if (maxargs && endbody == NULL) { + /* + * Stray .Ec without previous .Eo: + * Break the output line, keep the arguments. + */ + roff_elem_alloc(mdoc, line, ppos, MDOC_br); + rew_elem(mdoc, MDOC_br); + } + } else if (endbody == NULL) { + rew_last(mdoc, body); + if (maxargs) + mdoc_tail_alloc(mdoc, line, ppos, atok); + } + + if ( ! (mdoc_macros[tok].flags & MDOC_PARSED)) { + if (buf[*pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, + mdoc->parse, line, ppos, + "%s %s", mdoc_macronames[tok], + buf + *pos); + if (endbody == NULL && n != NULL) + rew_pending(mdoc, n); + return; + } + + if (endbody != NULL) + n = endbody; + + ntok = TOKEN_NONE; + for (j = 0; ; j++) { + lastarg = *pos; + + if (j == maxargs && n != NULL) + rew_last(mdoc, n); + + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) + break; + + ntok = ac == ARGS_QWORD ? TOKEN_NONE : + lookup(mdoc, tok, line, lastarg, p); + + if (ntok == TOKEN_NONE) { + dword(mdoc, line, lastarg, p, DELIM_MAX, + MDOC_JOIN & mdoc_macros[tok].flags); + continue; + } + + if (n != NULL) + rew_last(mdoc, n); + mdoc->flags &= ~MDOC_NEWLINE; + mdoc_macro(mdoc, ntok, line, lastarg, pos, buf); + break; + } + + if (n != NULL) { + if (ntok != TOKEN_NONE && n->flags & MDOC_BROKEN) { + target = n; + do + target = target->parent; + while ( ! (target->flags & MDOC_ENDED)); + pending = find_pending(mdoc, ntok, line, ppos, + target); + } else + pending = 0; + if ( ! pending) + rew_pending(mdoc, n); + } + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line(MACRO_PROT_ARGS) +{ + int la, scope, cnt, firstarg, mayopen, nc, nl; + int ntok; + enum margserr ac; + enum mdelim d; + struct mdoc_arg *arg; + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * Whether we allow ignored elements (those without content, + * usually because of reserved words) to squeak by. + */ + + switch (tok) { + case MDOC_An: + case MDOC_Ar: + case MDOC_Fl: + case MDOC_Mt: + case MDOC_Nm: + case MDOC_Pa: + nc = 1; + break; + default: + nc = 0; + break; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + + d = DELIM_NONE; + firstarg = 1; + mayopen = 1; + for (cnt = scope = 0;; ) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + + /* + * At the end of a macro line, + * opening delimiters do not suppress spacing. + */ + + if (ac == ARGS_EOLN) { + if (d == DELIM_OPEN) + mdoc->last->flags &= ~MDOC_DELIMO; + break; + } + + /* + * The rest of the macro line is only punctuation, + * to be handled by append_delims(). + * If there were no other arguments, + * do not allow the first one to suppress spacing, + * even if it turns out to be a closing one. + */ + + if (ac == ARGS_PUNCT) { + if (cnt == 0 && (nc == 0 || tok == MDOC_An)) + mdoc->flags |= MDOC_NODELIMC; + break; + } + + ntok = (ac == ARGS_QWORD || (tok == MDOC_Fn && !cnt)) ? + TOKEN_NONE : lookup(mdoc, tok, line, la, p); + + /* + * In this case, we've located a submacro and must + * execute it. Close out scope, if open. If no + * elements have been generated, either create one (nc) + * or raise a warning. + */ + + if (ntok != TOKEN_NONE) { + if (scope) + rew_elem(mdoc, tok); + if (nc && ! cnt) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + rew_last(mdoc, mdoc->last); + } else if ( ! nc && ! cnt) { + mdoc_argv_free(arg); + mandoc_msg(MANDOCERR_MACRO_EMPTY, + mdoc->parse, line, ppos, + mdoc_macronames[tok]); + } + mdoc_macro(mdoc, ntok, line, la, pos, buf); + if (nl) + append_delims(mdoc, line, pos, buf); + return; + } + + /* + * Non-quote-enclosed punctuation. Set up our scope, if + * a word; rewind the scope, if a delimiter; then append + * the word. + */ + + d = ac == ARGS_QWORD ? DELIM_NONE : mdoc_isdelim(p); + + if (DELIM_NONE != d) { + /* + * If we encounter closing punctuation, no word + * has been emitted, no scope is open, and we're + * allowed to have an empty element, then start + * a new scope. + */ + if ((d == DELIM_CLOSE || + (d == DELIM_MIDDLE && tok == MDOC_Fl)) && + !cnt && !scope && nc && mayopen) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + scope = 1; + cnt++; + if (tok == MDOC_Nm) + mayopen = 0; + } + /* + * Close out our scope, if one is open, before + * any punctuation. + */ + if (scope) + rew_elem(mdoc, tok); + scope = 0; + if (tok == MDOC_Fn) + mayopen = 0; + } else if (mayopen && !scope) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + scope = 1; + cnt++; + } + + dword(mdoc, line, la, p, d, + MDOC_JOIN & mdoc_macros[tok].flags); + + /* + * If the first argument is a closing delimiter, + * do not suppress spacing before it. + */ + + if (firstarg && d == DELIM_CLOSE && !nc) + mdoc->last->flags &= ~MDOC_DELIMC; + firstarg = 0; + + /* + * `Fl' macros have their scope re-opened with each new + * word so that the `-' can be added to each one without + * having to parse out spaces. + */ + if (scope && tok == MDOC_Fl) { + rew_elem(mdoc, tok); + scope = 0; + } + } + + if (scope) + rew_elem(mdoc, tok); + + /* + * If no elements have been collected and we're allowed to have + * empties (nc), open a scope and close it out. Otherwise, + * raise a warning. + */ + + if ( ! cnt) { + if (nc) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + rew_last(mdoc, mdoc->last); + } else { + mdoc_argv_free(arg); + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + line, ppos, mdoc_macronames[tok]); + } + } + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +blk_full(MACRO_PROT_ARGS) +{ + int la, nl, parsed; + struct mdoc_arg *arg; + struct roff_node *blk; /* Our own or a broken block. */ + struct roff_node *head; /* Our own head. */ + struct roff_node *body; /* Our own body. */ + struct roff_node *n; + enum margserr ac, lac; + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + if (buf[*pos] == '\0' && (tok == MDOC_Sh || tok == MDOC_Ss)) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + line, ppos, mdoc_macronames[tok]); + return; + } + + if ( ! (mdoc_macros[tok].flags & MDOC_EXPLICIT)) { + + /* Here, tok is one of Sh Ss Nm Nd It. */ + + blk = NULL; + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->flags & MDOC_ENDED) { + if ( ! (n->flags & MDOC_VALID)) + n->flags |= MDOC_BROKEN; + continue; + } + if (n->type != ROFFT_BLOCK) + continue; + + if (tok == MDOC_It && n->tok == MDOC_Bl) { + if (blk != NULL) { + mandoc_vmsg(MANDOCERR_BLK_BROKEN, + mdoc->parse, line, ppos, + "It breaks %s", + mdoc_macronames[blk->tok]); + rew_pending(mdoc, blk); + } + break; + } + + if (mdoc_macros[n->tok].flags & MDOC_EXPLICIT) { + switch (tok) { + case MDOC_Sh: + case MDOC_Ss: + mandoc_vmsg(MANDOCERR_BLK_BROKEN, + mdoc->parse, line, ppos, + "%s breaks %s", + mdoc_macronames[tok], + mdoc_macronames[n->tok]); + rew_pending(mdoc, n); + n = mdoc->last; + continue; + case MDOC_It: + /* Delay in case it's astray. */ + blk = n; + continue; + default: + break; + } + break; + } + + /* Here, n is one of Sh Ss Nm Nd It. */ + + if (tok != MDOC_Sh && (n->tok == MDOC_Sh || + (tok != MDOC_Ss && (n->tok == MDOC_Ss || + (tok != MDOC_It && n->tok == MDOC_It))))) + break; + + /* Item breaking an explicit block. */ + + if (blk != NULL) { + mandoc_vmsg(MANDOCERR_BLK_BROKEN, + mdoc->parse, line, ppos, + "It breaks %s", + mdoc_macronames[blk->tok]); + rew_pending(mdoc, blk); + blk = NULL; + } + + /* Close out prior implicit scopes. */ + + rew_last(mdoc, n); + } + + /* Skip items outside lists. */ + + if (tok == MDOC_It && (n == NULL || n->tok != MDOC_Bl)) { + mandoc_vmsg(MANDOCERR_IT_STRAY, mdoc->parse, + line, ppos, "It %s", buf + *pos); + roff_elem_alloc(mdoc, line, ppos, MDOC_br); + rew_elem(mdoc, MDOC_br); + return; + } + } + + /* + * This routine accommodates implicitly- and explicitly-scoped + * macro openings. Implicit ones first close out prior scope + * (seen above). Delay opening the head until necessary to + * allow leading punctuation to print. Special consideration + * for `It -column', which has phrase-part syntax instead of + * regular child nodes. + */ + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + blk = mdoc_block_alloc(mdoc, line, ppos, tok, arg); + head = body = NULL; + + /* + * Exception: Heads of `It' macros in `-diag' lists are not + * parsed, even though `It' macros in general are parsed. + */ + + parsed = tok != MDOC_It || + mdoc->last->parent->tok != MDOC_Bl || + mdoc->last->parent->norm->Bl.type != LIST_diag; + + /* + * The `Nd' macro has all arguments in its body: it's a hybrid + * of block partial-explicit and full-implicit. Stupid. + */ + + if (tok == MDOC_Nd) { + head = roff_head_alloc(mdoc, line, ppos, tok); + rew_last(mdoc, head); + body = roff_body_alloc(mdoc, line, ppos, tok); + } + + if (tok == MDOC_Bk) + mdoc->flags |= MDOC_KEEP; + + ac = ARGS_EOLN; + for (;;) { + + /* + * If we are right after a tab character, + * do not parse the first word for macros. + */ + + if (mdoc->flags & MDOC_PHRASEQN) { + mdoc->flags &= ~MDOC_PHRASEQN; + mdoc->flags |= MDOC_PHRASEQF; + } + + la = *pos; + lac = ac; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN) { + if (lac != ARGS_PHRASE || + ! (mdoc->flags & MDOC_PHRASEQF)) + break; + + /* + * This line ends in a tab; start the next + * column now, with a leading blank. + */ + + if (body != NULL) + rew_last(mdoc, body); + body = roff_body_alloc(mdoc, line, ppos, tok); + roff_word_alloc(mdoc, line, ppos, "\\&"); + break; + } + + if (tok == MDOC_Bd || tok == MDOC_Bk) { + mandoc_vmsg(MANDOCERR_ARG_EXCESS, + mdoc->parse, line, la, "%s ... %s", + mdoc_macronames[tok], buf + la); + break; + } + if (tok == MDOC_Rs) { + mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, + line, la, "Rs %s", buf + la); + break; + } + if (ac == ARGS_PUNCT) + break; + + /* + * Emit leading punctuation (i.e., punctuation before + * the ROFFT_HEAD) for non-phrase types. + */ + + if (head == NULL && + ac != ARGS_PHRASE && + ac != ARGS_QWORD && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + continue; + } + + /* Open a head if one hasn't been opened. */ + + if (head == NULL) + head = roff_head_alloc(mdoc, line, ppos, tok); + + if (ac == ARGS_PHRASE) { + + /* + * If we haven't opened a body yet, rewind the + * head; if we have, rewind that instead. + */ + + rew_last(mdoc, body == NULL ? head : body); + body = roff_body_alloc(mdoc, line, ppos, tok); + + /* Process to the tab or to the end of the line. */ + + mdoc->flags |= MDOC_PHRASE; + parse_rest(mdoc, TOKEN_NONE, line, &la, buf); + mdoc->flags &= ~MDOC_PHRASE; + + /* There may have been `Ta' macros. */ + + while (body->next != NULL) + body = body->next; + continue; + } + + if (macro_or_word(mdoc, tok, line, la, pos, buf, parsed)) + break; + } + + if (blk->flags & MDOC_VALID) + return; + if (head == NULL) + head = roff_head_alloc(mdoc, line, ppos, tok); + if (nl && tok != MDOC_Bd && tok != MDOC_Bl && tok != MDOC_Rs) + append_delims(mdoc, line, pos, buf); + if (body != NULL) + goto out; + if (find_pending(mdoc, tok, line, ppos, head)) + return; + + /* Close out scopes to remain in a consistent state. */ + + rew_last(mdoc, head); + body = roff_body_alloc(mdoc, line, ppos, tok); +out: + if (mdoc->flags & MDOC_FREECOL) { + rew_last(mdoc, body); + rew_last(mdoc, blk); + mdoc->flags &= ~MDOC_FREECOL; + } +} + +static void +blk_part_imp(MACRO_PROT_ARGS) +{ + int la, nl; + enum margserr ac; + char *p; + struct roff_node *blk; /* saved block context */ + struct roff_node *body; /* saved body context */ + struct roff_node *n; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * A macro that spans to the end of the line. This is generally + * (but not necessarily) called as the first macro. The block + * has a head as the immediate child, which is always empty, + * followed by zero or more opening punctuation nodes, then the + * body (which may be empty, depending on the macro), then zero + * or more closing punctuation nodes. + */ + + blk = mdoc_block_alloc(mdoc, line, ppos, tok, NULL); + rew_last(mdoc, roff_head_alloc(mdoc, line, ppos, tok)); + + /* + * Open the body scope "on-demand", that is, after we've + * processed all our the leading delimiters (open parenthesis, + * etc.). + */ + + for (body = NULL; ; ) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_EOLN || ac == ARGS_PUNCT) + break; + + if (body == NULL && ac != ARGS_QWORD && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + continue; + } + + if (body == NULL) + body = roff_body_alloc(mdoc, line, ppos, tok); + + if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + break; + } + if (body == NULL) + body = roff_body_alloc(mdoc, line, ppos, tok); + + if (find_pending(mdoc, tok, line, ppos, body)) + return; + + rew_last(mdoc, body); + if (nl) + append_delims(mdoc, line, pos, buf); + rew_pending(mdoc, blk); + + /* Move trailing .Ns out of scope. */ + + for (n = body->child; n && n->next; n = n->next) + /* Do nothing. */ ; + if (n && n->tok == MDOC_Ns) + mdoc_node_relink(mdoc, n); +} + +static void +blk_part_exp(MACRO_PROT_ARGS) +{ + int la, nl; + enum margserr ac; + struct roff_node *head; /* keep track of head */ + char *p; + + nl = MDOC_NEWLINE & mdoc->flags; + + /* + * The opening of an explicit macro having zero or more leading + * punctuation nodes; a head with optional single element (the + * case of `Eo'); and a body that may be empty. + */ + + roff_block_alloc(mdoc, line, ppos, tok); + head = NULL; + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) + break; + + /* Flush out leading punctuation. */ + + if (head == NULL && ac != ARGS_QWORD && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + continue; + } + + if (head == NULL) { + head = roff_head_alloc(mdoc, line, ppos, tok); + if (tok == MDOC_Eo) /* Not parsed. */ + dword(mdoc, line, la, p, DELIM_MAX, 0); + rew_last(mdoc, head); + roff_body_alloc(mdoc, line, ppos, tok); + if (tok == MDOC_Eo) + continue; + } + + if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + break; + } + + /* Clean-up to leave in a consistent state. */ + + if (head == NULL) { + rew_last(mdoc, roff_head_alloc(mdoc, line, ppos, tok)); + roff_body_alloc(mdoc, line, ppos, tok); + } + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line_argn(MACRO_PROT_ARGS) +{ + struct mdoc_arg *arg; + char *p; + enum margserr ac; + int ntok; + int state; /* arg#; -1: not yet open; -2: closed */ + int la, maxargs, nl; + + nl = mdoc->flags & MDOC_NEWLINE; + + /* + * A line macro that has a fixed number of arguments (maxargs). + * Only open the scope once the first non-leading-punctuation is + * found (unless MDOC_IGNDELIM is noted, like in `Pf'), then + * keep it open until the maximum number of arguments are + * exhausted. + */ + + switch (tok) { + case MDOC_Ap: + case MDOC_Ns: + case MDOC_Ux: + maxargs = 0; + break; + case MDOC_Bx: + case MDOC_Es: + case MDOC_Xr: + maxargs = 2; + break; + default: + maxargs = 1; + break; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + + state = -1; + p = NULL; + for (;;) { + la = *pos; + ac = mdoc_args(mdoc, line, pos, buf, tok, &p); + + if (ac == ARGS_WORD && state == -1 && + ! (mdoc_macros[tok].flags & MDOC_IGNDELIM) && + mdoc_isdelim(p) == DELIM_OPEN) { + dword(mdoc, line, la, p, DELIM_OPEN, 0); + continue; + } + + if (state == -1 && tok != MDOC_In && + tok != MDOC_St && tok != MDOC_Xr) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + state = 0; + } + + if (ac == ARGS_PUNCT || ac == ARGS_EOLN) { + if (abs(state) < 2 && tok == MDOC_Pf) + mandoc_vmsg(MANDOCERR_PF_SKIP, + mdoc->parse, line, ppos, "Pf %s", + p == NULL ? "at eol" : p); + break; + } + + if (state == maxargs) { + rew_elem(mdoc, tok); + state = -2; + } + + ntok = (ac == ARGS_QWORD || (tok == MDOC_Pf && state == 0)) ? + TOKEN_NONE : lookup(mdoc, tok, line, la, p); + + if (ntok != TOKEN_NONE) { + if (state >= 0) { + rew_elem(mdoc, tok); + state = -2; + } + mdoc_macro(mdoc, ntok, line, la, pos, buf); + break; + } + + if (ac == ARGS_QWORD || + mdoc_macros[tok].flags & MDOC_IGNDELIM || + mdoc_isdelim(p) == DELIM_NONE) { + if (state == -1) { + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + state = 1; + } else if (state >= 0) + state++; + } else if (state >= 0) { + rew_elem(mdoc, tok); + state = -2; + } + + dword(mdoc, line, la, p, DELIM_MAX, + MDOC_JOIN & mdoc_macros[tok].flags); + } + + if (state == -1) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + line, ppos, mdoc_macronames[tok]); + return; + } + + if (state == 0 && tok == MDOC_Pf) + append_delims(mdoc, line, pos, buf); + if (state >= 0) + rew_elem(mdoc, tok); + if (nl) + append_delims(mdoc, line, pos, buf); +} + +static void +in_line_eoln(MACRO_PROT_ARGS) +{ + struct roff_node *n; + struct mdoc_arg *arg; + + if ((tok == MDOC_Pp || tok == MDOC_Lp) && + ! (mdoc->flags & MDOC_SYNOPSIS)) { + n = mdoc->last; + if (mdoc->next == ROFF_NEXT_SIBLING) + n = n->parent; + if (n->tok == MDOC_Nm) + rew_last(mdoc, n->parent); + } + + if (buf[*pos] == '\0' && + (tok == MDOC_Fd || mdoc_macronames[tok][0] == '%')) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + line, ppos, mdoc_macronames[tok]); + return; + } + + mdoc_argv(mdoc, line, tok, &arg, pos, buf); + mdoc_elem_alloc(mdoc, line, ppos, tok, arg); + if (parse_rest(mdoc, tok, line, pos, buf)) + return; + rew_elem(mdoc, tok); +} + +/* + * The simplest argument parser available: Parse the remaining + * words until the end of the phrase or line and return 0 + * or until the next macro, call that macro, and return 1. + */ +static int +parse_rest(struct roff_man *mdoc, int tok, int line, int *pos, char *buf) +{ + int la; + + for (;;) { + la = *pos; + if (mdoc_args(mdoc, line, pos, buf, tok, NULL) == ARGS_EOLN) + return 0; + if (macro_or_word(mdoc, tok, line, la, pos, buf, 1)) + return 1; + } +} + +static void +ctx_synopsis(MACRO_PROT_ARGS) +{ + + if (~mdoc->flags & (MDOC_SYNOPSIS | MDOC_NEWLINE)) + in_line(mdoc, tok, line, ppos, pos, buf); + else if (tok == MDOC_Nm) + blk_full(mdoc, tok, line, ppos, pos, buf); + else { + assert(tok == MDOC_Vt); + blk_part_imp(mdoc, tok, line, ppos, pos, buf); + } +} + +/* + * Phrases occur within `Bl -column' entries, separated by `Ta' or tabs. + * They're unusual because they're basically free-form text until a + * macro is encountered. + */ +static void +phrase_ta(MACRO_PROT_ARGS) +{ + struct roff_node *body, *n; + + /* Make sure we are in a column list or ignore this macro. */ + + body = NULL; + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->flags & MDOC_ENDED) + continue; + if (n->tok == MDOC_It && n->type == ROFFT_BODY) + body = n; + if (n->tok == MDOC_Bl) + break; + } + + if (n == NULL || n->norm->Bl.type != LIST_column) { + mandoc_msg(MANDOCERR_TA_STRAY, mdoc->parse, + line, ppos, "Ta"); + return; + } + + /* Advance to the next column. */ + + rew_last(mdoc, body); + roff_body_alloc(mdoc, line, ppos, MDOC_It); + parse_rest(mdoc, TOKEN_NONE, line, pos, buf); +} diff --git a/contrib/mdocml/mdoc_man.c b/contrib/mdocml/mdoc_man.c new file mode 100644 index 0000000..ab24531 --- /dev/null +++ b/contrib/mdocml/mdoc_man.c @@ -0,0 +1,1804 @@ +/* $Id: mdoc_man.c,v 1.96 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2011-2016 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "out.h" +#include "main.h" + +#define DECL_ARGS const struct roff_meta *meta, struct roff_node *n + +struct manact { + int (*cond)(DECL_ARGS); /* DON'T run actions */ + int (*pre)(DECL_ARGS); /* pre-node action */ + void (*post)(DECL_ARGS); /* post-node action */ + const char *prefix; /* pre-node string constant */ + const char *suffix; /* post-node string constant */ +}; + +static int cond_body(DECL_ARGS); +static int cond_head(DECL_ARGS); +static void font_push(char); +static void font_pop(void); +static void mid_it(void); +static void post__t(DECL_ARGS); +static void post_aq(DECL_ARGS); +static void post_bd(DECL_ARGS); +static void post_bf(DECL_ARGS); +static void post_bk(DECL_ARGS); +static void post_bl(DECL_ARGS); +static void post_dl(DECL_ARGS); +static void post_en(DECL_ARGS); +static void post_enc(DECL_ARGS); +static void post_eo(DECL_ARGS); +static void post_fa(DECL_ARGS); +static void post_fd(DECL_ARGS); +static void post_fl(DECL_ARGS); +static void post_fn(DECL_ARGS); +static void post_fo(DECL_ARGS); +static void post_font(DECL_ARGS); +static void post_in(DECL_ARGS); +static void post_it(DECL_ARGS); +static void post_lb(DECL_ARGS); +static void post_nm(DECL_ARGS); +static void post_percent(DECL_ARGS); +static void post_pf(DECL_ARGS); +static void post_sect(DECL_ARGS); +static void post_sp(DECL_ARGS); +static void post_vt(DECL_ARGS); +static int pre__t(DECL_ARGS); +static int pre_an(DECL_ARGS); +static int pre_ap(DECL_ARGS); +static int pre_aq(DECL_ARGS); +static int pre_bd(DECL_ARGS); +static int pre_bf(DECL_ARGS); +static int pre_bk(DECL_ARGS); +static int pre_bl(DECL_ARGS); +static int pre_br(DECL_ARGS); +static int pre_bx(DECL_ARGS); +static int pre_dl(DECL_ARGS); +static int pre_en(DECL_ARGS); +static int pre_enc(DECL_ARGS); +static int pre_em(DECL_ARGS); +static int pre_skip(DECL_ARGS); +static int pre_eo(DECL_ARGS); +static int pre_ex(DECL_ARGS); +static int pre_fa(DECL_ARGS); +static int pre_fd(DECL_ARGS); +static int pre_fl(DECL_ARGS); +static int pre_fn(DECL_ARGS); +static int pre_fo(DECL_ARGS); +static int pre_ft(DECL_ARGS); +static int pre_in(DECL_ARGS); +static int pre_it(DECL_ARGS); +static int pre_lk(DECL_ARGS); +static int pre_li(DECL_ARGS); +static int pre_ll(DECL_ARGS); +static int pre_nm(DECL_ARGS); +static int pre_no(DECL_ARGS); +static int pre_ns(DECL_ARGS); +static int pre_pp(DECL_ARGS); +static int pre_rs(DECL_ARGS); +static int pre_rv(DECL_ARGS); +static int pre_sm(DECL_ARGS); +static int pre_sp(DECL_ARGS); +static int pre_sect(DECL_ARGS); +static int pre_sy(DECL_ARGS); +static void pre_syn(const struct roff_node *); +static int pre_vt(DECL_ARGS); +static int pre_ux(DECL_ARGS); +static int pre_xr(DECL_ARGS); +static void print_word(const char *); +static void print_line(const char *, int); +static void print_block(const char *, int); +static void print_offs(const char *, int); +static void print_width(const struct mdoc_bl *, + const struct roff_node *); +static void print_count(int *); +static void print_node(DECL_ARGS); + +static const struct manact manacts[MDOC_MAX + 1] = { + { NULL, pre_ap, NULL, NULL, NULL }, /* Ap */ + { NULL, NULL, NULL, NULL, NULL }, /* Dd */ + { NULL, NULL, NULL, NULL, NULL }, /* Dt */ + { NULL, NULL, NULL, NULL, NULL }, /* Os */ + { NULL, pre_sect, post_sect, ".SH", NULL }, /* Sh */ + { NULL, pre_sect, post_sect, ".SS", NULL }, /* Ss */ + { NULL, pre_pp, NULL, NULL, NULL }, /* Pp */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* D1 */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* Dl */ + { cond_body, pre_bd, post_bd, NULL, NULL }, /* Bd */ + { NULL, NULL, NULL, NULL, NULL }, /* Ed */ + { cond_body, pre_bl, post_bl, NULL, NULL }, /* Bl */ + { NULL, NULL, NULL, NULL, NULL }, /* El */ + { NULL, pre_it, post_it, NULL, NULL }, /* It */ + { NULL, pre_em, post_font, NULL, NULL }, /* Ad */ + { NULL, pre_an, NULL, NULL, NULL }, /* An */ + { NULL, pre_em, post_font, NULL, NULL }, /* Ar */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Cd */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Cm */ + { NULL, pre_li, post_font, NULL, NULL }, /* Dv */ + { NULL, pre_li, post_font, NULL, NULL }, /* Er */ + { NULL, pre_li, post_font, NULL, NULL }, /* Ev */ + { NULL, pre_ex, NULL, NULL, NULL }, /* Ex */ + { NULL, pre_fa, post_fa, NULL, NULL }, /* Fa */ + { NULL, pre_fd, post_fd, NULL, NULL }, /* Fd */ + { NULL, pre_fl, post_fl, NULL, NULL }, /* Fl */ + { NULL, pre_fn, post_fn, NULL, NULL }, /* Fn */ + { NULL, pre_ft, post_font, NULL, NULL }, /* Ft */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Ic */ + { NULL, pre_in, post_in, NULL, NULL }, /* In */ + { NULL, pre_li, post_font, NULL, NULL }, /* Li */ + { cond_head, pre_enc, NULL, "\\- ", NULL }, /* Nd */ + { NULL, pre_nm, post_nm, NULL, NULL }, /* Nm */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Op */ + { NULL, pre_ft, post_font, NULL, NULL }, /* Ot */ + { NULL, pre_em, post_font, NULL, NULL }, /* Pa */ + { NULL, pre_rv, NULL, NULL, NULL }, /* Rv */ + { NULL, NULL, NULL, NULL, NULL }, /* St */ + { NULL, pre_em, post_font, NULL, NULL }, /* Va */ + { NULL, pre_vt, post_vt, NULL, NULL }, /* Vt */ + { NULL, pre_xr, NULL, NULL, NULL }, /* Xr */ + { NULL, NULL, post_percent, NULL, NULL }, /* %A */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %B */ + { NULL, NULL, post_percent, NULL, NULL }, /* %D */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %I */ + { NULL, pre_em, post_percent, NULL, NULL }, /* %J */ + { NULL, NULL, post_percent, NULL, NULL }, /* %N */ + { NULL, NULL, post_percent, NULL, NULL }, /* %O */ + { NULL, NULL, post_percent, NULL, NULL }, /* %P */ + { NULL, NULL, post_percent, NULL, NULL }, /* %R */ + { NULL, pre__t, post__t, NULL, NULL }, /* %T */ + { NULL, NULL, post_percent, NULL, NULL }, /* %V */ + { NULL, NULL, NULL, NULL, NULL }, /* Ac */ + { cond_body, pre_aq, post_aq, NULL, NULL }, /* Ao */ + { cond_body, pre_aq, post_aq, NULL, NULL }, /* Aq */ + { NULL, NULL, NULL, NULL, NULL }, /* At */ + { NULL, NULL, NULL, NULL, NULL }, /* Bc */ + { NULL, pre_bf, post_bf, NULL, NULL }, /* Bf */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bo */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bq */ + { NULL, pre_ux, NULL, "BSD/OS", NULL }, /* Bsx */ + { NULL, pre_bx, NULL, NULL, NULL }, /* Bx */ + { NULL, pre_skip, NULL, NULL, NULL }, /* Db */ + { NULL, NULL, NULL, NULL, NULL }, /* Dc */ + { cond_body, pre_enc, post_enc, "\\(Lq", "\\(Rq" }, /* Do */ + { cond_body, pre_enc, post_enc, "\\(Lq", "\\(Rq" }, /* Dq */ + { NULL, NULL, NULL, NULL, NULL }, /* Ec */ + { NULL, NULL, NULL, NULL, NULL }, /* Ef */ + { NULL, pre_em, post_font, NULL, NULL }, /* Em */ + { cond_body, pre_eo, post_eo, NULL, NULL }, /* Eo */ + { NULL, pre_ux, NULL, "FreeBSD", NULL }, /* Fx */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Ms */ + { NULL, pre_no, NULL, NULL, NULL }, /* No */ + { NULL, pre_ns, NULL, NULL, NULL }, /* Ns */ + { NULL, pre_ux, NULL, "NetBSD", NULL }, /* Nx */ + { NULL, pre_ux, NULL, "OpenBSD", NULL }, /* Ox */ + { NULL, NULL, NULL, NULL, NULL }, /* Pc */ + { NULL, NULL, post_pf, NULL, NULL }, /* Pf */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Po */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Pq */ + { NULL, NULL, NULL, NULL, NULL }, /* Qc */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* Ql */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qo */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qq */ + { NULL, NULL, NULL, NULL, NULL }, /* Re */ + { cond_body, pre_rs, NULL, NULL, NULL }, /* Rs */ + { NULL, NULL, NULL, NULL, NULL }, /* Sc */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* So */ + { cond_body, pre_enc, post_enc, "\\(oq", "\\(cq" }, /* Sq */ + { NULL, pre_sm, NULL, NULL, NULL }, /* Sm */ + { NULL, pre_em, post_font, NULL, NULL }, /* Sx */ + { NULL, pre_sy, post_font, NULL, NULL }, /* Sy */ + { NULL, pre_li, post_font, NULL, NULL }, /* Tn */ + { NULL, pre_ux, NULL, "UNIX", NULL }, /* Ux */ + { NULL, NULL, NULL, NULL, NULL }, /* Xc */ + { NULL, NULL, NULL, NULL, NULL }, /* Xo */ + { NULL, pre_fo, post_fo, NULL, NULL }, /* Fo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fc */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Oo */ + { NULL, NULL, NULL, NULL, NULL }, /* Oc */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bk */ + { NULL, NULL, NULL, NULL, NULL }, /* Ek */ + { NULL, pre_ux, NULL, "is currently in beta test.", NULL }, /* Bt */ + { NULL, NULL, NULL, NULL, NULL }, /* Hf */ + { NULL, pre_em, post_font, NULL, NULL }, /* Fr */ + { NULL, pre_ux, NULL, "currently under development.", NULL }, /* Ud */ + { NULL, NULL, post_lb, NULL, NULL }, /* Lb */ + { NULL, pre_pp, NULL, NULL, NULL }, /* Lp */ + { NULL, pre_lk, NULL, NULL, NULL }, /* Lk */ + { NULL, pre_em, post_font, NULL, NULL }, /* Mt */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Brq */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Bro */ + { NULL, NULL, NULL, NULL, NULL }, /* Brc */ + { NULL, NULL, post_percent, NULL, NULL }, /* %C */ + { NULL, pre_skip, NULL, NULL, NULL }, /* Es */ + { cond_body, pre_en, post_en, NULL, NULL }, /* En */ + { NULL, pre_ux, NULL, "DragonFly", NULL }, /* Dx */ + { NULL, NULL, post_percent, NULL, NULL }, /* %Q */ + { NULL, pre_br, NULL, NULL, NULL }, /* br */ + { NULL, pre_sp, post_sp, NULL, NULL }, /* sp */ + { NULL, NULL, post_percent, NULL, NULL }, /* %U */ + { NULL, NULL, NULL, NULL, NULL }, /* Ta */ + { NULL, pre_ll, post_sp, NULL, NULL }, /* ll */ + { NULL, NULL, NULL, NULL, NULL }, /* ROOT */ +}; + +static int outflags; +#define MMAN_spc (1 << 0) /* blank character before next word */ +#define MMAN_spc_force (1 << 1) /* even before trailing punctuation */ +#define MMAN_nl (1 << 2) /* break man(7) code line */ +#define MMAN_br (1 << 3) /* break output line */ +#define MMAN_sp (1 << 4) /* insert a blank output line */ +#define MMAN_PP (1 << 5) /* reset indentation etc. */ +#define MMAN_Sm (1 << 6) /* horizontal spacing mode */ +#define MMAN_Bk (1 << 7) /* word keep mode */ +#define MMAN_Bk_susp (1 << 8) /* suspend this (after a macro) */ +#define MMAN_An_split (1 << 9) /* author mode is "split" */ +#define MMAN_An_nosplit (1 << 10) /* author mode is "nosplit" */ +#define MMAN_PD (1 << 11) /* inter-paragraph spacing disabled */ +#define MMAN_nbrword (1 << 12) /* do not break the next word */ + +#define BL_STACK_MAX 32 + +static int Bl_stack[BL_STACK_MAX]; /* offsets [chars] */ +static int Bl_stack_post[BL_STACK_MAX]; /* add final .RE */ +static int Bl_stack_len; /* number of nested Bl blocks */ +static int TPremain; /* characters before tag is full */ + +static struct { + char *head; + char *tail; + size_t size; +} fontqueue; + + +static void +font_push(char newfont) +{ + + if (fontqueue.head + fontqueue.size <= ++fontqueue.tail) { + fontqueue.size += 8; + fontqueue.head = mandoc_realloc(fontqueue.head, + fontqueue.size); + } + *fontqueue.tail = newfont; + print_word(""); + printf("\\f"); + putchar(newfont); + outflags &= ~MMAN_spc; +} + +static void +font_pop(void) +{ + + if (fontqueue.tail > fontqueue.head) + fontqueue.tail--; + outflags &= ~MMAN_spc; + print_word(""); + printf("\\f"); + putchar(*fontqueue.tail); +} + +static void +print_word(const char *s) +{ + + if ((MMAN_PP | MMAN_sp | MMAN_br | MMAN_nl) & outflags) { + /* + * If we need a newline, print it now and start afresh. + */ + if (MMAN_PP & outflags) { + if (MMAN_sp & outflags) { + if (MMAN_PD & outflags) { + printf("\n.PD"); + outflags &= ~MMAN_PD; + } + } else if ( ! (MMAN_PD & outflags)) { + printf("\n.PD 0"); + outflags |= MMAN_PD; + } + printf("\n.PP\n"); + } else if (MMAN_sp & outflags) + printf("\n.sp\n"); + else if (MMAN_br & outflags) + printf("\n.br\n"); + else if (MMAN_nl & outflags) + putchar('\n'); + outflags &= ~(MMAN_PP|MMAN_sp|MMAN_br|MMAN_nl|MMAN_spc); + if (1 == TPremain) + printf(".br\n"); + TPremain = 0; + } else if (MMAN_spc & outflags) { + /* + * If we need a space, only print it if + * (1) it is forced by `No' or + * (2) what follows is not terminating punctuation or + * (3) what follows is longer than one character. + */ + if (MMAN_spc_force & outflags || '\0' == s[0] || + NULL == strchr(".,:;)]?!", s[0]) || '\0' != s[1]) { + if (MMAN_Bk & outflags && + ! (MMAN_Bk_susp & outflags)) + putchar('\\'); + putchar(' '); + if (TPremain) + TPremain--; + } + } + + /* + * Reassign needing space if we're not following opening + * punctuation. + */ + if (MMAN_Sm & outflags && ('\0' == s[0] || + (('(' != s[0] && '[' != s[0]) || '\0' != s[1]))) + outflags |= MMAN_spc; + else + outflags &= ~MMAN_spc; + outflags &= ~(MMAN_spc_force | MMAN_Bk_susp); + + for ( ; *s; s++) { + switch (*s) { + case ASCII_NBRSP: + printf("\\ "); + break; + case ASCII_HYPH: + putchar('-'); + break; + case ASCII_BREAK: + printf("\\:"); + break; + case ' ': + if (MMAN_nbrword & outflags) { + printf("\\ "); + break; + } + /* FALLTHROUGH */ + default: + putchar((unsigned char)*s); + break; + } + if (TPremain) + TPremain--; + } + outflags &= ~MMAN_nbrword; +} + +static void +print_line(const char *s, int newflags) +{ + + outflags &= ~MMAN_br; + outflags |= MMAN_nl; + print_word(s); + outflags |= newflags; +} + +static void +print_block(const char *s, int newflags) +{ + + outflags &= ~MMAN_PP; + if (MMAN_sp & outflags) { + outflags &= ~(MMAN_sp | MMAN_br); + if (MMAN_PD & outflags) { + print_line(".PD", 0); + outflags &= ~MMAN_PD; + } + } else if (! (MMAN_PD & outflags)) + print_line(".PD 0", MMAN_PD); + outflags |= MMAN_nl; + print_word(s); + outflags |= MMAN_Bk_susp | newflags; +} + +static void +print_offs(const char *v, int keywords) +{ + char buf[24]; + struct roffsu su; + int sz; + + print_line(".RS", MMAN_Bk_susp); + + /* Convert v into a number (of characters). */ + if (NULL == v || '\0' == *v || (keywords && !strcmp(v, "left"))) + sz = 0; + else if (keywords && !strcmp(v, "indent")) + sz = 6; + else if (keywords && !strcmp(v, "indent-two")) + sz = 12; + else if (a2roffsu(v, &su, SCALE_EN) > 1) { + if (SCALE_EN == su.unit) + sz = su.scale; + else { + /* + * XXX + * If we are inside an enclosing list, + * there is no easy way to add the two + * indentations because they are provided + * in terms of different units. + */ + print_word(v); + outflags |= MMAN_nl; + return; + } + } else + sz = strlen(v); + + /* + * We are inside an enclosing list. + * Add the two indentations. + */ + if (Bl_stack_len) + sz += Bl_stack[Bl_stack_len - 1]; + + (void)snprintf(buf, sizeof(buf), "%dn", sz); + print_word(buf); + outflags |= MMAN_nl; +} + +/* + * Set up the indentation for a list item; used from pre_it(). + */ +static void +print_width(const struct mdoc_bl *bl, const struct roff_node *child) +{ + char buf[24]; + struct roffsu su; + int numeric, remain, sz, chsz; + + numeric = 1; + remain = 0; + + /* Convert the width into a number (of characters). */ + if (bl->width == NULL) + sz = (bl->type == LIST_hang) ? 6 : 0; + else if (a2roffsu(bl->width, &su, SCALE_MAX) > 1) { + if (SCALE_EN == su.unit) + sz = su.scale; + else { + sz = 0; + numeric = 0; + } + } else + sz = strlen(bl->width); + + /* XXX Rough estimation, might have multiple parts. */ + if (bl->type == LIST_enum) + chsz = (bl->count > 8) + 1; + else if (child != NULL && child->type == ROFFT_TEXT) + chsz = strlen(child->string); + else + chsz = 0; + + /* Maybe we are inside an enclosing list? */ + mid_it(); + + /* + * Save our own indentation, + * such that child lists can use it. + */ + Bl_stack[Bl_stack_len++] = sz + 2; + + /* Set up the current list. */ + if (chsz > sz && bl->type != LIST_tag) + print_block(".HP", 0); + else { + print_block(".TP", 0); + remain = sz + 2; + } + if (numeric) { + (void)snprintf(buf, sizeof(buf), "%dn", sz + 2); + print_word(buf); + } else + print_word(bl->width); + TPremain = remain; +} + +static void +print_count(int *count) +{ + char buf[24]; + + (void)snprintf(buf, sizeof(buf), "%d.\\&", ++*count); + print_word(buf); +} + +void +man_man(void *arg, const struct roff_man *man) +{ + + /* + * Dump the keep buffer. + * We're guaranteed by now that this exists (is non-NULL). + * Flush stdout afterward, just in case. + */ + fputs(mparse_getkeep(man_mparse(man)), stdout); + fflush(stdout); +} + +void +man_mdoc(void *arg, const struct roff_man *mdoc) +{ + struct roff_node *n; + + printf(".TH \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n", + mdoc->meta.title, + (mdoc->meta.msec == NULL ? "" : mdoc->meta.msec), + mdoc->meta.date, mdoc->meta.os, mdoc->meta.vol); + + /* Disable hyphenation and if nroff, disable justification. */ + printf(".nh\n.if n .ad l"); + + outflags = MMAN_nl | MMAN_Sm; + if (0 == fontqueue.size) { + fontqueue.size = 8; + fontqueue.head = fontqueue.tail = mandoc_malloc(8); + *fontqueue.tail = 'R'; + } + for (n = mdoc->first->child; n != NULL; n = n->next) + print_node(&mdoc->meta, n); + putchar('\n'); +} + +static void +print_node(DECL_ARGS) +{ + const struct manact *act; + struct roff_node *sub; + int cond, do_sub; + + /* + * Break the line if we were parsed subsequent the current node. + * This makes the page structure be more consistent. + */ + if (MMAN_spc & outflags && MDOC_LINE & n->flags) + outflags |= MMAN_nl; + + act = NULL; + cond = 0; + do_sub = 1; + n->flags &= ~MDOC_ENDED; + + if (n->type == ROFFT_TEXT) { + /* + * Make sure that we don't happen to start with a + * control character at the start of a line. + */ + if (MMAN_nl & outflags && + ('.' == *n->string || '\'' == *n->string)) { + print_word(""); + printf("\\&"); + outflags &= ~MMAN_spc; + } + if (outflags & MMAN_Sm && ! (n->flags & MDOC_DELIMC)) + outflags |= MMAN_spc_force; + print_word(n->string); + if (outflags & MMAN_Sm && ! (n->flags & MDOC_DELIMO)) + outflags |= MMAN_spc; + } else { + /* + * Conditionally run the pre-node action handler for a + * node. + */ + act = manacts + n->tok; + cond = act->cond == NULL || (*act->cond)(meta, n); + if (cond && act->pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + do_sub = (*act->pre)(meta, n); + } + + /* + * Conditionally run all child nodes. + * Note that this iterates over children instead of using + * recursion. This prevents unnecessary depth in the stack. + */ + if (do_sub) + for (sub = n->child; sub; sub = sub->next) + print_node(meta, sub); + + /* + * Lastly, conditionally run the post-node handler. + */ + if (MDOC_ENDED & n->flags) + return; + + if (cond && act->post) + (*act->post)(meta, n); + + if (ENDBODY_NOT != n->end) + n->body->flags |= MDOC_ENDED; + + if (ENDBODY_NOSPACE == n->end) + outflags &= ~(MMAN_spc | MMAN_nl); +} + +static int +cond_head(DECL_ARGS) +{ + + return n->type == ROFFT_HEAD; +} + +static int +cond_body(DECL_ARGS) +{ + + return n->type == ROFFT_BODY; +} + +static int +pre_enc(DECL_ARGS) +{ + const char *prefix; + + prefix = manacts[n->tok].prefix; + if (NULL == prefix) + return 1; + print_word(prefix); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_enc(DECL_ARGS) +{ + const char *suffix; + + suffix = manacts[n->tok].suffix; + if (NULL == suffix) + return; + outflags &= ~(MMAN_spc | MMAN_nl); + print_word(suffix); +} + +static int +pre_ex(DECL_ARGS) +{ + struct roff_node *nch; + + outflags |= MMAN_br | MMAN_nl; + + print_word("The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + font_push('B'); + print_word(nch->string); + font_pop(); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + outflags &= ~MMAN_spc; + print_word(","); + } + if (nch->next->next == NULL) + print_word("and"); + } + + if (n->child != NULL && n->child->next != NULL) + print_word("utilities exit\\~0"); + else + print_word("utility exits\\~0"); + + print_word("on success, and\\~>0 if an error occurs."); + outflags |= MMAN_nl; + return 0; +} + +static void +post_font(DECL_ARGS) +{ + + font_pop(); +} + +static void +post_percent(DECL_ARGS) +{ + + if (pre_em == manacts[n->tok].pre) + font_pop(); + if (n->next) { + print_word(","); + if (n->prev && n->prev->tok == n->tok && + n->next->tok == n->tok) + print_word("and"); + } else { + print_word("."); + outflags |= MMAN_nl; + } +} + +static int +pre__t(DECL_ARGS) +{ + + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) { + print_word(""); + putchar('\"'); + outflags &= ~MMAN_spc; + } else + font_push('I'); + return 1; +} + +static void +post__t(DECL_ARGS) +{ + + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) { + outflags &= ~MMAN_spc; + print_word(""); + putchar('\"'); + } else + font_pop(); + post_percent(meta, n); +} + +/* + * Print before a section header. + */ +static int +pre_sect(DECL_ARGS) +{ + + if (n->type == ROFFT_HEAD) { + outflags |= MMAN_sp; + print_block(manacts[n->tok].prefix, 0); + print_word(""); + putchar('\"'); + outflags &= ~MMAN_spc; + } + return 1; +} + +/* + * Print subsequent a section header. + */ +static void +post_sect(DECL_ARGS) +{ + + if (n->type != ROFFT_HEAD) + return; + outflags &= ~MMAN_spc; + print_word(""); + putchar('\"'); + outflags |= MMAN_nl; + if (MDOC_Sh == n->tok && SEC_AUTHORS == n->sec) + outflags &= ~(MMAN_An_split | MMAN_An_nosplit); +} + +/* See mdoc_term.c, synopsis_pre() for comments. */ +static void +pre_syn(const struct roff_node *n) +{ + + if (NULL == n->prev || ! (MDOC_SYNPRETTY & n->flags)) + return; + + if (n->prev->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + outflags |= MMAN_br; + return; + } + + switch (n->prev->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + outflags |= MMAN_sp; + break; + case MDOC_Ft: + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + outflags |= MMAN_sp; + break; + } + /* FALLTHROUGH */ + default: + outflags |= MMAN_br; + break; + } +} + +static int +pre_an(DECL_ARGS) +{ + + switch (n->norm->An.auth) { + case AUTH_split: + outflags &= ~MMAN_An_nosplit; + outflags |= MMAN_An_split; + return 0; + case AUTH_nosplit: + outflags &= ~MMAN_An_split; + outflags |= MMAN_An_nosplit; + return 0; + default: + if (MMAN_An_split & outflags) + outflags |= MMAN_br; + else if (SEC_AUTHORS == n->sec && + ! (MMAN_An_nosplit & outflags)) + outflags |= MMAN_An_split; + return 1; + } +} + +static int +pre_ap(DECL_ARGS) +{ + + outflags &= ~MMAN_spc; + print_word("'"); + outflags &= ~MMAN_spc; + return 0; +} + +static int +pre_aq(DECL_ARGS) +{ + + print_word(n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_aq(DECL_ARGS) +{ + + outflags &= ~(MMAN_spc | MMAN_nl); + print_word(n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); +} + +static int +pre_bd(DECL_ARGS) +{ + + outflags &= ~(MMAN_PP | MMAN_sp | MMAN_br); + + if (DISP_unfilled == n->norm->Bd.type || + DISP_literal == n->norm->Bd.type) + print_line(".nf", 0); + if (0 == n->norm->Bd.comp && NULL != n->parent->prev) + outflags |= MMAN_sp; + print_offs(n->norm->Bd.offs, 1); + return 1; +} + +static void +post_bd(DECL_ARGS) +{ + + /* Close out this display. */ + print_line(".RE", MMAN_nl); + if (DISP_unfilled == n->norm->Bd.type || + DISP_literal == n->norm->Bd.type) + print_line(".fi", MMAN_nl); + + /* Maybe we are inside an enclosing list? */ + if (NULL != n->parent->next) + mid_it(); +} + +static int +pre_bf(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + break; + default: + return 0; + } + switch (n->norm->Bf.font) { + case FONT_Em: + font_push('I'); + break; + case FONT_Sy: + font_push('B'); + break; + default: + font_push('R'); + break; + } + return 1; +} + +static void +post_bf(DECL_ARGS) +{ + + if (n->type == ROFFT_BODY) + font_pop(); +} + +static int +pre_bk(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + outflags |= MMAN_Bk; + return 1; + default: + return 0; + } +} + +static void +post_bk(DECL_ARGS) +{ + + if (n->type == ROFFT_BODY) + outflags &= ~MMAN_Bk; +} + +static int +pre_bl(DECL_ARGS) +{ + size_t icol; + + /* + * print_offs() will increase the -offset to account for + * a possible enclosing .It, but any enclosed .It blocks + * just nest and do not add up their indentation. + */ + if (n->norm->Bl.offs) { + print_offs(n->norm->Bl.offs, 0); + Bl_stack[Bl_stack_len++] = 0; + } + + switch (n->norm->Bl.type) { + case LIST_enum: + n->norm->Bl.count = 0; + return 1; + case LIST_column: + break; + default: + return 1; + } + + if (n->child != NULL) { + print_line(".TS", MMAN_nl); + for (icol = 0; icol < n->norm->Bl.ncols; icol++) + print_word("l"); + print_word("."); + } + outflags |= MMAN_nl; + return 1; +} + +static void +post_bl(DECL_ARGS) +{ + + switch (n->norm->Bl.type) { + case LIST_column: + if (n->child != NULL) + print_line(".TE", 0); + break; + case LIST_enum: + n->norm->Bl.count = 0; + break; + default: + break; + } + + if (n->norm->Bl.offs) { + print_line(".RE", MMAN_nl); + assert(Bl_stack_len); + Bl_stack_len--; + assert(0 == Bl_stack[Bl_stack_len]); + } else { + outflags |= MMAN_PP | MMAN_nl; + outflags &= ~(MMAN_sp | MMAN_br); + } + + /* Maybe we are inside an enclosing list? */ + if (NULL != n->parent->next) + mid_it(); + +} + +static int +pre_br(DECL_ARGS) +{ + + outflags |= MMAN_br; + return 0; +} + +static int +pre_bx(DECL_ARGS) +{ + + n = n->child; + if (n) { + print_word(n->string); + outflags &= ~MMAN_spc; + n = n->next; + } + print_word("BSD"); + if (NULL == n) + return 0; + outflags &= ~MMAN_spc; + print_word("-"); + outflags &= ~MMAN_spc; + print_word(n->string); + return 0; +} + +static int +pre_dl(DECL_ARGS) +{ + + print_offs("6n", 0); + return 1; +} + +static void +post_dl(DECL_ARGS) +{ + + print_line(".RE", MMAN_nl); + + /* Maybe we are inside an enclosing list? */ + if (NULL != n->parent->next) + mid_it(); +} + +static int +pre_em(DECL_ARGS) +{ + + font_push('I'); + return 1; +} + +static int +pre_en(DECL_ARGS) +{ + + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + + print_word(n->norm->Es->child->string); + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_en(DECL_ARGS) +{ + + if (NULL == n->norm->Es || + NULL == n->norm->Es->child || + NULL == n->norm->Es->child->next) + return; + + outflags &= ~MMAN_spc; + print_word(n->norm->Es->child->next->string); + return; +} + +static int +pre_eo(DECL_ARGS) +{ + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + print_word("\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + outflags &= ~(MMAN_spc | MMAN_nl); + return 1; +} + +static void +post_eo(DECL_ARGS) +{ + int body, tail; + + if (n->end != ENDBODY_NOT) { + outflags |= MMAN_spc; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + outflags &= ~MMAN_spc; + else if ( ! (body || tail)) + print_word("\\&"); + else if ( ! tail) + outflags |= MMAN_spc; +} + +static int +pre_fa(DECL_ARGS) +{ + int am_Fa; + + am_Fa = MDOC_Fa == n->tok; + + if (am_Fa) + n = n->child; + + while (NULL != n) { + font_push('I'); + if (am_Fa || MDOC_SYNPRETTY & n->flags) + outflags |= MMAN_nbrword; + print_node(meta, n); + font_pop(); + if (NULL != (n = n->next)) + print_word(","); + } + return 0; +} + +static void +post_fa(DECL_ARGS) +{ + + if (NULL != n->next && MDOC_Fa == n->next->tok) + print_word(","); +} + +static int +pre_fd(DECL_ARGS) +{ + + pre_syn(n); + font_push('B'); + return 1; +} + +static void +post_fd(DECL_ARGS) +{ + + font_pop(); + outflags |= MMAN_br; +} + +static int +pre_fl(DECL_ARGS) +{ + + font_push('B'); + print_word("\\-"); + if (n->child != NULL) + outflags &= ~MMAN_spc; + return 1; +} + +static void +post_fl(DECL_ARGS) +{ + + font_pop(); + if (!(n->child != NULL || + n->next == NULL || + n->next->type == ROFFT_TEXT || + n->next->flags & MDOC_LINE)) + outflags &= ~MMAN_spc; +} + +static int +pre_fn(DECL_ARGS) +{ + + pre_syn(n); + + n = n->child; + if (NULL == n) + return 0; + + if (MDOC_SYNPRETTY & n->flags) + print_block(".HP 4n", MMAN_nl); + + font_push('B'); + print_node(meta, n); + font_pop(); + outflags &= ~MMAN_spc; + print_word("("); + outflags &= ~MMAN_spc; + + n = n->next; + if (NULL != n) + pre_fa(meta, n); + return 0; +} + +static void +post_fn(DECL_ARGS) +{ + + print_word(")"); + if (MDOC_SYNPRETTY & n->flags) { + print_word(";"); + outflags |= MMAN_PP; + } +} + +static int +pre_fo(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + pre_syn(n); + break; + case ROFFT_HEAD: + if (n->child == NULL) + return 0; + if (MDOC_SYNPRETTY & n->flags) + print_block(".HP 4n", MMAN_nl); + font_push('B'); + break; + case ROFFT_BODY: + outflags &= ~(MMAN_spc | MMAN_nl); + print_word("("); + outflags &= ~MMAN_spc; + break; + default: + break; + } + return 1; +} + +static void +post_fo(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + if (n->child != NULL) + font_pop(); + break; + case ROFFT_BODY: + post_fn(meta, n); + break; + default: + break; + } +} + +static int +pre_ft(DECL_ARGS) +{ + + pre_syn(n); + font_push('I'); + return 1; +} + +static int +pre_in(DECL_ARGS) +{ + + if (MDOC_SYNPRETTY & n->flags) { + pre_syn(n); + font_push('B'); + print_word("#include <"); + outflags &= ~MMAN_spc; + } else { + print_word("<"); + outflags &= ~MMAN_spc; + font_push('I'); + } + return 1; +} + +static void +post_in(DECL_ARGS) +{ + + if (MDOC_SYNPRETTY & n->flags) { + outflags &= ~MMAN_spc; + print_word(">"); + font_pop(); + outflags |= MMAN_br; + } else { + font_pop(); + outflags &= ~MMAN_spc; + print_word(">"); + } +} + +static int +pre_it(DECL_ARGS) +{ + const struct roff_node *bln; + + switch (n->type) { + case ROFFT_HEAD: + outflags |= MMAN_PP | MMAN_nl; + bln = n->parent->parent; + if (0 == bln->norm->Bl.comp || + (NULL == n->parent->prev && + NULL == bln->parent->prev)) + outflags |= MMAN_sp; + outflags &= ~MMAN_br; + switch (bln->norm->Bl.type) { + case LIST_item: + return 0; + case LIST_inset: + case LIST_diag: + case LIST_ohang: + if (bln->norm->Bl.type == LIST_diag) + print_line(".B \"", 0); + else + print_line(".R \"", 0); + outflags &= ~MMAN_spc; + return 1; + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + print_width(&bln->norm->Bl, NULL); + TPremain = 0; + outflags |= MMAN_nl; + font_push('B'); + if (LIST_bullet == bln->norm->Bl.type) + print_word("\\(bu"); + else + print_word("-"); + font_pop(); + outflags |= MMAN_nl; + return 0; + case LIST_enum: + print_width(&bln->norm->Bl, NULL); + TPremain = 0; + outflags |= MMAN_nl; + print_count(&bln->norm->Bl.count); + outflags |= MMAN_nl; + return 0; + case LIST_hang: + print_width(&bln->norm->Bl, n->child); + TPremain = 0; + outflags |= MMAN_nl; + return 1; + case LIST_tag: + print_width(&bln->norm->Bl, n->child); + putchar('\n'); + outflags &= ~MMAN_spc; + return 1; + default: + return 1; + } + default: + break; + } + return 1; +} + +/* + * This function is called after closing out an indented block. + * If we are inside an enclosing list, restore its indentation. + */ +static void +mid_it(void) +{ + char buf[24]; + + /* Nothing to do outside a list. */ + if (0 == Bl_stack_len || 0 == Bl_stack[Bl_stack_len - 1]) + return; + + /* The indentation has already been set up. */ + if (Bl_stack_post[Bl_stack_len - 1]) + return; + + /* Restore the indentation of the enclosing list. */ + print_line(".RS", MMAN_Bk_susp); + (void)snprintf(buf, sizeof(buf), "%dn", + Bl_stack[Bl_stack_len - 1]); + print_word(buf); + + /* Remeber to close out this .RS block later. */ + Bl_stack_post[Bl_stack_len - 1] = 1; +} + +static void +post_it(DECL_ARGS) +{ + const struct roff_node *bln; + + bln = n->parent->parent; + + switch (n->type) { + case ROFFT_HEAD: + switch (bln->norm->Bl.type) { + case LIST_diag: + outflags &= ~MMAN_spc; + print_word("\\ "); + break; + case LIST_ohang: + outflags |= MMAN_br; + break; + default: + break; + } + break; + case ROFFT_BODY: + switch (bln->norm->Bl.type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + case LIST_hang: + case LIST_tag: + assert(Bl_stack_len); + Bl_stack[--Bl_stack_len] = 0; + + /* + * Our indentation had to be restored + * after a child display or child list. + * Close out that indentation block now. + */ + if (Bl_stack_post[Bl_stack_len]) { + print_line(".RE", MMAN_nl); + Bl_stack_post[Bl_stack_len] = 0; + } + break; + case LIST_column: + if (NULL != n->next) { + putchar('\t'); + outflags &= ~MMAN_spc; + } + break; + default: + break; + } + break; + default: + break; + } +} + +static void +post_lb(DECL_ARGS) +{ + + if (SEC_LIBRARY == n->sec) + outflags |= MMAN_br; +} + +static int +pre_lk(DECL_ARGS) +{ + const struct roff_node *link, *descr; + + if (NULL == (link = n->child)) + return 0; + + if (NULL != (descr = link->next)) { + font_push('I'); + while (NULL != descr) { + print_word(descr->string); + descr = descr->next; + } + print_word(":"); + font_pop(); + } + + font_push('B'); + print_word(link->string); + font_pop(); + return 0; +} + +static int +pre_ll(DECL_ARGS) +{ + + print_line(".ll", 0); + return 1; +} + +static int +pre_li(DECL_ARGS) +{ + + font_push('R'); + return 1; +} + +static int +pre_nm(DECL_ARGS) +{ + char *name; + + if (n->type == ROFFT_BLOCK) { + outflags |= MMAN_Bk; + pre_syn(n); + } + if (n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) + return 1; + name = n->child ? n->child->string : meta->name; + if (NULL == name) + return 0; + if (n->type == ROFFT_HEAD) { + if (NULL == n->parent->prev) + outflags |= MMAN_sp; + print_block(".HP", 0); + printf(" %zun", strlen(name) + 1); + outflags |= MMAN_nl; + } + font_push('B'); + if (NULL == n->child) + print_word(meta->name); + return 1; +} + +static void +post_nm(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + outflags &= ~MMAN_Bk; + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + if (n->child != NULL || meta->name != NULL) + font_pop(); + break; + default: + break; + } +} + +static int +pre_no(DECL_ARGS) +{ + + outflags |= MMAN_spc_force; + return 1; +} + +static int +pre_ns(DECL_ARGS) +{ + + outflags &= ~MMAN_spc; + return 0; +} + +static void +post_pf(DECL_ARGS) +{ + + if ( ! (n->next == NULL || n->next->flags & MDOC_LINE)) + outflags &= ~MMAN_spc; +} + +static int +pre_pp(DECL_ARGS) +{ + + if (MDOC_It != n->parent->tok) + outflags |= MMAN_PP; + outflags |= MMAN_sp | MMAN_nl; + outflags &= ~MMAN_br; + return 0; +} + +static int +pre_rs(DECL_ARGS) +{ + + if (SEC_SEE_ALSO == n->sec) { + outflags |= MMAN_PP | MMAN_sp | MMAN_nl; + outflags &= ~MMAN_br; + } + return 1; +} + +static int +pre_rv(DECL_ARGS) +{ + struct roff_node *nch; + + outflags |= MMAN_br | MMAN_nl; + + if (n->child != NULL) { + print_word("The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + font_push('B'); + print_word(nch->string); + font_pop(); + + outflags &= ~MMAN_spc; + print_word("()"); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + outflags &= ~MMAN_spc; + print_word(","); + } + if (nch->next->next == NULL) + print_word("and"); + } + + if (n->child != NULL && n->child->next != NULL) + print_word("functions return"); + else + print_word("function returns"); + + print_word("the value\\~0 if successful;"); + } else + print_word("Upon successful completion, " + "the value\\~0 is returned;"); + + print_word("otherwise the value\\~\\-1 is returned" + " and the global variable"); + + font_push('I'); + print_word("errno"); + font_pop(); + + print_word("is set to indicate the error."); + outflags |= MMAN_nl; + return 0; +} + +static int +pre_skip(DECL_ARGS) +{ + + return 0; +} + +static int +pre_sm(DECL_ARGS) +{ + + if (NULL == n->child) + outflags ^= MMAN_Sm; + else if (0 == strcmp("on", n->child->string)) + outflags |= MMAN_Sm; + else + outflags &= ~MMAN_Sm; + + if (MMAN_Sm & outflags) + outflags |= MMAN_spc; + + return 0; +} + +static int +pre_sp(DECL_ARGS) +{ + + if (MMAN_PP & outflags) { + outflags &= ~MMAN_PP; + print_line(".PP", 0); + } else + print_line(".sp", 0); + return 1; +} + +static void +post_sp(DECL_ARGS) +{ + + outflags |= MMAN_nl; +} + +static int +pre_sy(DECL_ARGS) +{ + + font_push('B'); + return 1; +} + +static int +pre_vt(DECL_ARGS) +{ + + if (MDOC_SYNPRETTY & n->flags) { + switch (n->type) { + case ROFFT_BLOCK: + pre_syn(n); + return 1; + case ROFFT_BODY: + break; + default: + return 0; + } + } + font_push('I'); + return 1; +} + +static void +post_vt(DECL_ARGS) +{ + + if (n->flags & MDOC_SYNPRETTY && n->type != ROFFT_BODY) + return; + font_pop(); +} + +static int +pre_xr(DECL_ARGS) +{ + + n = n->child; + if (NULL == n) + return 0; + print_node(meta, n); + n = n->next; + if (NULL == n) + return 0; + outflags &= ~MMAN_spc; + print_word("("); + print_node(meta, n); + print_word(")"); + return 0; +} + +static int +pre_ux(DECL_ARGS) +{ + + print_word(manacts[n->tok].prefix); + if (NULL == n->child) + return 0; + outflags &= ~MMAN_spc; + print_word("\\ "); + outflags &= ~MMAN_spc; + return 1; +} diff --git a/contrib/mdocml/mdoc_state.c b/contrib/mdocml/mdoc_state.c new file mode 100644 index 0000000..cbd7376 --- /dev/null +++ b/contrib/mdocml/mdoc_state.c @@ -0,0 +1,292 @@ +/* $Id: mdoc_state.c,v 1.3 2015/10/30 18:53:54 schwarze Exp $ */ +/* + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "libmdoc.h" + +#define STATE_ARGS struct roff_man *mdoc, struct roff_node *n + +typedef void (*state_handler)(STATE_ARGS); + +static void state_bd(STATE_ARGS); +static void state_bl(STATE_ARGS); +static void state_dl(STATE_ARGS); +static void state_sh(STATE_ARGS); +static void state_sm(STATE_ARGS); + +static const state_handler state_handlers[MDOC_MAX] = { + NULL, /* Ap */ + NULL, /* Dd */ + NULL, /* Dt */ + NULL, /* Os */ + state_sh, /* Sh */ + NULL, /* Ss */ + NULL, /* Pp */ + NULL, /* D1 */ + state_dl, /* Dl */ + state_bd, /* Bd */ + NULL, /* Ed */ + state_bl, /* Bl */ + NULL, /* El */ + NULL, /* It */ + NULL, /* Ad */ + NULL, /* An */ + NULL, /* Ar */ + NULL, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + NULL, /* Er */ + NULL, /* Ev */ + NULL, /* Ex */ + NULL, /* Fa */ + NULL, /* Fd */ + NULL, /* Fl */ + NULL, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + NULL, /* In */ + NULL, /* Li */ + NULL, /* Nd */ + NULL, /* Nm */ + NULL, /* Op */ + NULL, /* Ot */ + NULL, /* Pa */ + NULL, /* Rv */ + NULL, /* St */ + NULL, /* Va */ + NULL, /* Vt */ + NULL, /* Xr */ + NULL, /* %A */ + NULL, /* %B */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + NULL, /* %N */ + NULL, /* %O */ + NULL, /* %P */ + NULL, /* %R */ + NULL, /* %T */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + NULL, /* At */ + NULL, /* Bc */ + NULL, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + NULL, /* Bx */ + NULL, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + NULL, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + NULL, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + state_sm, /* Sm */ + NULL, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + NULL, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + NULL, /* Bk */ + NULL, /* Ek */ + NULL, /* Bt */ + NULL, /* Hf */ + NULL, /* Fr */ + NULL, /* Ud */ + NULL, /* Lb */ + NULL, /* Lp */ + NULL, /* Lk */ + NULL, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + NULL, /* Es */ + NULL, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + NULL, /* br */ + NULL, /* sp */ + NULL, /* %U */ + NULL, /* Ta */ + NULL, /* ll */ +}; + + +void +mdoc_state(struct roff_man *mdoc, struct roff_node *n) +{ + state_handler handler; + + if (n->tok == TOKEN_NONE) + return; + + if ( ! (mdoc_macros[n->tok].flags & MDOC_PROLOGUE)) + mdoc->flags |= MDOC_PBODY; + + handler = state_handlers[n->tok]; + if (*handler) + (*handler)(mdoc, n); +} + +void +mdoc_state_reset(struct roff_man *mdoc) +{ + + roff_setreg(mdoc->roff, "nS", 0, '='); + mdoc->flags = 0; +} + +static void +state_bd(STATE_ARGS) +{ + enum mdocargt arg; + + if (n->type != ROFFT_HEAD && + (n->type != ROFFT_BODY || n->end != ENDBODY_NOT)) + return; + + if (n->parent->args == NULL) + return; + + arg = n->parent->args->argv[0].arg; + if (arg != MDOC_Literal && arg != MDOC_Unfilled) + return; + + state_dl(mdoc, n); +} + +static void +state_bl(STATE_ARGS) +{ + + if (n->type != ROFFT_HEAD || n->parent->args == NULL) + return; + + switch(n->parent->args->argv[0].arg) { + case MDOC_Diag: + n->norm->Bl.type = LIST_diag; + break; + case MDOC_Column: + n->norm->Bl.type = LIST_column; + break; + default: + break; + } +} + +static void +state_dl(STATE_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + mdoc->flags |= MDOC_LITERAL; + break; + case ROFFT_BODY: + mdoc->flags &= ~MDOC_LITERAL; + break; + default: + break; + } +} + +static void +state_sh(STATE_ARGS) +{ + struct roff_node *nch; + char *secname; + + if (n->type != ROFFT_HEAD) + return; + + if ( ! (n->flags & MDOC_VALID)) { + secname = NULL; + deroff(&secname, n); + + /* + * Set the section attribute for the BLOCK, HEAD, + * and HEAD children; the latter can only be TEXT + * nodes, so no recursion is needed. For other + * nodes, including the .Sh BODY, this is done + * when allocating the node data structures, but + * for .Sh BLOCK and HEAD, the section is still + * unknown at that time. + */ + + n->sec = n->parent->sec = secname == NULL ? + SEC_CUSTOM : mdoc_a2sec(secname); + for (nch = n->child; nch != NULL; nch = nch->next) + nch->sec = n->sec; + free(secname); + } + + if ((mdoc->lastsec = n->sec) == SEC_SYNOPSIS) { + roff_setreg(mdoc->roff, "nS", 1, '='); + mdoc->flags |= MDOC_SYNOPSIS; + } else { + roff_setreg(mdoc->roff, "nS", 0, '='); + mdoc->flags &= ~MDOC_SYNOPSIS; + } +} + +static void +state_sm(STATE_ARGS) +{ + + if (n->child == NULL) + mdoc->flags ^= MDOC_SMOFF; + else if ( ! strcmp(n->child->string, "on")) + mdoc->flags &= ~MDOC_SMOFF; + else if ( ! strcmp(n->child->string, "off")) + mdoc->flags |= MDOC_SMOFF; +} diff --git a/contrib/mdocml/mdoc_term.c b/contrib/mdocml/mdoc_term.c new file mode 100644 index 0000000..e846436 --- /dev/null +++ b/contrib/mdocml/mdoc_term.c @@ -0,0 +1,2245 @@ +/* $Id: mdoc_term.c,v 1.331 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "out.h" +#include "term.h" +#include "tag.h" +#include "main.h" + +struct termpair { + struct termpair *ppair; + int count; +}; + +#define DECL_ARGS struct termp *p, \ + struct termpair *pair, \ + const struct roff_meta *meta, \ + struct roff_node *n + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); +}; + +static int a2width(const struct termp *, const char *); + +static void print_bvspace(struct termp *, + const struct roff_node *, + const struct roff_node *); +static void print_mdoc_node(DECL_ARGS); +static void print_mdoc_nodelist(DECL_ARGS); +static void print_mdoc_head(struct termp *, const struct roff_meta *); +static void print_mdoc_foot(struct termp *, const struct roff_meta *); +static void synopsis_pre(struct termp *, + const struct roff_node *); + +static void termp____post(DECL_ARGS); +static void termp__t_post(DECL_ARGS); +static void termp_bd_post(DECL_ARGS); +static void termp_bk_post(DECL_ARGS); +static void termp_bl_post(DECL_ARGS); +static void termp_eo_post(DECL_ARGS); +static void termp_fd_post(DECL_ARGS); +static void termp_fo_post(DECL_ARGS); +static void termp_in_post(DECL_ARGS); +static void termp_it_post(DECL_ARGS); +static void termp_lb_post(DECL_ARGS); +static void termp_nm_post(DECL_ARGS); +static void termp_pf_post(DECL_ARGS); +static void termp_quote_post(DECL_ARGS); +static void termp_sh_post(DECL_ARGS); +static void termp_ss_post(DECL_ARGS); + +static int termp__a_pre(DECL_ARGS); +static int termp__t_pre(DECL_ARGS); +static int termp_an_pre(DECL_ARGS); +static int termp_ap_pre(DECL_ARGS); +static int termp_bd_pre(DECL_ARGS); +static int termp_bf_pre(DECL_ARGS); +static int termp_bk_pre(DECL_ARGS); +static int termp_bl_pre(DECL_ARGS); +static int termp_bold_pre(DECL_ARGS); +static int termp_bt_pre(DECL_ARGS); +static int termp_bx_pre(DECL_ARGS); +static int termp_cd_pre(DECL_ARGS); +static int termp_d1_pre(DECL_ARGS); +static int termp_eo_pre(DECL_ARGS); +static int termp_er_pre(DECL_ARGS); +static int termp_ex_pre(DECL_ARGS); +static int termp_fa_pre(DECL_ARGS); +static int termp_fd_pre(DECL_ARGS); +static int termp_fl_pre(DECL_ARGS); +static int termp_fn_pre(DECL_ARGS); +static int termp_fo_pre(DECL_ARGS); +static int termp_ft_pre(DECL_ARGS); +static int termp_in_pre(DECL_ARGS); +static int termp_it_pre(DECL_ARGS); +static int termp_li_pre(DECL_ARGS); +static int termp_ll_pre(DECL_ARGS); +static int termp_lk_pre(DECL_ARGS); +static int termp_nd_pre(DECL_ARGS); +static int termp_nm_pre(DECL_ARGS); +static int termp_ns_pre(DECL_ARGS); +static int termp_quote_pre(DECL_ARGS); +static int termp_rs_pre(DECL_ARGS); +static int termp_rv_pre(DECL_ARGS); +static int termp_sh_pre(DECL_ARGS); +static int termp_skip_pre(DECL_ARGS); +static int termp_sm_pre(DECL_ARGS); +static int termp_sp_pre(DECL_ARGS); +static int termp_ss_pre(DECL_ARGS); +static int termp_tag_pre(DECL_ARGS); +static int termp_under_pre(DECL_ARGS); +static int termp_ud_pre(DECL_ARGS); +static int termp_vt_pre(DECL_ARGS); +static int termp_xr_pre(DECL_ARGS); +static int termp_xx_pre(DECL_ARGS); + +static const struct termact termacts[MDOC_MAX] = { + { termp_ap_pre, NULL }, /* Ap */ + { NULL, NULL }, /* Dd */ + { NULL, NULL }, /* Dt */ + { NULL, NULL }, /* Os */ + { termp_sh_pre, termp_sh_post }, /* Sh */ + { termp_ss_pre, termp_ss_post }, /* Ss */ + { termp_sp_pre, NULL }, /* Pp */ + { termp_d1_pre, termp_bl_post }, /* D1 */ + { termp_d1_pre, termp_bl_post }, /* Dl */ + { termp_bd_pre, termp_bd_post }, /* Bd */ + { NULL, NULL }, /* Ed */ + { termp_bl_pre, termp_bl_post }, /* Bl */ + { NULL, NULL }, /* El */ + { termp_it_pre, termp_it_post }, /* It */ + { termp_under_pre, NULL }, /* Ad */ + { termp_an_pre, NULL }, /* An */ + { termp_under_pre, NULL }, /* Ar */ + { termp_cd_pre, NULL }, /* Cd */ + { termp_bold_pre, NULL }, /* Cm */ + { termp_li_pre, NULL }, /* Dv */ + { termp_er_pre, NULL }, /* Er */ + { termp_tag_pre, NULL }, /* Ev */ + { termp_ex_pre, NULL }, /* Ex */ + { termp_fa_pre, NULL }, /* Fa */ + { termp_fd_pre, termp_fd_post }, /* Fd */ + { termp_fl_pre, NULL }, /* Fl */ + { termp_fn_pre, NULL }, /* Fn */ + { termp_ft_pre, NULL }, /* Ft */ + { termp_bold_pre, NULL }, /* Ic */ + { termp_in_pre, termp_in_post }, /* In */ + { termp_li_pre, NULL }, /* Li */ + { termp_nd_pre, NULL }, /* Nd */ + { termp_nm_pre, termp_nm_post }, /* Nm */ + { termp_quote_pre, termp_quote_post }, /* Op */ + { termp_ft_pre, NULL }, /* Ot */ + { termp_under_pre, NULL }, /* Pa */ + { termp_rv_pre, NULL }, /* Rv */ + { NULL, NULL }, /* St */ + { termp_under_pre, NULL }, /* Va */ + { termp_vt_pre, NULL }, /* Vt */ + { termp_xr_pre, NULL }, /* Xr */ + { termp__a_pre, termp____post }, /* %A */ + { termp_under_pre, termp____post }, /* %B */ + { NULL, termp____post }, /* %D */ + { termp_under_pre, termp____post }, /* %I */ + { termp_under_pre, termp____post }, /* %J */ + { NULL, termp____post }, /* %N */ + { NULL, termp____post }, /* %O */ + { NULL, termp____post }, /* %P */ + { NULL, termp____post }, /* %R */ + { termp__t_pre, termp__t_post }, /* %T */ + { NULL, termp____post }, /* %V */ + { NULL, NULL }, /* Ac */ + { termp_quote_pre, termp_quote_post }, /* Ao */ + { termp_quote_pre, termp_quote_post }, /* Aq */ + { NULL, NULL }, /* At */ + { NULL, NULL }, /* Bc */ + { termp_bf_pre, NULL }, /* Bf */ + { termp_quote_pre, termp_quote_post }, /* Bo */ + { termp_quote_pre, termp_quote_post }, /* Bq */ + { termp_xx_pre, NULL }, /* Bsx */ + { termp_bx_pre, NULL }, /* Bx */ + { termp_skip_pre, NULL }, /* Db */ + { NULL, NULL }, /* Dc */ + { termp_quote_pre, termp_quote_post }, /* Do */ + { termp_quote_pre, termp_quote_post }, /* Dq */ + { NULL, NULL }, /* Ec */ /* FIXME: no space */ + { NULL, NULL }, /* Ef */ + { termp_under_pre, NULL }, /* Em */ + { termp_eo_pre, termp_eo_post }, /* Eo */ + { termp_xx_pre, NULL }, /* Fx */ + { termp_bold_pre, NULL }, /* Ms */ + { termp_li_pre, NULL }, /* No */ + { termp_ns_pre, NULL }, /* Ns */ + { termp_xx_pre, NULL }, /* Nx */ + { termp_xx_pre, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { NULL, termp_pf_post }, /* Pf */ + { termp_quote_pre, termp_quote_post }, /* Po */ + { termp_quote_pre, termp_quote_post }, /* Pq */ + { NULL, NULL }, /* Qc */ + { termp_quote_pre, termp_quote_post }, /* Ql */ + { termp_quote_pre, termp_quote_post }, /* Qo */ + { termp_quote_pre, termp_quote_post }, /* Qq */ + { NULL, NULL }, /* Re */ + { termp_rs_pre, NULL }, /* Rs */ + { NULL, NULL }, /* Sc */ + { termp_quote_pre, termp_quote_post }, /* So */ + { termp_quote_pre, termp_quote_post }, /* Sq */ + { termp_sm_pre, NULL }, /* Sm */ + { termp_under_pre, NULL }, /* Sx */ + { termp_bold_pre, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { termp_xx_pre, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { termp_fo_pre, termp_fo_post }, /* Fo */ + { NULL, NULL }, /* Fc */ + { termp_quote_pre, termp_quote_post }, /* Oo */ + { NULL, NULL }, /* Oc */ + { termp_bk_pre, termp_bk_post }, /* Bk */ + { NULL, NULL }, /* Ek */ + { termp_bt_pre, NULL }, /* Bt */ + { NULL, NULL }, /* Hf */ + { termp_under_pre, NULL }, /* Fr */ + { termp_ud_pre, NULL }, /* Ud */ + { NULL, termp_lb_post }, /* Lb */ + { termp_sp_pre, NULL }, /* Lp */ + { termp_lk_pre, NULL }, /* Lk */ + { termp_under_pre, NULL }, /* Mt */ + { termp_quote_pre, termp_quote_post }, /* Brq */ + { termp_quote_pre, termp_quote_post }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, termp____post }, /* %C */ + { termp_skip_pre, NULL }, /* Es */ + { termp_quote_pre, termp_quote_post }, /* En */ + { termp_xx_pre, NULL }, /* Dx */ + { NULL, termp____post }, /* %Q */ + { termp_sp_pre, NULL }, /* br */ + { termp_sp_pre, NULL }, /* sp */ + { NULL, termp____post }, /* %U */ + { NULL, NULL }, /* Ta */ + { termp_ll_pre, NULL }, /* ll */ +}; + +static int fn_prio; + +void +terminal_mdoc(void *arg, const struct roff_man *mdoc) +{ + struct roff_node *n; + struct termp *p; + + p = (struct termp *)arg; + p->overstep = 0; + p->rmargin = p->maxrmargin = p->defrmargin; + p->tabwidth = term_len(p, 5); + + n = mdoc->first->child; + if (p->synopsisonly) { + while (n != NULL) { + if (n->tok == MDOC_Sh && n->sec == SEC_SYNOPSIS) { + if (n->child->next->child != NULL) + print_mdoc_nodelist(p, NULL, + &mdoc->meta, + n->child->next->child); + term_newln(p); + break; + } + n = n->next; + } + } else { + if (p->defindent == 0) + p->defindent = 5; + term_begin(p, print_mdoc_head, print_mdoc_foot, + &mdoc->meta); + if (n != NULL) { + if (n->tok != MDOC_Sh) + term_vspace(p); + print_mdoc_nodelist(p, NULL, &mdoc->meta, n); + } + term_end(p); + } +} + +static void +print_mdoc_nodelist(DECL_ARGS) +{ + + while (n != NULL) { + print_mdoc_node(p, pair, meta, n); + n = n->next; + } +} + +static void +print_mdoc_node(DECL_ARGS) +{ + int chld; + struct termpair npair; + size_t offset, rmargin; + + chld = 1; + offset = p->offset; + rmargin = p->rmargin; + n->flags &= ~MDOC_ENDED; + n->prev_font = p->fonti; + + memset(&npair, 0, sizeof(struct termpair)); + npair.ppair = pair; + + /* + * Keeps only work until the end of a line. If a keep was + * invoked in a prior line, revert it to PREKEEP. + */ + + if (p->flags & TERMP_KEEP && n->flags & MDOC_LINE) { + p->flags &= ~TERMP_KEEP; + p->flags |= TERMP_PREKEEP; + } + + /* + * After the keep flags have been set up, we may now + * produce output. Note that some pre-handlers do so. + */ + + switch (n->type) { + case ROFFT_TEXT: + if (' ' == *n->string && MDOC_LINE & n->flags) + term_newln(p); + if (MDOC_DELIMC & n->flags) + p->flags |= TERMP_NOSPACE; + term_word(p, n->string); + if (MDOC_DELIMO & n->flags) + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_EQN: + if ( ! (n->flags & MDOC_LINE)) + p->flags |= TERMP_NOSPACE; + term_eqn(p, n->eqn); + if (n->next != NULL && ! (n->next->flags & MDOC_LINE)) + p->flags |= TERMP_NOSPACE; + break; + case ROFFT_TBL: + if (p->tbl.cols == NULL) + term_newln(p); + term_tbl(p, n->span); + break; + default: + if (termacts[n->tok].pre && + (n->end == ENDBODY_NOT || n->child != NULL)) + chld = (*termacts[n->tok].pre) + (p, &npair, meta, n); + break; + } + + if (chld && n->child) + print_mdoc_nodelist(p, &npair, meta, n->child); + + term_fontpopq(p, + (ENDBODY_NOT == n->end ? n : n->body)->prev_font); + + switch (n->type) { + case ROFFT_TEXT: + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + break; + default: + if ( ! termacts[n->tok].post || MDOC_ENDED & n->flags) + break; + (void)(*termacts[n->tok].post)(p, &npair, meta, n); + + /* + * Explicit end tokens not only call the post + * handler, but also tell the respective block + * that it must not call the post handler again. + */ + if (ENDBODY_NOT != n->end) + n->body->flags |= MDOC_ENDED; + + /* + * End of line terminating an implicit block + * while an explicit block is still open. + * Continue the explicit block without spacing. + */ + if (ENDBODY_NOSPACE == n->end) + p->flags |= TERMP_NOSPACE; + break; + } + + if (MDOC_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + + if (MDOC_ll != n->tok) { + p->offset = offset; + p->rmargin = rmargin; + } +} + +static void +print_mdoc_foot(struct termp *p, const struct roff_meta *meta) +{ + size_t sz; + + term_fontrepl(p, TERMFONT_NONE); + + /* + * Output the footer in new-groff style, that is, three columns + * with the middle being the manual date and flanking columns + * being the operating system: + * + * SYSTEM DATE SYSTEM + */ + + term_vspace(p); + + p->offset = 0; + sz = term_strlen(p, meta->date); + p->rmargin = p->maxrmargin > sz ? + (p->maxrmargin + term_len(p, 1) - sz) / 2 : 0; + p->trailspace = 1; + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + + term_word(p, meta->os); + term_flushln(p); + + p->offset = p->rmargin; + sz = term_strlen(p, meta->os); + p->rmargin = p->maxrmargin > sz ? p->maxrmargin - sz : 0; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->date); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->trailspace = 0; + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + + term_word(p, meta->os); + term_flushln(p); + + p->offset = 0; + p->rmargin = p->maxrmargin; + p->flags = 0; +} + +static void +print_mdoc_head(struct termp *p, const struct roff_meta *meta) +{ + char *volume, *title; + size_t vollen, titlen; + + /* + * The header is strange. It has three components, which are + * really two with the first duplicated. It goes like this: + * + * IDENTIFIER TITLE IDENTIFIER + * + * The IDENTIFIER is NAME(SECTION), which is the command-name + * (if given, or "unknown" if not) followed by the manual page + * section. These are given in `Dt'. The TITLE is a free-form + * string depending on the manual volume. If not specified, it + * switches on the manual section. + */ + + assert(meta->vol); + if (NULL == meta->arch) + volume = mandoc_strdup(meta->vol); + else + mandoc_asprintf(&volume, "%s (%s)", + meta->vol, meta->arch); + vollen = term_strlen(p, volume); + + if (NULL == meta->msec) + title = mandoc_strdup(meta->title); + else + mandoc_asprintf(&title, "%s(%s)", + meta->title, meta->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->trailspace = 1; + p->offset = 0; + p->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + (p->maxrmargin - vollen + term_len(p, 1)) / 2 : + vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; + + term_word(p, title); + term_flushln(p); + + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->offset + vollen + titlen < p->maxrmargin ? + p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, volume); + term_flushln(p); + + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + if (p->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->offset = 0; + p->rmargin = p->maxrmargin; + free(title); + free(volume); +} + +static int +a2width(const struct termp *p, const char *v) +{ + struct roffsu su; + + if (a2roffsu(v, &su, SCALE_MAX) < 2) { + SCALE_HS_INIT(&su, term_strlen(p, v)); + su.scale /= term_strlen(p, "0"); + } + return term_hspan(p, &su) / 24; +} + +/* + * Determine how much space to print out before block elements of `It' + * (and thus `Bl') and `Bd'. And then go ahead and print that space, + * too. + */ +static void +print_bvspace(struct termp *p, + const struct roff_node *bl, + const struct roff_node *n) +{ + const struct roff_node *nn; + + assert(n); + + term_newln(p); + + if (MDOC_Bd == bl->tok && bl->norm->Bd.comp) + return; + if (MDOC_Bl == bl->tok && bl->norm->Bl.comp) + return; + + /* Do not vspace directly after Ss/Sh. */ + + nn = n; + while (nn->prev == NULL) { + do { + nn = nn->parent; + if (nn->type == ROFFT_ROOT) + return; + } while (nn->type != ROFFT_BLOCK); + if (nn->tok == MDOC_Sh || nn->tok == MDOC_Ss) + return; + if (nn->tok == MDOC_It && + nn->parent->parent->norm->Bl.type != LIST_item) + break; + } + + /* A `-column' does not assert vspace within the list. */ + + if (MDOC_Bl == bl->tok && LIST_column == bl->norm->Bl.type) + if (n->prev && MDOC_It == n->prev->tok) + return; + + /* A `-diag' without body does not vspace. */ + + if (MDOC_Bl == bl->tok && LIST_diag == bl->norm->Bl.type) + if (n->prev && MDOC_It == n->prev->tok) { + assert(n->prev->body); + if (NULL == n->prev->body->child) + return; + } + + term_vspace(p); +} + + +static int +termp_ll_pre(DECL_ARGS) +{ + + term_setwidth(p, n->child != NULL ? n->child->string : NULL); + return 0; +} + +static int +termp_it_pre(DECL_ARGS) +{ + char buf[24]; + const struct roff_node *bl, *nn; + size_t ncols, dcol; + int i, offset, width; + enum mdoc_list type; + + if (n->type == ROFFT_BLOCK) { + print_bvspace(p, n->parent->parent, n); + return 1; + } + + bl = n->parent->parent->parent; + type = bl->norm->Bl.type; + + /* + * Defaults for specific list types. + */ + + switch (type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + width = term_len(p, 2); + break; + case LIST_hang: + width = term_len(p, 8); + break; + case LIST_column: + case LIST_tag: + width = term_len(p, 10); + break; + default: + width = 0; + break; + } + offset = 0; + + /* + * First calculate width and offset. This is pretty easy unless + * we're a -column list, in which case all prior columns must + * be accounted for. + */ + + if (bl->norm->Bl.offs != NULL) { + offset = a2width(p, bl->norm->Bl.offs); + if (offset < 0 && (size_t)(-offset) > p->offset) + offset = -p->offset; + else if (offset > SHRT_MAX) + offset = 0; + } + + switch (type) { + case LIST_column: + if (n->type == ROFFT_HEAD) + break; + + /* + * Imitate groff's column handling: + * - For each earlier column, add its width. + * - For less than 5 columns, add four more blanks per + * column. + * - For exactly 5 columns, add three more blank per + * column. + * - For more than 5 columns, add only one column. + */ + ncols = bl->norm->Bl.ncols; + dcol = ncols < 5 ? term_len(p, 4) : + ncols == 5 ? term_len(p, 3) : term_len(p, 1); + + /* + * Calculate the offset by applying all prior ROFFT_BODY, + * so we stop at the ROFFT_HEAD (nn->prev == NULL). + */ + + for (i = 0, nn = n->prev; + nn->prev && i < (int)ncols; + nn = nn->prev, i++) + offset += dcol + a2width(p, + bl->norm->Bl.cols[i]); + + /* + * When exceeding the declared number of columns, leave + * the remaining widths at 0. This will later be + * adjusted to the default width of 10, or, for the last + * column, stretched to the right margin. + */ + if (i >= (int)ncols) + break; + + /* + * Use the declared column widths, extended as explained + * in the preceding paragraph. + */ + width = a2width(p, bl->norm->Bl.cols[i]) + dcol; + break; + default: + if (NULL == bl->norm->Bl.width) + break; + + /* + * Note: buffer the width by 2, which is groff's magic + * number for buffering single arguments. See the above + * handling for column for how this changes. + */ + width = a2width(p, bl->norm->Bl.width) + term_len(p, 2); + if (width < 0 && (size_t)(-width) > p->offset) + width = -p->offset; + else if (width > SHRT_MAX) + width = 0; + break; + } + + /* + * Whitespace control. Inset bodies need an initial space, + * while diagonal bodies need two. + */ + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case LIST_diag: + if (n->type == ROFFT_BODY) + term_word(p, "\\ \\ "); + break; + case LIST_inset: + if (n->type == ROFFT_BODY && n->parent->head->child != NULL) + term_word(p, "\\ "); + break; + default: + break; + } + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case LIST_diag: + if (n->type == ROFFT_HEAD) + term_fontpush(p, TERMFONT_BOLD); + break; + default: + break; + } + + /* + * Pad and break control. This is the tricky part. These flags + * are documented in term_flushln() in term.c. Note that we're + * going to unset all of these flags in termp_it_post() when we + * exit. + */ + + switch (type) { + case LIST_enum: + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + /* + * Weird special case. + * Some very narrow lists actually hang. + */ + if (width <= (int)term_len(p, 2)) + p->flags |= TERMP_HANG; + if (n->type != ROFFT_HEAD) + break; + p->flags |= TERMP_NOBREAK; + p->trailspace = 1; + break; + case LIST_hang: + if (n->type != ROFFT_HEAD) + break; + + /* + * This is ugly. If `-hang' is specified and the body + * is a `Bl' or `Bd', then we want basically to nullify + * the "overstep" effect in term_flushln() and treat + * this as a `-ohang' list instead. + */ + if (NULL != n->next && + NULL != n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) + break; + + p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; + p->trailspace = 1; + break; + case LIST_tag: + if (n->type != ROFFT_HEAD) + break; + + p->flags |= TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND; + p->trailspace = 2; + + if (NULL == n->next || NULL == n->next->child) + p->flags |= TERMP_DANGLE; + break; + case LIST_column: + if (n->type == ROFFT_HEAD) + break; + + if (NULL == n->next) { + p->flags &= ~TERMP_NOBREAK; + p->trailspace = 0; + } else { + p->flags |= TERMP_NOBREAK; + p->trailspace = 1; + } + + break; + case LIST_diag: + if (n->type != ROFFT_HEAD) + break; + p->flags |= TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 1; + break; + default: + break; + } + + /* + * Margin control. Set-head-width lists have their right + * margins shortened. The body for these lists has the offset + * necessarily lengthened. Everybody gets the offset. + */ + + p->offset += offset; + + switch (type) { + case LIST_hang: + /* + * Same stipulation as above, regarding `-hang'. We + * don't want to recalculate rmargin and offsets when + * using `Bd' or `Bl' within `-hang' overstep lists. + */ + if (n->type == ROFFT_HEAD && + NULL != n->next && + NULL != n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) + break; + /* FALLTHROUGH */ + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + case LIST_tag: + if (n->type == ROFFT_HEAD) + p->rmargin = p->offset + width; + else + p->offset += width; + break; + case LIST_column: + assert(width); + p->rmargin = p->offset + width; + /* + * XXX - this behaviour is not documented: the + * right-most column is filled to the right margin. + */ + if (n->type == ROFFT_HEAD) + break; + if (NULL == n->next && p->rmargin < p->maxrmargin) + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + /* + * The dash, hyphen, bullet and enum lists all have a special + * HEAD character (temporarily bold, in some cases). + */ + + if (n->type == ROFFT_HEAD) + switch (type) { + case LIST_bullet: + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\[bu]"); + term_fontpop(p); + break; + case LIST_dash: + case LIST_hyphen: + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "-"); + term_fontpop(p); + break; + case LIST_enum: + (pair->ppair->ppair->count)++; + (void)snprintf(buf, sizeof(buf), "%d.", + pair->ppair->ppair->count); + term_word(p, buf); + break; + default: + break; + } + + /* + * If we're not going to process our children, indicate so here. + */ + + switch (type) { + case LIST_bullet: + case LIST_item: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + if (n->type == ROFFT_HEAD) + return 0; + break; + case LIST_column: + if (n->type == ROFFT_HEAD) + return 0; + break; + default: + break; + } + + return 1; +} + +static void +termp_it_post(DECL_ARGS) +{ + enum mdoc_list type; + + if (n->type == ROFFT_BLOCK) + return; + + type = n->parent->parent->parent->norm->Bl.type; + + switch (type) { + case LIST_item: + case LIST_diag: + case LIST_inset: + if (n->type == ROFFT_BODY) + term_newln(p); + break; + case LIST_column: + if (n->type == ROFFT_BODY) + term_flushln(p); + break; + default: + term_newln(p); + break; + } + + /* + * Now that our output is flushed, we can reset our tags. Since + * only `It' sets these flags, we're free to assume that nobody + * has munged them in the meanwhile. + */ + + p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND | + TERMP_DANGLE | TERMP_HANG); + p->trailspace = 0; +} + +static int +termp_nm_pre(DECL_ARGS) +{ + const char *cp; + + if (n->type == ROFFT_BLOCK) { + p->flags |= TERMP_PREKEEP; + return 1; + } + + if (n->type == ROFFT_BODY) { + if (NULL == n->child) + return 0; + p->flags |= TERMP_NOSPACE; + cp = NULL; + if (n->prev->child != NULL) + cp = n->prev->child->string; + if (cp == NULL) + cp = meta->name; + if (cp == NULL) + p->offset += term_len(p, 6); + else + p->offset += term_len(p, 1) + term_strlen(p, cp); + return 1; + } + + if (NULL == n->child && NULL == meta->name) + return 0; + + if (n->type == ROFFT_HEAD) + synopsis_pre(p, n->parent); + + if (n->type == ROFFT_HEAD && + NULL != n->next && NULL != n->next->child) { + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK | TERMP_BRIND; + p->trailspace = 1; + p->rmargin = p->offset + term_len(p, 1); + if (NULL == n->child) { + p->rmargin += term_strlen(p, meta->name); + } else if (n->child->type == ROFFT_TEXT) { + p->rmargin += term_strlen(p, n->child->string); + if (n->child->next) + p->flags |= TERMP_HANG; + } else { + p->rmargin += term_len(p, 5); + p->flags |= TERMP_HANG; + } + } + + term_fontpush(p, TERMFONT_BOLD); + if (NULL == n->child) + term_word(p, meta->name); + return 1; +} + +static void +termp_nm_post(DECL_ARGS) +{ + + if (n->type == ROFFT_BLOCK) { + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); + } else if (n->type == ROFFT_HEAD && + NULL != n->next && NULL != n->next->child) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); + p->trailspace = 0; + } else if (n->type == ROFFT_BODY && n->child != NULL) + term_flushln(p); +} + +static int +termp_fl_pre(DECL_ARGS) +{ + + termp_tag_pre(p, pair, meta, n); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\-"); + + if (!(n->child == NULL && + (n->next == NULL || + n->next->type == ROFFT_TEXT || + n->next->flags & MDOC_LINE))) + p->flags |= TERMP_NOSPACE; + + return 1; +} + +static int +termp__a_pre(DECL_ARGS) +{ + + if (n->prev && MDOC__A == n->prev->tok) + if (NULL == n->next || MDOC__A != n->next->tok) + term_word(p, "and"); + + return 1; +} + +static int +termp_an_pre(DECL_ARGS) +{ + + if (n->norm->An.auth == AUTH_split) { + p->flags &= ~TERMP_NOSPLIT; + p->flags |= TERMP_SPLIT; + return 0; + } + if (n->norm->An.auth == AUTH_nosplit) { + p->flags &= ~TERMP_SPLIT; + p->flags |= TERMP_NOSPLIT; + return 0; + } + + if (p->flags & TERMP_SPLIT) + term_newln(p); + + if (n->sec == SEC_AUTHORS && ! (p->flags & TERMP_NOSPLIT)) + p->flags |= TERMP_SPLIT; + + return 1; +} + +static int +termp_ns_pre(DECL_ARGS) +{ + + if ( ! (MDOC_LINE & n->flags)) + p->flags |= TERMP_NOSPACE; + return 1; +} + +static int +termp_rs_pre(DECL_ARGS) +{ + + if (SEC_SEE_ALSO != n->sec) + return 1; + if (n->type == ROFFT_BLOCK && n->prev != NULL) + term_vspace(p); + return 1; +} + +static int +termp_rv_pre(DECL_ARGS) +{ + struct roff_node *nch; + + term_newln(p); + + if (n->child != NULL) { + term_word(p, "The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, nch->string); + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, "()"); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + if (nch->next->next == NULL) + term_word(p, "and"); + } + + if (n->child != NULL && n->child->next != NULL) + term_word(p, "functions return"); + else + term_word(p, "function returns"); + + term_word(p, "the value\\~0 if successful;"); + } else + term_word(p, "Upon successful completion," + " the value\\~0 is returned;"); + + term_word(p, "otherwise the value\\~\\-1 is returned" + " and the global variable"); + + term_fontpush(p, TERMFONT_UNDER); + term_word(p, "errno"); + term_fontpop(p); + + term_word(p, "is set to indicate the error."); + p->flags |= TERMP_SENTENCE; + + return 0; +} + +static int +termp_ex_pre(DECL_ARGS) +{ + struct roff_node *nch; + + term_newln(p); + term_word(p, "The"); + + for (nch = n->child; nch != NULL; nch = nch->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, nch->string); + term_fontpop(p); + + if (nch->next == NULL) + continue; + + if (nch->prev != NULL || nch->next->next != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + + if (nch->next->next == NULL) + term_word(p, "and"); + } + + if (n->child != NULL && n->child->next != NULL) + term_word(p, "utilities exit\\~0"); + else + term_word(p, "utility exits\\~0"); + + term_word(p, "on success, and\\~>0 if an error occurs."); + + p->flags |= TERMP_SENTENCE; + return 0; +} + +static int +termp_nd_pre(DECL_ARGS) +{ + + if (n->type == ROFFT_BODY) + term_word(p, "\\(en"); + return 1; +} + +static int +termp_bl_pre(DECL_ARGS) +{ + + return n->type != ROFFT_HEAD; +} + +static void +termp_bl_post(DECL_ARGS) +{ + + if (n->type == ROFFT_BLOCK) + term_newln(p); +} + +static int +termp_xr_pre(DECL_ARGS) +{ + + if (NULL == (n = n->child)) + return 0; + + assert(n->type == ROFFT_TEXT); + term_word(p, n->string); + + if (NULL == (n = n->next)) + return 0; + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + assert(n->type == ROFFT_TEXT); + term_word(p, n->string); + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + return 0; +} + +/* + * This decides how to assert whitespace before any of the SYNOPSIS set + * of macros (which, as in the case of Ft/Fo and Ft/Fn, may contain + * macro combos). + */ +static void +synopsis_pre(struct termp *p, const struct roff_node *n) +{ + /* + * Obviously, if we're not in a SYNOPSIS or no prior macros + * exist, do nothing. + */ + if (NULL == n->prev || ! (MDOC_SYNPRETTY & n->flags)) + return; + + /* + * If we're the second in a pair of like elements, emit our + * newline and return. UNLESS we're `Fo', `Fn', `Fn', in which + * case we soldier on. + */ + if (n->prev->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + term_newln(p); + return; + } + + /* + * If we're one of the SYNOPSIS set and non-like pair-wise after + * another (or Fn/Fo, which we've let slip through) then assert + * vertical space, else only newline and move on. + */ + switch (n->prev->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + term_vspace(p); + break; + case MDOC_Ft: + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + term_vspace(p); + break; + } + /* FALLTHROUGH */ + default: + term_newln(p); + break; + } +} + +static int +termp_vt_pre(DECL_ARGS) +{ + + if (n->type == ROFFT_ELEM) { + synopsis_pre(p, n); + return termp_under_pre(p, pair, meta, n); + } else if (n->type == ROFFT_BLOCK) { + synopsis_pre(p, n); + return 1; + } else if (n->type == ROFFT_HEAD) + return 0; + + return termp_under_pre(p, pair, meta, n); +} + +static int +termp_bold_pre(DECL_ARGS) +{ + + termp_tag_pre(p, pair, meta, n); + term_fontpush(p, TERMFONT_BOLD); + return 1; +} + +static int +termp_fd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + return termp_bold_pre(p, pair, meta, n); +} + +static void +termp_fd_post(DECL_ARGS) +{ + + term_newln(p); +} + +static int +termp_sh_pre(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + /* + * Vertical space before sections, except + * when the previous section was empty. + */ + if (n->prev == NULL || + n->prev->tok != MDOC_Sh || + (n->prev->body != NULL && + n->prev->body->child != NULL)) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontpush(p, TERMFONT_BOLD); + break; + case ROFFT_BODY: + p->offset = term_len(p, p->defindent); + switch (n->sec) { + case SEC_DESCRIPTION: + fn_prio = 0; + break; + case SEC_AUTHORS: + p->flags &= ~(TERMP_SPLIT|TERMP_NOSPLIT); + break; + default: + break; + } + break; + default: + break; + } + return 1; +} + +static void +termp_sh_post(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_HEAD: + term_newln(p); + break; + case ROFFT_BODY: + term_newln(p); + p->offset = 0; + break; + default: + break; + } +} + +static int +termp_bt_pre(DECL_ARGS) +{ + + term_word(p, "is currently in beta test."); + p->flags |= TERMP_SENTENCE; + return 0; +} + +static void +termp_lb_post(DECL_ARGS) +{ + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags) + term_newln(p); +} + +static int +termp_ud_pre(DECL_ARGS) +{ + + term_word(p, "currently under development."); + p->flags |= TERMP_SENTENCE; + return 0; +} + +static int +termp_d1_pre(DECL_ARGS) +{ + + if (n->type != ROFFT_BLOCK) + return 1; + term_newln(p); + p->offset += term_len(p, p->defindent + 1); + return 1; +} + +static int +termp_ft_pre(DECL_ARGS) +{ + + /* NB: MDOC_LINE does not effect this! */ + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_UNDER); + return 1; +} + +static int +termp_fn_pre(DECL_ARGS) +{ + size_t rmargin = 0; + int pretty; + + pretty = MDOC_SYNPRETTY & n->flags; + + synopsis_pre(p, n); + + if (NULL == (n = n->child)) + return 0; + + if (pretty) { + rmargin = p->rmargin; + p->rmargin = p->offset + term_len(p, 4); + p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; + } + + assert(n->type == ROFFT_TEXT); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->string); + term_fontpop(p); + + if (n->sec == SEC_DESCRIPTION) + tag_put(n->string, ++fn_prio, p->line); + + if (pretty) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); + p->offset = p->rmargin; + p->rmargin = rmargin; + } + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + for (n = n->next; n; n = n->next) { + assert(n->type == ROFFT_TEXT); + term_fontpush(p, TERMFONT_UNDER); + if (pretty) + p->flags |= TERMP_NBRWORD; + term_word(p, n->string); + term_fontpop(p); + + if (n->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (pretty) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + term_flushln(p); + } + + return 0; +} + +static int +termp_fa_pre(DECL_ARGS) +{ + const struct roff_node *nn; + + if (n->parent->tok != MDOC_Fo) { + term_fontpush(p, TERMFONT_UNDER); + return 1; + } + + for (nn = n->child; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_UNDER); + p->flags |= TERMP_NBRWORD; + term_word(p, nn->string); + term_fontpop(p); + + if (nn->next || (n->next && n->next->tok == MDOC_Fa)) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + + return 0; +} + +static int +termp_bd_pre(DECL_ARGS) +{ + size_t tabwidth, lm, len, rm, rmax; + struct roff_node *nn; + int offset; + + if (n->type == ROFFT_BLOCK) { + print_bvspace(p, n, n); + return 1; + } else if (n->type == ROFFT_HEAD) + return 0; + + /* Handle the -offset argument. */ + + if (n->norm->Bd.offs == NULL || + ! strcmp(n->norm->Bd.offs, "left")) + /* nothing */; + else if ( ! strcmp(n->norm->Bd.offs, "indent")) + p->offset += term_len(p, p->defindent + 1); + else if ( ! strcmp(n->norm->Bd.offs, "indent-two")) + p->offset += term_len(p, (p->defindent + 1) * 2); + else { + offset = a2width(p, n->norm->Bd.offs); + if (offset < 0 && (size_t)(-offset) > p->offset) + p->offset = 0; + else if (offset < SHRT_MAX) + p->offset += offset; + } + + /* + * If -ragged or -filled are specified, the block does nothing + * but change the indentation. If -unfilled or -literal are + * specified, text is printed exactly as entered in the display: + * for macro lines, a newline is appended to the line. Blank + * lines are allowed. + */ + + if (DISP_literal != n->norm->Bd.type && + DISP_unfilled != n->norm->Bd.type && + DISP_centered != n->norm->Bd.type) + return 1; + + tabwidth = p->tabwidth; + if (DISP_literal == n->norm->Bd.type) + p->tabwidth = term_len(p, 8); + + lm = p->offset; + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + for (nn = n->child; nn; nn = nn->next) { + if (DISP_centered == n->norm->Bd.type) { + if (nn->type == ROFFT_TEXT) { + len = term_strlen(p, nn->string); + p->offset = len >= rm ? 0 : + lm + len >= rm ? rm - len : + (lm + rm - len) / 2; + } else + p->offset = lm; + } + print_mdoc_node(p, pair, meta, nn); + /* + * If the printed node flushes its own line, then we + * needn't do it here as well. This is hacky, but the + * notion of selective eoln whitespace is pretty dumb + * anyway, so don't sweat it. + */ + switch (nn->tok) { + case MDOC_Sm: + case MDOC_br: + case MDOC_sp: + case MDOC_Bl: + case MDOC_D1: + case MDOC_Dl: + case MDOC_Lp: + case MDOC_Pp: + continue; + default: + break; + } + if (p->flags & TERMP_NONEWLINE || + (nn->next && ! (nn->next->flags & MDOC_LINE))) + continue; + term_flushln(p); + p->flags |= TERMP_NOSPACE; + } + + p->tabwidth = tabwidth; + p->rmargin = rm; + p->maxrmargin = rmax; + return 0; +} + +static void +termp_bd_post(DECL_ARGS) +{ + size_t rm, rmax; + + if (n->type != ROFFT_BODY) + return; + + rm = p->rmargin; + rmax = p->maxrmargin; + + if (DISP_literal == n->norm->Bd.type || + DISP_unfilled == n->norm->Bd.type) + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + p->flags |= TERMP_NOSPACE; + term_newln(p); + + p->rmargin = rm; + p->maxrmargin = rmax; +} + +static int +termp_bx_pre(DECL_ARGS) +{ + + if (NULL != (n = n->child)) { + term_word(p, n->string); + p->flags |= TERMP_NOSPACE; + term_word(p, "BSD"); + } else { + term_word(p, "BSD"); + return 0; + } + + if (NULL != (n = n->next)) { + p->flags |= TERMP_NOSPACE; + term_word(p, "-"); + p->flags |= TERMP_NOSPACE; + term_word(p, n->string); + } + + return 0; +} + +static int +termp_xx_pre(DECL_ARGS) +{ + const char *pp; + int flags; + + pp = NULL; + switch (n->tok) { + case MDOC_Bsx: + pp = "BSD/OS"; + break; + case MDOC_Dx: + pp = "DragonFly"; + break; + case MDOC_Fx: + pp = "FreeBSD"; + break; + case MDOC_Nx: + pp = "NetBSD"; + break; + case MDOC_Ox: + pp = "OpenBSD"; + break; + case MDOC_Ux: + pp = "UNIX"; + break; + default: + abort(); + } + + term_word(p, pp); + if (n->child) { + flags = p->flags; + p->flags |= TERMP_KEEP; + term_word(p, n->child->string); + p->flags = flags; + } + return 0; +} + +static void +termp_pf_post(DECL_ARGS) +{ + + if ( ! (n->next == NULL || n->next->flags & MDOC_LINE)) + p->flags |= TERMP_NOSPACE; +} + +static int +termp_ss_pre(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + term_newln(p); + if (n->prev) + term_vspace(p); + break; + case ROFFT_HEAD: + term_fontpush(p, TERMFONT_BOLD); + p->offset = term_len(p, (p->defindent+1)/2); + break; + case ROFFT_BODY: + p->offset = term_len(p, p->defindent); + break; + default: + break; + } + + return 1; +} + +static void +termp_ss_post(DECL_ARGS) +{ + + if (n->type == ROFFT_HEAD || n->type == ROFFT_BODY) + term_newln(p); +} + +static int +termp_cd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_BOLD); + return 1; +} + +static int +termp_in_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + + if (MDOC_SYNPRETTY & n->flags && MDOC_LINE & n->flags) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "#include"); + term_word(p, "<"); + } else { + term_word(p, "<"); + term_fontpush(p, TERMFONT_UNDER); + } + + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp_in_post(DECL_ARGS) +{ + + if (MDOC_SYNPRETTY & n->flags) + term_fontpush(p, TERMFONT_BOLD); + + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); + + if (MDOC_SYNPRETTY & n->flags) + term_fontpop(p); +} + +static int +termp_sp_pre(DECL_ARGS) +{ + struct roffsu su; + int i, len; + + switch (n->tok) { + case MDOC_sp: + if (n->child) { + if ( ! a2roffsu(n->child->string, &su, SCALE_VS)) + su.scale = 1.0; + len = term_vspan(p, &su); + } else + len = 1; + break; + case MDOC_br: + len = 0; + break; + default: + len = 1; + fn_prio = 0; + break; + } + + if (0 == len) + term_newln(p); + else if (len < 0) + p->skipvsp -= len; + else + for (i = 0; i < len; i++) + term_vspace(p); + + return 0; +} + +static int +termp_skip_pre(DECL_ARGS) +{ + + return 0; +} + +static int +termp_quote_pre(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return 1; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + term_word(p, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? "<" : "\\(la"); + break; + case MDOC_Bro: + case MDOC_Brq: + term_word(p, "{"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + term_word(p, "["); + break; + case MDOC_Do: + case MDOC_Dq: + term_word(p, "\\(Lq"); + break; + case MDOC_En: + if (NULL == n->norm->Es || + NULL == n->norm->Es->child) + return 1; + term_word(p, n->norm->Es->child->string); + break; + case MDOC_Po: + case MDOC_Pq: + term_word(p, "("); + break; + case MDOC__T: + case MDOC_Qo: + case MDOC_Qq: + term_word(p, "\""); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + term_word(p, "\\(oq"); + break; + default: + abort(); + } + + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp_quote_post(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY && n->type != ROFFT_ELEM) + return; + + p->flags |= TERMP_NOSPACE; + + switch (n->tok) { + case MDOC_Ao: + case MDOC_Aq: + term_word(p, n->child != NULL && n->child->next == NULL && + n->child->tok == MDOC_Mt ? ">" : "\\(ra"); + break; + case MDOC_Bro: + case MDOC_Brq: + term_word(p, "}"); + break; + case MDOC_Oo: + case MDOC_Op: + case MDOC_Bo: + case MDOC_Bq: + term_word(p, "]"); + break; + case MDOC_Do: + case MDOC_Dq: + term_word(p, "\\(Rq"); + break; + case MDOC_En: + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + p->flags &= ~TERMP_NOSPACE; + else + term_word(p, n->norm->Es->child->next->string); + break; + case MDOC_Po: + case MDOC_Pq: + term_word(p, ")"); + break; + case MDOC__T: + case MDOC_Qo: + case MDOC_Qq: + term_word(p, "\""); + break; + case MDOC_Ql: + case MDOC_So: + case MDOC_Sq: + term_word(p, "\\(cq"); + break; + default: + abort(); + } +} + +static int +termp_eo_pre(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY) + return 1; + + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + term_word(p, "\\&"); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + p->flags |= TERMP_NOSPACE; + + return 1; +} + +static void +termp_eo_post(DECL_ARGS) +{ + int body, tail; + + if (n->type != ROFFT_BODY) + return; + + if (n->end != ENDBODY_NOT) { + p->flags &= ~TERMP_NOSPACE; + return; + } + + body = n->child != NULL || n->parent->head->child != NULL; + tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + + if (body && tail) + p->flags |= TERMP_NOSPACE; + else if ( ! (body || tail)) + term_word(p, "\\&"); + else if ( ! tail) + p->flags &= ~TERMP_NOSPACE; +} + +static int +termp_fo_pre(DECL_ARGS) +{ + size_t rmargin = 0; + int pretty; + + pretty = MDOC_SYNPRETTY & n->flags; + + if (n->type == ROFFT_BLOCK) { + synopsis_pre(p, n); + return 1; + } else if (n->type == ROFFT_BODY) { + if (pretty) { + rmargin = p->rmargin; + p->rmargin = p->offset + term_len(p, 4); + p->flags |= TERMP_NOBREAK | TERMP_BRIND | + TERMP_HANG; + } + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + if (pretty) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | + TERMP_HANG); + p->offset = p->rmargin; + p->rmargin = rmargin; + } + return 1; + } + + if (NULL == n->child) + return 0; + + /* XXX: we drop non-initial arguments as per groff. */ + + assert(n->child->string); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->child->string); + return 0; +} + +static void +termp_fo_post(DECL_ARGS) +{ + + if (n->type != ROFFT_BODY) + return; + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (MDOC_SYNPRETTY & n->flags) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + term_flushln(p); + } +} + +static int +termp_bf_pre(DECL_ARGS) +{ + + if (n->type == ROFFT_HEAD) + return 0; + else if (n->type != ROFFT_BODY) + return 1; + + if (FONT_Em == n->norm->Bf.font) + term_fontpush(p, TERMFONT_UNDER); + else if (FONT_Sy == n->norm->Bf.font) + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); + + return 1; +} + +static int +termp_sm_pre(DECL_ARGS) +{ + + if (NULL == n->child) + p->flags ^= TERMP_NONOSPACE; + else if (0 == strcmp("on", n->child->string)) + p->flags &= ~TERMP_NONOSPACE; + else + p->flags |= TERMP_NONOSPACE; + + if (p->col && ! (TERMP_NONOSPACE & p->flags)) + p->flags &= ~TERMP_NOSPACE; + + return 0; +} + +static int +termp_ap_pre(DECL_ARGS) +{ + + p->flags |= TERMP_NOSPACE; + term_word(p, "'"); + p->flags |= TERMP_NOSPACE; + return 1; +} + +static void +termp____post(DECL_ARGS) +{ + + /* + * Handle lists of authors. In general, print each followed by + * a comma. Don't print the comma if there are only two + * authors. + */ + if (MDOC__A == n->tok && n->next && MDOC__A == n->next->tok) + if (NULL == n->next->next || MDOC__A != n->next->next->tok) + if (NULL == n->prev || MDOC__A != n->prev->tok) + return; + + /* TODO: %U. */ + + if (NULL == n->parent || MDOC_Rs != n->parent->tok) + return; + + p->flags |= TERMP_NOSPACE; + if (NULL == n->next) { + term_word(p, "."); + p->flags |= TERMP_SENTENCE; + } else + term_word(p, ","); +} + +static int +termp_li_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_NONE); + return 1; +} + +static int +termp_lk_pre(DECL_ARGS) +{ + const struct roff_node *link, *descr; + + if (NULL == (link = n->child)) + return 0; + + if (NULL != (descr = link->next)) { + term_fontpush(p, TERMFONT_UNDER); + while (NULL != descr) { + term_word(p, descr->string); + descr = descr->next; + } + p->flags |= TERMP_NOSPACE; + term_word(p, ":"); + term_fontpop(p); + } + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, link->string); + term_fontpop(p); + + return 0; +} + +static int +termp_bk_pre(DECL_ARGS) +{ + + switch (n->type) { + case ROFFT_BLOCK: + break; + case ROFFT_HEAD: + return 0; + case ROFFT_BODY: + if (n->parent->args != NULL || n->prev->child == NULL) + p->flags |= TERMP_PREKEEP; + break; + default: + abort(); + } + + return 1; +} + +static void +termp_bk_post(DECL_ARGS) +{ + + if (n->type == ROFFT_BODY) + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); +} + +static void +termp__t_post(DECL_ARGS) +{ + + /* + * If we're in an `Rs' and there's a journal present, then quote + * us instead of underlining us (for disambiguation). + */ + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) + termp_quote_post(p, pair, meta, n); + + termp____post(p, pair, meta, n); +} + +static int +termp__t_pre(DECL_ARGS) +{ + + /* + * If we're in an `Rs' and there's a journal present, then quote + * us instead of underlining us (for disambiguation). + */ + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) + return termp_quote_pre(p, pair, meta, n); + + term_fontpush(p, TERMFONT_UNDER); + return 1; +} + +static int +termp_under_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_UNDER); + return 1; +} + +static int +termp_er_pre(DECL_ARGS) +{ + + if (n->sec == SEC_ERRORS && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Bq && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(n->child->string, 1, p->line); + return 1; +} + +static int +termp_tag_pre(DECL_ARGS) +{ + + if (n->child != NULL && + n->child->type == ROFFT_TEXT && + n->prev == NULL && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Xo && + n->parent->parent->prev == NULL && + n->parent->parent->parent->tok == MDOC_It))) + tag_put(n->child->string, 1, p->line); + return 1; +} diff --git a/contrib/mdocml/mdoc_validate.c b/contrib/mdocml/mdoc_validate.c new file mode 100644 index 0000000..e369349 --- /dev/null +++ b/contrib/mdocml/mdoc_validate.c @@ -0,0 +1,2294 @@ +/* $Id: mdoc_validate.c,v 1.301 2016/01/08 17:48:09 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#ifndef OSNAME +#include <sys/utsname.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libmdoc.h" + +/* FIXME: .Bl -diag can't have non-text children in HEAD. */ + +#define POST_ARGS struct roff_man *mdoc + +enum check_ineq { + CHECK_LT, + CHECK_GT, + CHECK_EQ +}; + +typedef void (*v_post)(POST_ARGS); + +static void check_text(struct roff_man *, int, int, char *); +static void check_argv(struct roff_man *, + struct roff_node *, struct mdoc_argv *); +static void check_args(struct roff_man *, struct roff_node *); +static int child_an(const struct roff_node *); +static size_t macro2len(int); +static void rewrite_macro2len(char **); + +static void post_an(POST_ARGS); +static void post_an_norm(POST_ARGS); +static void post_at(POST_ARGS); +static void post_bd(POST_ARGS); +static void post_bf(POST_ARGS); +static void post_bk(POST_ARGS); +static void post_bl(POST_ARGS); +static void post_bl_block(POST_ARGS); +static void post_bl_block_tag(POST_ARGS); +static void post_bl_head(POST_ARGS); +static void post_bl_norm(POST_ARGS); +static void post_bx(POST_ARGS); +static void post_defaults(POST_ARGS); +static void post_display(POST_ARGS); +static void post_dd(POST_ARGS); +static void post_dt(POST_ARGS); +static void post_en(POST_ARGS); +static void post_es(POST_ARGS); +static void post_eoln(POST_ARGS); +static void post_ex(POST_ARGS); +static void post_fa(POST_ARGS); +static void post_fn(POST_ARGS); +static void post_fname(POST_ARGS); +static void post_fo(POST_ARGS); +static void post_hyph(POST_ARGS); +static void post_ignpar(POST_ARGS); +static void post_it(POST_ARGS); +static void post_lb(POST_ARGS); +static void post_nd(POST_ARGS); +static void post_nm(POST_ARGS); +static void post_ns(POST_ARGS); +static void post_obsolete(POST_ARGS); +static void post_os(POST_ARGS); +static void post_par(POST_ARGS); +static void post_prevpar(POST_ARGS); +static void post_root(POST_ARGS); +static void post_rs(POST_ARGS); +static void post_sh(POST_ARGS); +static void post_sh_head(POST_ARGS); +static void post_sh_name(POST_ARGS); +static void post_sh_see_also(POST_ARGS); +static void post_sh_authors(POST_ARGS); +static void post_sm(POST_ARGS); +static void post_st(POST_ARGS); +static void post_std(POST_ARGS); + +static v_post mdoc_valids[MDOC_MAX] = { + NULL, /* Ap */ + post_dd, /* Dd */ + post_dt, /* Dt */ + post_os, /* Os */ + post_sh, /* Sh */ + post_ignpar, /* Ss */ + post_par, /* Pp */ + post_display, /* D1 */ + post_display, /* Dl */ + post_display, /* Bd */ + NULL, /* Ed */ + post_bl, /* Bl */ + NULL, /* El */ + post_it, /* It */ + NULL, /* Ad */ + post_an, /* An */ + post_defaults, /* Ar */ + NULL, /* Cd */ + NULL, /* Cm */ + NULL, /* Dv */ + NULL, /* Er */ + NULL, /* Ev */ + post_ex, /* Ex */ + post_fa, /* Fa */ + NULL, /* Fd */ + NULL, /* Fl */ + post_fn, /* Fn */ + NULL, /* Ft */ + NULL, /* Ic */ + NULL, /* In */ + post_defaults, /* Li */ + post_nd, /* Nd */ + post_nm, /* Nm */ + NULL, /* Op */ + post_obsolete, /* Ot */ + post_defaults, /* Pa */ + post_std, /* Rv */ + post_st, /* St */ + NULL, /* Va */ + NULL, /* Vt */ + NULL, /* Xr */ + NULL, /* %A */ + post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ + NULL, /* %D */ + NULL, /* %I */ + NULL, /* %J */ + post_hyph, /* %N */ + post_hyph, /* %O */ + NULL, /* %P */ + post_hyph, /* %R */ + post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ + NULL, /* %V */ + NULL, /* Ac */ + NULL, /* Ao */ + NULL, /* Aq */ + post_at, /* At */ + NULL, /* Bc */ + post_bf, /* Bf */ + NULL, /* Bo */ + NULL, /* Bq */ + NULL, /* Bsx */ + post_bx, /* Bx */ + post_obsolete, /* Db */ + NULL, /* Dc */ + NULL, /* Do */ + NULL, /* Dq */ + NULL, /* Ec */ + NULL, /* Ef */ + NULL, /* Em */ + NULL, /* Eo */ + NULL, /* Fx */ + NULL, /* Ms */ + NULL, /* No */ + post_ns, /* Ns */ + NULL, /* Nx */ + NULL, /* Ox */ + NULL, /* Pc */ + NULL, /* Pf */ + NULL, /* Po */ + NULL, /* Pq */ + NULL, /* Qc */ + NULL, /* Ql */ + NULL, /* Qo */ + NULL, /* Qq */ + NULL, /* Re */ + post_rs, /* Rs */ + NULL, /* Sc */ + NULL, /* So */ + NULL, /* Sq */ + post_sm, /* Sm */ + post_hyph, /* Sx */ + NULL, /* Sy */ + NULL, /* Tn */ + NULL, /* Ux */ + NULL, /* Xc */ + NULL, /* Xo */ + post_fo, /* Fo */ + NULL, /* Fc */ + NULL, /* Oo */ + NULL, /* Oc */ + post_bk, /* Bk */ + NULL, /* Ek */ + post_eoln, /* Bt */ + NULL, /* Hf */ + post_obsolete, /* Fr */ + post_eoln, /* Ud */ + post_lb, /* Lb */ + post_par, /* Lp */ + NULL, /* Lk */ + post_defaults, /* Mt */ + NULL, /* Brq */ + NULL, /* Bro */ + NULL, /* Brc */ + NULL, /* %C */ + post_es, /* Es */ + post_en, /* En */ + NULL, /* Dx */ + NULL, /* %Q */ + post_par, /* br */ + post_par, /* sp */ + NULL, /* %U */ + NULL, /* Ta */ + NULL, /* ll */ +}; + +#define RSORD_MAX 14 /* Number of `Rs' blocks. */ + +static const int rsord[RSORD_MAX] = { + MDOC__A, + MDOC__T, + MDOC__B, + MDOC__I, + MDOC__J, + MDOC__R, + MDOC__N, + MDOC__V, + MDOC__U, + MDOC__P, + MDOC__Q, + MDOC__C, + MDOC__D, + MDOC__O +}; + +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "CONTEXT", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; + + +void +mdoc_node_validate(struct roff_man *mdoc) +{ + struct roff_node *n; + v_post *p; + + n = mdoc->last; + mdoc->last = mdoc->last->child; + while (mdoc->last != NULL) { + mdoc_node_validate(mdoc); + if (mdoc->last == n) + mdoc->last = mdoc->last->child; + else + mdoc->last = mdoc->last->next; + } + + mdoc->last = n; + mdoc->next = ROFF_NEXT_SIBLING; + switch (n->type) { + case ROFFT_TEXT: + if (n->sec != SEC_SYNOPSIS || n->parent->tok != MDOC_Fd) + check_text(mdoc, n->line, n->pos, n->string); + break; + case ROFFT_EQN: + case ROFFT_TBL: + break; + case ROFFT_ROOT: + post_root(mdoc); + break; + default: + check_args(mdoc, mdoc->last); + + /* + * Closing delimiters are not special at the + * beginning of a block, opening delimiters + * are not special at the end. + */ + + if (n->child != NULL) + n->child->flags &= ~MDOC_DELIMC; + if (n->last != NULL) + n->last->flags &= ~MDOC_DELIMO; + + /* Call the macro's postprocessor. */ + + p = mdoc_valids + n->tok; + if (*p) + (*p)(mdoc); + if (mdoc->last == n) + mdoc_state(mdoc, n); + break; + } +} + +static void +check_args(struct roff_man *mdoc, struct roff_node *n) +{ + int i; + + if (NULL == n->args) + return; + + assert(n->args->argc); + for (i = 0; i < (int)n->args->argc; i++) + check_argv(mdoc, n, &n->args->argv[i]); +} + +static void +check_argv(struct roff_man *mdoc, struct roff_node *n, struct mdoc_argv *v) +{ + int i; + + for (i = 0; i < (int)v->sz; i++) + check_text(mdoc, v->line, v->pos, v->value[i]); +} + +static void +check_text(struct roff_man *mdoc, int ln, int pos, char *p) +{ + char *cp; + + if (MDOC_LITERAL & mdoc->flags) + return; + + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mandoc_msg(MANDOCERR_FI_TAB, mdoc->parse, + ln, pos + (int)(p - cp), NULL); +} + +static void +post_bl_norm(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv, *wa; + int i; + enum mdocargt mdoclt; + enum mdoc_list lt; + + n = mdoc->last->parent; + n->norm->Bl.type = LIST__NONE; + + /* + * First figure out which kind of list to use: bind ourselves to + * the first mentioned list type and warn about any remaining + * ones. If we find no list type, we default to LIST_item. + */ + + wa = (n->args == NULL) ? NULL : n->args->argv; + mdoclt = MDOC_ARG_MAX; + for (i = 0; n->args && i < (int)n->args->argc; i++) { + argv = n->args->argv + i; + lt = LIST__NONE; + switch (argv->arg) { + /* Set list types. */ + case MDOC_Bullet: + lt = LIST_bullet; + break; + case MDOC_Dash: + lt = LIST_dash; + break; + case MDOC_Enum: + lt = LIST_enum; + break; + case MDOC_Hyphen: + lt = LIST_hyphen; + break; + case MDOC_Item: + lt = LIST_item; + break; + case MDOC_Tag: + lt = LIST_tag; + break; + case MDOC_Diag: + lt = LIST_diag; + break; + case MDOC_Hang: + lt = LIST_hang; + break; + case MDOC_Ohang: + lt = LIST_ohang; + break; + case MDOC_Inset: + lt = LIST_inset; + break; + case MDOC_Column: + lt = LIST_column; + break; + /* Set list arguments. */ + case MDOC_Compact: + if (n->norm->Bl.comp) + mandoc_msg(MANDOCERR_ARG_REP, + mdoc->parse, argv->line, + argv->pos, "Bl -compact"); + n->norm->Bl.comp = 1; + break; + case MDOC_Width: + wa = argv; + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + mdoc->parse, argv->line, + argv->pos, "Bl -width"); + n->norm->Bl.width = "0n"; + break; + } + if (NULL != n->norm->Bl.width) + mandoc_vmsg(MANDOCERR_ARG_REP, + mdoc->parse, argv->line, + argv->pos, "Bl -width %s", + argv->value[0]); + rewrite_macro2len(argv->value); + n->norm->Bl.width = argv->value[0]; + break; + case MDOC_Offset: + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + mdoc->parse, argv->line, + argv->pos, "Bl -offset"); + break; + } + if (NULL != n->norm->Bl.offs) + mandoc_vmsg(MANDOCERR_ARG_REP, + mdoc->parse, argv->line, + argv->pos, "Bl -offset %s", + argv->value[0]); + rewrite_macro2len(argv->value); + n->norm->Bl.offs = argv->value[0]; + break; + default: + continue; + } + if (LIST__NONE == lt) + continue; + mdoclt = argv->arg; + + /* Check: multiple list types. */ + + if (LIST__NONE != n->norm->Bl.type) { + mandoc_vmsg(MANDOCERR_BL_REP, + mdoc->parse, n->line, n->pos, + "Bl -%s", mdoc_argnames[argv->arg]); + continue; + } + + /* The list type should come first. */ + + if (n->norm->Bl.width || + n->norm->Bl.offs || + n->norm->Bl.comp) + mandoc_vmsg(MANDOCERR_BL_LATETYPE, + mdoc->parse, n->line, n->pos, "Bl -%s", + mdoc_argnames[n->args->argv[0].arg]); + + n->norm->Bl.type = lt; + if (LIST_column == lt) { + n->norm->Bl.ncols = argv->sz; + n->norm->Bl.cols = (void *)argv->value; + } + } + + /* Allow lists to default to LIST_item. */ + + if (LIST__NONE == n->norm->Bl.type) { + mandoc_msg(MANDOCERR_BL_NOTYPE, mdoc->parse, + n->line, n->pos, "Bl"); + n->norm->Bl.type = LIST_item; + } + + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. Yet others have a default and need + * no warning. + */ + + switch (n->norm->Bl.type) { + case LIST_tag: + if (NULL == n->norm->Bl.width) + mandoc_msg(MANDOCERR_BL_NOWIDTH, mdoc->parse, + n->line, n->pos, "Bl -tag"); + break; + case LIST_column: + case LIST_diag: + case LIST_ohang: + case LIST_inset: + case LIST_item: + if (n->norm->Bl.width) + mandoc_vmsg(MANDOCERR_BL_SKIPW, mdoc->parse, + wa->line, wa->pos, "Bl -%s", + mdoc_argnames[mdoclt]); + break; + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "2n"; + break; + case LIST_enum: + if (NULL == n->norm->Bl.width) + n->norm->Bl.width = "3n"; + break; + default: + break; + } +} + +static void +post_bd(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv; + int i; + enum mdoc_disp dt; + + n = mdoc->last; + for (i = 0; n->args && i < (int)n->args->argc; i++) { + argv = n->args->argv + i; + dt = DISP__NONE; + + switch (argv->arg) { + case MDOC_Centred: + dt = DISP_centered; + break; + case MDOC_Ragged: + dt = DISP_ragged; + break; + case MDOC_Unfilled: + dt = DISP_unfilled; + break; + case MDOC_Filled: + dt = DISP_filled; + break; + case MDOC_Literal: + dt = DISP_literal; + break; + case MDOC_File: + mandoc_msg(MANDOCERR_BD_FILE, mdoc->parse, + n->line, n->pos, NULL); + break; + case MDOC_Offset: + if (0 == argv->sz) { + mandoc_msg(MANDOCERR_ARG_EMPTY, + mdoc->parse, argv->line, + argv->pos, "Bd -offset"); + break; + } + if (NULL != n->norm->Bd.offs) + mandoc_vmsg(MANDOCERR_ARG_REP, + mdoc->parse, argv->line, + argv->pos, "Bd -offset %s", + argv->value[0]); + rewrite_macro2len(argv->value); + n->norm->Bd.offs = argv->value[0]; + break; + case MDOC_Compact: + if (n->norm->Bd.comp) + mandoc_msg(MANDOCERR_ARG_REP, + mdoc->parse, argv->line, + argv->pos, "Bd -compact"); + n->norm->Bd.comp = 1; + break; + default: + abort(); + } + if (DISP__NONE == dt) + continue; + + if (DISP__NONE == n->norm->Bd.type) + n->norm->Bd.type = dt; + else + mandoc_vmsg(MANDOCERR_BD_REP, + mdoc->parse, n->line, n->pos, + "Bd -%s", mdoc_argnames[argv->arg]); + } + + if (DISP__NONE == n->norm->Bd.type) { + mandoc_msg(MANDOCERR_BD_NOTYPE, mdoc->parse, + n->line, n->pos, "Bd"); + n->norm->Bd.type = DISP_ragged; + } +} + +static void +post_an_norm(POST_ARGS) +{ + struct roff_node *n; + struct mdoc_argv *argv; + size_t i; + + n = mdoc->last; + if (n->args == NULL) + return; + + for (i = 1; i < n->args->argc; i++) { + argv = n->args->argv + i; + mandoc_vmsg(MANDOCERR_AN_REP, + mdoc->parse, argv->line, argv->pos, + "An -%s", mdoc_argnames[argv->arg]); + } + + argv = n->args->argv; + if (argv->arg == MDOC_Split) + n->norm->An.auth = AUTH_split; + else if (argv->arg == MDOC_Nosplit) + n->norm->An.auth = AUTH_nosplit; + else + abort(); +} + +static void +post_std(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->args && n->args->argc == 1) + if (n->args->argv[0].arg == MDOC_Std) + return; + + mandoc_msg(MANDOCERR_ARG_STD, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); +} + +static void +post_obsolete(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (n->type == ROFFT_ELEM || n->type == ROFFT_BLOCK) + mandoc_msg(MANDOCERR_MACRO_OBS, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); +} + +static void +post_bf(POST_ARGS) +{ + struct roff_node *np, *nch; + + /* + * Unlike other data pointers, these are "housed" by the HEAD + * element, which contains the goods. + */ + + np = mdoc->last; + if (np->type != ROFFT_HEAD) + return; + + assert(np->parent->type == ROFFT_BLOCK); + assert(np->parent->tok == MDOC_Bf); + + /* Check the number of arguments. */ + + nch = np->child; + if (np->parent->args == NULL) { + if (nch == NULL) { + mandoc_msg(MANDOCERR_BF_NOFONT, mdoc->parse, + np->line, np->pos, "Bf"); + return; + } + nch = nch->next; + } + if (nch != NULL) + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + nch->line, nch->pos, "Bf ... %s", nch->string); + + /* Extract argument into data. */ + + if (np->parent->args != NULL) { + switch (np->parent->args->argv[0].arg) { + case MDOC_Emphasis: + np->norm->Bf.font = FONT_Em; + break; + case MDOC_Literal: + np->norm->Bf.font = FONT_Li; + break; + case MDOC_Symbolic: + np->norm->Bf.font = FONT_Sy; + break; + default: + abort(); + } + return; + } + + /* Extract parameter into data. */ + + if ( ! strcmp(np->child->string, "Em")) + np->norm->Bf.font = FONT_Em; + else if ( ! strcmp(np->child->string, "Li")) + np->norm->Bf.font = FONT_Li; + else if ( ! strcmp(np->child->string, "Sy")) + np->norm->Bf.font = FONT_Sy; + else + mandoc_vmsg(MANDOCERR_BF_BADFONT, mdoc->parse, + np->child->line, np->child->pos, + "Bf %s", np->child->string); +} + +static void +post_lb(POST_ARGS) +{ + struct roff_node *n; + const char *stdlibname; + char *libname; + + n = mdoc->last->child; + assert(n->type == ROFFT_TEXT); + + if (NULL == (stdlibname = mdoc_a2lib(n->string))) + mandoc_asprintf(&libname, + "library \\(Lq%s\\(Rq", n->string); + else + libname = mandoc_strdup(stdlibname); + + free(n->string); + n->string = libname; +} + +static void +post_eoln(POST_ARGS) +{ + const struct roff_node *n; + + n = mdoc->last; + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_ARG_SKIP, + mdoc->parse, n->line, n->pos, + "%s %s", mdoc_macronames[n->tok], + n->child->string); +} + +static void +post_fname(POST_ARGS) +{ + const struct roff_node *n; + const char *cp; + size_t pos; + + n = mdoc->last->child; + pos = strcspn(n->string, "()"); + cp = n->string + pos; + if ( ! (cp[0] == '\0' || (cp[0] == '(' && cp[1] == '*'))) + mandoc_msg(MANDOCERR_FN_PAREN, mdoc->parse, + n->line, n->pos + pos, n->string); +} + +static void +post_fn(POST_ARGS) +{ + + post_fname(mdoc); + post_fa(mdoc); +} + +static void +post_fo(POST_ARGS) +{ + const struct roff_node *n; + + n = mdoc->last; + + if (n->type != ROFFT_HEAD) + return; + + if (n->child == NULL) { + mandoc_msg(MANDOCERR_FO_NOHEAD, mdoc->parse, + n->line, n->pos, "Fo"); + return; + } + if (n->child != n->last) { + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + n->child->next->line, n->child->next->pos, + "Fo ... %s", n->child->next->string); + while (n->child != n->last) + roff_node_delete(mdoc, n->last); + } + + post_fname(mdoc); +} + +static void +post_fa(POST_ARGS) +{ + const struct roff_node *n; + const char *cp; + + for (n = mdoc->last->child; n != NULL; n = n->next) { + for (cp = n->string; *cp != '\0'; cp++) { + /* Ignore callbacks and alterations. */ + if (*cp == '(' || *cp == '{') + break; + if (*cp != ',') + continue; + mandoc_msg(MANDOCERR_FA_COMMA, mdoc->parse, + n->line, n->pos + (cp - n->string), + n->string); + break; + } + } +} + +static void +post_nm(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->last != NULL && + (n->last->tok == MDOC_Pp || + n->last->tok == MDOC_Lp)) + mdoc_node_relink(mdoc, n->last); + + if (mdoc->meta.name != NULL) + return; + + deroff(&mdoc->meta.name, n); + + if (mdoc->meta.name == NULL) + mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, + n->line, n->pos, "Nm"); +} + +static void +post_nd(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->type != ROFFT_BODY) + return; + + if (n->child == NULL) + mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, + n->line, n->pos, "Nd"); + + post_hyph(mdoc); +} + +static void +post_display(POST_ARGS) +{ + struct roff_node *n, *np; + + n = mdoc->last; + switch (n->type) { + case ROFFT_BODY: + if (n->end != ENDBODY_NOT) + break; + if (n->child == NULL) + mandoc_msg(MANDOCERR_BLK_EMPTY, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); + else if (n->tok == MDOC_D1) + post_hyph(mdoc); + break; + case ROFFT_BLOCK: + if (n->tok == MDOC_Bd) { + if (n->args == NULL) { + mandoc_msg(MANDOCERR_BD_NOARG, + mdoc->parse, n->line, n->pos, "Bd"); + mdoc->next = ROFF_NEXT_SIBLING; + while (n->body->child != NULL) + mdoc_node_relink(mdoc, + n->body->child); + roff_node_delete(mdoc, n); + break; + } + post_bd(mdoc); + post_prevpar(mdoc); + } + for (np = n->parent; np != NULL; np = np->parent) { + if (np->type == ROFFT_BLOCK && np->tok == MDOC_Bd) { + mandoc_vmsg(MANDOCERR_BD_NEST, + mdoc->parse, n->line, n->pos, + "%s in Bd", mdoc_macronames[n->tok]); + break; + } + } + break; + default: + break; + } +} + +static void +post_defaults(POST_ARGS) +{ + struct roff_node *nn; + + /* + * The `Ar' defaults to "file ..." if no value is provided as an + * argument; the `Mt' and `Pa' macros use "~"; the `Li' just + * gets an empty string. + */ + + if (mdoc->last->child != NULL) + return; + + nn = mdoc->last; + + switch (nn->tok) { + case MDOC_Ar: + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, nn->line, nn->pos, "file"); + roff_word_alloc(mdoc, nn->line, nn->pos, "..."); + break; + case MDOC_Pa: + case MDOC_Mt: + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, nn->line, nn->pos, "~"); + break; + default: + abort(); + } + mdoc->last = nn; +} + +static void +post_at(POST_ARGS) +{ + struct roff_node *n; + const char *std_att; + char *att; + + n = mdoc->last; + if (n->child == NULL) { + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, "AT&T UNIX"); + mdoc->last = n; + return; + } + + /* + * If we have a child, look it up in the standard keys. If a + * key exist, use that instead of the child; if it doesn't, + * prefix "AT&T UNIX " to the existing data. + */ + + n = n->child; + assert(n->type == ROFFT_TEXT); + if ((std_att = mdoc_a2att(n->string)) == NULL) { + mandoc_vmsg(MANDOCERR_AT_BAD, mdoc->parse, + n->line, n->pos, "At %s", n->string); + mandoc_asprintf(&att, "AT&T UNIX %s", n->string); + } else + att = mandoc_strdup(std_att); + + free(n->string); + n->string = att; +} + +static void +post_an(POST_ARGS) +{ + struct roff_node *np, *nch; + + post_an_norm(mdoc); + + np = mdoc->last; + nch = np->child; + if (np->norm->An.auth == AUTH__NONE) { + if (nch == NULL) + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + np->line, np->pos, "An"); + } else if (nch != NULL) + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + nch->line, nch->pos, "An ... %s", nch->string); +} + +static void +post_en(POST_ARGS) +{ + + post_obsolete(mdoc); + if (mdoc->last->type == ROFFT_BLOCK) + mdoc->last->norm->Es = mdoc->last_es; +} + +static void +post_es(POST_ARGS) +{ + + post_obsolete(mdoc); + mdoc->last_es = mdoc->last; +} + +static void +post_it(POST_ARGS) +{ + struct roff_node *nbl, *nit, *nch; + int i, cols; + enum mdoc_list lt; + + post_prevpar(mdoc); + + nit = mdoc->last; + if (nit->type != ROFFT_BLOCK) + return; + + nbl = nit->parent->parent; + lt = nbl->norm->Bl.type; + + switch (lt) { + case LIST_tag: + case LIST_hang: + case LIST_ohang: + case LIST_inset: + case LIST_diag: + if (nit->head->child == NULL) + mandoc_vmsg(MANDOCERR_IT_NOHEAD, + mdoc->parse, nit->line, nit->pos, + "Bl -%s It", + mdoc_argnames[nbl->args->argv[0].arg]); + break; + case LIST_bullet: + case LIST_dash: + case LIST_enum: + case LIST_hyphen: + if (nit->body == NULL || nit->body->child == NULL) + mandoc_vmsg(MANDOCERR_IT_NOBODY, + mdoc->parse, nit->line, nit->pos, + "Bl -%s It", + mdoc_argnames[nbl->args->argv[0].arg]); + /* FALLTHROUGH */ + case LIST_item: + if (nit->head->child != NULL) + mandoc_vmsg(MANDOCERR_ARG_SKIP, + mdoc->parse, nit->line, nit->pos, + "It %s", nit->head->child->string); + break; + case LIST_column: + cols = (int)nbl->norm->Bl.ncols; + + assert(nit->head->child == NULL); + + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) + if (nch->type == ROFFT_BODY) + i++; + + if (i < cols || i > cols + 1) + mandoc_vmsg(MANDOCERR_BL_COL, + mdoc->parse, nit->line, nit->pos, + "%d columns, %d cells", cols, i); + break; + default: + abort(); + } +} + +static void +post_bl_block(POST_ARGS) +{ + struct roff_node *n, *ni, *nc; + + post_prevpar(mdoc); + + /* + * These are fairly complicated, so we've broken them into two + * functions. post_bl_block_tag() is called when a -tag is + * specified, but no -width (it must be guessed). The second + * when a -width is specified (macro indicators must be + * rewritten into real lengths). + */ + + n = mdoc->last; + + if (n->norm->Bl.type == LIST_tag && + n->norm->Bl.width == NULL) { + post_bl_block_tag(mdoc); + assert(n->norm->Bl.width != NULL); + } + + for (ni = n->body->child; ni != NULL; ni = ni->next) { + if (ni->body == NULL) + continue; + nc = ni->body->last; + while (nc != NULL) { + switch (nc->tok) { + case MDOC_Pp: + case MDOC_Lp: + case MDOC_br: + break; + default: + nc = NULL; + continue; + } + if (ni->next == NULL) { + mandoc_msg(MANDOCERR_PAR_MOVE, + mdoc->parse, nc->line, nc->pos, + mdoc_macronames[nc->tok]); + mdoc_node_relink(mdoc, nc); + } else if (n->norm->Bl.comp == 0 && + n->norm->Bl.type != LIST_column) { + mandoc_vmsg(MANDOCERR_PAR_SKIP, + mdoc->parse, nc->line, nc->pos, + "%s before It", + mdoc_macronames[nc->tok]); + roff_node_delete(mdoc, nc); + } else + break; + nc = ni->body->last; + } + } +} + +/* + * If the argument of -offset or -width is a macro, + * replace it with the associated default width. + */ +void +rewrite_macro2len(char **arg) +{ + size_t width; + int tok; + + if (*arg == NULL) + return; + else if ( ! strcmp(*arg, "Ds")) + width = 6; + else if ((tok = mdoc_hash_find(*arg)) == TOKEN_NONE) + return; + else + width = macro2len(tok); + + free(*arg); + mandoc_asprintf(arg, "%zun", width); +} + +static void +post_bl_block_tag(POST_ARGS) +{ + struct roff_node *n, *nn; + size_t sz, ssz; + int i; + char buf[24]; + + /* + * Calculate the -width for a `Bl -tag' list if it hasn't been + * provided. Uses the first head macro. NOTE AGAIN: this is + * ONLY if the -width argument has NOT been provided. See + * rewrite_macro2len() for converting the -width string. + */ + + sz = 10; + n = mdoc->last; + + for (nn = n->body->child; nn != NULL; nn = nn->next) { + if (nn->tok != MDOC_It) + continue; + + assert(nn->type == ROFFT_BLOCK); + nn = nn->head->child; + + if (nn == NULL) + break; + + if (nn->type == ROFFT_TEXT) { + sz = strlen(nn->string) + 1; + break; + } + + if (0 != (ssz = macro2len(nn->tok))) + sz = ssz; + + break; + } + + /* Defaults to ten ens. */ + + (void)snprintf(buf, sizeof(buf), "%un", (unsigned int)sz); + + /* + * We have to dynamically add this to the macro's argument list. + * We're guaranteed that a MDOC_Width doesn't already exist. + */ + + assert(n->args != NULL); + i = (int)(n->args->argc)++; + + n->args->argv = mandoc_reallocarray(n->args->argv, + n->args->argc, sizeof(struct mdoc_argv)); + + n->args->argv[i].arg = MDOC_Width; + n->args->argv[i].line = n->line; + n->args->argv[i].pos = n->pos; + n->args->argv[i].sz = 1; + n->args->argv[i].value = mandoc_malloc(sizeof(char *)); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->norm->Bl.width = n->args->argv[i].value[0]; +} + +static void +post_bl_head(POST_ARGS) +{ + struct roff_node *nbl, *nh, *nch, *nnext; + struct mdoc_argv *argv; + int i, j; + + post_bl_norm(mdoc); + + nh = mdoc->last; + if (nh->norm->Bl.type != LIST_column) { + if ((nch = nh->child) == NULL) + return; + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + nch->line, nch->pos, "Bl ... %s", nch->string); + while (nch != NULL) { + roff_node_delete(mdoc, nch); + nch = nh->child; + } + return; + } + + /* + * Append old-style lists, where the column width specifiers + * trail as macro parameters, to the new-style ("normal-form") + * lists where they're argument values following -column. + */ + + if (nh->child == NULL) + return; + + nbl = nh->parent; + for (j = 0; j < (int)nbl->args->argc; j++) + if (nbl->args->argv[j].arg == MDOC_Column) + break; + + assert(j < (int)nbl->args->argc); + + /* + * Accommodate for new-style groff column syntax. Shuffle the + * child nodes, all of which must be TEXT, as arguments for the + * column field. Then, delete the head children. + */ + + argv = nbl->args->argv + j; + i = argv->sz; + for (nch = nh->child; nch != NULL; nch = nch->next) + argv->sz++; + argv->value = mandoc_reallocarray(argv->value, + argv->sz, sizeof(char *)); + + nh->norm->Bl.ncols = argv->sz; + nh->norm->Bl.cols = (void *)argv->value; + + for (nch = nh->child; nch != NULL; nch = nnext) { + argv->value[i++] = nch->string; + nch->string = NULL; + nnext = nch->next; + roff_node_delete(NULL, nch); + } + nh->child = NULL; +} + +static void +post_bl(POST_ARGS) +{ + struct roff_node *nparent, *nprev; /* of the Bl block */ + struct roff_node *nblock, *nbody; /* of the Bl */ + struct roff_node *nchild, *nnext; /* of the Bl body */ + + nbody = mdoc->last; + switch (nbody->type) { + case ROFFT_BLOCK: + post_bl_block(mdoc); + return; + case ROFFT_HEAD: + post_bl_head(mdoc); + return; + case ROFFT_BODY: + break; + default: + return; + } + if (nbody->end != ENDBODY_NOT) + return; + + nchild = nbody->child; + if (nchild == NULL) { + mandoc_msg(MANDOCERR_BLK_EMPTY, mdoc->parse, + nbody->line, nbody->pos, "Bl"); + return; + } + while (nchild != NULL) { + if (nchild->tok == MDOC_It || + (nchild->tok == MDOC_Sm && + nchild->next != NULL && + nchild->next->tok == MDOC_It)) { + nchild = nchild->next; + continue; + } + + mandoc_msg(MANDOCERR_BL_MOVE, mdoc->parse, + nchild->line, nchild->pos, + mdoc_macronames[nchild->tok]); + + /* + * Move the node out of the Bl block. + * First, collect all required node pointers. + */ + + nblock = nbody->parent; + nprev = nblock->prev; + nparent = nblock->parent; + nnext = nchild->next; + + /* + * Unlink this child. + */ + + assert(nchild->prev == NULL); + nbody->child = nnext; + if (nnext == NULL) + nbody->last = NULL; + else + nnext->prev = NULL; + + /* + * Relink this child. + */ + + nchild->parent = nparent; + nchild->prev = nprev; + nchild->next = nblock; + + nblock->prev = nchild; + if (nprev == NULL) + nparent->child = nchild; + else + nprev->next = nchild; + + nchild = nnext; + } +} + +static void +post_bk(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + + if (n->type == ROFFT_BLOCK && n->body->child == NULL) { + mandoc_msg(MANDOCERR_BLK_EMPTY, + mdoc->parse, n->line, n->pos, "Bk"); + roff_node_delete(mdoc, n); + } +} + +static void +post_sm(POST_ARGS) +{ + struct roff_node *nch; + + nch = mdoc->last->child; + + if (nch == NULL) { + mdoc->flags ^= MDOC_SMOFF; + return; + } + + assert(nch->type == ROFFT_TEXT); + + if ( ! strcmp(nch->string, "on")) { + mdoc->flags &= ~MDOC_SMOFF; + return; + } + if ( ! strcmp(nch->string, "off")) { + mdoc->flags |= MDOC_SMOFF; + return; + } + + mandoc_vmsg(MANDOCERR_SM_BAD, + mdoc->parse, nch->line, nch->pos, + "%s %s", mdoc_macronames[mdoc->last->tok], nch->string); + mdoc_node_relink(mdoc, nch); + return; +} + +static void +post_root(POST_ARGS) +{ + struct roff_node *n; + + /* Add missing prologue data. */ + + if (mdoc->meta.date == NULL) + mdoc->meta.date = mdoc->quick ? + mandoc_strdup("") : + mandoc_normdate(mdoc->parse, NULL, 0, 0); + + if (mdoc->meta.title == NULL) { + mandoc_msg(MANDOCERR_DT_NOTITLE, + mdoc->parse, 0, 0, "EOF"); + mdoc->meta.title = mandoc_strdup("UNTITLED"); + } + + if (mdoc->meta.vol == NULL) + mdoc->meta.vol = mandoc_strdup("LOCAL"); + + if (mdoc->meta.os == NULL) { + mandoc_msg(MANDOCERR_OS_MISSING, + mdoc->parse, 0, 0, NULL); + mdoc->meta.os = mandoc_strdup(""); + } + + /* Check that we begin with a proper `Sh'. */ + + n = mdoc->first->child; + while (n != NULL && n->tok != TOKEN_NONE && + mdoc_macros[n->tok].flags & MDOC_PROLOGUE) + n = n->next; + + if (n == NULL) + mandoc_msg(MANDOCERR_DOC_EMPTY, mdoc->parse, 0, 0, NULL); + else if (n->tok != MDOC_Sh) + mandoc_msg(MANDOCERR_SEC_BEFORE, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); +} + +static void +post_st(POST_ARGS) +{ + struct roff_node *n, *nch; + const char *p; + + n = mdoc->last; + nch = n->child; + + assert(nch->type == ROFFT_TEXT); + + if ((p = mdoc_a2st(nch->string)) == NULL) { + mandoc_vmsg(MANDOCERR_ST_BAD, mdoc->parse, + nch->line, nch->pos, "St %s", nch->string); + roff_node_delete(mdoc, n); + } else { + free(nch->string); + nch->string = mandoc_strdup(p); + } +} + +static void +post_rs(POST_ARGS) +{ + struct roff_node *np, *nch, *next, *prev; + int i, j; + + np = mdoc->last; + + if (np->type != ROFFT_BODY) + return; + + if (np->child == NULL) { + mandoc_msg(MANDOCERR_RS_EMPTY, mdoc->parse, + np->line, np->pos, "Rs"); + return; + } + + /* + * The full `Rs' block needs special handling to order the + * sub-elements according to `rsord'. Pick through each element + * and correctly order it. This is an insertion sort. + */ + + next = NULL; + for (nch = np->child->next; nch != NULL; nch = next) { + /* Determine order number of this child. */ + for (i = 0; i < RSORD_MAX; i++) + if (rsord[i] == nch->tok) + break; + + if (i == RSORD_MAX) { + mandoc_msg(MANDOCERR_RS_BAD, + mdoc->parse, nch->line, nch->pos, + mdoc_macronames[nch->tok]); + i = -1; + } else if (nch->tok == MDOC__J || nch->tok == MDOC__B) + np->norm->Rs.quote_T++; + + /* + * Remove this child from the chain. This somewhat + * repeats roff_node_unlink(), but since we're + * just re-ordering, there's no need for the + * full unlink process. + */ + + if ((next = nch->next) != NULL) + next->prev = nch->prev; + + if ((prev = nch->prev) != NULL) + prev->next = nch->next; + + nch->prev = nch->next = NULL; + + /* + * Scan back until we reach a node that's + * to be ordered before this child. + */ + + for ( ; prev ; prev = prev->prev) { + /* Determine order of `prev'. */ + for (j = 0; j < RSORD_MAX; j++) + if (rsord[j] == prev->tok) + break; + if (j == RSORD_MAX) + j = -1; + + if (j <= i) + break; + } + + /* + * Set this child back into its correct place + * in front of the `prev' node. + */ + + nch->prev = prev; + + if (prev == NULL) { + np->child->prev = nch; + nch->next = np->child; + np->child = nch; + } else { + if (prev->next) + prev->next->prev = nch; + nch->next = prev->next; + prev->next = nch; + } + } +} + +/* + * For some arguments of some macros, + * convert all breakable hyphens into ASCII_HYPH. + */ +static void +post_hyph(POST_ARGS) +{ + struct roff_node *nch; + char *cp; + + for (nch = mdoc->last->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_TEXT) + continue; + cp = nch->string; + if (*cp == '\0') + continue; + while (*(++cp) != '\0') + if (*cp == '-' && + isalpha((unsigned char)cp[-1]) && + isalpha((unsigned char)cp[1])) + *cp = ASCII_HYPH; + } +} + +static void +post_ns(POST_ARGS) +{ + + if (mdoc->last->flags & MDOC_LINE) + mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse, + mdoc->last->line, mdoc->last->pos, NULL); +} + +static void +post_sh(POST_ARGS) +{ + + post_ignpar(mdoc); + + switch (mdoc->last->type) { + case ROFFT_HEAD: + post_sh_head(mdoc); + break; + case ROFFT_BODY: + switch (mdoc->lastsec) { + case SEC_NAME: + post_sh_name(mdoc); + break; + case SEC_SEE_ALSO: + post_sh_see_also(mdoc); + break; + case SEC_AUTHORS: + post_sh_authors(mdoc); + break; + default: + break; + } + break; + default: + break; + } +} + +static void +post_sh_name(POST_ARGS) +{ + struct roff_node *n; + int hasnm, hasnd; + + hasnm = hasnd = 0; + + for (n = mdoc->last->child; n != NULL; n = n->next) { + switch (n->tok) { + case MDOC_Nm: + hasnm = 1; + break; + case MDOC_Nd: + hasnd = 1; + if (n->next != NULL) + mandoc_msg(MANDOCERR_NAMESEC_ND, + mdoc->parse, n->line, n->pos, NULL); + break; + case TOKEN_NONE: + if (hasnm) + break; + /* FALLTHROUGH */ + default: + mandoc_msg(MANDOCERR_NAMESEC_BAD, mdoc->parse, + n->line, n->pos, mdoc_macronames[n->tok]); + break; + } + } + + if ( ! hasnm) + mandoc_msg(MANDOCERR_NAMESEC_NONM, mdoc->parse, + mdoc->last->line, mdoc->last->pos, NULL); + if ( ! hasnd) + mandoc_msg(MANDOCERR_NAMESEC_NOND, mdoc->parse, + mdoc->last->line, mdoc->last->pos, NULL); +} + +static void +post_sh_see_also(POST_ARGS) +{ + const struct roff_node *n; + const char *name, *sec; + const char *lastname, *lastsec, *lastpunct; + int cmp; + + n = mdoc->last->child; + lastname = lastsec = lastpunct = NULL; + while (n != NULL) { + if (n->tok != MDOC_Xr || + n->child == NULL || + n->child->next == NULL) + break; + + /* Process one .Xr node. */ + + name = n->child->string; + sec = n->child->next->string; + if (lastsec != NULL) { + if (lastpunct[0] != ',' || lastpunct[1] != '\0') + mandoc_vmsg(MANDOCERR_XR_PUNCT, + mdoc->parse, n->line, n->pos, + "%s before %s(%s)", lastpunct, + name, sec); + cmp = strcmp(lastsec, sec); + if (cmp > 0) + mandoc_vmsg(MANDOCERR_XR_ORDER, + mdoc->parse, n->line, n->pos, + "%s(%s) after %s(%s)", name, + sec, lastname, lastsec); + else if (cmp == 0 && + strcasecmp(lastname, name) > 0) + mandoc_vmsg(MANDOCERR_XR_ORDER, + mdoc->parse, n->line, n->pos, + "%s after %s", name, lastname); + } + lastname = name; + lastsec = sec; + + /* Process the following node. */ + + n = n->next; + if (n == NULL) + break; + if (n->tok == MDOC_Xr) { + lastpunct = "none"; + continue; + } + if (n->type != ROFFT_TEXT) + break; + for (name = n->string; *name != '\0'; name++) + if (isalpha((const unsigned char)*name)) + return; + lastpunct = n->string; + if (n->next == NULL) + mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, + n->line, n->pos, "%s after %s(%s)", + lastpunct, lastname, lastsec); + n = n->next; + } +} + +static int +child_an(const struct roff_node *n) +{ + + for (n = n->child; n != NULL; n = n->next) + if ((n->tok == MDOC_An && n->child != NULL) || child_an(n)) + return 1; + return 0; +} + +static void +post_sh_authors(POST_ARGS) +{ + + if ( ! child_an(mdoc->last)) + mandoc_msg(MANDOCERR_AN_MISSING, mdoc->parse, + mdoc->last->line, mdoc->last->pos, NULL); +} + +static void +post_sh_head(POST_ARGS) +{ + const char *goodsec; + enum roff_sec sec; + + /* + * Process a new section. Sections are either "named" or + * "custom". Custom sections are user-defined, while named ones + * follow a conventional order and may only appear in certain + * manual sections. + */ + + sec = mdoc->last->sec; + + /* The NAME should be first. */ + + if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) + mandoc_vmsg(MANDOCERR_NAMESEC_FIRST, mdoc->parse, + mdoc->last->line, mdoc->last->pos, + "Sh %s", secnames[sec]); + + /* The SYNOPSIS gets special attention in other areas. */ + + if (sec == SEC_SYNOPSIS) { + roff_setreg(mdoc->roff, "nS", 1, '='); + mdoc->flags |= MDOC_SYNOPSIS; + } else { + roff_setreg(mdoc->roff, "nS", 0, '='); + mdoc->flags &= ~MDOC_SYNOPSIS; + } + + /* Mark our last section. */ + + mdoc->lastsec = sec; + + /* We don't care about custom sections after this. */ + + if (sec == SEC_CUSTOM) + return; + + /* + * Check whether our non-custom section is being repeated or is + * out of order. + */ + + if (sec == mdoc->lastnamed) + mandoc_vmsg(MANDOCERR_SEC_REP, mdoc->parse, + mdoc->last->line, mdoc->last->pos, + "Sh %s", secnames[sec]); + + if (sec < mdoc->lastnamed) + mandoc_vmsg(MANDOCERR_SEC_ORDER, mdoc->parse, + mdoc->last->line, mdoc->last->pos, + "Sh %s", secnames[sec]); + + /* Mark the last named section. */ + + mdoc->lastnamed = sec; + + /* Check particular section/manual conventions. */ + + if (mdoc->meta.msec == NULL) + return; + + goodsec = NULL; + switch (sec) { + case SEC_ERRORS: + if (*mdoc->meta.msec == '4') + break; + goodsec = "2, 3, 4, 9"; + /* FALLTHROUGH */ + case SEC_RETURN_VALUES: + case SEC_LIBRARY: + if (*mdoc->meta.msec == '2') + break; + if (*mdoc->meta.msec == '3') + break; + if (NULL == goodsec) + goodsec = "2, 3, 9"; + /* FALLTHROUGH */ + case SEC_CONTEXT: + if (*mdoc->meta.msec == '9') + break; + if (NULL == goodsec) + goodsec = "9"; + mandoc_vmsg(MANDOCERR_SEC_MSEC, mdoc->parse, + mdoc->last->line, mdoc->last->pos, + "Sh %s for %s only", secnames[sec], goodsec); + break; + default: + break; + } +} + +static void +post_ignpar(POST_ARGS) +{ + struct roff_node *np; + + switch (mdoc->last->type) { + case ROFFT_HEAD: + post_hyph(mdoc); + return; + case ROFFT_BODY: + break; + default: + return; + } + + if ((np = mdoc->last->child) != NULL) + if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + mandoc_vmsg(MANDOCERR_PAR_SKIP, + mdoc->parse, np->line, np->pos, + "%s after %s", mdoc_macronames[np->tok], + mdoc_macronames[mdoc->last->tok]); + roff_node_delete(mdoc, np); + } + + if ((np = mdoc->last->last) != NULL) + if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { + mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, + np->line, np->pos, "%s at the end of %s", + mdoc_macronames[np->tok], + mdoc_macronames[mdoc->last->tok]); + roff_node_delete(mdoc, np); + } +} + +static void +post_prevpar(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + if (NULL == n->prev) + return; + if (n->type != ROFFT_ELEM && n->type != ROFFT_BLOCK) + return; + + /* + * Don't allow prior `Lp' or `Pp' prior to a paragraph-type + * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + */ + + if (n->prev->tok != MDOC_Pp && + n->prev->tok != MDOC_Lp && + n->prev->tok != MDOC_br) + return; + if (n->tok == MDOC_Bl && n->norm->Bl.comp) + return; + if (n->tok == MDOC_Bd && n->norm->Bd.comp) + return; + if (n->tok == MDOC_It && n->parent->norm->Bl.comp) + return; + + mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, + n->prev->line, n->prev->pos, + "%s before %s", mdoc_macronames[n->prev->tok], + mdoc_macronames[n->tok]); + roff_node_delete(mdoc, n->prev); +} + +static void +post_par(POST_ARGS) +{ + struct roff_node *np; + + np = mdoc->last; + if (np->tok != MDOC_br && np->tok != MDOC_sp) + post_prevpar(mdoc); + + if (np->tok == MDOC_sp) { + if (np->child != NULL && np->child->next != NULL) + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + np->child->next->line, np->child->next->pos, + "sp ... %s", np->child->next->string); + } else if (np->child != NULL) + mandoc_vmsg(MANDOCERR_ARG_SKIP, + mdoc->parse, np->line, np->pos, "%s %s", + mdoc_macronames[np->tok], np->child->string); + + if ((np = mdoc->last->prev) == NULL) { + np = mdoc->last->parent; + if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) + return; + } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && + (mdoc->last->tok != MDOC_br || + (np->tok != MDOC_sp && np->tok != MDOC_br))) + return; + + mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, + mdoc->last->line, mdoc->last->pos, + "%s after %s", mdoc_macronames[mdoc->last->tok], + mdoc_macronames[np->tok]); + roff_node_delete(mdoc, mdoc->last); +} + +static void +post_dd(POST_ARGS) +{ + struct roff_node *n; + char *datestr; + + n = mdoc->last; + if (mdoc->meta.date != NULL) { + mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, + n->line, n->pos, "Dd"); + free(mdoc->meta.date); + } else if (mdoc->flags & MDOC_PBODY) + mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, + n->line, n->pos, "Dd"); + else if (mdoc->meta.title != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + n->line, n->pos, "Dd after Dt"); + else if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + n->line, n->pos, "Dd after Os"); + + if (n->child == NULL || n->child->string[0] == '\0') { + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + goto out; + } + + datestr = NULL; + deroff(&datestr, n); + if (mdoc->quick) + mdoc->meta.date = datestr; + else { + mdoc->meta.date = mandoc_normdate(mdoc->parse, + datestr, n->line, n->pos); + free(datestr); + } +out: + roff_node_delete(mdoc, n); +} + +static void +post_dt(POST_ARGS) +{ + struct roff_node *nn, *n; + const char *cp; + char *p; + + n = mdoc->last; + if (mdoc->flags & MDOC_PBODY) { + mandoc_msg(MANDOCERR_DT_LATE, mdoc->parse, + n->line, n->pos, "Dt"); + goto out; + } + + if (mdoc->meta.title != NULL) + mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, + n->line, n->pos, "Dt"); + else if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_ORDER, mdoc->parse, + n->line, n->pos, "Dt after Os"); + + free(mdoc->meta.title); + free(mdoc->meta.msec); + free(mdoc->meta.vol); + free(mdoc->meta.arch); + + mdoc->meta.title = NULL; + mdoc->meta.msec = NULL; + mdoc->meta.vol = NULL; + mdoc->meta.arch = NULL; + + /* Mandatory first argument: title. */ + + nn = n->child; + if (nn == NULL || *nn->string == '\0') { + mandoc_msg(MANDOCERR_DT_NOTITLE, + mdoc->parse, n->line, n->pos, "Dt"); + mdoc->meta.title = mandoc_strdup("UNTITLED"); + } else { + mdoc->meta.title = mandoc_strdup(nn->string); + + /* Check that all characters are uppercase. */ + + for (p = nn->string; *p != '\0'; p++) + if (islower((unsigned char)*p)) { + mandoc_vmsg(MANDOCERR_TITLE_CASE, + mdoc->parse, nn->line, + nn->pos + (p - nn->string), + "Dt %s", nn->string); + break; + } + } + + /* Mandatory second argument: section. */ + + if (nn != NULL) + nn = nn->next; + + if (nn == NULL) { + mandoc_vmsg(MANDOCERR_MSEC_MISSING, + mdoc->parse, n->line, n->pos, + "Dt %s", mdoc->meta.title); + mdoc->meta.vol = mandoc_strdup("LOCAL"); + goto out; /* msec and arch remain NULL. */ + } + + mdoc->meta.msec = mandoc_strdup(nn->string); + + /* Infer volume title from section number. */ + + cp = mandoc_a2msec(nn->string); + if (cp == NULL) { + mandoc_vmsg(MANDOCERR_MSEC_BAD, mdoc->parse, + nn->line, nn->pos, "Dt ... %s", nn->string); + mdoc->meta.vol = mandoc_strdup(nn->string); + } else + mdoc->meta.vol = mandoc_strdup(cp); + + /* Optional third argument: architecture. */ + + if ((nn = nn->next) == NULL) + goto out; + + for (p = nn->string; *p != '\0'; p++) + *p = tolower((unsigned char)*p); + mdoc->meta.arch = mandoc_strdup(nn->string); + + /* Ignore fourth and later arguments. */ + + if ((nn = nn->next) != NULL) + mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, + nn->line, nn->pos, "Dt ... %s", nn->string); + +out: + roff_node_delete(mdoc, n); +} + +static void +post_bx(POST_ARGS) +{ + struct roff_node *n; + + /* + * Make `Bx's second argument always start with an uppercase + * letter. Groff checks if it's an "accepted" term, but we just + * uppercase blindly. + */ + + if ((n = mdoc->last->child) != NULL && (n = n->next) != NULL) + *n->string = (char)toupper((unsigned char)*n->string); +} + +static void +post_os(POST_ARGS) +{ +#ifndef OSNAME + struct utsname utsname; + static char *defbuf; +#endif + struct roff_node *n; + + n = mdoc->last; + if (mdoc->meta.os != NULL) + mandoc_msg(MANDOCERR_PROLOG_REP, mdoc->parse, + n->line, n->pos, "Os"); + else if (mdoc->flags & MDOC_PBODY) + mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, + n->line, n->pos, "Os"); + + /* + * Set the operating system by way of the `Os' macro. + * The order of precedence is: + * 1. the argument of the `Os' macro, unless empty + * 2. the -Ios=foo command line argument, if provided + * 3. -DOSNAME="\"foo\"", if provided during compilation + * 4. "sysname release" from uname(3) + */ + + free(mdoc->meta.os); + mdoc->meta.os = NULL; + deroff(&mdoc->meta.os, n); + if (mdoc->meta.os) + goto out; + + if (mdoc->defos) { + mdoc->meta.os = mandoc_strdup(mdoc->defos); + goto out; + } + +#ifdef OSNAME + mdoc->meta.os = mandoc_strdup(OSNAME); +#else /*!OSNAME */ + if (defbuf == NULL) { + if (uname(&utsname) == -1) { + mandoc_msg(MANDOCERR_OS_UNAME, mdoc->parse, + n->line, n->pos, "Os"); + defbuf = mandoc_strdup("UNKNOWN"); + } else + mandoc_asprintf(&defbuf, "%s %s", + utsname.sysname, utsname.release); + } + mdoc->meta.os = mandoc_strdup(defbuf); +#endif /*!OSNAME*/ + +out: + roff_node_delete(mdoc, n); +} + +/* + * If no argument is provided, + * fill in the name of the current manual page. + */ +static void +post_ex(POST_ARGS) +{ + struct roff_node *n; + + post_std(mdoc); + + n = mdoc->last; + if (n->child != NULL) + return; + + if (mdoc->meta.name == NULL) { + mandoc_msg(MANDOCERR_EX_NONAME, mdoc->parse, + n->line, n->pos, "Ex"); + return; + } + + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name); + mdoc->last = n; +} + +enum roff_sec +mdoc_a2sec(const char *p) +{ + int i; + + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return (enum roff_sec)i; + + return SEC_CUSTOM; +} + +static size_t +macro2len(int macro) +{ + + switch (macro) { + case MDOC_Ad: + return 12; + case MDOC_Ao: + return 12; + case MDOC_An: + return 12; + case MDOC_Aq: + return 12; + case MDOC_Ar: + return 12; + case MDOC_Bo: + return 12; + case MDOC_Bq: + return 12; + case MDOC_Cd: + return 12; + case MDOC_Cm: + return 10; + case MDOC_Do: + return 10; + case MDOC_Dq: + return 12; + case MDOC_Dv: + return 12; + case MDOC_Eo: + return 12; + case MDOC_Em: + return 10; + case MDOC_Er: + return 17; + case MDOC_Ev: + return 15; + case MDOC_Fa: + return 12; + case MDOC_Fl: + return 10; + case MDOC_Fo: + return 16; + case MDOC_Fn: + return 16; + case MDOC_Ic: + return 10; + case MDOC_Li: + return 16; + case MDOC_Ms: + return 6; + case MDOC_Nm: + return 10; + case MDOC_No: + return 12; + case MDOC_Oo: + return 10; + case MDOC_Op: + return 14; + case MDOC_Pa: + return 32; + case MDOC_Pf: + return 12; + case MDOC_Po: + return 12; + case MDOC_Pq: + return 12; + case MDOC_Ql: + return 16; + case MDOC_Qo: + return 12; + case MDOC_So: + return 12; + case MDOC_Sq: + return 12; + case MDOC_Sy: + return 6; + case MDOC_Sx: + return 16; + case MDOC_Tn: + return 10; + case MDOC_Va: + return 12; + case MDOC_Vt: + return 12; + case MDOC_Xr: + return 10; + default: + break; + }; + return 0; +} diff --git a/contrib/mdocml/msec.c b/contrib/mdocml/msec.c new file mode 100644 index 0000000..9d41511 --- /dev/null +++ b/contrib/mdocml/msec.c @@ -0,0 +1,36 @@ +/* $Id: msec.c,v 1.15 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mandoc_a2msec(const char *p) +{ + +#include "msec.in" + + return NULL; +} diff --git a/contrib/mdocml/msec.in b/contrib/mdocml/msec.in new file mode 100644 index 0000000..86d0dd8 --- /dev/null +++ b/contrib/mdocml/msec.in @@ -0,0 +1,40 @@ +/* $Id: msec.in,v 1.6 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * These are all possible manual-section macros and what they correspond + * to when rendered as the volume title. + * + * Be sure to escape strings. + */ + +LINE("1", "FreeBSD General Commands Manual") +LINE("2", "FreeBSD System Calls Manual") +LINE("3", "FreeBSD Library Functions Manual") +LINE("3p", "Perl Library Functions Manual") +LINE("4", "FreeBSD Kernel Interfaces Manual") +LINE("5", "FreeBSD File Formats Manual") +LINE("6", "FreeBSD Games Manual") +LINE("7", "FreeBSD Miscellaneous Information Manual") +LINE("8", "FreeBSD System Manager\'s Manual") +LINE("9", "FreeBSD Kernel Developer\'s Manual") +LINE("X11", "X11 Developer\'s Manual") +LINE("X11R6", "X11 Developer\'s Manual") +LINE("unass", "Unassociated") +LINE("local", "Local") +LINE("draft", "Draft") +LINE("paper", "Paper") diff --git a/contrib/mdocml/out.c b/contrib/mdocml/out.c new file mode 100644 index 0000000..aff3558 --- /dev/null +++ b/contrib/mdocml/out.c @@ -0,0 +1,329 @@ +/* $Id: out.c,v 1.62 2015/10/12 00:08:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "out.h" + +static void tblcalc_data(struct rofftbl *, struct roffcol *, + const struct tbl_opts *, const struct tbl_dat *); +static void tblcalc_literal(struct rofftbl *, struct roffcol *, + const struct tbl_dat *); +static void tblcalc_number(struct rofftbl *, struct roffcol *, + const struct tbl_opts *, const struct tbl_dat *); + + +/* + * Parse the *src string and store a scaling unit into *dst. + * If the string doesn't specify the unit, use the default. + * If no default is specified, fail. + * Return 2 on complete success, 1 when a conversion was done, + * but there was trailing garbage, and 0 on total failure. + */ +int +a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) +{ + char *endptr; + + dst->unit = def == SCALE_MAX ? SCALE_BU : def; + dst->scale = strtod(src, &endptr); + if (endptr == src) + return 0; + + switch (*endptr++) { + case 'c': + dst->unit = SCALE_CM; + break; + case 'i': + dst->unit = SCALE_IN; + break; + case 'f': + dst->unit = SCALE_FS; + break; + case 'M': + dst->unit = SCALE_MM; + break; + case 'm': + dst->unit = SCALE_EM; + break; + case 'n': + dst->unit = SCALE_EN; + break; + case 'P': + dst->unit = SCALE_PC; + break; + case 'p': + dst->unit = SCALE_PT; + break; + case 'u': + dst->unit = SCALE_BU; + break; + case 'v': + dst->unit = SCALE_VS; + break; + case '\0': + endptr--; + /* FALLTHROUGH */ + default: + if (SCALE_MAX == def) + return 0; + dst->unit = def; + break; + } + + return *endptr == '\0' ? 2 : 1; +} + +/* + * Calculate the abstract widths and decimal positions of columns in a + * table. This routine allocates the columns structures then runs over + * all rows and cells in the table. The function pointers in "tbl" are + * used for the actual width calculations. + */ +void +tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, + size_t totalwidth) +{ + const struct tbl_opts *opts; + const struct tbl_dat *dp; + struct roffcol *col; + size_t ewidth, xwidth; + int spans; + int icol, maxcol, necol, nxcol, quirkcol; + + /* + * Allocate the master column specifiers. These will hold the + * widths and decimal positions for all cells in the column. It + * must be freed and nullified by the caller. + */ + + assert(NULL == tbl->cols); + tbl->cols = mandoc_calloc((size_t)sp->opts->cols, + sizeof(struct roffcol)); + opts = sp->opts; + + for (maxcol = -1; sp; sp = sp->next) { + if (TBL_SPAN_DATA != sp->pos) + continue; + spans = 1; + /* + * Account for the data cells in the layout, matching it + * to data cells in the data section. + */ + for (dp = sp->first; dp; dp = dp->next) { + /* Do not used spanned cells in the calculation. */ + if (0 < --spans) + continue; + spans = dp->spans; + if (1 < spans) + continue; + icol = dp->layout->col; + if (maxcol < icol) + maxcol = icol; + col = tbl->cols + icol; + col->flags |= dp->layout->flags; + if (dp->layout->flags & TBL_CELL_WIGN) + continue; + tblcalc_data(tbl, col, opts, dp); + } + } + + /* + * Count columns to equalize and columns to maximize. + * Find maximum width of the columns to equalize. + * Find total width of the columns *not* to maximize. + */ + + necol = nxcol = 0; + ewidth = xwidth = 0; + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if (col->flags & TBL_CELL_EQUAL) { + necol++; + if (ewidth < col->width) + ewidth = col->width; + } + if (col->flags & TBL_CELL_WMAX) + nxcol++; + else + xwidth += col->width; + } + + /* + * Equalize columns, if requested for any of them. + * Update total width of the columns not to maximize. + */ + + if (necol) { + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if ( ! (col->flags & TBL_CELL_EQUAL)) + continue; + if (col->width == ewidth) + continue; + if (nxcol && totalwidth) + xwidth += ewidth - col->width; + col->width = ewidth; + } + } + + /* + * If there are any columns to maximize, find the total + * available width, deducting 3n margins between columns. + * Distribute the available width evenly. + */ + + if (nxcol && totalwidth) { + xwidth = totalwidth - xwidth - 3*maxcol - + (opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) ? + 2 : !!opts->lvert + !!opts->rvert); + + /* + * Emulate a bug in GNU tbl width calculation that + * manifests itself for large numbers of x-columns. + * Emulating it for 5 x-columns gives identical + * behaviour for up to 6 x-columns. + */ + + if (nxcol == 5) { + quirkcol = xwidth % nxcol + 2; + if (quirkcol != 3 && quirkcol != 4) + quirkcol = -1; + } else + quirkcol = -1; + + necol = 0; + ewidth = 0; + for (icol = 0; icol <= maxcol; icol++) { + col = tbl->cols + icol; + if ( ! (col->flags & TBL_CELL_WMAX)) + continue; + col->width = (double)xwidth * ++necol / nxcol + - ewidth + 0.4995; + if (necol == quirkcol) + col->width--; + ewidth += col->width; + } + } +} + +static void +tblcalc_data(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_opts *opts, const struct tbl_dat *dp) +{ + size_t sz; + + /* Branch down into data sub-types. */ + + switch (dp->layout->pos) { + case TBL_CELL_HORIZ: + case TBL_CELL_DHORIZ: + sz = (*tbl->len)(1, tbl->arg); + if (col->width < sz) + col->width = sz; + break; + case TBL_CELL_LONG: + case TBL_CELL_CENTRE: + case TBL_CELL_LEFT: + case TBL_CELL_RIGHT: + tblcalc_literal(tbl, col, dp); + break; + case TBL_CELL_NUMBER: + tblcalc_number(tbl, col, opts, dp); + break; + case TBL_CELL_DOWN: + break; + default: + abort(); + } +} + +static void +tblcalc_literal(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_dat *dp) +{ + size_t sz; + const char *str; + + str = dp->string ? dp->string : ""; + sz = (*tbl->slen)(str, tbl->arg); + + if (col->width < sz) + col->width = sz; +} + +static void +tblcalc_number(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_opts *opts, const struct tbl_dat *dp) +{ + int i; + size_t sz, psz, ssz, d; + const char *str; + char *cp; + char buf[2]; + + /* + * First calculate number width and decimal place (last + 1 for + * non-decimal numbers). If the stored decimal is subsequent to + * ours, make our size longer by that difference + * (right-"shifting"); similarly, if ours is subsequent the + * stored, then extend the stored size by the difference. + * Finally, re-assign the stored values. + */ + + str = dp->string ? dp->string : ""; + sz = (*tbl->slen)(str, tbl->arg); + + /* FIXME: TBL_DATA_HORIZ et al.? */ + + buf[0] = opts->decimal; + buf[1] = '\0'; + + psz = (*tbl->slen)(buf, tbl->arg); + + if (NULL != (cp = strrchr(str, opts->decimal))) { + buf[1] = '\0'; + for (ssz = 0, i = 0; cp != &str[i]; i++) { + buf[0] = str[i]; + ssz += (*tbl->slen)(buf, tbl->arg); + } + d = ssz + psz; + } else + d = sz + psz; + + /* Adjust the settings for this column. */ + + if (col->decimal > d) { + sz += col->decimal - d; + d = col->decimal; + } else + col->width += d - col->decimal; + + if (sz > col->width) + col->width = sz; + if (d > col->decimal) + col->decimal = d; +} diff --git a/contrib/mdocml/out.h b/contrib/mdocml/out.h new file mode 100644 index 0000000..2c1cf3f --- /dev/null +++ b/contrib/mdocml/out.h @@ -0,0 +1,68 @@ +/* $Id: out.h,v 1.27 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum roffscale { + SCALE_CM, /* centimeters (c) */ + SCALE_IN, /* inches (i) */ + SCALE_PC, /* pica (P) */ + SCALE_PT, /* points (p) */ + SCALE_EM, /* ems (m) */ + SCALE_MM, /* mini-ems (M) */ + SCALE_EN, /* ens (n) */ + SCALE_BU, /* default horizontal (u) */ + SCALE_VS, /* default vertical (v) */ + SCALE_FS, /* syn. for u (f) */ + SCALE_MAX +}; + +struct roffcol { + size_t width; /* width of cell */ + size_t decimal; /* decimal position in cell */ + int flags; /* layout flags, see tbl_cell */ +}; + +struct roffsu { + enum roffscale unit; + double scale; +}; + +typedef size_t (*tbl_strlen)(const char *, void *); +typedef size_t (*tbl_len)(size_t, void *); + +struct rofftbl { + tbl_strlen slen; /* calculate string length */ + tbl_len len; /* produce width of empty space */ + struct roffcol *cols; /* master column specifiers */ + void *arg; /* passed to slen and len */ +}; + +#define SCALE_VS_INIT(p, v) \ + do { (p)->unit = SCALE_VS; \ + (p)->scale = (v); } \ + while (/* CONSTCOND */ 0) + +#define SCALE_HS_INIT(p, v) \ + do { (p)->unit = SCALE_EN; \ + (p)->scale = (v); } \ + while (/* CONSTCOND */ 0) + + +struct tbl_span; + +int a2roffsu(const char *, struct roffsu *, enum roffscale); +void tblcalc(struct rofftbl *tbl, + const struct tbl_span *, size_t); diff --git a/contrib/mdocml/preconv.c b/contrib/mdocml/preconv.c new file mode 100644 index 0000000..1fc137a --- /dev/null +++ b/contrib/mdocml/preconv.c @@ -0,0 +1,176 @@ +/* $Id: preconv.c,v 1.15 2015/10/06 18:32:19 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include "mandoc.h" +#include "libmandoc.h" + +int +preconv_encode(struct buf *ib, size_t *ii, struct buf *ob, size_t *oi, + int *filenc) +{ + unsigned char *cu; + int nby; + unsigned int accum; + + cu = (unsigned char *)ib->buf + *ii; + assert(*cu & 0x80); + + if ( ! (*filenc & MPARSE_UTF8)) + goto latin; + + nby = 1; + while (nby < 5 && *cu & (1 << (7 - nby))) + nby++; + + switch (nby) { + case 2: + accum = *cu & 0x1f; + if (accum < 0x02) /* Obfuscated ASCII. */ + goto latin; + break; + case 3: + accum = *cu & 0x0f; + break; + case 4: + accum = *cu & 0x07; + if (accum > 0x04) /* Beyond Unicode. */ + goto latin; + break; + default: /* Bad sequence header. */ + goto latin; + } + + cu++; + switch (nby) { + case 3: + if ((accum == 0x00 && ! (*cu & 0x20)) || /* Use 2-byte. */ + (accum == 0x0d && *cu & 0x20)) /* Surrogates. */ + goto latin; + break; + case 4: + if ((accum == 0x00 && ! (*cu & 0x30)) || /* Use 3-byte. */ + (accum == 0x04 && *cu & 0x30)) /* Beyond Unicode. */ + goto latin; + break; + default: + break; + } + + while (--nby) { + if ((*cu & 0xc0) != 0x80) /* Invalid continuation. */ + goto latin; + accum <<= 6; + accum += *cu & 0x3f; + cu++; + } + + assert(accum > 0x7f); + assert(accum < 0x110000); + assert(accum < 0xd800 || accum > 0xdfff); + + *oi += snprintf(ob->buf + *oi, 11, "\\[u%.4X]", accum); + *ii = (char *)cu - ib->buf; + *filenc &= ~MPARSE_LATIN1; + return 1; + +latin: + if ( ! (*filenc & MPARSE_LATIN1)) + return 0; + + *oi += snprintf(ob->buf + *oi, 11, + "\\[u%.4X]", (unsigned char)ib->buf[(*ii)++]); + + *filenc &= ~MPARSE_UTF8; + return 1; +} + +int +preconv_cue(const struct buf *b, size_t offset) +{ + const char *ln, *eoln, *eoph; + size_t sz, phsz; + + ln = b->buf + offset; + sz = b->sz - offset; + + /* Look for the end-of-line. */ + + if (NULL == (eoln = memchr(ln, '\n', sz))) + eoln = ln + sz; + + /* Check if we have the correct header/trailer. */ + + if ((sz = (size_t)(eoln - ln)) < 10 || + memcmp(ln, ".\\\" -*-", 7) || memcmp(eoln - 3, "-*-", 3)) + return MPARSE_UTF8 | MPARSE_LATIN1; + + /* Move after the header and adjust for the trailer. */ + + ln += 7; + sz -= 10; + + while (sz > 0) { + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Find the end-of-phrase marker (or eoln). */ + + if (NULL == (eoph = memchr(ln, ';', sz))) + eoph = eoln - 3; + else + eoph++; + + /* Only account for the "coding" phrase. */ + + if ((phsz = eoph - ln) < 7 || + strncasecmp(ln, "coding:", 7)) { + sz -= phsz; + ln += phsz; + continue; + } + + sz -= 7; + ln += 7; + + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + return 0; + + /* Check us against known encodings. */ + + if (phsz > 4 && !strncasecmp(ln, "utf-8", 5)) + return MPARSE_UTF8; + if (phsz > 10 && !strncasecmp(ln, "iso-latin-1", 11)) + return MPARSE_LATIN1; + return 0; + } + return MPARSE_UTF8 | MPARSE_LATIN1; +} diff --git a/contrib/mdocml/predefs.in b/contrib/mdocml/predefs.in new file mode 100644 index 0000000..d1690e3 --- /dev/null +++ b/contrib/mdocml/predefs.in @@ -0,0 +1,65 @@ +/* $Id: predefs.in,v 1.4 2012/07/18 10:39:19 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The predefined-string translation tables. Each corresponds to a + * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand + * side corresponds to the input sequence (\*x, \*(xx and so on). The + * right-hand side is what's produced by libroff. + * + * XXX - C-escape strings! + * XXX - update PREDEF_MAX in roff.c if adding more! + */ + +PREDEF("Am", "&") +PREDEF("Ba", "\\fR|\\fP") +PREDEF("Ge", "\\(>=") +PREDEF("Gt", ">") +PREDEF("If", "infinity") +PREDEF("Le", "\\(<=") +PREDEF("Lq", "\\(lq") +PREDEF("Lt", "<") +PREDEF("Na", "NaN") +PREDEF("Ne", "\\(!=") +PREDEF("Pi", "pi") +PREDEF("Pm", "\\(+-") +PREDEF("Rq", "\\(rq") +PREDEF("left-bracket", "[") +PREDEF("left-parenthesis", "(") +PREDEF("lp", "(") +PREDEF("left-singlequote", "\\(oq") +PREDEF("q", "\\(dq") +PREDEF("quote-left", "\\(oq") +PREDEF("quote-right", "\\(cq") +PREDEF("R", "\\(rg") +PREDEF("right-bracket", "]") +PREDEF("right-parenthesis", ")") +PREDEF("rp", ")") +PREDEF("right-singlequote", "\\(cq") +PREDEF("Tm", "(Tm)") +PREDEF("Px", "POSIX") +PREDEF("Ai", "ANSI") +PREDEF("\'", "\\\'") +PREDEF("aa", "\\(aa") +PREDEF("ga", "\\(ga") +PREDEF("`", "\\`") +PREDEF("lq", "\\(lq") +PREDEF("rq", "\\(rq") +PREDEF("ua", "\\(ua") +PREDEF("va", "\\(va") +PREDEF("<=", "\\(<=") +PREDEF(">=", "\\(>=") diff --git a/contrib/mdocml/read.c b/contrib/mdocml/read.c new file mode 100644 index 0000000..0ef3cdf --- /dev/null +++ b/contrib/mdocml/read.c @@ -0,0 +1,948 @@ +/* $Id: read.c,v 1.148 2016/01/08 02:53:13 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> +#if HAVE_MMAP +#include <sys/mman.h> +#include <sys/stat.h> +#endif + +#include <assert.h> +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <zlib.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "libmandoc.h" +#include "roff_int.h" + +#define REPARSE_LIMIT 1000 + +struct mparse { + struct roff_man *man; /* man parser */ + struct roff *roff; /* roff parser (!NULL) */ + char *sodest; /* filename pointed to by .so */ + const char *file; /* filename of current input file */ + struct buf *primary; /* buffer currently being parsed */ + struct buf *secondary; /* preprocessed copy of input */ + const char *defos; /* default operating system */ + mandocmsg mmsg; /* warning/error message handler */ + enum mandoclevel file_status; /* status of current parse */ + enum mandoclevel wlevel; /* ignore messages below this */ + int options; /* parser options */ + int gzip; /* current input file is gzipped */ + int filenc; /* encoding of the current file */ + int reparse_count; /* finite interp. stack */ + int line; /* line number in the file */ +}; + +static void choose_parser(struct mparse *); +static void resize_buf(struct buf *, size_t); +static void mparse_buf_r(struct mparse *, struct buf, size_t, int); +static int read_whole_file(struct mparse *, const char *, int, + struct buf *, int *); +static void mparse_end(struct mparse *); +static void mparse_parse_buffer(struct mparse *, struct buf, + const char *); + +static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { + MANDOCERR_OK, + MANDOCERR_WARNING, + MANDOCERR_WARNING, + MANDOCERR_ERROR, + MANDOCERR_UNSUPP, + MANDOCERR_MAX, + MANDOCERR_MAX +}; + +static const char * const mandocerrs[MANDOCERR_MAX] = { + "ok", + + "generic warning", + + /* related to the prologue */ + "missing manual title, using UNTITLED", + "missing manual title, using \"\"", + "lower case character in document title", + "missing manual section, using \"\"", + "unknown manual section", + "missing date, using today's date", + "cannot parse date, using it verbatim", + "missing Os macro, using \"\"", + "duplicate prologue macro", + "late prologue macro", + "skipping late title macro", + "prologue macros out of order", + + /* related to document structure */ + ".so is fragile, better use ln(1)", + "no document body", + "content before first section header", + "first section is not \"NAME\"", + "NAME section without name", + "NAME section without description", + "description not at the end of NAME", + "bad NAME section content", + "missing description line, using \"\"", + "sections out of conventional order", + "duplicate section title", + "unexpected section", + "unusual Xr order", + "unusual Xr punctuation", + "AUTHORS section without An macro", + + /* related to macros and nesting */ + "obsolete macro", + "macro neither callable nor escaped", + "skipping paragraph macro", + "moving paragraph macro out of list", + "skipping no-space macro", + "blocks badly nested", + "nested displays are not portable", + "moving content out of list", + "fill mode already enabled, skipping", + "fill mode already disabled, skipping", + "line scope broken", + + /* related to missing macro arguments */ + "skipping empty request", + "conditional request controls empty scope", + "skipping empty macro", + "empty block", + "empty argument, using 0n", + "missing display type, using -ragged", + "list type is not the first argument", + "missing -width in -tag list, using 8n", + "missing utility name, using \"\"", + "missing function name, using \"\"", + "empty head in list item", + "empty list item", + "missing font type, using \\fR", + "unknown font type, using \\fR", + "nothing follows prefix", + "empty reference block", + "missing -std argument, adding it", + "missing option string, using \"\"", + "missing resource identifier, using \"\"", + "missing eqn box, using \"\"", + + /* related to bad macro arguments */ + "unterminated quoted argument", + "duplicate argument", + "skipping duplicate argument", + "skipping duplicate display type", + "skipping duplicate list type", + "skipping -width argument", + "wrong number of cells", + "unknown AT&T UNIX version", + "comma in function argument", + "parenthesis in function name", + "invalid content in Rs block", + "invalid Boolean argument", + "unknown font, skipping request", + "odd number of characters in request", + + /* related to plain text */ + "blank line in fill mode, using .sp", + "tab in filled text", + "whitespace at end of input line", + "bad comment style", + "invalid escape sequence", + "undefined string, using \"\"", + + /* related to tables */ + "tbl line starts with span", + "tbl column starts with span", + "skipping vertical bar in tbl layout", + + "generic error", + + /* related to tables */ + "non-alphabetic character in tbl options", + "skipping unknown tbl option", + "missing tbl option argument", + "wrong tbl option argument size", + "empty tbl layout", + "invalid character in tbl layout", + "unmatched parenthesis in tbl layout", + "tbl without any data cells", + "ignoring data in spanned tbl cell", + "ignoring extra tbl data cells", + "data block open at end of tbl", + + /* related to document structure and macros */ + NULL, + "input stack limit exceeded, infinite loop?", + "skipping bad character", + "skipping unknown macro", + "skipping insecure request", + "skipping item outside list", + "skipping column outside column list", + "skipping end of block that is not open", + "fewer RS blocks open, skipping", + "inserting missing end of block", + "appending missing end of block", + + /* related to request and macro arguments */ + "escaped character not allowed in a name", + "NOT IMPLEMENTED: Bd -file", + "skipping display without arguments", + "missing list type, using -item", + "missing manual name, using \"\"", + "uname(3) system call failed, using UNKNOWN", + "unknown standard specifier", + "skipping request without numeric argument", + "NOT IMPLEMENTED: .so with absolute path or \"..\"", + ".so request failed", + "skipping all arguments", + "skipping excess arguments", + "divide by zero", + + "unsupported feature", + "input too large", + "unsupported control character", + "unsupported roff request", + "eqn delim option in tbl", + "unsupported tbl layout modifier", + "ignoring macro in table", +}; + +static const char * const mandoclevels[MANDOCLEVEL_MAX] = { + "SUCCESS", + "RESERVED", + "WARNING", + "ERROR", + "UNSUPP", + "BADARG", + "SYSERR" +}; + + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; + buf->buf = mandoc_realloc(buf->buf, buf->sz); +} + +static void +choose_parser(struct mparse *curp) +{ + char *cp, *ep; + int format; + + /* + * If neither command line arguments -mdoc or -man select + * a parser nor the roff parser found a .Dd or .TH macro + * yet, look ahead in the main input buffer. + */ + + if ((format = roff_getformat(curp->roff)) == 0) { + cp = curp->primary->buf; + ep = cp + curp->primary->sz; + while (cp < ep) { + if (*cp == '.' || *cp == '\'') { + cp++; + if (cp[0] == 'D' && cp[1] == 'd') { + format = MPARSE_MDOC; + break; + } + if (cp[0] == 'T' && cp[1] == 'H') { + format = MPARSE_MAN; + break; + } + } + cp = memchr(cp, '\n', ep - cp); + if (cp == NULL) + break; + cp++; + } + } + + if (curp->man == NULL) { + curp->man = roff_man_alloc(curp->roff, curp, curp->defos, + curp->options & MPARSE_QUICK ? 1 : 0); + curp->man->macroset = MACROSET_MAN; + curp->man->first->tok = TOKEN_NONE; + } + + if (format == MPARSE_MDOC) { + mdoc_hash_init(); + curp->man->macroset = MACROSET_MDOC; + curp->man->first->tok = TOKEN_NONE; + } else { + man_hash_init(); + curp->man->macroset = MACROSET_MAN; + curp->man->first->tok = TOKEN_NONE; + } +} + +/* + * Main parse routine for a buffer. + * It assumes encoding and line numbering are already set up. + * It can recurse directly (for invocations of user-defined + * macros, inline equations, and input line traps) + * and indirectly (for .so file inclusion). + */ +static void +mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) +{ + const struct tbl_span *span; + struct buf ln; + const char *save_file; + char *cp; + size_t pos; /* byte number in the ln buffer */ + enum rofferr rr; + int of; + int lnn; /* line number in the real file */ + int fd; + unsigned char c; + + memset(&ln, 0, sizeof(ln)); + + lnn = curp->line; + pos = 0; + + while (i < blk.sz) { + if (0 == pos && '\0' == blk.buf[i]) + break; + + if (start) { + curp->line = lnn; + curp->reparse_count = 0; + + if (lnn < 3 && + curp->filenc & MPARSE_UTF8 && + curp->filenc & MPARSE_LATIN1) + curp->filenc = preconv_cue(&blk, i); + } + + while (i < blk.sz && (start || blk.buf[i] != '\0')) { + + /* + * When finding an unescaped newline character, + * leave the character loop to process the line. + * Skip a preceding carriage return, if any. + */ + + if ('\r' == blk.buf[i] && i + 1 < blk.sz && + '\n' == blk.buf[i + 1]) + ++i; + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + + /* + * Make sure we have space for the worst + * case of 11 bytes: "\\[u10ffff]\0" + */ + + if (pos + 11 > ln.sz) + resize_buf(&ln, 256); + + /* + * Encode 8-bit input. + */ + + c = blk.buf[i]; + if (c & 0x80) { + if ( ! (curp->filenc && preconv_encode( + &blk, &i, &ln, &pos, &curp->filenc))) { + mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, + curp->line, pos, "0x%x", c); + ln.buf[pos++] = '?'; + i++; + } + continue; + } + + /* + * Exclude control characters. + */ + + if (c == 0x7f || (c < 0x20 && c != 0x09)) { + mandoc_vmsg(c == 0x00 || c == 0x04 || + c > 0x0a ? MANDOCERR_CHAR_BAD : + MANDOCERR_CHAR_UNSUPP, + curp, curp->line, pos, "0x%x", c); + i++; + if (c != '\r') + ln.buf[pos++] = '?'; + continue; + } + + /* Trailing backslash = a plain char. */ + + if (blk.buf[i] != '\\' || i + 1 == blk.sz) { + ln.buf[pos++] = blk.buf[i++]; + continue; + } + + /* + * Found escape and at least one other character. + * When it's a newline character, skip it. + * When there is a carriage return in between, + * skip that one as well. + */ + + if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && + '\n' == blk.buf[i + 2]) + ++i; + if ('\n' == blk.buf[i + 1]) { + i += 2; + ++lnn; + continue; + } + + if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { + i += 2; + /* Comment, skip to end of line */ + for (; i < blk.sz; ++i) { + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + } + + /* Backout trailing whitespaces */ + for (; pos > 0; --pos) { + if (ln.buf[pos - 1] != ' ') + break; + if (pos > 2 && ln.buf[pos - 2] == '\\') + break; + } + break; + } + + /* Catch escaped bogus characters. */ + + c = (unsigned char) blk.buf[i+1]; + + if ( ! (isascii(c) && + (isgraph(c) || isblank(c)))) { + mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, + curp->line, pos, "0x%x", c); + i += 2; + ln.buf[pos++] = '?'; + continue; + } + + /* Some other escape sequence, copy & cont. */ + + ln.buf[pos++] = blk.buf[i++]; + ln.buf[pos++] = blk.buf[i++]; + } + + if (pos >= ln.sz) + resize_buf(&ln, 256); + + ln.buf[pos] = '\0'; + + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; + + /* + * Maintain a lookaside buffer of all parsed lines. We + * only do this if mparse_keep() has been invoked (the + * buffer may be accessed with mparse_getkeep()). + */ + + if (curp->secondary) { + curp->secondary->buf = mandoc_realloc( + curp->secondary->buf, + curp->secondary->sz + pos + 2); + memcpy(curp->secondary->buf + + curp->secondary->sz, + ln.buf, pos); + curp->secondary->sz += pos; + curp->secondary->buf + [curp->secondary->sz] = '\n'; + curp->secondary->sz++; + curp->secondary->buf + [curp->secondary->sz] = '\0'; + } +rerun: + rr = roff_parseln(curp->roff, curp->line, &ln, &of); + + switch (rr) { + case ROFF_REPARSE: + if (REPARSE_LIMIT >= ++curp->reparse_count) + mparse_buf_r(curp, ln, of, 0); + else + mandoc_msg(MANDOCERR_ROFFLOOP, curp, + curp->line, pos, NULL); + pos = 0; + continue; + case ROFF_APPEND: + pos = strlen(ln.buf); + continue; + case ROFF_RERUN: + goto rerun; + case ROFF_IGN: + pos = 0; + continue; + case ROFF_SO: + if ( ! (curp->options & MPARSE_SO) && + (i >= blk.sz || blk.buf[i] == '\0')) { + curp->sodest = mandoc_strdup(ln.buf + of); + free(ln.buf); + return; + } + /* + * We remove `so' clauses from our lookaside + * buffer because we're going to descend into + * the file recursively. + */ + if (curp->secondary) + curp->secondary->sz -= pos + 1; + save_file = curp->file; + if ((fd = mparse_open(curp, ln.buf + of)) != -1) { + mparse_readfd(curp, fd, ln.buf + of); + close(fd); + curp->file = save_file; + } else { + curp->file = save_file; + mandoc_vmsg(MANDOCERR_SO_FAIL, + curp, curp->line, pos, + ".so %s", ln.buf + of); + ln.sz = mandoc_asprintf(&cp, + ".sp\nSee the file %s.\n.sp", + ln.buf + of); + free(ln.buf); + ln.buf = cp; + of = 0; + mparse_buf_r(curp, ln, of, 0); + } + pos = 0; + continue; + default: + break; + } + + /* + * If input parsers have not been allocated, do so now. + * We keep these instanced between parsers, but set them + * locally per parse routine since we can use different + * parsers with each one. + */ + + if (curp->man == NULL || + curp->man->macroset == MACROSET_NONE) + choose_parser(curp); + + /* + * Lastly, push down into the parsers themselves. + * If libroff returns ROFF_TBL, then add it to the + * currently open parse. Since we only get here if + * there does exist data (see tbl_data.c), we're + * guaranteed that something's been allocated. + * Do the same for ROFF_EQN. + */ + + if (rr == ROFF_TBL) + while ((span = roff_span(curp->roff)) != NULL) + roff_addtbl(curp->man, span); + else if (rr == ROFF_EQN) + roff_addeqn(curp->man, roff_eqn(curp->roff)); + else if ((curp->man->macroset == MACROSET_MDOC ? + mdoc_parseln(curp->man, curp->line, ln.buf, of) : + man_parseln(curp->man, curp->line, ln.buf, of)) == 2) + break; + + /* Temporary buffers typically are not full. */ + + if (0 == start && '\0' == blk.buf[i]) + break; + + /* Start the next input line. */ + + pos = 0; + } + + free(ln.buf); +} + +static int +read_whole_file(struct mparse *curp, const char *file, int fd, + struct buf *fb, int *with_mmap) +{ + gzFile gz; + size_t off; + ssize_t ssz; + +#if HAVE_MMAP + struct stat st; + + if (fstat(fd, &st) == -1) + err((int)MANDOCLEVEL_SYSERR, "%s", file); + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (curp->gzip == 0 && S_ISREG(st.st_mode)) { + if (st.st_size > 0x7fffffff) { + mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL); + return 0; + } + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return 1; + } +#endif + + if (curp->gzip) { + if ((gz = gzdopen(fd, "rb")) == NULL) + err((int)MANDOCLEVEL_SYSERR, "%s", file); + } else + gz = NULL; + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz) { + if (fb->sz == (1U << 31)) { + mandoc_msg(MANDOCERR_TOOLARGE, curp, + 0, 0, NULL); + break; + } + resize_buf(fb, 65536); + } + ssz = curp->gzip ? + gzread(gz, fb->buf + (int)off, fb->sz - off) : + read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return 1; + } + if (ssz == -1) + err((int)MANDOCLEVEL_SYSERR, "%s", file); + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + return 0; +} + +static void +mparse_end(struct mparse *curp) +{ + + if (curp->man == NULL && curp->sodest == NULL) + curp->man = roff_man_alloc(curp->roff, curp, curp->defos, + curp->options & MPARSE_QUICK ? 1 : 0); + if (curp->man->macroset == MACROSET_NONE) + curp->man->macroset = MACROSET_MAN; + if (curp->man->macroset == MACROSET_MDOC) + mdoc_endparse(curp->man); + else + man_endparse(curp->man); + roff_endparse(curp->roff); +} + +static void +mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file) +{ + struct buf *svprimary; + const char *svfile; + size_t offset; + static int recursion_depth; + + if (64 < recursion_depth) { + mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL); + return; + } + + /* Line number is per-file. */ + svfile = curp->file; + curp->file = file; + svprimary = curp->primary; + curp->primary = &blk; + curp->line = 1; + recursion_depth++; + + /* Skip an UTF-8 byte order mark. */ + if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 && + (unsigned char)blk.buf[0] == 0xef && + (unsigned char)blk.buf[1] == 0xbb && + (unsigned char)blk.buf[2] == 0xbf) { + offset = 3; + curp->filenc &= ~MPARSE_LATIN1; + } else + offset = 0; + + mparse_buf_r(curp, blk, offset, 1); + + if (--recursion_depth == 0) + mparse_end(curp); + + curp->primary = svprimary; + curp->file = svfile; +} + +enum mandoclevel +mparse_readmem(struct mparse *curp, void *buf, size_t len, + const char *file) +{ + struct buf blk; + + blk.buf = buf; + blk.sz = len; + + mparse_parse_buffer(curp, blk, file); + return curp->file_status; +} + +/* + * Read the whole file into memory and call the parsers. + * Called recursively when an .so request is encountered. + */ +enum mandoclevel +mparse_readfd(struct mparse *curp, int fd, const char *file) +{ + struct buf blk; + int with_mmap; + int save_filenc; + + if (read_whole_file(curp, file, fd, &blk, &with_mmap)) { + save_filenc = curp->filenc; + curp->filenc = curp->options & + (MPARSE_UTF8 | MPARSE_LATIN1); + mparse_parse_buffer(curp, blk, file); + curp->filenc = save_filenc; +#if HAVE_MMAP + if (with_mmap) + munmap(blk.buf, blk.sz); + else +#endif + free(blk.buf); + } + return curp->file_status; +} + +int +mparse_open(struct mparse *curp, const char *file) +{ + char *cp; + int fd; + + curp->file = file; + cp = strrchr(file, '.'); + curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); + + /* First try to use the filename as it is. */ + + if ((fd = open(file, O_RDONLY)) != -1) + return fd; + + /* + * If that doesn't work and the filename doesn't + * already end in .gz, try appending .gz. + */ + + if ( ! curp->gzip) { + mandoc_asprintf(&cp, "%s.gz", file); + fd = open(file, O_RDONLY); + free(cp); + if (fd != -1) { + curp->gzip = 1; + return fd; + } + } + + /* Neither worked, give up. */ + + mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno)); + return -1; +} + +struct mparse * +mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, + const char *defos) +{ + struct mparse *curp; + + curp = mandoc_calloc(1, sizeof(struct mparse)); + + curp->options = options; + curp->wlevel = wlevel; + curp->mmsg = mmsg; + curp->defos = defos; + + curp->roff = roff_alloc(curp, options); + curp->man = roff_man_alloc( curp->roff, curp, curp->defos, + curp->options & MPARSE_QUICK ? 1 : 0); + if (curp->options & MPARSE_MDOC) { + mdoc_hash_init(); + curp->man->macroset = MACROSET_MDOC; + } else if (curp->options & MPARSE_MAN) { + man_hash_init(); + curp->man->macroset = MACROSET_MAN; + } + curp->man->first->tok = TOKEN_NONE; + return curp; +} + +void +mparse_reset(struct mparse *curp) +{ + + roff_reset(curp->roff); + + if (curp->man != NULL) + roff_man_reset(curp->man); + if (curp->secondary) + curp->secondary->sz = 0; + + curp->file_status = MANDOCLEVEL_OK; + + free(curp->sodest); + curp->sodest = NULL; +} + +void +mparse_free(struct mparse *curp) +{ + + roff_man_free(curp->man); + if (curp->roff) + roff_free(curp->roff); + if (curp->secondary) + free(curp->secondary->buf); + + free(curp->secondary); + free(curp->sodest); + free(curp); +} + +void +mparse_result(struct mparse *curp, struct roff_man **man, + char **sodest) +{ + + if (sodest && NULL != (*sodest = curp->sodest)) { + *man = NULL; + return; + } + if (man) + *man = curp->man; +} + +void +mandoc_vmsg(enum mandocerr t, struct mparse *m, + int ln, int pos, const char *fmt, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + mandoc_msg(t, m, ln, pos, buf); +} + +void +mandoc_msg(enum mandocerr er, struct mparse *m, + int ln, int col, const char *msg) +{ + enum mandoclevel level; + + level = MANDOCLEVEL_UNSUPP; + while (er < mandoclimits[level]) + level--; + + if (level < m->wlevel && er != MANDOCERR_FILE) + return; + + if (m->mmsg) + (*m->mmsg)(er, level, m->file, ln, col, msg); + + if (m->file_status < level) + m->file_status = level; +} + +const char * +mparse_strerror(enum mandocerr er) +{ + + return mandocerrs[er]; +} + +const char * +mparse_strlevel(enum mandoclevel lvl) +{ + return mandoclevels[lvl]; +} + +void +mparse_keep(struct mparse *p) +{ + + assert(NULL == p->secondary); + p->secondary = mandoc_calloc(1, sizeof(struct buf)); +} + +const char * +mparse_getkeep(const struct mparse *p) +{ + + assert(p->secondary); + return p->secondary->sz ? p->secondary->buf : NULL; +} diff --git a/contrib/mdocml/roff.7 b/contrib/mdocml/roff.7 new file mode 100644 index 0000000..114f63e --- /dev/null +++ b/contrib/mdocml/roff.7 @@ -0,0 +1,2154 @@ +.\" $Id: roff.7,v 1.75 2015/09/24 18:41:22 schwarze Exp $ +.\" +.\" Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2010, 2011, 2013-2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: September 24 2015 $ +.Dt ROFF 7 +.Os +.Sh NAME +.Nm roff +.Nd roff language reference for mandoc +.Sh DESCRIPTION +The +.Nm roff +language is a general purpose text formatting language. +Since traditional implementations of the +.Xr mdoc 7 +and +.Xr man 7 +manual formatting languages are based on it, +many real-world manuals use small numbers of +.Nm +requests and escape sequences intermixed with their +.Xr mdoc 7 +or +.Xr man 7 +code. +To properly format such manuals, the +.Xr mandoc 1 +utility supports a tiny subset of +.Nm +requests and escapes. +Only these requests and escapes supported by +.Xr mandoc 1 +are documented in the present manual, +together with the basic language syntax shared by +.Nm , +.Xr mdoc 7 , +and +.Xr man 7 . +For complete +.Nm +manuals, consult the +.Sx SEE ALSO +section. +.Pp +Input lines beginning with the control character +.Sq \&. +are parsed for requests and macros. +Such lines are called +.Dq request lines +or +.Dq macro lines , +respectively. +Requests change the processing state and manipulate the formatting; +some macros also define the document structure and produce formatted +output. +The single quote +.Pq Qq \(aq +is accepted as an alternative control character, +treated by +.Xr mandoc 1 +just like +.Ql \&. +.Pp +Lines not beginning with control characters are called +.Dq text lines . +They provide free-form text to be printed; the formatting of the text +depends on the respective processing context. +.Sh LANGUAGE SYNTAX +.Nm +documents may contain only graphable 7-bit ASCII characters, the space +character, and, in certain circumstances, the tab character. +The backslash character +.Sq \e +indicates the start of an escape sequence, used for example for +.Sx Comments , +.Sx Special Characters , +.Sx Predefined Strings , +and +user-defined strings defined using the +.Sx ds +request. +For a listing of escape sequences, consult the +.Sx ESCAPE SEQUENCE REFERENCE +below. +.Ss Comments +Text following an escaped double-quote +.Sq \e\(dq , +whether in a request, macro, or text line, is ignored to the end of the line. +A request line beginning with a control character and comment escape +.Sq \&.\e\(dq +is also ignored. +Furthermore, request lines with only a control character and optional +trailing whitespace are stripped from input. +.Pp +Examples: +.Bd -literal -offset indent -compact +\&.\e\(dq This is a comment line. +\&.\e\(dq The next line is ignored: +\&. +\&.Sh EXAMPLES \e\(dq This is a comment, too. +\&example text \e\(dq And so is this. +.Ed +.Ss Special Characters +Special characters are used to encode special glyphs and are rendered +differently across output media. +They may occur in request, macro, and text lines. +Sequences begin with the escape character +.Sq \e +followed by either an open-parenthesis +.Sq \&( +for two-character sequences; an open-bracket +.Sq \&[ +for n-character sequences (terminated at a close-bracket +.Sq \&] ) ; +or a single one character sequence. +.Pp +Examples: +.Bl -tag -width Ds -offset indent -compact +.It Li \e(em +Two-letter em dash escape. +.It Li \ee +One-letter backslash escape. +.El +.Pp +See +.Xr mandoc_char 7 +for a complete list. +.Ss Text Decoration +Terms may be text-decorated using the +.Sq \ef +escape followed by an indicator: B (bold), I (italic), R (regular), or P +(revert to previous mode). +A numerical representation 3, 2, or 1 (bold, italic, and regular, +respectively) may be used instead. +The indicator or numerical representative may be preceded by C +(constant-width), which is ignored. +.Pp +The two-character indicator +.Sq BI +requests a font that is both bold and italic. +It may not be portable to old roff implementations. +.Pp +Examples: +.Bl -tag -width Ds -offset indent -compact +.It Li \efBbold\efR +Write in \fBbold\fP, then switch to regular font mode. +.It Li \efIitalic\efP +Write in \fIitalic\fP, then return to previous font mode. +.It Li \ef(BIbold italic\efP +Write in \f(BIbold italic\fP, then return to previous font mode. +.El +.Pp +Text decoration is +.Em not +recommended for +.Xr mdoc 7 , +which encourages semantic annotation. +.Ss Predefined Strings +Predefined strings, like +.Sx Special Characters , +mark special output glyphs. +Predefined strings are escaped with the slash-asterisk, +.Sq \e* : +single-character +.Sq \e*X , +two-character +.Sq \e*(XX , +and N-character +.Sq \e*[N] . +.Pp +Examples: +.Bl -tag -width Ds -offset indent -compact +.It Li \e*(Am +Two-letter ampersand predefined string. +.It Li \e*q +One-letter double-quote predefined string. +.El +.Pp +Predefined strings are not recommended for use, +as they differ across implementations. +Those supported by +.Xr mandoc 1 +are listed in +.Xr mandoc_char 7 . +Manuals using these predefined strings are almost certainly not portable. +.Ss Whitespace +Whitespace consists of the space character. +In text lines, whitespace is preserved within a line. +In request and macro lines, whitespace delimits arguments and is discarded. +.Pp +Unescaped trailing spaces are stripped from text line input unless in a +literal context. +In general, trailing whitespace on any input line is discouraged for +reasons of portability. +In the rare case that a blank character is needed at the end of an +input line, it may be forced by +.Sq \e\ \e& . +.Pp +Literal space characters can be produced in the output +using escape sequences. +In macro lines, they can also be included in arguments using quotation; see +.Sx MACRO SYNTAX +for details. +.Pp +Blank text lines, which may include whitespace, are only permitted +within literal contexts. +If the first character of a text line is a space, that line is printed +with a leading newline. +.Ss Scaling Widths +Many requests and macros support scaled widths for their arguments. +The syntax for a scaled width is +.Sq Li [+-]?[0-9]*.[0-9]*[:unit:] , +where a decimal must be preceded or followed by at least one digit. +Negative numbers, while accepted, are truncated to zero. +.Pp +The following scaling units are accepted: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It c +centimetre +.It i +inch +.It P +pica (~1/6 inch) +.It p +point (~1/72 inch) +.It f +scale +.Sq u +by 65536 +.It v +default vertical span +.It m +width of rendered +.Sq m +.Pq em +character +.It n +width of rendered +.Sq n +.Pq en +character +.It u +default horizontal span for the terminal +.It M +mini-em (~1/100 em) +.El +.Pp +Using anything other than +.Sq m , +.Sq n , +or +.Sq v +is necessarily non-portable across output media. +See +.Sx COMPATIBILITY . +.Pp +If a scaling unit is not provided, the numerical value is interpreted +under the default rules of +.Sq v +for vertical spaces and +.Sq u +for horizontal ones. +.Pp +Examples: +.Bl -tag -width ".Bl -tag -width 2i" -offset indent -compact +.It Li \&.Bl -tag -width 2i +two-inch tagged list indentation in +.Xr mdoc 7 +.It Li \&.HP 2i +two-inch tagged list indentation in +.Xr man 7 +.It Li \&.sp 2v +two vertical spaces +.El +.Ss Sentence Spacing +Each sentence should terminate at the end of an input line. +By doing this, a formatter will be able to apply the proper amount of +spacing after the end of sentence (unescaped) period, exclamation mark, +or question mark followed by zero or more non-sentence closing +delimiters +.Po +.Sq \&) , +.Sq \&] , +.Sq \&' , +.Sq \&" +.Pc . +.Pp +The proper spacing is also intelligently preserved if a sentence ends at +the boundary of a macro line. +.Pp +Examples: +.Bd -literal -offset indent -compact +Do not end sentences mid-line like this. Instead, +end a sentence like this. +A macro would end like this: +\&.Xr mandoc 1 \&. +.Ed +.Sh REQUEST SYNTAX +A request or macro line consists of: +.Pp +.Bl -enum -compact +.It +the control character +.Sq \&. +or +.Sq \(aq +at the beginning of the line, +.It +optionally an arbitrary amount of whitespace, +.It +the name of the request or the macro, which is one word of arbitrary +length, terminated by whitespace, +.It +and zero or more arguments delimited by whitespace. +.El +.Pp +Thus, the following request lines are all equivalent: +.Bd -literal -offset indent +\&.ig end +\&.ig end +\&. ig end +.Ed +.Sh MACRO SYNTAX +Macros are provided by the +.Xr mdoc 7 +and +.Xr man 7 +languages and can be defined by the +.Sx \&de +request. +When called, they follow the same syntax as requests, except that +macro arguments may optionally be quoted by enclosing them +in double quote characters +.Pq Sq \(dq . +Quoted text, even if it contains whitespace or would cause +a macro invocation when unquoted, is always considered literal text. +Inside quoted text, pairs of double quote characters +.Pq Sq Qq +resolve to single double quote characters. +.Pp +To be recognised as the beginning of a quoted argument, the opening +quote character must be preceded by a space character. +A quoted argument extends to the next double quote character that is not +part of a pair, or to the end of the input line, whichever comes earlier. +Leaving out the terminating double quote character at the end of the line +is discouraged. +For clarity, if more arguments follow on the same input line, +it is recommended to follow the terminating double quote character +by a space character; in case the next character after the terminating +double quote character is anything else, it is regarded as the beginning +of the next, unquoted argument. +.Pp +Both in quoted and unquoted arguments, pairs of backslashes +.Pq Sq \e\e +resolve to single backslashes. +In unquoted arguments, space characters can alternatively be included +by preceding them with a backslash +.Pq Sq \e\~ , +but quoting is usually better for clarity. +.Pp +Examples: +.Bl -tag -width Ds -offset indent -compact +.It Li .Fn strlen \(dqconst char *s\(dq +Group arguments +.Qq const char *s +into one function argument. +If unspecified, +.Qq const , +.Qq char , +and +.Qq *s +would be considered separate arguments. +.It Li .Op \(dqFl a\(dq +Consider +.Qq \&Fl a +as literal text instead of a flag macro. +.El +.Sh REQUEST REFERENCE +The +.Xr mandoc 1 +.Nm +parser recognises the following requests. +For requests marked as "ignored" or "unsupported", any arguments are +ignored, and the number of arguments is not checked. +.Ss \&ab +Abort processing. +Currently unsupported. +.Ss \&ad +Set line adjustment mode. +It takes one argument to select normal, left, right, +or center adjustment for subsequent text. +Currently ignored. +.Ss \&af +Assign an output format to a number register. +Currently ignored. +.Ss \&aln +Create an alias for a number register. +Currently unsupported. +.Ss \&als +Create an alias for a request, string, macro, or diversion. +Currently unsupported. +.Ss \&am +Append to a macro definition. +The syntax of this request is the same as that of +.Sx \&de . +.Ss \&am1 +Append to a macro definition, switching roff compatibility mode off +during macro execution (groff extension). +The syntax of this request is the same as that of +.Sx \&de1 . +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&am . +.Ss \&ami +Append to a macro definition, specifying the macro name indirectly +(groff extension). +The syntax of this request is the same as that of +.Sx \&dei . +.Ss \&ami1 +Append to a macro definition, specifying the macro name indirectly +and switching roff compatibility mode off during macro execution +(groff extension). +The syntax of this request is the same as that of +.Sx \&dei1 . +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&ami . +.Ss \&as +Append to a user-defined string. +The syntax of this request is the same as that of +.Sx \&ds . +If a user-defined string with the specified name does not yet exist, +it is set to the empty string before appending. +.Ss \&as1 +Append to a user-defined string, switching roff compatibility mode off +during macro execution (groff extension). +The syntax of this request is the same as that of +.Sx \&ds1 . +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&as . +.Ss \&asciify +Fully unformat a diversion. +Currently unsupported. +.Ss \&backtrace +Print a backtrace of the input stack. +This is a groff extension and currently ignored. +.Ss \&bd +Artificially embolden by repeated printing with small shifts. +Currently ignored. +.Ss \&bleedat +Set the BleedBox page parameter for PDF generation. +This is a Heirloom extension and currently ignored. +.Ss \&blm +Set a blank line trap. +Currently unsupported. +.Ss \&box +Begin a diversion without including a partially filled line. +Currently unsupported. +.Ss \&boxa +Add to a diversion without including a partially filled line. +Currently unsupported. +.Ss \&bp +Begin new page. +Currently ignored. +.Ss \&BP +Define a frame and place a picture in it. +This is a Heirloom extension and currently unsupported. +.Ss \&br +Break the output line. +See +.Xr man 7 +and +.Xr mdoc 7 . +.Ss \&break +Break out of a +.Sx \&while +loop. +Currently unsupported. +.Ss \&breakchar +Optional line break characters. +This is a Heirloom extension and currently ignored. +.Ss \&brnl +Break output line after next N input lines. +This is a Heirloom extension and currently ignored. +.Ss \&brp +Break and spread output line. +Currently, this is implemented as an alias for +.Sx \&br . +.Ss \&brpnl +Break and spread output line after next N input lines. +This is a Heirloom extension and currently ignored. +.Ss \&c2 +Change the no-break control character. +Currently unsupported. +.Ss \&cc +Change the control character. +Its syntax is as follows: +.Bd -literal -offset indent +.Pf . Cm \&cc Op Ar c +.Ed +.Pp +If +.Ar c +is not specified, the control character is reset to +.Sq \&. . +Trailing characters are ignored. +.Ss \&ce +Center some lines. +It takes one integer argument, specifying how many lines to center. +Currently ignored. +.Ss \&cf +Output the contents of a file. +Ignored because insecure. +.Ss \&cflags +Set character flags. +This is a groff extension and currently ignored. +.Ss \&ch +Change a trap location. +Currently ignored. +.Ss \&char +Define a new glyph. +Currently unsupported. +.Ss \&chop +Remove the last character from a macro, string, or diversion. +Currently unsupported. +.Ss \&class +Define a character class. +This is a groff extension and currently ignored. +.Ss \&close +Close an open file. +Ignored because insecure. +.Ss \&CL +Print text in color. +This is a Heirloom extension and currently unsupported. +.Ss \&color +Activate or deactivate colors. +This is a groff extension and currently ignored. +.Ss \&composite +Define a name component for composite glyph names. +This is a groff extension and currently unsupported. +.Ss \&continue +Immediately start the next iteration of a +.Sx \&while +loop. +Currently unsupported. +.Ss \&cp +Switch +.Nm +compatibility mode on or off. +Currently ignored. +.Ss \&cropat +Set the CropBox page parameter for PDF generation. +This is a Heirloom extension and currently ignored. +.Ss \&cs +Constant character spacing mode. +Currently ignored. +.Ss \&cu +Underline including whitespace. +Currently ignored. +.Ss \&da +Append to a diversion. +Currently unsupported. +.Ss \&dch +Change a trap location in the current diversion. +This is a Heirloom extension and currently unsupported. +.Ss \&de +Define a +.Nm +macro. +Its syntax can be either +.Bd -literal -offset indent +.Pf . Cm \&de Ar name +.Ar macro definition +\&.. +.Ed +.Pp +or +.Bd -literal -offset indent +.Pf . Cm \&de Ar name Ar end +.Ar macro definition +.Pf . Ar end +.Ed +.Pp +Both forms define or redefine the macro +.Ar name +to represent the +.Ar macro definition , +which may consist of one or more input lines, including the newline +characters terminating each line, optionally containing calls to +.Nm +requests, +.Nm +macros or high-level macros like +.Xr man 7 +or +.Xr mdoc 7 +macros, whichever applies to the document in question. +.Pp +Specifying a custom +.Ar end +macro works in the same way as for +.Sx \&ig ; +namely, the call to +.Sq Pf . Ar end +first ends the +.Ar macro definition , +and after that, it is also evaluated as a +.Nm +request or +.Nm +macro, but not as a high-level macro. +.Pp +The macro can be invoked later using the syntax +.Pp +.D1 Pf . Ar name Op Ar argument Op Ar argument ... +.Pp +Regarding argument parsing, see +.Sx MACRO SYNTAX +above. +.Pp +The line invoking the macro will be replaced +in the input stream by the +.Ar macro definition , +replacing all occurrences of +.No \e\e$ Ns Ar N , +where +.Ar N +is a digit, by the +.Ar N Ns th Ar argument . +For example, +.Bd -literal -offset indent +\&.de ZN +\efI\e^\e\e$1\e^\efP\e\e$2 +\&.. +\&.ZN XtFree . +.Ed +.Pp +produces +.Pp +.D1 \efI\e^XtFree\e^\efP. +.Pp +in the input stream, and thus in the output: \fI\^XtFree\^\fP. +Each occurrence of \e\e$* is replaced with all the arguments, +joined together with single blank characters. +.Pp +Since macros and user-defined strings share a common string table, +defining a macro +.Ar name +clobbers the user-defined string +.Ar name , +and the +.Ar macro definition +can also be printed using the +.Sq \e* +string interpolation syntax described below +.Sx ds , +but this is rarely useful because every macro definition contains at least +one explicit newline character. +.Pp +In order to prevent endless recursion, both groff and +.Xr mandoc 1 +limit the stack depth for expanding macros and strings +to a large, but finite number, and +.Xr mandoc 1 +also limits the length of the expanded input line. +Do not rely on the exact values of these limits. +.Ss \&de1 +Define a +.Nm +macro that will be executed with +.Nm +compatibility mode switched off during macro execution. +This is a groff extension. +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&de . +.Ss \&defcolor +Define a color name. +This is a groff extension and currently ignored. +.Ss \&dei +Define a +.Nm +macro, specifying the macro name indirectly (groff extension). +The syntax of this request is the same as that of +.Sx \&de . +The request +.Pp +.D1 Pf . Cm \&dei Ar name Op Ar end +.Pp +has the same effect as: +.Pp +.D1 Pf . Cm \&de No \e* Ns Bo Ar name Bc Op \e* Ns Bq Ar end +.Ss \&dei1 +Define a +.Nm +macro that will be executed with +.Nm +compatibility mode switched off during macro execution, +specifying the macro name indirectly (groff extension). +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&dei . +.Ss \&device +This request only makes sense with the groff-specific intermediate +output format and is unsupported. +.Ss \&devicem +This request only makes sense with the groff-specific intermediate +output format and is unsupported. +.Ss \&di +Begin a diversion. +Currently unsupported. +.Ss \&do +Execute +.Nm +request or macro line with compatibility mode disabled. +Currently unsupported. +.Ss \&ds +Define a user-defined string. +Its syntax is as follows: +.Pp +.D1 Pf . Cm \&ds Ar name Oo \(dq Oc Ns Ar string +.Pp +The +.Ar name +and +.Ar string +arguments are space-separated. +If the +.Ar string +begins with a double-quote character, that character will not be part +of the string. +All remaining characters on the input line form the +.Ar string , +including whitespace and double-quote characters, even trailing ones. +.Pp +The +.Ar string +can be interpolated into subsequent text by using +.No \e* Ns Bq Ar name +for a +.Ar name +of arbitrary length, or \e*(NN or \e*N if the length of +.Ar name +is two or one characters, respectively. +Interpolation can be prevented by escaping the leading backslash; +that is, an asterisk preceded by an even number of backslashes +does not trigger string interpolation. +.Pp +Since user-defined strings and macros share a common string table, +defining a string +.Ar name +clobbers the macro +.Ar name , +and the +.Ar name +used for defining a string can also be invoked as a macro, +in which case the following input line will be appended to the +.Ar string , +forming a new input line passed to the +.Nm +parser. +For example, +.Bd -literal -offset indent +\&.ds badidea .S +\&.badidea +H SYNOPSIS +.Ed +.Pp +invokes the +.Cm SH +macro when used in a +.Xr man 7 +document. +Such abuse is of course strongly discouraged. +.Ss \&ds1 +Define a user-defined string that will be expanded with +.Nm +compatibility mode switched off during string expansion. +This is a groff extension. +Since +.Xr mandoc 1 +does not implement +.Nm +compatibility mode at all, it handles this request as an alias for +.Sx \&ds . +.Ss \&dwh +Set a location trap in the current diversion. +This is a Heirloom extension and currently unsupported. +.Ss \&dt +Set a trap within a diversion. +Currently unsupported. +.Ss \&ec +Change the escape character. +Currently unsupported. +.Ss \&ecs +Restore the escape character. +Currently unsupported. +.Ss \&ecr +Save the escape character. +Currently unsupported. +.Ss \&el +The +.Qq else +half of an if/else conditional. +Pops a result off the stack of conditional evaluations pushed by +.Sx \&ie +and uses it as its conditional. +If no stack entries are present (e.g., due to no prior +.Sx \&ie +calls) +then false is assumed. +The syntax of this request is similar to +.Sx \&if +except that the conditional is missing. +.Ss \&em +Set a trap at the end of input. +Currently unsupported. +.Ss \&EN +End an equation block. +See +.Sx \&EQ . +.Ss \&eo +Disable the escape mechanism completely. +Currently unsupported. +.Ss \&EP +End a picture started by +.Sx \&BP . +This is a Heirloom extension and currently unsupported. +.Ss \&EQ +Begin an equation block. +See +.Xr eqn 7 +for a description of the equation language. +.Ss \&errprint +Print a string like an error message. +This is a Heirloom extension and currently ignored. +.Ss \&ev +Switch to another environment. +Currently unsupported. +.Ss \&evc +Copy an environment into the current environment. +Currently unsupported. +.Ss \&ex +Abort processing and exit. +Currently unsupported. +.Ss \&fallback +Select the fallback sequence for a font. +This is a Heirloom extension and currently ignored. +.Ss \&fam +Change the font family. +Takes one argument specifying the font family to be selected. +It is a groff extension and currently ignored. +.Ss \&fc +Define a delimiting and a padding character for fields. +Currently unsupported. +.Ss \&fchar +Define a fallback glyph. +Currently unsupported. +.Ss \&fcolor +Set the fill color for \eD objects. +This is a groff extension and currently ignored. +.Ss \&fdeferlig +Defer ligature building. +This is a Heirloom extension and currently ignored. +.Ss \&feature +Enable or disable an OpenType feature. +This is a Heirloom extension and currently ignored. +.Ss \&fi +Switch to fill mode. +See +.Xr man 7 . +Ignored in +.Xr mdoc 7 . +.Ss \&fkern +Control the use of kerning tables for a font. +This is a Heirloom extension and currently ignored. +.Ss \&fl +Flush output. +Currently ignored. +.Ss \&flig +Define ligatures. +This is a Heirloom extension and currently ignored. +.Ss \&fp +Assign font position. +Currently ignored. +.Ss \&fps +Mount a font with a special character map. +This is a Heirloom extension and currently ignored. +.Ss \&fschar +Define a font-specific fallback glyph. +This is a groff extension and currently unsupported. +.Ss \&fspacewidth +Set a font-specific width for the space character. +This is a Heirloom extension and currently ignored. +.Ss \&fspecial +Conditionally define a special font. +This is a groff extension and currently ignored. +.Ss \&ft +Change the font. +Its syntax is as follows: +.Pp +.D1 Pf . Cm \&ft Op Ar font +.Pp +The following +.Ar font +arguments are supported: +.Bl -tag -width 4n -offset indent +.It Cm B , BI , 3 , 4 +switches to +.Sy bold +font +.It Cm I , 2 +switches to +.Em underlined +font +.It Cm R , CW , 1 +switches to normal font +.It Cm P No "or no argument" +switches back to the previous font +.El +.Pp +This request takes effect only locally, may be overridden by macros +and escape sequences, and is only supported in +.Xr man 7 +for now. +.Ss \&ftr +Translate font name. +This is a groff extension and currently ignored. +.Ss \&fzoom +Zoom font size. +Currently ignored. +.Ss \&gcolor +Set glyph color. +This is a groff extension and currently ignored. +.Ss \&hc +Set the hyphenation character. +Currently ignored. +.Ss \&hcode +Set hyphenation codes of characters. +Currently ignored. +.Ss \&hidechar +Hide characters in a font. +This is a Heirloom extension and currently ignored. +.Ss \&hla +Set hyphenation language. +This is a groff extension and currently ignored. +.Ss \&hlm +Set maximum number of consecutive hyphenated lines. +Currently ignored. +.Ss \&hpf +Load hyphenation pattern file. +This is a groff extension and currently ignored. +.Ss \&hpfa +Load hyphenation pattern file, appending to the current patterns. +This is a groff extension and currently ignored. +.Ss \&hpfcode +Define mapping values for character codes in hyphenation patterns. +This is a groff extension and currently ignored. +.Ss \&hw +Specify hyphenation points in words. +Currently ignored. +.Ss \&hy +Set automatic hyphenation mode. +Currently ignored. +.Ss \&hylang +Set hyphenation language. +This is a Heirloom extension and currently ignored. +.Ss \&hylen +Minimum word length for hyphenation. +This is a Heirloom extension and currently ignored. +.Ss \&hym +Set hyphenation margin. +This is a groff extension and currently ignored. +.Ss \&hypp +Define hyphenation penalties. +This is a Heirloom extension and currently ignored. +.Ss \&hys +Set hyphenation space. +This is a groff extension and currently ignored. +.Ss \&ie +The +.Qq if +half of an if/else conditional. +The result of the conditional is pushed into a stack used by subsequent +invocations of +.Sx \&el , +which may be separated by any intervening input (or not exist at all). +Its syntax is equivalent to +.Sx \&if . +.Ss \&if +Begins a conditional. +This request has the following syntax: +.Bd -literal -offset indent +\&.if COND BODY +.Ed +.Bd -literal -offset indent +\&.if COND \e{BODY +BODY...\e} +.Ed +.Bd -literal -offset indent +\&.if COND \e{\e +BODY... +\&.\e} +.Ed +.Pp +COND is a conditional statement. +Currently, +.Xr mandoc 1 +supports the following subset of roff conditionals: +.Bl -bullet +.It +If +.Sq \&! +is prefixed to COND, the condition is logically inverted. +.It +If the first character of COND is +.Sq n +.Pq nroff mode +or +.Sq o +.Pq odd page , +COND evaluates to true. +.It +If the first character of COND is +.Sq c +.Pq character available , +.Sq d +.Pq string defined , +.Sq e +.Pq even page , +.Sq t +.Pq troff mode , +or +.Sq v +.Pq vroff mode , +COND evaluates to false. +.It +If the first character of COND is +.Sq r , +it evaluates to true if the rest of COND is the name of an existing +number register; otherwise, it evaluates to false. +.It +If COND starts with a parenthesis or with an optionally signed +integer number, it is evaluated according to the rules of +.Sx Numerical expressions +explained below. +It evaluates to true if the result is positive, +or to false if the result is zero or negative. +.It +Otherwise, the first character of COND is regarded as a delimiter +and COND evaluates to true if the string extending from its first +to its second occurrence is equal to the string extending from its +second to its third occurrence. +.It +If COND cannot be parsed, it evaluates to false. +.El +.Pp +If a conditional is false, its children are not processed, but are +syntactically interpreted to preserve the integrity of the input +document. +Thus, +.Pp +.D1 \&.if t .ig +.Pp +will discard the +.Sq \&.ig , +which may lead to interesting results, but +.Pp +.D1 \&.if t .if t \e{\e +.Pp +will continue to syntactically interpret to the block close of the final +conditional. +Sub-conditionals, in this case, obviously inherit the truth value of +the parent. +.Pp +If the BODY section is begun by an escaped brace +.Sq \e{ , +scope continues until the end of the input line containing the +matching closing-brace escape sequence +.Sq \e} . +If the BODY is not enclosed in braces, scope continues until +the end of the line. +If the COND is followed by a BODY on the same line, whether after a +brace or not, then requests and macros +.Em must +begin with a control character. +It is generally more intuitive, in this case, to write +.Bd -literal -offset indent +\&.if COND \e{\e +\&.foo +bar +\&.\e} +.Ed +.Pp +than having the request or macro follow as +.Pp +.D1 \&.if COND \e{ .foo +.Pp +The scope of a conditional is always parsed, but only executed if the +conditional evaluates to true. +.Pp +Note that the +.Sq \e} +is converted into a zero-width escape sequence if not passed as a +standalone macro +.Sq \&.\e} . +For example, +.Pp +.D1 \&.Fl a \e} b +.Pp +will result in +.Sq \e} +being considered an argument of the +.Sq \&Fl +macro. +.Ss \&ig +Ignore input. +Its syntax can be either +.Bd -literal -offset indent +.Pf . Cm \&ig +.Ar ignored text +\&.. +.Ed +.Pp +or +.Bd -literal -offset indent +.Pf . Cm \&ig Ar end +.Ar ignored text +.Pf . Ar end +.Ed +.Pp +In the first case, input is ignored until a +.Sq \&.. +request is encountered on its own line. +In the second case, input is ignored until the specified +.Sq Pf . Ar end +macro is encountered. +Do not use the escape character +.Sq \e +anywhere in the definition of +.Ar end ; +it would cause very strange behaviour. +.Pp +When the +.Ar end +macro is a roff request or a roff macro, like in +.Pp +.D1 \&.ig if +.Pp +the subsequent invocation of +.Sx \&if +will first terminate the +.Ar ignored text , +then be invoked as usual. +Otherwise, it only terminates the +.Ar ignored text , +and arguments following it or the +.Sq \&.. +request are discarded. +.Ss \&in +Change indentation. +See +.Xr man 7 . +Ignored in +.Xr mdoc 7 . +.Ss \&index +Find a substring in a string. +This is a Heirloom extension and currently unsupported. +.Ss \&it +Set an input line trap. +Its syntax is as follows: +.Pp +.D1 Pf . Cm it Ar expression macro +.Pp +The named +.Ar macro +will be invoked after processing the number of input text lines +specified by the numerical +.Ar expression . +While evaluating the +.Ar expression , +the unit suffixes described below +.Sx Scaling Widths +are ignored. +.Ss \&itc +Set an input line trap, not counting lines ending with \ec. +Currently unsupported. +.Ss \&IX +To support the generation of a table of contents, +.Xr pod2man 1 +emits this user-defined macro, usually without defining it. +To avoid reporting large numbers of spurious errors, +.Xr mandoc 1 +ignores it. +.Ss \&kern +Switch kerning on or off. +Currently ignored. +.Ss \&kernafter +Increase kerning after some characters. +This is a Heirloom extension and currently ignored. +.Ss \&kernbefore +Increase kerning before some characters. +This is a Heirloom extension and currently ignored. +.Ss \&kernpair +Add a kerning pair to the kerning table. +This is a Heirloom extension and currently ignored. +.Ss \&lc +Define a leader repetition character. +Currently unsupported. +.Ss \&lc_ctype +Set the +.Dv LC_CTYPE +locale. +This is a Heirloom extension and currently unsupported. +.Ss \&lds +Define a local string. +This is a Heirloom extension and currently unsupported. +.Ss \&length +Count the number of input characters in a user-defined string. +Currently unsupported. +.Ss \&letadj +Dynamic letter spacing and reshaping. +This is a Heirloom extension and currently ignored. +.Ss \&lf +Change the line number for error messages. +Ignored because insecure. +.Ss \&lg +Switch the ligature mechanism on or off. +Currently ignored. +.Ss \&lhang +Hang characters at left margin. +This is a Heirloom extension and currently ignored. +.Ss \&linetabs +Enable or disable line-tabs mode. +This is a groff extension and currently unsupported. +.Ss \&ll +Change the output line length. +Its syntax is as follows: +.Pp +.D1 Pf . Cm \&ll Op Oo +|- Oc Ns Ar width +.Pp +If the +.Ar width +argument is omitted, the line length is reset to its previous value. +The default setting for terminal output is 78n. +If a sign is given, the line length is added to or subtracted from; +otherwise, it is set to the provided value. +Using this request in new manuals is discouraged for several reasons, +among others because it overrides the +.Xr mandoc 1 +.Fl O Cm width +command line option. +.Ss \&lnr +Set local number register. +This is a Heirloom extension and currently unsupported. +.Ss \&lnrf +Set local floating-point register. +This is a Heirloom extension and currently unsupported. +.Ss \&lpfx +Set a line prefix. +This is a Heirloom extension and currently unsupported. +.Ss \&ls +Set line spacing. +It takes one integer argument specifying the vertical distance of +subsequent output text lines measured in v units. +Currently ignored. +.Ss \&lsm +Set a leading spaces trap. +This is a groff extension and currently unsupported. +.Ss \< +Set title line length. +Currently ignored. +.Ss \&mc +Print margin character in the right margin. +Currently ignored. +.Ss \&mediasize +Set the device media size. +This is a Heirloom extension and currently ignored. +.Ss \&minss +Set minimum word space. +This is a Heirloom extension and currently ignored. +.Ss \&mk +Mark vertical position. +Currently ignored. +.Ss \&mso +Load a macro file. +Ignored because insecure. +.Ss \&na +Disable adjusting without changing the adjustment mode. +Currently ignored. +.Ss \&ne +Declare the need for the specified minimum vertical space +before the next trap or the bottom of the page. +Currently ignored. +.Ss \&nf +Switch to no-fill mode. +See +.Xr man 7 . +Ignored by +.Xr mdoc 7 . +.Ss \&nh +Turn off automatic hyphenation mode. +Currently ignored. +.Ss \&nhychar +Define hyphenation-inhibiting characters. +This is a Heirloom extension and currently ignored. +.Ss \&nm +Print line numbers. +Currently unsupported. +.Ss \&nn +Temporarily turn off line numbering. +Currently unsupported. +.Ss \&nop +Execute the rest of the input line as a request or macro line. +Currently unsupported. +.Ss \&nr +Define or change a register. +A register is an arbitrary string value that defines some sort of state, +which influences parsing and/or formatting. +Its syntax is as follows: +.Pp +.D1 Pf \. Cm \&nr Ar name Oo +|- Oc Ns Ar expression +.Pp +For the syntax of +.Ar expression , +see +.Sx Numerical expressions +below. +If it is prefixed by a sign, the register will be +incremented or decremented instead of assigned to. +.Pp +The following register +.Ar name +is handled specially: +.Bl -tag -width Ds +.It Cm nS +If set to a positive integer value, certain +.Xr mdoc 7 +macros will behave in the same way as in the +.Em SYNOPSIS +section. +If set to 0, these macros will behave in the same way as outside the +.Em SYNOPSIS +section, even when called within the +.Em SYNOPSIS +section itself. +Note that starting a new +.Xr mdoc 7 +section with the +.Cm \&Sh +macro will reset this register. +.El +.Ss \&nrf +Define or change a floating-point register. +This is a Heirloom extension and currently unsupported. +.Ss \&nroff +Force nroff mode. +This is a groff extension and currently ignored. +.Ss \&ns +Turn on no-space mode. +Currently ignored. +.Ss \&nx +Abort processing of the current input file and process another one. +Ignored because insecure. +.Ss \&open +Open a file for writing. +Ignored because insecure. +.Ss \&opena +Open a file for appending. +Ignored because insecure. +.Ss \&os +Output saved vertical space. +Currently ignored. +.Ss \&output +Output directly to intermediate output. +Not supported. +.Ss \&padj +Globally control paragraph-at-once adjustment. +This is a Heirloom extension and currently ignored. +.Ss \&papersize +Set the paper size. +This is a Heirloom extension and currently ignored. +.Ss \&pc +Change the page number character. +Currently ignored. +.Ss \&pev +Print environments. +This is a groff extension and currently ignored. +.Ss \&pi +Pipe output to a shell command. +Ignored because insecure. +.Ss \&PI +Low-level request used by +.Sx \&BP . +This is a Heirloom extension and currently unsupported. +.Ss \&pl +Change page length. +Takes one height argument. +Currently ignored. +.Ss \&pm +Print names and sizes of macros, strings, and diversions. +Currently ignored. +.Ss \&pn +Change page number of the next page. +Currently ignored. +.Ss \&pnr +Print all number registers. +Currently ignored. +.Ss \&po +Set horizontal page offset. +Currently ignored. +.Ss \&ps +Change point size. +Takes one numerical argument. +Currently ignored. +.Ss \&psbb +Retrieve the bounding box of a PostScript file. +Currently unsupported. +.Ss \&pshape +Set a special shape for the current paragraph. +This is a Heirloom extension and currently unsupported. +.Ss \&pso +Include output of a shell command. +Ignored because insecure. +.Ss \&ptr +Print the names and positions of all traps. +This is a groff extension and currently ignored. +.Ss \&pvs +Change post-vertical spacing. +This is a groff extension and currently ignored. +.Ss \&rchar +Remove glyph definitions. +Currently unsupported. +.Ss \&rd +Read from standard input. +Currently ignored. +.Ss \&recursionlimit +Set the maximum stack depth for recursive macros. +This is a Heirloom extension and currently ignored. +.Ss \&return +Exit a macro and return to the caller. +Currently unsupported. +.Ss \&rfschar +Remove font-specific fallback glyph definitions. +Currently unsupported. +.Ss \&rhang +Hang characters at right margin. +This is a Heirloom extension and currently ignored. +.Ss \&rj +Justify unfilled text to the right margin. +Currently ignored. +.Ss \&rm +Remove a request, macro or string. +Its syntax is as follows: +.Pp +.D1 Pf \. Cm \&rm Ar name +.Ss \&rn +Rename a request, macro, diversion, or string. +Currently unsupported. +.Ss \&rnn +Rename a number register. +Currently unsupported. +.Ss \&rr +Remove a register. +Its syntax is as follows: +.Pp +.D1 Pf \. Cm \&rr Ar name +.Ss \&rs +End no-space mode. +Currently ignored. +.Ss \&rt +Return to marked vertical position. +Currently ignored. +.Ss \&schar +Define global fallback glyph. +This is a groff extension and currently unsupported. +.Ss \&sentchar +Define sentence-ending characters. +This is a Heirloom extension and currently ignored. +.Ss \&shc +Change the soft hyphen character. +Currently ignored. +.Ss \&shift +Shift macro arguments. +Currently unsupported. +.Ss \&sizes +Define permissible point sizes. +This is a groff extension and currently ignored. +.Ss \&so +Include a source file. +Its syntax is as follows: +.Pp +.D1 Pf \. Cm \&so Ar file +.Pp +The +.Ar file +will be read and its contents processed as input in place of the +.Sq \&.so +request line. +To avoid inadvertent inclusion of unrelated files, +.Xr mandoc 1 +only accepts relative paths not containing the strings +.Qq ../ +and +.Qq /.. . +.Pp +This request requires +.Xr man 1 +to change to the right directory before calling +.Xr mandoc 1 , +per convention to the root of the manual tree. +Typical usage looks like: +.Pp +.Dl \&.so man3/Xcursor.3 +.Pp +As the whole concept is rather fragile, the use of +.Sx \&so +is discouraged. +Use +.Xr ln 1 +instead. +.Ss \&spacewidth +Set the space width from the font metrics file. +This is a Heirloom extension and currently ignored. +.Ss \&special +Define a special font. +This is a groff extension and currently ignored. +.Ss \&spreadwarn +Warn about wide spacing between words. +Currently ignored. +.Ss \&ss +Set space character size. +Currently ignored. +.Ss \&sty +Associate style with a font position. +This is a groff extension and currently ignored. +.Ss \&substring +Replace a user-defined string with a substring. +Currently unsupported. +.Ss \&sv +Save vertical space. +Currently ignored. +.Ss \&sy +Execute shell command. +Ignored because insecure. +.Ss \&T& +Re-start a table layout, retaining the options of the prior table +invocation. +See +.Sx \&TS . +.Ss \&ta +Set tab stops. +Takes an arbitrary number of arguments. +Currently unsupported. +.Ss \&tc +Change tab repetition character. +Currently unsupported. +.Ss \&TE +End a table context. +See +.Sx \&TS . +.Ss \&ti +Temporary indent. +Currently unsupported. +.Ss \&tkf +Enable track kerning for a font. +Currently ignored. +.Ss \&tl +Print a title line. +Currently unsupported. +.Ss \&tm +Print to standard error output. +Currently ignored. +.Ss \&tm1 +Print to standard error output, allowing leading blanks. +This is a groff extension and currently ignored. +.Ss \&tmc +Print to standard error output without a trailing newline. +This is a groff extension and currently ignored. +.Ss \&tr +Output character translation. +Its syntax is as follows: +.Pp +.D1 Pf \. Cm \&tr Ar [ab]+ +.Pp +Pairs of +.Ar ab +characters are replaced +.Ar ( a +for +.Ar b ) . +Replacement (or origin) characters may also be character escapes; thus, +.Pp +.Dl tr \e(xx\e(yy +.Pp +replaces all invocations of \e(xx with \e(yy. +.Ss \&track +Static letter space tracking. +This is a Heirloom extension and currently ignored. +.Ss \&transchar +Define transparent characters for sentence-ending. +This is a Heirloom extension and currently ignored. +.Ss \&trf +Output the contents of a file, disallowing invalid characters. +This is a groff extension and ignored because insecure. +.Ss \&trimat +Set the TrimBox page parameter for PDF generation. +This is a Heirloom extension and currently ignored. +.Ss \&trin +Output character translation, ignored by +.Cm \&asciify . +Currently unsupported. +.Ss \&trnt +Output character translation, ignored by \e!. +Currently unsupported. +.Ss \&troff +Force troff mode. +This is a groff extension and currently ignored. +.Ss \&TS +Begin a table, which formats input in aligned rows and columns. +See +.Xr tbl 7 +for a description of the tbl language. +.Ss \&uf +Globally set the underline font. +Currently ignored. +.Ss \&ul +Underline. +Currently ignored. +.Ss \&unformat +Unformat spaces and tabs in a diversion. +Currently unsupported. +.Ss \&unwatch +Disable notification for string or macro. +This is a Heirloom extension and currently ignored. +.Ss \&unwatchn +Disable notification for register. +This is a Heirloom extension and currently ignored. +.Ss \&vpt +Enable or disable vertical position traps. +This is a groff extension and currently ignored. +.Ss \&vs +Change vertical spacing. +Currently ignored. +.Ss \&warn +Set warning level. +Currently ignored. +.Ss \&warnscale +Set the scaling indicator used in warnings. +This is a groff extension and currently ignored. +.Ss \&watch +Notify on change of string or macro. +This is a Heirloom extension and currently ignored. +.Ss \&watchlength +On change, report the contents of macros and strings +up to the specified length. +This is a Heirloom extension and currently ignored. +.Ss \&watchn +Notify on change of register. +This is a Heirloom extension and currently ignored. +.Ss \&wh +Set a page location trap. +Currently unsupported. +.Ss \&while +Repeated execution while a condition is true. +Currently unsupported. +.Ss \&write +Write to an open file. +Ignored because insecure. +.Ss \&writec +Write to an open file without appending a newline. +Ignored because insecure. +.Ss \&writem +Write macro or string to an open file. +Ignored because insecure. +.Ss \&xflag +Set the extension level. +This is a Heirloom extension and currently ignored. +.Ss Numerical expressions +The +.Sx \&nr , +.Sx \&if , +and +.Sx \&ie +requests accept integer numerical expressions as arguments. +These are always evaluated using the C +.Vt int +type; integer overflow works the same way as in the C language. +Numbers consist of an arbitrary number of digits +.Sq 0 +to +.Sq 9 +prefixed by an optional sign +.Sq + +or +.Sq - . +Each number may be followed by one optional scaling unit described below +.Sx Scaling Widths . +The following equations hold: +.Bd -literal -offset indent +1i = 6v = 6P = 10m = 10n = 72p = 1000M = 240u = 240 +254c = 100i = 24000u = 24000 +1f = 65536u = 65536 +.Ed +.Pp +The following binary operators are implemented. +Unless otherwise stated, they behave as in the C language: +.Pp +.Bl -tag -width 2n -compact +.It Ic + +addition +.It Ic - +subtraction +.It Ic * +multiplication +.It Ic / +division +.It Ic % +remainder of division +.It Ic < +less than +.It Ic > +greater than +.It Ic == +equal to +.It Ic = +equal to, same effect as +.Ic == +(this differs from C) +.It Ic <= +less than or equal to +.It Ic >= +greater than or equal to +.It Ic <> +not equal to (corresponds to C +.Ic != ; +this one is of limited portability, it is supported by Heirloom roff, +but not by groff) +.It Ic & +logical and (corresponds to C +.Ic && ) +.It Ic \&: +logical or (corresponds to C +.Ic \&|| ) +.It Ic <? +minimum (not available in C) +.It Ic >? +maximum (not available in C) +.El +.Pp +There is no concept of precedence; evaluation proceeds from left to right, +except when subexpressions are enclosed in parantheses. +Inside parentheses, whitespace is ignored. +.Sh ESCAPE SEQUENCE REFERENCE +The +.Xr mandoc 1 +.Nm +parser recognises the following escape sequences. +Note that the +.Nm +language defines more escape sequences not implemented in +.Xr mandoc 1 . +In +.Xr mdoc 7 +and +.Xr man 7 +documents, using escape sequences is discouraged except for those +described in the +.Sx LANGUAGE SYNTAX +section above. +.Pp +A backslash followed by any character not listed here +simply prints that character itself. +.Ss \e<newline> +A backslash at the end of an input line can be used to continue the +logical input line on the next physical input line, joining the text +on both lines together as if it were on a single input line. +.Ss \e<space> +The escape sequence backslash-space +.Pq Sq \e\ \& +is an unpaddable space-sized non-breaking space character; see +.Sx Whitespace . +.Ss \e\(dq +The rest of the input line is treated as +.Sx Comments . +.Ss \e% +Hyphenation allowed at this point of the word; ignored by +.Xr mandoc 1 . +.Ss \e& +Non-printing zero-width character; see +.Sx Whitespace . +.Ss \e\(aq +Acute accent special character; use +.Sq \e(aa +instead. +.Ss \e( Ns Ar cc +.Sx Special Characters +with two-letter names, see +.Xr mandoc_char 7 . +.Ss \e*[ Ns Ar name ] +Interpolate the string with the +.Ar name ; +see +.Sx Predefined Strings +and +.Sx ds . +For short names, there are variants +.No \e* Ns Ar c +and +.No \e*( Ns Ar cc . +.Ss \e, +Left italic correction (groff extension); ignored by +.Xr mandoc 1 . +.Ss \e- +Special character +.Dq mathematical minus sign . +.Ss \e/ +Right italic correction (groff extension); ignored by +.Xr mandoc 1 . +.Ss \e[ Ns Ar name ] +.Sx Special Characters +with names of arbitrary length, see +.Xr mandoc_char 7 . +.Ss \e^ +One-twelfth em half-narrow space character, effectively zero-width in +.Xr mandoc 1 . +.Ss \e` +Grave accent special character; use +.Sq \e(ga +instead. +.Ss \e{ +Begin conditional input; see +.Sx if . +.Ss \e\(ba +One-sixth em narrow space character, effectively zero-width in +.Xr mandoc 1 . +.Ss \e} +End conditional input; see +.Sx if . +.Ss \e~ +Paddable non-breaking space character. +.Ss \e0 +Digit width space character. +.Ss \eA\(aq Ns Ar string Ns \(aq +Anchor definition; ignored by +.Xr mandoc 1 . +.Ss \eB\(aq Ns Ar string Ns \(aq +Interpolate +.Sq 1 +if +.Ar string +conforms to the syntax of +.Sx Numerical expressions +explained above and +.Sq 0 +otherwise. +.Ss \eb\(aq Ns Ar string Ns \(aq +Bracket building function; ignored by +.Xr mandoc 1 . +.Ss \eC\(aq Ns Ar name Ns \(aq +.Sx Special Characters +with names of arbitrary length. +.Ss \ec +When encountered at the end of an input text line, +the next input text line is considered to continue that line, +even if there are request or macro lines in between. +No whitespace is inserted. +.Ss \eD\(aq Ns Ar string Ns \(aq +Draw graphics function; ignored by +.Xr mandoc 1 . +.Ss \ed +Move down by half a line; ignored by +.Xr mandoc 1 . +.Ss \ee +Backslash special character. +.Ss \eF[ Ns Ar name ] +Switch font family (groff extension); ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \eF Ns Ar c +and +.No \eF( Ns Ar cc . +.Ss \ef[ Ns Ar name ] +Switch to the font +.Ar name , +see +.Sx Text Decoration . +For short names, there are variants +.No \ef Ns Ar c +and +.No \ef( Ns Ar cc . +.Ss \eg[ Ns Ar name ] +Interpolate the format of a number register; ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \eg Ns Ar c +and +.No \eg( Ns Ar cc . +.Ss \eH\(aq Ns Oo +|- Oc Ns Ar number Ns \(aq +Set the height of the current font; ignored by +.Xr mandoc 1 . +.Ss \eh\(aq Ns Ar number Ns \(aq +Horizontal motion; ignored by +.Xr mandoc 1 . +.Ss \ek[ Ns Ar name ] +Mark horizontal input place in register; ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \ek Ns Ar c +and +.No \ek( Ns Ar cc . +.Ss \eL\(aq Ns Ar number Ns Oo Ar c Oc Ns \(aq +Vertical line drawing function; ignored by +.Xr mandoc 1 . +.Ss \el\(aq Ns Ar number Ns Oo Ar c Oc Ns \(aq +Horizontal line drawing function; ignored by +.Xr mandoc 1 . +.Ss \eM[ Ns Ar name ] +Set fill (background) color (groff extension); ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \eM Ns Ar c +and +.No \eM( Ns Ar cc . +.Ss \em[ Ns Ar name ] +Set glyph drawing color (groff extension); ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \em Ns Ar c +and +.No \em( Ns Ar cc . +.Ss \eN\(aq Ns Ar number Ns \(aq +Character +.Ar number +on the current font. +.Ss \en[ Ns Ar name ] +Interpolate the number register +.Ar name . +For short names, there are variants +.No \en Ns Ar c +and +.No \en( Ns Ar cc . +.Ss \eo\(aq Ns Ar string Ns \(aq +Overstrike, writing all the characters contained in the +.Ar string +to the same output position. +In terminal and HTML output modes, +only the last one of the characters is visible. +.Ss \eR\(aq Ns Ar name Oo +|- Oc Ns Ar number Ns \(aq +Set number register; ignored by +.Xr mandoc 1 . +.Ss \eS\(aq Ns Ar number Ns \(aq +Slant output; ignored by +.Xr mandoc 1 . +.Ss \es\(aq Ns Oo +|- Oc Ns Ar number Ns \(aq +Change point size; ignored by +.Xr mandoc 1 . +Alternative forms +.No \es Ns Oo +|- Oc Ns Ar n , +.No \es Ns Oo +|- Oc Ns \(aq Ns Ar number Ns \(aq , +.No \es Ns [ Oo +|- Oc Ns Ar number ] , +and +.No \es Ns Oo +|- Oc Ns [ Ar number Ns ] +are also parsed and ignored. +.Ss \et +Horizontal tab; ignored by +.Xr mandoc 1 . +.Ss \eu +Move up by half a line; ignored by +.Xr mandoc 1 . +.Ss \eV[ Ns Ar name ] +Interpolate an environment variable; ignored by +.Xr mandoc 1 . +For short names, there are variants +.No \eV Ns Ar c +and +.No \eV( Ns Ar cc . +.Ss \ev\(aq Ns Ar number Ns \(aq +Vertical motion; ignored by +.Xr mandoc 1 . +.Ss \ew\(aq Ns Ar string Ns \(aq +Interpolate the width of the +.Ar string . +The +.Xr mandoc 1 +implementation assumes that after expansion of user-defined strings, the +.Ar string +only contains normal characters, no escape sequences, and that each +character has a width of 24 basic units. +.Ss \eX\(aq Ns Ar string Ns \(aq +Output +.Ar string +as device control function; ignored in nroff mode and by +.Xr mandoc 1 . +.Ss \ex\(aq Ns Ar number Ns \(aq +Extra line space function; ignored by +.Xr mandoc 1 . +.Ss \eY[ Ns Ar name ] +Output a string as a device control function; ignored in nroff mode and by +.Xr mandoc 1 . +For short names, there are variants +.No \eY Ns Ar c +and +.No \eY( Ns Ar cc . +.Ss \eZ\(aq Ns Ar string Ns \(aq +Print +.Ar string +with zero width and height; ignored by +.Xr mandoc 1 . +.Ss \ez +Output the next character without advancing the cursor position. +.Sh COMPATIBILITY +The +.Xr mandoc 1 +implementation of the +.Nm +language is intentionally incomplete. +Unimplemented features include: +.Pp +.Bl -dash -compact +.It +For security reasons, +.Xr mandoc 1 +never reads or writes external files except via +.Sx \&so +requests with safe relative paths. +.It +There is no automatic hyphenation, no adjustment to the right margin, +and no centering; the output is always set flush-left. +.It +Support for setting tabulator positions +and tabulator and leader characters is missing, +and support for manually changing indentation is limited. +.It +The +.Sq u +scaling unit is the default terminal unit. +In traditional troff systems, this unit changes depending on the +output media. +.It +Width measurements are implemented in a crude way +and often yield wrong results. +Explicit movement requests and escapes are ignored. +.It +There is no concept of output pages, no support for floats, +graphics drawing, and picture inclusion; +terminal output is always continuous. +.It +Requests regarding color, font families, and glyph manipulation +are ignored. +Font support is very limited. +Kerning is not implemented, and no ligatures are produced. +.It +The +.Qq \(aq +macro control character does not suppress output line breaks. +.It +Diversions are not implemented, +and support for traps is very incomplete. +.It +While recursion is supported, +.Sx \&while +loops are not. +.El +.Pp +The special semantics of the +.Cm nS +number register is an idiosyncracy of +.Ox +manuals and not supported by other +.Xr mdoc 7 +implementations. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr eqn 7 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr tbl 7 +.Rs +.%A Joseph F. Ossanna +.%A Brian W. Kernighan +.%I AT&T Bell Laboratories +.%T Troff User's Manual +.%R Computing Science Technical Report +.%N 54 +.%C Murray Hill, New Jersey +.%D 1976 and 1992 +.%U http://www.kohala.com/start/troff/cstr54.ps +.Re +.Rs +.%A Joseph F. Ossanna +.%A Brian W. Kernighan +.%A Gunnar Ritter +.%T Heirloom Documentation Tools Nroff/Troff User's Manual +.%D September 17, 2007 +.%U http://heirloom.sourceforge.net/doctools/troff.pdf +.Re +.Sh HISTORY +The RUNOFF typesetting system, whose input forms the basis for +.Nm , +was written in MAD and FAP for the CTSS operating system by Jerome E. +Saltzer in 1964. +Doug McIlroy rewrote it in BCPL in 1969, renaming it +.Nm . +Dennis M. Ritchie rewrote McIlroy's +.Nm +in PDP-11 assembly for +.At v1 , +Joseph F. Ossanna improved roff and renamed it nroff +for +.At v2 , +then ported nroff to C as troff, which Brian W. Kernighan released with +.At v7 . +In 1989, James Clarke re-implemented troff in C++, naming it groff. +.Sh AUTHORS +.An -nosplit +This +.Nm +reference was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +and +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/roff.c b/contrib/mdocml/roff.c new file mode 100644 index 0000000..13b9439 --- /dev/null +++ b/contrib/mdocml/roff.c @@ -0,0 +1,3468 @@ +/* $Id: roff.c,v 1.284 2016/01/08 17:48:10 schwarze Exp $ */ +/* + * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "roff.h" +#include "libmandoc.h" +#include "roff_int.h" +#include "libroff.h" + +/* Maximum number of string expansions per line, to break infinite loops. */ +#define EXPAND_LIMIT 1000 + +/* --- data types --------------------------------------------------------- */ + +enum rofft { + ROFF_ab, + ROFF_ad, + ROFF_af, + ROFF_aln, + ROFF_als, + ROFF_am, + ROFF_am1, + ROFF_ami, + ROFF_ami1, + ROFF_as, + ROFF_as1, + ROFF_asciify, + ROFF_backtrace, + ROFF_bd, + ROFF_bleedat, + ROFF_blm, + ROFF_box, + ROFF_boxa, + ROFF_bp, + ROFF_BP, + /* MAN_br, MDOC_br */ + ROFF_break, + ROFF_breakchar, + ROFF_brnl, + ROFF_brp, + ROFF_brpnl, + ROFF_c2, + ROFF_cc, + ROFF_ce, + ROFF_cf, + ROFF_cflags, + ROFF_ch, + ROFF_char, + ROFF_chop, + ROFF_class, + ROFF_close, + ROFF_CL, + ROFF_color, + ROFF_composite, + ROFF_continue, + ROFF_cp, + ROFF_cropat, + ROFF_cs, + ROFF_cu, + ROFF_da, + ROFF_dch, + ROFF_Dd, + ROFF_de, + ROFF_de1, + ROFF_defcolor, + ROFF_dei, + ROFF_dei1, + ROFF_device, + ROFF_devicem, + ROFF_di, + ROFF_do, + ROFF_ds, + ROFF_ds1, + ROFF_dwh, + ROFF_dt, + ROFF_ec, + ROFF_ecr, + ROFF_ecs, + ROFF_el, + ROFF_em, + ROFF_EN, + ROFF_eo, + ROFF_EP, + ROFF_EQ, + ROFF_errprint, + ROFF_ev, + ROFF_evc, + ROFF_ex, + ROFF_fallback, + ROFF_fam, + ROFF_fc, + ROFF_fchar, + ROFF_fcolor, + ROFF_fdeferlig, + ROFF_feature, + /* MAN_fi; ignored in mdoc(7) */ + ROFF_fkern, + ROFF_fl, + ROFF_flig, + ROFF_fp, + ROFF_fps, + ROFF_fschar, + ROFF_fspacewidth, + ROFF_fspecial, + /* MAN_ft; ignored in mdoc(7) */ + ROFF_ftr, + ROFF_fzoom, + ROFF_gcolor, + ROFF_hc, + ROFF_hcode, + ROFF_hidechar, + ROFF_hla, + ROFF_hlm, + ROFF_hpf, + ROFF_hpfa, + ROFF_hpfcode, + ROFF_hw, + ROFF_hy, + ROFF_hylang, + ROFF_hylen, + ROFF_hym, + ROFF_hypp, + ROFF_hys, + ROFF_ie, + ROFF_if, + ROFF_ig, + /* MAN_in; ignored in mdoc(7) */ + ROFF_index, + ROFF_it, + ROFF_itc, + ROFF_IX, + ROFF_kern, + ROFF_kernafter, + ROFF_kernbefore, + ROFF_kernpair, + ROFF_lc, + ROFF_lc_ctype, + ROFF_lds, + ROFF_length, + ROFF_letadj, + ROFF_lf, + ROFF_lg, + ROFF_lhang, + ROFF_linetabs, + /* MAN_ll, MDOC_ll */ + ROFF_lnr, + ROFF_lnrf, + ROFF_lpfx, + ROFF_ls, + ROFF_lsm, + ROFF_lt, + ROFF_mc, + ROFF_mediasize, + ROFF_minss, + ROFF_mk, + ROFF_mso, + ROFF_na, + ROFF_ne, + /* MAN_nf; ignored in mdoc(7) */ + ROFF_nh, + ROFF_nhychar, + ROFF_nm, + ROFF_nn, + ROFF_nop, + ROFF_nr, + ROFF_nrf, + ROFF_nroff, + ROFF_ns, + ROFF_nx, + ROFF_open, + ROFF_opena, + ROFF_os, + ROFF_output, + ROFF_padj, + ROFF_papersize, + ROFF_pc, + ROFF_pev, + ROFF_pi, + ROFF_PI, + ROFF_pl, + ROFF_pm, + ROFF_pn, + ROFF_pnr, + ROFF_po, + ROFF_ps, + ROFF_psbb, + ROFF_pshape, + ROFF_pso, + ROFF_ptr, + ROFF_pvs, + ROFF_rchar, + ROFF_rd, + ROFF_recursionlimit, + ROFF_return, + ROFF_rfschar, + ROFF_rhang, + ROFF_rj, + ROFF_rm, + ROFF_rn, + ROFF_rnn, + ROFF_rr, + ROFF_rs, + ROFF_rt, + ROFF_schar, + ROFF_sentchar, + ROFF_shc, + ROFF_shift, + ROFF_sizes, + ROFF_so, + /* MAN_sp, MDOC_sp */ + ROFF_spacewidth, + ROFF_special, + ROFF_spreadwarn, + ROFF_ss, + ROFF_sty, + ROFF_substring, + ROFF_sv, + ROFF_sy, + ROFF_T_, + ROFF_ta, + ROFF_tc, + ROFF_TE, + ROFF_TH, + ROFF_ti, + ROFF_tkf, + ROFF_tl, + ROFF_tm, + ROFF_tm1, + ROFF_tmc, + ROFF_tr, + ROFF_track, + ROFF_transchar, + ROFF_trf, + ROFF_trimat, + ROFF_trin, + ROFF_trnt, + ROFF_troff, + ROFF_TS, + ROFF_uf, + ROFF_ul, + ROFF_unformat, + ROFF_unwatch, + ROFF_unwatchn, + ROFF_vpt, + ROFF_vs, + ROFF_warn, + ROFF_warnscale, + ROFF_watch, + ROFF_watchlength, + ROFF_watchn, + ROFF_wh, + ROFF_while, + ROFF_write, + ROFF_writec, + ROFF_writem, + ROFF_xflag, + ROFF_cblock, + ROFF_USERDEF, + ROFF_MAX +}; + +/* + * An incredibly-simple string buffer. + */ +struct roffstr { + char *p; /* nil-terminated buffer */ + size_t sz; /* saved strlen(p) */ +}; + +/* + * A key-value roffstr pair as part of a singly-linked list. + */ +struct roffkv { + struct roffstr key; + struct roffstr val; + struct roffkv *next; /* next in list */ +}; + +/* + * A single number register as part of a singly-linked list. + */ +struct roffreg { + struct roffstr key; + int val; + struct roffreg *next; +}; + +struct roff { + struct mparse *parse; /* parse point */ + struct roffnode *last; /* leaf of stack */ + int *rstack; /* stack of inverted `ie' values */ + struct roffreg *regtab; /* number registers */ + struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ + struct roffstr *xtab; /* single-byte trans table (`tr') */ + const char *current_string; /* value of last called user macro */ + struct tbl_node *first_tbl; /* first table parsed */ + struct tbl_node *last_tbl; /* last table parsed */ + struct tbl_node *tbl; /* current table being parsed */ + struct eqn_node *last_eqn; /* last equation parsed */ + struct eqn_node *first_eqn; /* first equation parsed */ + struct eqn_node *eqn; /* current equation being parsed */ + int eqn_inline; /* current equation is inline */ + int options; /* parse options */ + int rstacksz; /* current size limit of rstack */ + int rstackpos; /* position in rstack */ + int format; /* current file in mdoc or man format */ + int argc; /* number of args of the last macro */ + char control; /* control character */ +}; + +struct roffnode { + enum rofft tok; /* type of node */ + struct roffnode *parent; /* up one in stack */ + int line; /* parse line */ + int col; /* parse col */ + char *name; /* node name, e.g. macro name */ + char *end; /* end-rules: custom token */ + int endspan; /* end-rules: next-line or infty */ + int rule; /* current evaluation rule */ +}; + +#define ROFF_ARGS struct roff *r, /* parse ctx */ \ + enum rofft tok, /* tok of macro */ \ + struct buf *buf, /* input buffer */ \ + int ln, /* parse line */ \ + int ppos, /* original pos in buffer */ \ + int pos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ + +typedef enum rofferr (*roffproc)(ROFF_ARGS); + +struct roffmac { + const char *name; /* macro name */ + roffproc proc; /* process new macro */ + roffproc text; /* process as child text of macro */ + roffproc sub; /* process as child of macro */ + int flags; +#define ROFFMAC_STRUCT (1 << 0) /* always interpret */ + struct roffmac *next; +}; + +struct predef { + const char *name; /* predefined input name */ + const char *str; /* replacement symbol */ +}; + +#define PREDEF(__name, __str) \ + { (__name), (__str) }, + +/* --- function prototypes ------------------------------------------------ */ + +static enum rofft roffhash_find(const char *, size_t); +static void roffhash_init(void); +static void roffnode_cleanscope(struct roff *); +static void roffnode_pop(struct roff *); +static void roffnode_push(struct roff *, enum rofft, + const char *, int, int); +static enum rofferr roff_block(ROFF_ARGS); +static enum rofferr roff_block_text(ROFF_ARGS); +static enum rofferr roff_block_sub(ROFF_ARGS); +static enum rofferr roff_brp(ROFF_ARGS); +static enum rofferr roff_cblock(ROFF_ARGS); +static enum rofferr roff_cc(ROFF_ARGS); +static void roff_ccond(struct roff *, int, int); +static enum rofferr roff_cond(ROFF_ARGS); +static enum rofferr roff_cond_text(ROFF_ARGS); +static enum rofferr roff_cond_sub(ROFF_ARGS); +static enum rofferr roff_ds(ROFF_ARGS); +static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); +static int roff_evalcond(struct roff *r, int, char *, int *); +static int roff_evalnum(struct roff *, int, + const char *, int *, int *, int); +static int roff_evalpar(struct roff *, int, + const char *, int *, int *, int); +static int roff_evalstrcond(const char *, int *); +static void roff_free1(struct roff *); +static void roff_freereg(struct roffreg *); +static void roff_freestr(struct roffkv *); +static size_t roff_getname(struct roff *, char **, int, int); +static int roff_getnum(const char *, int *, int *, int); +static int roff_getop(const char *, int *, char *); +static int roff_getregn(const struct roff *, + const char *, size_t); +static int roff_getregro(const struct roff *, + const char *name); +static const char *roff_getstrn(const struct roff *, + const char *, size_t); +static int roff_hasregn(const struct roff *, + const char *, size_t); +static enum rofferr roff_insec(ROFF_ARGS); +static enum rofferr roff_it(ROFF_ARGS); +static enum rofferr roff_line_ignore(ROFF_ARGS); +static void roff_man_alloc1(struct roff_man *); +static void roff_man_free1(struct roff_man *); +static enum rofferr roff_nr(ROFF_ARGS); +static enum rofft roff_parse(struct roff *, char *, int *, + int, int); +static enum rofferr roff_parsetext(struct buf *, int, int *); +static enum rofferr roff_res(struct roff *, struct buf *, int, int); +static enum rofferr roff_rm(ROFF_ARGS); +static enum rofferr roff_rr(ROFF_ARGS); +static void roff_setstr(struct roff *, + const char *, const char *, int); +static void roff_setstrn(struct roffkv **, const char *, + size_t, const char *, size_t, int); +static enum rofferr roff_so(ROFF_ARGS); +static enum rofferr roff_tr(ROFF_ARGS); +static enum rofferr roff_Dd(ROFF_ARGS); +static enum rofferr roff_TH(ROFF_ARGS); +static enum rofferr roff_TE(ROFF_ARGS); +static enum rofferr roff_TS(ROFF_ARGS); +static enum rofferr roff_EQ(ROFF_ARGS); +static enum rofferr roff_EN(ROFF_ARGS); +static enum rofferr roff_T_(ROFF_ARGS); +static enum rofferr roff_unsupp(ROFF_ARGS); +static enum rofferr roff_userdef(ROFF_ARGS); + +/* --- constant data ------------------------------------------------------ */ + +/* See roffhash_find() */ + +#define ASCII_HI 126 +#define ASCII_LO 33 +#define HASHWIDTH (ASCII_HI - ASCII_LO + 1) + +#define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ +#define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ + +static struct roffmac *hash[HASHWIDTH]; + +static struct roffmac roffs[ROFF_MAX] = { + { "ab", roff_unsupp, NULL, NULL, 0, NULL }, + { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, + { "af", roff_line_ignore, NULL, NULL, 0, NULL }, + { "aln", roff_unsupp, NULL, NULL, 0, NULL }, + { "als", roff_unsupp, NULL, NULL, 0, NULL }, + { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "as", roff_ds, NULL, NULL, 0, NULL }, + { "as1", roff_ds, NULL, NULL, 0, NULL }, + { "asciify", roff_unsupp, NULL, NULL, 0, NULL }, + { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL }, + { "bd", roff_line_ignore, NULL, NULL, 0, NULL }, + { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL }, + { "blm", roff_unsupp, NULL, NULL, 0, NULL }, + { "box", roff_unsupp, NULL, NULL, 0, NULL }, + { "boxa", roff_unsupp, NULL, NULL, 0, NULL }, + { "bp", roff_line_ignore, NULL, NULL, 0, NULL }, + { "BP", roff_unsupp, NULL, NULL, 0, NULL }, + { "break", roff_unsupp, NULL, NULL, 0, NULL }, + { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL }, + { "brnl", roff_line_ignore, NULL, NULL, 0, NULL }, + { "brp", roff_brp, NULL, NULL, 0, NULL }, + { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL }, + { "c2", roff_unsupp, NULL, NULL, 0, NULL }, + { "cc", roff_cc, NULL, NULL, 0, NULL }, + { "ce", roff_line_ignore, NULL, NULL, 0, NULL }, + { "cf", roff_insec, NULL, NULL, 0, NULL }, + { "cflags", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ch", roff_line_ignore, NULL, NULL, 0, NULL }, + { "char", roff_unsupp, NULL, NULL, 0, NULL }, + { "chop", roff_unsupp, NULL, NULL, 0, NULL }, + { "class", roff_line_ignore, NULL, NULL, 0, NULL }, + { "close", roff_insec, NULL, NULL, 0, NULL }, + { "CL", roff_unsupp, NULL, NULL, 0, NULL }, + { "color", roff_line_ignore, NULL, NULL, 0, NULL }, + { "composite", roff_unsupp, NULL, NULL, 0, NULL }, + { "continue", roff_unsupp, NULL, NULL, 0, NULL }, + { "cp", roff_line_ignore, NULL, NULL, 0, NULL }, + { "cropat", roff_line_ignore, NULL, NULL, 0, NULL }, + { "cs", roff_line_ignore, NULL, NULL, 0, NULL }, + { "cu", roff_line_ignore, NULL, NULL, 0, NULL }, + { "da", roff_unsupp, NULL, NULL, 0, NULL }, + { "dch", roff_unsupp, NULL, NULL, 0, NULL }, + { "Dd", roff_Dd, NULL, NULL, 0, NULL }, + { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL }, + { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "device", roff_unsupp, NULL, NULL, 0, NULL }, + { "devicem", roff_unsupp, NULL, NULL, 0, NULL }, + { "di", roff_unsupp, NULL, NULL, 0, NULL }, + { "do", roff_unsupp, NULL, NULL, 0, NULL }, + { "ds", roff_ds, NULL, NULL, 0, NULL }, + { "ds1", roff_ds, NULL, NULL, 0, NULL }, + { "dwh", roff_unsupp, NULL, NULL, 0, NULL }, + { "dt", roff_unsupp, NULL, NULL, 0, NULL }, + { "ec", roff_unsupp, NULL, NULL, 0, NULL }, + { "ecr", roff_unsupp, NULL, NULL, 0, NULL }, + { "ecs", roff_unsupp, NULL, NULL, 0, NULL }, + { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "em", roff_unsupp, NULL, NULL, 0, NULL }, + { "EN", roff_EN, NULL, NULL, 0, NULL }, + { "eo", roff_unsupp, NULL, NULL, 0, NULL }, + { "EP", roff_unsupp, NULL, NULL, 0, NULL }, + { "EQ", roff_EQ, NULL, NULL, 0, NULL }, + { "errprint", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ev", roff_unsupp, NULL, NULL, 0, NULL }, + { "evc", roff_unsupp, NULL, NULL, 0, NULL }, + { "ex", roff_unsupp, NULL, NULL, 0, NULL }, + { "fallback", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fam", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fc", roff_unsupp, NULL, NULL, 0, NULL }, + { "fchar", roff_unsupp, NULL, NULL, 0, NULL }, + { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL }, + { "feature", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fkern", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fl", roff_line_ignore, NULL, NULL, 0, NULL }, + { "flig", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fp", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fps", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fschar", roff_unsupp, NULL, NULL, 0, NULL }, + { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ftr", roff_line_ignore, NULL, NULL, 0, NULL }, + { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL }, + { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hc", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hcode", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hla", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hlm", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hpf", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hw", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hylang", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hylen", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hym", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hypp", roff_line_ignore, NULL, NULL, 0, NULL }, + { "hys", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "index", roff_unsupp, NULL, NULL, 0, NULL }, + { "it", roff_it, NULL, NULL, 0, NULL }, + { "itc", roff_unsupp, NULL, NULL, 0, NULL }, + { "IX", roff_line_ignore, NULL, NULL, 0, NULL }, + { "kern", roff_line_ignore, NULL, NULL, 0, NULL }, + { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL }, + { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL }, + { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL }, + { "lc", roff_unsupp, NULL, NULL, 0, NULL }, + { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL }, + { "lds", roff_unsupp, NULL, NULL, 0, NULL }, + { "length", roff_unsupp, NULL, NULL, 0, NULL }, + { "letadj", roff_line_ignore, NULL, NULL, 0, NULL }, + { "lf", roff_insec, NULL, NULL, 0, NULL }, + { "lg", roff_line_ignore, NULL, NULL, 0, NULL }, + { "lhang", roff_line_ignore, NULL, NULL, 0, NULL }, + { "linetabs", roff_unsupp, NULL, NULL, 0, NULL }, + { "lnr", roff_unsupp, NULL, NULL, 0, NULL }, + { "lnrf", roff_unsupp, NULL, NULL, 0, NULL }, + { "lpfx", roff_unsupp, NULL, NULL, 0, NULL }, + { "ls", roff_line_ignore, NULL, NULL, 0, NULL }, + { "lsm", roff_unsupp, NULL, NULL, 0, NULL }, + { "lt", roff_line_ignore, NULL, NULL, 0, NULL }, + { "mc", roff_line_ignore, NULL, NULL, 0, NULL }, + { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL }, + { "minss", roff_line_ignore, NULL, NULL, 0, NULL }, + { "mk", roff_line_ignore, NULL, NULL, 0, NULL }, + { "mso", roff_insec, NULL, NULL, 0, NULL }, + { "na", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nm", roff_unsupp, NULL, NULL, 0, NULL }, + { "nn", roff_unsupp, NULL, NULL, 0, NULL }, + { "nop", roff_unsupp, NULL, NULL, 0, NULL }, + { "nr", roff_nr, NULL, NULL, 0, NULL }, + { "nrf", roff_unsupp, NULL, NULL, 0, NULL }, + { "nroff", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nx", roff_insec, NULL, NULL, 0, NULL }, + { "open", roff_insec, NULL, NULL, 0, NULL }, + { "opena", roff_insec, NULL, NULL, 0, NULL }, + { "os", roff_line_ignore, NULL, NULL, 0, NULL }, + { "output", roff_unsupp, NULL, NULL, 0, NULL }, + { "padj", roff_line_ignore, NULL, NULL, 0, NULL }, + { "papersize", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pc", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pev", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pi", roff_insec, NULL, NULL, 0, NULL }, + { "PI", roff_unsupp, NULL, NULL, 0, NULL }, + { "pl", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pm", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pn", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pnr", roff_line_ignore, NULL, NULL, 0, NULL }, + { "po", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, + { "psbb", roff_unsupp, NULL, NULL, 0, NULL }, + { "pshape", roff_unsupp, NULL, NULL, 0, NULL }, + { "pso", roff_insec, NULL, NULL, 0, NULL }, + { "ptr", roff_line_ignore, NULL, NULL, 0, NULL }, + { "pvs", roff_line_ignore, NULL, NULL, 0, NULL }, + { "rchar", roff_unsupp, NULL, NULL, 0, NULL }, + { "rd", roff_line_ignore, NULL, NULL, 0, NULL }, + { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL }, + { "return", roff_unsupp, NULL, NULL, 0, NULL }, + { "rfschar", roff_unsupp, NULL, NULL, 0, NULL }, + { "rhang", roff_line_ignore, NULL, NULL, 0, NULL }, + { "rj", roff_line_ignore, NULL, NULL, 0, NULL }, + { "rm", roff_rm, NULL, NULL, 0, NULL }, + { "rn", roff_unsupp, NULL, NULL, 0, NULL }, + { "rnn", roff_unsupp, NULL, NULL, 0, NULL }, + { "rr", roff_rr, NULL, NULL, 0, NULL }, + { "rs", roff_line_ignore, NULL, NULL, 0, NULL }, + { "rt", roff_line_ignore, NULL, NULL, 0, NULL }, + { "schar", roff_unsupp, NULL, NULL, 0, NULL }, + { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL }, + { "shc", roff_line_ignore, NULL, NULL, 0, NULL }, + { "shift", roff_unsupp, NULL, NULL, 0, NULL }, + { "sizes", roff_line_ignore, NULL, NULL, 0, NULL }, + { "so", roff_so, NULL, NULL, 0, NULL }, + { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, + { "special", roff_line_ignore, NULL, NULL, 0, NULL }, + { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ss", roff_line_ignore, NULL, NULL, 0, NULL }, + { "sty", roff_line_ignore, NULL, NULL, 0, NULL }, + { "substring", roff_unsupp, NULL, NULL, 0, NULL }, + { "sv", roff_line_ignore, NULL, NULL, 0, NULL }, + { "sy", roff_insec, NULL, NULL, 0, NULL }, + { "T&", roff_T_, NULL, NULL, 0, NULL }, + { "ta", roff_unsupp, NULL, NULL, 0, NULL }, + { "tc", roff_unsupp, NULL, NULL, 0, NULL }, + { "TE", roff_TE, NULL, NULL, 0, NULL }, + { "TH", roff_TH, NULL, NULL, 0, NULL }, + { "ti", roff_unsupp, NULL, NULL, 0, NULL }, + { "tkf", roff_line_ignore, NULL, NULL, 0, NULL }, + { "tl", roff_unsupp, NULL, NULL, 0, NULL }, + { "tm", roff_line_ignore, NULL, NULL, 0, NULL }, + { "tm1", roff_line_ignore, NULL, NULL, 0, NULL }, + { "tmc", roff_line_ignore, NULL, NULL, 0, NULL }, + { "tr", roff_tr, NULL, NULL, 0, NULL }, + { "track", roff_line_ignore, NULL, NULL, 0, NULL }, + { "transchar", roff_line_ignore, NULL, NULL, 0, NULL }, + { "trf", roff_insec, NULL, NULL, 0, NULL }, + { "trimat", roff_line_ignore, NULL, NULL, 0, NULL }, + { "trin", roff_unsupp, NULL, NULL, 0, NULL }, + { "trnt", roff_unsupp, NULL, NULL, 0, NULL }, + { "troff", roff_line_ignore, NULL, NULL, 0, NULL }, + { "TS", roff_TS, NULL, NULL, 0, NULL }, + { "uf", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ul", roff_line_ignore, NULL, NULL, 0, NULL }, + { "unformat", roff_unsupp, NULL, NULL, 0, NULL }, + { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL }, + { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL }, + { "vpt", roff_line_ignore, NULL, NULL, 0, NULL }, + { "vs", roff_line_ignore, NULL, NULL, 0, NULL }, + { "warn", roff_line_ignore, NULL, NULL, 0, NULL }, + { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL }, + { "watch", roff_line_ignore, NULL, NULL, 0, NULL }, + { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL }, + { "watchn", roff_line_ignore, NULL, NULL, 0, NULL }, + { "wh", roff_unsupp, NULL, NULL, 0, NULL }, + { "while", roff_unsupp, NULL, NULL, 0, NULL }, + { "write", roff_insec, NULL, NULL, 0, NULL }, + { "writec", roff_insec, NULL, NULL, 0, NULL }, + { "writem", roff_insec, NULL, NULL, 0, NULL }, + { "xflag", roff_line_ignore, NULL, NULL, 0, NULL }, + { ".", roff_cblock, NULL, NULL, 0, NULL }, + { NULL, roff_userdef, NULL, NULL, 0, NULL }, +}; + +/* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */ +const char *const __mdoc_reserved[] = { + "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", + "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", + "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", + "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", + "Dt", "Dv", "Dx", "D1", + "Ec", "Ed", "Ef", "Ek", "El", "Em", + "En", "Eo", "Er", "Es", "Ev", "Ex", + "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", + "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", + "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", + "Oc", "Oo", "Op", "Os", "Ot", "Ox", + "Pa", "Pc", "Pf", "Po", "Pp", "Pq", + "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv", + "Sc", "Sh", "Sm", "So", "Sq", + "Ss", "St", "Sx", "Sy", + "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", + "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", + "%P", "%Q", "%R", "%T", "%U", "%V", + NULL +}; + +/* not currently implemented: BT DE DS ME MT PT SY TQ YS */ +const char *const __man_reserved[] = { + "AT", "B", "BI", "BR", "DT", + "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR", + "LP", "OP", "P", "PD", "PP", + "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", + "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR", + NULL +}; + +/* Array of injected predefined strings. */ +#define PREDEFS_MAX 38 +static const struct predef predefs[PREDEFS_MAX] = { +#include "predefs.in" +}; + +/* See roffhash_find() */ +#define ROFF_HASH(p) (p[0] - ASCII_LO) + +static int roffit_lines; /* number of lines to delay */ +static char *roffit_macro; /* nil-terminated macro line */ + + +/* --- request table ------------------------------------------------------ */ + +static void +roffhash_init(void) +{ + struct roffmac *n; + int buc, i; + + for (i = 0; i < (int)ROFF_USERDEF; i++) { + assert(roffs[i].name[0] >= ASCII_LO); + assert(roffs[i].name[0] <= ASCII_HI); + + buc = ROFF_HASH(roffs[i].name); + + if (NULL != (n = hash[buc])) { + for ( ; n->next; n = n->next) + /* Do nothing. */ ; + n->next = &roffs[i]; + } else + hash[buc] = &roffs[i]; + } +} + +/* + * Look up a roff token by its name. Returns ROFF_MAX if no macro by + * the nil-terminated string name could be found. + */ +static enum rofft +roffhash_find(const char *p, size_t s) +{ + int buc; + struct roffmac *n; + + /* + * libroff has an extremely simple hashtable, for the time + * being, which simply keys on the first character, which must + * be printable, then walks a chain. It works well enough until + * optimised. + */ + + if (p[0] < ASCII_LO || p[0] > ASCII_HI) + return ROFF_MAX; + + buc = ROFF_HASH(p); + + if (NULL == (n = hash[buc])) + return ROFF_MAX; + for ( ; n; n = n->next) + if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) + return (enum rofft)(n - roffs); + + return ROFF_MAX; +} + +/* --- stack of request blocks -------------------------------------------- */ + +/* + * Pop the current node off of the stack of roff instructions currently + * pending. + */ +static void +roffnode_pop(struct roff *r) +{ + struct roffnode *p; + + assert(r->last); + p = r->last; + + r->last = r->last->parent; + free(p->name); + free(p->end); + free(p); +} + +/* + * Push a roff node onto the instruction stack. This must later be + * removed with roffnode_pop(). + */ +static void +roffnode_push(struct roff *r, enum rofft tok, const char *name, + int line, int col) +{ + struct roffnode *p; + + p = mandoc_calloc(1, sizeof(struct roffnode)); + p->tok = tok; + if (name) + p->name = mandoc_strdup(name); + p->parent = r->last; + p->line = line; + p->col = col; + p->rule = p->parent ? p->parent->rule : 0; + + r->last = p; +} + +/* --- roff parser state data management ---------------------------------- */ + +static void +roff_free1(struct roff *r) +{ + struct tbl_node *tbl; + struct eqn_node *e; + int i; + + while (NULL != (tbl = r->first_tbl)) { + r->first_tbl = tbl->next; + tbl_free(tbl); + } + r->first_tbl = r->last_tbl = r->tbl = NULL; + + while (NULL != (e = r->first_eqn)) { + r->first_eqn = e->next; + eqn_free(e); + } + r->first_eqn = r->last_eqn = r->eqn = NULL; + + while (r->last) + roffnode_pop(r); + + free (r->rstack); + r->rstack = NULL; + r->rstacksz = 0; + r->rstackpos = -1; + + roff_freereg(r->regtab); + r->regtab = NULL; + + roff_freestr(r->strtab); + roff_freestr(r->xmbtab); + r->strtab = r->xmbtab = NULL; + + if (r->xtab) + for (i = 0; i < 128; i++) + free(r->xtab[i].p); + free(r->xtab); + r->xtab = NULL; +} + +void +roff_reset(struct roff *r) +{ + + roff_free1(r); + r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); + r->control = 0; +} + +void +roff_free(struct roff *r) +{ + + roff_free1(r); + free(r); +} + +struct roff * +roff_alloc(struct mparse *parse, int options) +{ + struct roff *r; + + r = mandoc_calloc(1, sizeof(struct roff)); + r->parse = parse; + r->options = options; + r->format = options & (MPARSE_MDOC | MPARSE_MAN); + r->rstackpos = -1; + + roffhash_init(); + + return r; +} + +/* --- syntax tree state data management ---------------------------------- */ + +static void +roff_man_free1(struct roff_man *man) +{ + + if (man->first != NULL) + roff_node_delete(man, man->first); + free(man->meta.msec); + free(man->meta.vol); + free(man->meta.os); + free(man->meta.arch); + free(man->meta.title); + free(man->meta.name); + free(man->meta.date); +} + +static void +roff_man_alloc1(struct roff_man *man) +{ + + memset(&man->meta, 0, sizeof(man->meta)); + man->first = mandoc_calloc(1, sizeof(*man->first)); + man->first->type = ROFFT_ROOT; + man->last = man->first; + man->last_es = NULL; + man->flags = 0; + man->macroset = MACROSET_NONE; + man->lastsec = man->lastnamed = SEC_NONE; + man->next = ROFF_NEXT_CHILD; +} + +void +roff_man_reset(struct roff_man *man) +{ + + roff_man_free1(man); + roff_man_alloc1(man); +} + +void +roff_man_free(struct roff_man *man) +{ + + roff_man_free1(man); + free(man); +} + +struct roff_man * +roff_man_alloc(struct roff *roff, struct mparse *parse, + const char *defos, int quick) +{ + struct roff_man *man; + + man = mandoc_calloc(1, sizeof(*man)); + man->parse = parse; + man->roff = roff; + man->defos = defos; + man->quick = quick; + roff_man_alloc1(man); + return man; +} + +/* --- syntax tree handling ----------------------------------------------- */ + +struct roff_node * +roff_node_alloc(struct roff_man *man, int line, int pos, + enum roff_type type, int tok) +{ + struct roff_node *n; + + n = mandoc_calloc(1, sizeof(*n)); + n->line = line; + n->pos = pos; + n->tok = tok; + n->type = type; + n->sec = man->lastsec; + + if (man->flags & MDOC_SYNOPSIS) + n->flags |= MDOC_SYNPRETTY; + else + n->flags &= ~MDOC_SYNPRETTY; + if (man->flags & MDOC_NEWLINE) + n->flags |= MDOC_LINE; + man->flags &= ~MDOC_NEWLINE; + + return n; +} + +void +roff_node_append(struct roff_man *man, struct roff_node *n) +{ + + switch (man->next) { + case ROFF_NEXT_SIBLING: + if (man->last->next != NULL) { + n->next = man->last->next; + man->last->next->prev = n; + } else + man->last->parent->last = n; + man->last->next = n; + n->prev = man->last; + n->parent = man->last->parent; + break; + case ROFF_NEXT_CHILD: + man->last->child = n; + n->parent = man->last; + n->parent->last = n; + break; + default: + abort(); + } + man->last = n; + + switch (n->type) { + case ROFFT_HEAD: + n->parent->head = n; + break; + case ROFFT_BODY: + if (n->end != ENDBODY_NOT) + return; + n->parent->body = n; + break; + case ROFFT_TAIL: + n->parent->tail = n; + break; + default: + return; + } + + /* + * Copy over the normalised-data pointer of our parent. Not + * everybody has one, but copying a null pointer is fine. + */ + + n->norm = n->parent->norm; + assert(n->parent->type == ROFFT_BLOCK); +} + +void +roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); + n->string = roff_strdup(man->roff, word); + roff_node_append(man, n); + if (man->macroset == MACROSET_MDOC) + n->flags |= MDOC_VALID | MDOC_ENDED; + else + n->flags |= MAN_VALID; + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_word_append(struct roff_man *man, const char *word) +{ + struct roff_node *n; + char *addstr, *newstr; + + n = man->last; + addstr = roff_strdup(man->roff, word); + mandoc_asprintf(&newstr, "%s %s", n->string, addstr); + free(addstr); + free(n->string); + n->string = newstr; + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; +} + +struct roff_node * +roff_block_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +struct roff_node * +roff_head_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +struct roff_node * +roff_body_alloc(struct roff_man *man, int line, int pos, int tok) +{ + struct roff_node *n; + + n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); + roff_node_append(man, n); + man->next = ROFF_NEXT_CHILD; + return n; +} + +void +roff_addeqn(struct roff_man *man, const struct eqn *eqn) +{ + struct roff_node *n; + + n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE); + n->eqn = eqn; + if (eqn->ln > man->last->line) + n->flags |= MDOC_LINE; + roff_node_append(man, n); + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_addtbl(struct roff_man *man, const struct tbl_span *tbl) +{ + struct roff_node *n; + + if (man->macroset == MACROSET_MAN) + man_breakscope(man, TOKEN_NONE); + n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); + n->span = tbl; + roff_node_append(man, n); + if (man->macroset == MACROSET_MDOC) + n->flags |= MDOC_VALID | MDOC_ENDED; + else + n->flags |= MAN_VALID; + man->next = ROFF_NEXT_SIBLING; +} + +void +roff_node_unlink(struct roff_man *man, struct roff_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent != NULL) { + if (n->parent->child == n) + n->parent->child = n->next; + if (n->parent->last == n) + n->parent->last = n->prev; + } + + /* Adjust parse point. */ + + if (man == NULL) + return; + if (man->last == n) { + if (n->prev == NULL) { + man->last = n->parent; + man->next = ROFF_NEXT_CHILD; + } else { + man->last = n->prev; + man->next = ROFF_NEXT_SIBLING; + } + } + if (man->first == n) + man->first = NULL; +} + +void +roff_node_free(struct roff_node *n) +{ + + if (n->args != NULL) + mdoc_argv_free(n->args); + if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) + free(n->norm); + free(n->string); + free(n); +} + +void +roff_node_delete(struct roff_man *man, struct roff_node *n) +{ + + while (n->child != NULL) + roff_node_delete(man, n->child); + roff_node_unlink(man, n); + roff_node_free(n); +} + +void +deroff(char **dest, const struct roff_node *n) +{ + char *cp; + size_t sz; + + if (n->type != ROFFT_TEXT) { + for (n = n->child; n != NULL; n = n->next) + deroff(dest, n); + return; + } + + /* Skip leading whitespace and escape sequences. */ + + cp = n->string; + while (*cp != '\0') { + if ('\\' == *cp) { + cp++; + mandoc_escape((const char **)&cp, NULL, NULL); + } else if (isspace((unsigned char)*cp)) + cp++; + else + break; + } + + /* Skip trailing whitespace. */ + + for (sz = strlen(cp); sz; sz--) + if ( ! isspace((unsigned char)cp[sz-1])) + break; + + /* Skip empty strings. */ + + if (sz == 0) + return; + + if (*dest == NULL) { + *dest = mandoc_strndup(cp, sz); + return; + } + + mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); + free(*dest); + *dest = cp; +} + +/* --- main functions of the roff parser ---------------------------------- */ + +/* + * In the current line, expand escape sequences that tend to get + * used in numerical expressions and conditional requests. + * Also check the syntax of the remaining escape sequences. + */ +static enum rofferr +roff_res(struct roff *r, struct buf *buf, int ln, int pos) +{ + char ubuf[24]; /* buffer to print the number */ + const char *start; /* start of the string to process */ + char *stesc; /* start of an escape sequence ('\\') */ + const char *stnam; /* start of the name, after "[(*" */ + const char *cp; /* end of the name, e.g. before ']' */ + const char *res; /* the string to be substituted */ + char *nbuf; /* new buffer to copy buf->buf to */ + size_t maxl; /* expected length of the escape name */ + size_t naml; /* actual length of the escape name */ + enum mandoc_esc esc; /* type of the escape sequence */ + int inaml; /* length returned from mandoc_escape() */ + int expand_count; /* to avoid infinite loops */ + int npos; /* position in numeric expression */ + int arg_complete; /* argument not interrupted by eol */ + char term; /* character terminating the escape */ + + expand_count = 0; + start = buf->buf + pos; + stesc = strchr(start, '\0') - 1; + while (stesc-- > start) { + + /* Search backwards for the next backslash. */ + + if (*stesc != '\\') + continue; + + /* If it is escaped, skip it. */ + + for (cp = stesc - 1; cp >= start; cp--) + if (*cp != '\\') + break; + + if ((stesc - cp) % 2 == 0) { + stesc = (char *)cp; + continue; + } + + /* Decide whether to expand or to check only. */ + + term = '\0'; + cp = stesc + 1; + switch (*cp) { + case '*': + res = NULL; + break; + case 'B': + case 'w': + term = cp[1]; + /* FALLTHROUGH */ + case 'n': + res = ubuf; + break; + default: + esc = mandoc_escape(&cp, &stnam, &inaml); + if (esc == ESCAPE_ERROR || + (esc == ESCAPE_SPECIAL && + mchars_spec2cp(stnam, inaml) < 0)) + mandoc_vmsg(MANDOCERR_ESC_BAD, + r->parse, ln, (int)(stesc - buf->buf), + "%.*s", (int)(cp - stesc), stesc); + continue; + } + + if (EXPAND_LIMIT < ++expand_count) { + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, + ln, (int)(stesc - buf->buf), NULL); + return ROFF_IGN; + } + + /* + * The third character decides the length + * of the name of the string or register. + * Save a pointer to the name. + */ + + if (term == '\0') { + switch (*++cp) { + case '\0': + maxl = 0; + break; + case '(': + cp++; + maxl = 2; + break; + case '[': + cp++; + term = ']'; + maxl = 0; + break; + default: + maxl = 1; + break; + } + } else { + cp += 2; + maxl = 0; + } + stnam = cp; + + /* Advance to the end of the name. */ + + naml = 0; + arg_complete = 1; + while (maxl == 0 || naml < maxl) { + if (*cp == '\0') { + mandoc_msg(MANDOCERR_ESC_BAD, r->parse, + ln, (int)(stesc - buf->buf), stesc); + arg_complete = 0; + break; + } + if (maxl == 0 && *cp == term) { + cp++; + break; + } + if (*cp++ != '\\' || stesc[1] != 'w') { + naml++; + continue; + } + switch (mandoc_escape(&cp, NULL, NULL)) { + case ESCAPE_SPECIAL: + case ESCAPE_UNICODE: + case ESCAPE_NUMBERED: + case ESCAPE_OVERSTRIKE: + naml++; + break; + default: + break; + } + } + + /* + * Retrieve the replacement string; if it is + * undefined, resume searching for escapes. + */ + + switch (stesc[1]) { + case '*': + if (arg_complete) + res = roff_getstrn(r, stnam, naml); + break; + case 'B': + npos = 0; + ubuf[0] = arg_complete && + roff_evalnum(r, ln, stnam, &npos, + NULL, ROFFNUM_SCALE) && + stnam + npos + 1 == cp ? '1' : '0'; + ubuf[1] = '\0'; + break; + case 'n': + if (arg_complete) + (void)snprintf(ubuf, sizeof(ubuf), "%d", + roff_getregn(r, stnam, naml)); + else + ubuf[0] = '\0'; + break; + case 'w': + /* use even incomplete args */ + (void)snprintf(ubuf, sizeof(ubuf), "%d", + 24 * (int)naml); + break; + } + + if (res == NULL) { + mandoc_vmsg(MANDOCERR_STR_UNDEF, + r->parse, ln, (int)(stesc - buf->buf), + "%.*s", (int)naml, stnam); + res = ""; + } else if (buf->sz + strlen(res) > SHRT_MAX) { + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, + ln, (int)(stesc - buf->buf), NULL); + return ROFF_IGN; + } + + /* Replace the escape sequence by the string. */ + + *stesc = '\0'; + buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", + buf->buf, res, cp) + 1; + + /* Prepare for the next replacement. */ + + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf) + strlen(res); + free(buf->buf); + buf->buf = nbuf; + } + return ROFF_CONT; +} + +/* + * Process text streams. + */ +static enum rofferr +roff_parsetext(struct buf *buf, int pos, int *offs) +{ + size_t sz; + const char *start; + char *p; + int isz; + enum mandoc_esc esc; + + /* Spring the input line trap. */ + + if (roffit_lines == 1) { + isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); + free(buf->buf); + buf->buf = p; + buf->sz = isz + 1; + *offs = 0; + free(roffit_macro); + roffit_lines = 0; + return ROFF_REPARSE; + } else if (roffit_lines > 1) + --roffit_lines; + + /* Convert all breakable hyphens into ASCII_HYPH. */ + + start = p = buf->buf + pos; + + while (*p != '\0') { + sz = strcspn(p, "-\\"); + p += sz; + + if (*p == '\0') + break; + + if (*p == '\\') { + /* Skip over escapes. */ + p++; + esc = mandoc_escape((const char **)&p, NULL, NULL); + if (esc == ESCAPE_ERROR) + break; + while (*p == '-') + p++; + continue; + } else if (p == start) { + p++; + continue; + } + + if (isalpha((unsigned char)p[-1]) && + isalpha((unsigned char)p[1])) + *p = ASCII_HYPH; + p++; + } + return ROFF_CONT; +} + +enum rofferr +roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) +{ + enum rofft t; + enum rofferr e; + int pos; /* parse point */ + int spos; /* saved parse point for messages */ + int ppos; /* original offset in buf->buf */ + int ctl; /* macro line (boolean) */ + + ppos = pos = *offs; + + /* Handle in-line equation delimiters. */ + + if (r->tbl == NULL && + r->last_eqn != NULL && r->last_eqn->delim && + (r->eqn == NULL || r->eqn_inline)) { + e = roff_eqndelim(r, buf, pos); + if (e == ROFF_REPARSE) + return e; + assert(e == ROFF_CONT); + } + + /* Expand some escape sequences. */ + + e = roff_res(r, buf, ln, pos); + if (e == ROFF_IGN) + return e; + assert(e == ROFF_CONT); + + ctl = roff_getcontrol(r, buf->buf, &pos); + + /* + * First, if a scope is open and we're not a macro, pass the + * text through the macro's filter. + * Equations process all content themselves. + * Tables process almost all content themselves, but we want + * to warn about macros before passing it there. + */ + + if (r->last != NULL && ! ctl) { + t = r->last->tok; + assert(roffs[t].text); + e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); + assert(e == ROFF_IGN || e == ROFF_CONT); + if (e != ROFF_CONT) + return e; + } + if (r->eqn != NULL) + return eqn_read(&r->eqn, ln, buf->buf, ppos, offs); + if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) + return tbl_read(r->tbl, ln, buf->buf, ppos); + if ( ! ctl) + return roff_parsetext(buf, pos, offs); + + /* Skip empty request lines. */ + + if (buf->buf[pos] == '"') { + mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse, + ln, pos, NULL); + return ROFF_IGN; + } else if (buf->buf[pos] == '\0') + return ROFF_IGN; + + /* + * If a scope is open, go to the child handler for that macro, + * as it may want to preprocess before doing anything with it. + * Don't do so if an equation is open. + */ + + if (r->last) { + t = r->last->tok; + assert(roffs[t].sub); + return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); + } + + /* No scope is open. This is a new request or macro. */ + + spos = pos; + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + /* Tables ignore most macros. */ + + if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) { + mandoc_msg(MANDOCERR_TBLMACRO, r->parse, + ln, pos, buf->buf + spos); + if (t == ROFF_TS) + return ROFF_IGN; + while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') + pos++; + while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ') + pos++; + return tbl_read(r->tbl, ln, buf->buf, pos); + } + + /* + * This is neither a roff request nor a user-defined macro. + * Let the standard macro set parsers handle it. + */ + + if (t == ROFF_MAX) + return ROFF_CONT; + + /* Execute a roff request or a user defined macro. */ + + assert(roffs[t].proc); + return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); +} + +void +roff_endparse(struct roff *r) +{ + + if (r->last) + mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, + r->last->line, r->last->col, + roffs[r->last->tok].name); + + if (r->eqn) { + mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, + r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ"); + eqn_end(&r->eqn); + } + + if (r->tbl) { + mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, + r->tbl->line, r->tbl->pos, "TS"); + tbl_end(&r->tbl); + } +} + +/* + * Parse a roff node's type from the input buffer. This must be in the + * form of ".foo xxx" in the usual way. + */ +static enum rofft +roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) +{ + char *cp; + const char *mac; + size_t maclen; + enum rofft t; + + cp = buf + *pos; + + if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) + return ROFF_MAX; + + mac = cp; + maclen = roff_getname(r, &cp, ln, ppos); + + t = (r->current_string = roff_getstrn(r, mac, maclen)) + ? ROFF_USERDEF : roffhash_find(mac, maclen); + + if (ROFF_MAX != t) + *pos = cp - buf; + + return t; +} + +/* --- handling of request blocks ----------------------------------------- */ + +static enum rofferr +roff_cblock(ROFF_ARGS) +{ + + /* + * A block-close `..' should only be invoked as a child of an + * ignore macro, otherwise raise a warning and just ignore it. + */ + + if (r->last == NULL) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, ".."); + return ROFF_IGN; + } + + switch (r->last->tok) { + case ROFF_am: + /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ + case ROFF_ami: + case ROFF_de: + /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ + case ROFF_dei: + case ROFF_ig: + break; + default: + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, ".."); + return ROFF_IGN; + } + + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, + ".. %s", buf->buf + pos); + + roffnode_pop(r); + roffnode_cleanscope(r); + return ROFF_IGN; + +} + +static void +roffnode_cleanscope(struct roff *r) +{ + + while (r->last) { + if (--r->last->endspan != 0) + break; + roffnode_pop(r); + } +} + +static void +roff_ccond(struct roff *r, int ln, int ppos) +{ + + if (NULL == r->last) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, "\\}"); + return; + } + + switch (r->last->tok) { + case ROFF_el: + case ROFF_ie: + case ROFF_if: + break; + default: + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, "\\}"); + return; + } + + if (r->last->endspan > -1) { + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, "\\}"); + return; + } + + roffnode_pop(r); + roffnode_cleanscope(r); + return; +} + +static enum rofferr +roff_block(ROFF_ARGS) +{ + const char *name; + char *iname, *cp; + size_t namesz; + + /* Ignore groff compatibility mode for now. */ + + if (tok == ROFF_de1) + tok = ROFF_de; + else if (tok == ROFF_dei1) + tok = ROFF_dei; + else if (tok == ROFF_am1) + tok = ROFF_am; + else if (tok == ROFF_ami1) + tok = ROFF_ami; + + /* Parse the macro name argument. */ + + cp = buf->buf + pos; + if (tok == ROFF_ig) { + iname = NULL; + namesz = 0; + } else { + iname = cp; + namesz = roff_getname(r, &cp, ln, ppos); + iname[namesz] = '\0'; + } + + /* Resolve the macro name argument if it is indirect. */ + + if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { + if ((name = roff_getstrn(r, iname, namesz)) == NULL) { + mandoc_vmsg(MANDOCERR_STR_UNDEF, + r->parse, ln, (int)(iname - buf->buf), + "%.*s", (int)namesz, iname); + namesz = 0; + } else + namesz = strlen(name); + } else + name = iname; + + if (namesz == 0 && tok != ROFF_ig) { + mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, + ln, ppos, roffs[tok].name); + return ROFF_IGN; + } + + roffnode_push(r, tok, name, ln, ppos); + + /* + * At the beginning of a `de' macro, clear the existing string + * with the same name, if there is one. New content will be + * appended from roff_block_text() in multiline mode. + */ + + if (tok == ROFF_de || tok == ROFF_dei) + roff_setstrn(&r->strtab, name, namesz, "", 0, 0); + + if (*cp == '\0') + return ROFF_IGN; + + /* Get the custom end marker. */ + + iname = cp; + namesz = roff_getname(r, &cp, ln, ppos); + + /* Resolve the end marker if it is indirect. */ + + if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { + if ((name = roff_getstrn(r, iname, namesz)) == NULL) { + mandoc_vmsg(MANDOCERR_STR_UNDEF, + r->parse, ln, (int)(iname - buf->buf), + "%.*s", (int)namesz, iname); + namesz = 0; + } else + namesz = strlen(name); + } else + name = iname; + + if (namesz) + r->last->end = mandoc_strndup(name, namesz); + + if (*cp != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, + ln, pos, ".%s ... %s", roffs[tok].name, cp); + + return ROFF_IGN; +} + +static enum rofferr +roff_block_sub(ROFF_ARGS) +{ + enum rofft t; + int i, j; + + /* + * First check whether a custom macro exists at this level. If + * it does, then check against it. This is some of groff's + * stranger behaviours. If we encountered a custom end-scope + * tag and that tag also happens to be a "real" macro, then we + * need to try interpreting it again as a real macro. If it's + * not, then return ignore. Else continue. + */ + + if (r->last->end) { + for (i = pos, j = 0; r->last->end[j]; j++, i++) + if (buf->buf[i] != r->last->end[j]) + break; + + if (r->last->end[j] == '\0' && + (buf->buf[i] == '\0' || + buf->buf[i] == ' ' || + buf->buf[i] == '\t')) { + roffnode_pop(r); + roffnode_cleanscope(r); + + while (buf->buf[i] == ' ' || buf->buf[i] == '\t') + i++; + + pos = i; + if (roff_parse(r, buf->buf, &pos, ln, ppos) != + ROFF_MAX) + return ROFF_RERUN; + return ROFF_IGN; + } + } + + /* + * If we have no custom end-query or lookup failed, then try + * pulling it out of the hashtable. + */ + + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + if (t != ROFF_cblock) { + if (tok != ROFF_ig) + roff_setstr(r, r->last->name, buf->buf + ppos, 2); + return ROFF_IGN; + } + + assert(roffs[t].proc); + return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); +} + +static enum rofferr +roff_block_text(ROFF_ARGS) +{ + + if (tok != ROFF_ig) + roff_setstr(r, r->last->name, buf->buf + pos, 2); + + return ROFF_IGN; +} + +static enum rofferr +roff_cond_sub(ROFF_ARGS) +{ + enum rofft t; + char *ep; + int rr; + + rr = r->last->rule; + roffnode_cleanscope(r); + t = roff_parse(r, buf->buf, &pos, ln, ppos); + + /* + * Fully handle known macros when they are structurally + * required or when the conditional evaluated to true. + */ + + if ((t != ROFF_MAX) && + (rr || roffs[t].flags & ROFFMAC_STRUCT)) { + assert(roffs[t].proc); + return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); + } + + /* + * If `\}' occurs on a macro line without a preceding macro, + * drop the line completely. + */ + + ep = buf->buf + pos; + if (ep[0] == '\\' && ep[1] == '}') + rr = 0; + + /* Always check for the closing delimiter `\}'. */ + + while ((ep = strchr(ep, '\\')) != NULL) { + if (*(++ep) == '}') { + *ep = '&'; + roff_ccond(r, ln, ep - buf->buf - 1); + } + if (*ep != '\0') + ++ep; + } + return rr ? ROFF_CONT : ROFF_IGN; +} + +static enum rofferr +roff_cond_text(ROFF_ARGS) +{ + char *ep; + int rr; + + rr = r->last->rule; + roffnode_cleanscope(r); + + ep = buf->buf + pos; + while ((ep = strchr(ep, '\\')) != NULL) { + if (*(++ep) == '}') { + *ep = '&'; + roff_ccond(r, ln, ep - buf->buf - 1); + } + if (*ep != '\0') + ++ep; + } + return rr ? ROFF_CONT : ROFF_IGN; +} + +/* --- handling of numeric and conditional expressions -------------------- */ + +/* + * Parse a single signed integer number. Stop at the first non-digit. + * If there is at least one digit, return success and advance the + * parse point, else return failure and let the parse point unchanged. + * Ignore overflows, treat them just like the C language. + */ +static int +roff_getnum(const char *v, int *pos, int *res, int flags) +{ + int myres, scaled, n, p; + + if (NULL == res) + res = &myres; + + p = *pos; + n = v[p] == '-'; + if (n || v[p] == '+') + p++; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[p])) + p++; + + for (*res = 0; isdigit((unsigned char)v[p]); p++) + *res = 10 * *res + v[p] - '0'; + if (p == *pos + n) + return 0; + + if (n) + *res = -*res; + + /* Each number may be followed by one optional scaling unit. */ + + switch (v[p]) { + case 'f': + scaled = *res * 65536; + break; + case 'i': + scaled = *res * 240; + break; + case 'c': + scaled = *res * 240 / 2.54; + break; + case 'v': + case 'P': + scaled = *res * 40; + break; + case 'm': + case 'n': + scaled = *res * 24; + break; + case 'p': + scaled = *res * 10 / 3; + break; + case 'u': + scaled = *res; + break; + case 'M': + scaled = *res * 6 / 25; + break; + default: + scaled = *res; + p--; + break; + } + if (flags & ROFFNUM_SCALE) + *res = scaled; + + *pos = p + 1; + return 1; +} + +/* + * Evaluate a string comparison condition. + * The first character is the delimiter. + * Succeed if the string up to its second occurrence + * matches the string up to its third occurence. + * Advance the cursor after the third occurrence + * or lacking that, to the end of the line. + */ +static int +roff_evalstrcond(const char *v, int *pos) +{ + const char *s1, *s2, *s3; + int match; + + match = 0; + s1 = v + *pos; /* initial delimiter */ + s2 = s1 + 1; /* for scanning the first string */ + s3 = strchr(s2, *s1); /* for scanning the second string */ + + if (NULL == s3) /* found no middle delimiter */ + goto out; + + while ('\0' != *++s3) { + if (*s2 != *s3) { /* mismatch */ + s3 = strchr(s3, *s1); + break; + } + if (*s3 == *s1) { /* found the final delimiter */ + match = 1; + break; + } + s2++; + } + +out: + if (NULL == s3) + s3 = strchr(s2, '\0'); + else if (*s3 != '\0') + s3++; + *pos = s3 - v; + return match; +} + +/* + * Evaluate an optionally negated single character, numerical, + * or string condition. + */ +static int +roff_evalcond(struct roff *r, int ln, char *v, int *pos) +{ + char *cp, *name; + size_t sz; + int number, savepos, wanttrue; + + if ('!' == v[*pos]) { + wanttrue = 0; + (*pos)++; + } else + wanttrue = 1; + + switch (v[*pos]) { + case '\0': + return 0; + case 'n': + case 'o': + (*pos)++; + return wanttrue; + case 'c': + case 'd': + case 'e': + case 't': + case 'v': + (*pos)++; + return !wanttrue; + case 'r': + cp = name = v + ++*pos; + sz = roff_getname(r, &cp, ln, *pos); + *pos = cp - v; + return (sz && roff_hasregn(r, name, sz)) == wanttrue; + default: + break; + } + + savepos = *pos; + if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) + return (number > 0) == wanttrue; + else if (*pos == savepos) + return roff_evalstrcond(v, pos) == wanttrue; + else + return 0; +} + +static enum rofferr +roff_line_ignore(ROFF_ARGS) +{ + + return ROFF_IGN; +} + +static enum rofferr +roff_insec(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, + ln, ppos, roffs[tok].name); + return ROFF_IGN; +} + +static enum rofferr +roff_unsupp(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, + ln, ppos, roffs[tok].name); + return ROFF_IGN; +} + +static enum rofferr +roff_cond(ROFF_ARGS) +{ + + roffnode_push(r, tok, NULL, ln, ppos); + + /* + * An `.el' has no conditional body: it will consume the value + * of the current rstack entry set in prior `ie' calls or + * defaults to DENY. + * + * If we're not an `el', however, then evaluate the conditional. + */ + + r->last->rule = tok == ROFF_el ? + (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : + roff_evalcond(r, ln, buf->buf, &pos); + + /* + * An if-else will put the NEGATION of the current evaluated + * conditional into the stack of rules. + */ + + if (tok == ROFF_ie) { + if (r->rstackpos + 1 == r->rstacksz) { + r->rstacksz += 16; + r->rstack = mandoc_reallocarray(r->rstack, + r->rstacksz, sizeof(int)); + } + r->rstack[++r->rstackpos] = !r->last->rule; + } + + /* If the parent has false as its rule, then so do we. */ + + if (r->last->parent && !r->last->parent->rule) + r->last->rule = 0; + + /* + * Determine scope. + * If there is nothing on the line after the conditional, + * not even whitespace, use next-line scope. + */ + + if (buf->buf[pos] == '\0') { + r->last->endspan = 2; + goto out; + } + + while (buf->buf[pos] == ' ') + pos++; + + /* An opening brace requests multiline scope. */ + + if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { + r->last->endspan = -1; + pos += 2; + while (buf->buf[pos] == ' ') + pos++; + goto out; + } + + /* + * Anything else following the conditional causes + * single-line scope. Warn if the scope contains + * nothing but trailing whitespace. + */ + + if (buf->buf[pos] == '\0') + mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, + ln, ppos, roffs[tok].name); + + r->last->endspan = 1; + +out: + *offs = pos; + return ROFF_RERUN; +} + +static enum rofferr +roff_ds(ROFF_ARGS) +{ + char *string; + const char *name; + size_t namesz; + + /* Ignore groff compatibility mode for now. */ + + if (tok == ROFF_ds1) + tok = ROFF_ds; + else if (tok == ROFF_as1) + tok = ROFF_as; + + /* + * The first word is the name of the string. + * If it is empty or terminated by an escape sequence, + * abort the `ds' request without defining anything. + */ + + name = string = buf->buf + pos; + if (*name == '\0') + return ROFF_IGN; + + namesz = roff_getname(r, &string, ln, pos); + if (name[namesz] == '\\') + return ROFF_IGN; + + /* Read past the initial double-quote, if any. */ + if (*string == '"') + string++; + + /* The rest is the value. */ + roff_setstrn(&r->strtab, name, namesz, string, strlen(string), + ROFF_as == tok); + return ROFF_IGN; +} + +/* + * Parse a single operator, one or two characters long. + * If the operator is recognized, return success and advance the + * parse point, else return failure and let the parse point unchanged. + */ +static int +roff_getop(const char *v, int *pos, char *res) +{ + + *res = v[*pos]; + + switch (*res) { + case '+': + case '-': + case '*': + case '/': + case '%': + case '&': + case ':': + break; + case '<': + switch (v[*pos + 1]) { + case '=': + *res = 'l'; + (*pos)++; + break; + case '>': + *res = '!'; + (*pos)++; + break; + case '?': + *res = 'i'; + (*pos)++; + break; + default: + break; + } + break; + case '>': + switch (v[*pos + 1]) { + case '=': + *res = 'g'; + (*pos)++; + break; + case '?': + *res = 'a'; + (*pos)++; + break; + default: + break; + } + break; + case '=': + if ('=' == v[*pos + 1]) + (*pos)++; + break; + default: + return 0; + } + (*pos)++; + + return *res; +} + +/* + * Evaluate either a parenthesized numeric expression + * or a single signed integer number. + */ +static int +roff_evalpar(struct roff *r, int ln, + const char *v, int *pos, int *res, int flags) +{ + + if ('(' != v[*pos]) + return roff_getnum(v, pos, res, flags); + + (*pos)++; + if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) + return 0; + + /* + * Omission of the closing parenthesis + * is an error in validation mode, + * but ignored in evaluation mode. + */ + + if (')' == v[*pos]) + (*pos)++; + else if (NULL == res) + return 0; + + return 1; +} + +/* + * Evaluate a complete numeric expression. + * Proceed left to right, there is no concept of precedence. + */ +static int +roff_evalnum(struct roff *r, int ln, const char *v, + int *pos, int *res, int flags) +{ + int mypos, operand2; + char operator; + + if (NULL == pos) { + mypos = 0; + pos = &mypos; + } + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(r, ln, v, pos, res, flags)) + return 0; + + while (1) { + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_getop(v, pos, &operator)) + break; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) + return 0; + + if (flags & ROFFNUM_WHITE) + while (isspace((unsigned char)v[*pos])) + (*pos)++; + + if (NULL == res) + continue; + + switch (operator) { + case '+': + *res += operand2; + break; + case '-': + *res -= operand2; + break; + case '*': + *res *= operand2; + break; + case '/': + if (operand2 == 0) { + mandoc_msg(MANDOCERR_DIVZERO, + r->parse, ln, *pos, v); + *res = 0; + break; + } + *res /= operand2; + break; + case '%': + if (operand2 == 0) { + mandoc_msg(MANDOCERR_DIVZERO, + r->parse, ln, *pos, v); + *res = 0; + break; + } + *res %= operand2; + break; + case '<': + *res = *res < operand2; + break; + case '>': + *res = *res > operand2; + break; + case 'l': + *res = *res <= operand2; + break; + case 'g': + *res = *res >= operand2; + break; + case '=': + *res = *res == operand2; + break; + case '!': + *res = *res != operand2; + break; + case '&': + *res = *res && operand2; + break; + case ':': + *res = *res || operand2; + break; + case 'i': + if (operand2 < *res) + *res = operand2; + break; + case 'a': + if (operand2 > *res) + *res = operand2; + break; + default: + abort(); + } + } + return 1; +} + +/* --- register management ------------------------------------------------ */ + +void +roff_setreg(struct roff *r, const char *name, int val, char sign) +{ + struct roffreg *reg; + + /* Search for an existing register with the same name. */ + reg = r->regtab; + + while (reg && strcmp(name, reg->key.p)) + reg = reg->next; + + if (NULL == reg) { + /* Create a new register. */ + reg = mandoc_malloc(sizeof(struct roffreg)); + reg->key.p = mandoc_strdup(name); + reg->key.sz = strlen(name); + reg->val = 0; + reg->next = r->regtab; + r->regtab = reg; + } + + if ('+' == sign) + reg->val += val; + else if ('-' == sign) + reg->val -= val; + else + reg->val = val; +} + +/* + * Handle some predefined read-only number registers. + * For now, return -1 if the requested register is not predefined; + * in case a predefined read-only register having the value -1 + * were to turn up, another special value would have to be chosen. + */ +static int +roff_getregro(const struct roff *r, const char *name) +{ + + switch (*name) { + case '$': /* Number of arguments of the last macro evaluated. */ + return r->argc; + case 'A': /* ASCII approximation mode is always off. */ + return 0; + case 'g': /* Groff compatibility mode is always on. */ + return 1; + case 'H': /* Fixed horizontal resolution. */ + return 24; + case 'j': /* Always adjust left margin only. */ + return 0; + case 'T': /* Some output device is always defined. */ + return 1; + case 'V': /* Fixed vertical resolution. */ + return 40; + default: + return -1; + } +} + +int +roff_getreg(const struct roff *r, const char *name) +{ + struct roffreg *reg; + int val; + + if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) { + val = roff_getregro(r, name + 1); + if (-1 != val) + return val; + } + + for (reg = r->regtab; reg; reg = reg->next) + if (0 == strcmp(name, reg->key.p)) + return reg->val; + + return 0; +} + +static int +roff_getregn(const struct roff *r, const char *name, size_t len) +{ + struct roffreg *reg; + int val; + + if ('.' == name[0] && 2 == len) { + val = roff_getregro(r, name + 1); + if (-1 != val) + return val; + } + + for (reg = r->regtab; reg; reg = reg->next) + if (len == reg->key.sz && + 0 == strncmp(name, reg->key.p, len)) + return reg->val; + + return 0; +} + +static int +roff_hasregn(const struct roff *r, const char *name, size_t len) +{ + struct roffreg *reg; + int val; + + if ('.' == name[0] && 2 == len) { + val = roff_getregro(r, name + 1); + if (-1 != val) + return 1; + } + + for (reg = r->regtab; reg; reg = reg->next) + if (len == reg->key.sz && + 0 == strncmp(name, reg->key.p, len)) + return 1; + + return 0; +} + +static void +roff_freereg(struct roffreg *reg) +{ + struct roffreg *old_reg; + + while (NULL != reg) { + free(reg->key.p); + old_reg = reg; + reg = reg->next; + free(old_reg); + } +} + +static enum rofferr +roff_nr(ROFF_ARGS) +{ + char *key, *val; + size_t keysz; + int iv; + char sign; + + key = val = buf->buf + pos; + if (*key == '\0') + return ROFF_IGN; + + keysz = roff_getname(r, &val, ln, pos); + if (key[keysz] == '\\') + return ROFF_IGN; + key[keysz] = '\0'; + + sign = *val; + if (sign == '+' || sign == '-') + val++; + + if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE)) + roff_setreg(r, key, iv, sign); + + return ROFF_IGN; +} + +static enum rofferr +roff_rr(ROFF_ARGS) +{ + struct roffreg *reg, **prev; + char *name, *cp; + size_t namesz; + + name = cp = buf->buf + pos; + if (*name == '\0') + return ROFF_IGN; + namesz = roff_getname(r, &cp, ln, pos); + name[namesz] = '\0'; + + prev = &r->regtab; + while (1) { + reg = *prev; + if (reg == NULL || !strcmp(name, reg->key.p)) + break; + prev = ®->next; + } + if (reg != NULL) { + *prev = reg->next; + free(reg->key.p); + free(reg); + } + return ROFF_IGN; +} + +/* --- handler functions for roff requests -------------------------------- */ + +static enum rofferr +roff_rm(ROFF_ARGS) +{ + const char *name; + char *cp; + size_t namesz; + + cp = buf->buf + pos; + while (*cp != '\0') { + name = cp; + namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); + roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); + if (name[namesz] == '\\') + break; + } + return ROFF_IGN; +} + +static enum rofferr +roff_it(ROFF_ARGS) +{ + int iv; + + /* Parse the number of lines. */ + + if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { + mandoc_msg(MANDOCERR_IT_NONUM, r->parse, + ln, ppos, buf->buf + 1); + return ROFF_IGN; + } + + while (isspace((unsigned char)buf->buf[pos])) + pos++; + + /* + * Arm the input line trap. + * Special-casing "an-trap" is an ugly workaround to cope + * with DocBook stupidly fiddling with man(7) internals. + */ + + roffit_lines = iv; + roffit_macro = mandoc_strdup(iv != 1 || + strcmp(buf->buf + pos, "an-trap") ? + buf->buf + pos : "br"); + return ROFF_IGN; +} + +static enum rofferr +roff_Dd(ROFF_ARGS) +{ + const char *const *cp; + + if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0) + for (cp = __mdoc_reserved; *cp; cp++) + roff_setstr(r, *cp, NULL, 0); + + if (r->format == 0) + r->format = MPARSE_MDOC; + + return ROFF_CONT; +} + +static enum rofferr +roff_TH(ROFF_ARGS) +{ + const char *const *cp; + + if ((r->options & MPARSE_QUICK) == 0) + for (cp = __man_reserved; *cp; cp++) + roff_setstr(r, *cp, NULL, 0); + + if (r->format == 0) + r->format = MPARSE_MAN; + + return ROFF_CONT; +} + +static enum rofferr +roff_TE(ROFF_ARGS) +{ + + if (NULL == r->tbl) + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, "TE"); + else if ( ! tbl_end(&r->tbl)) { + free(buf->buf); + buf->buf = mandoc_strdup(".sp"); + buf->sz = 4; + return ROFF_REPARSE; + } + return ROFF_IGN; +} + +static enum rofferr +roff_T_(ROFF_ARGS) +{ + + if (NULL == r->tbl) + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, + ln, ppos, "T&"); + else + tbl_restart(ppos, ln, r->tbl); + + return ROFF_IGN; +} + +/* + * Handle in-line equation delimiters. + */ +static enum rofferr +roff_eqndelim(struct roff *r, struct buf *buf, int pos) +{ + char *cp1, *cp2; + const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; + + /* + * Outside equations, look for an opening delimiter. + * If we are inside an equation, we already know it is + * in-line, or this function wouldn't have been called; + * so look for a closing delimiter. + */ + + cp1 = buf->buf + pos; + cp2 = strchr(cp1, r->eqn == NULL ? + r->last_eqn->odelim : r->last_eqn->cdelim); + if (cp2 == NULL) + return ROFF_CONT; + + *cp2++ = '\0'; + bef_pr = bef_nl = aft_nl = aft_pr = ""; + + /* Handle preceding text, protecting whitespace. */ + + if (*buf->buf != '\0') { + if (r->eqn == NULL) + bef_pr = "\\&"; + bef_nl = "\n"; + } + + /* + * Prepare replacing the delimiter with an equation macro + * and drop leading white space from the equation. + */ + + if (r->eqn == NULL) { + while (*cp2 == ' ') + cp2++; + mac = ".EQ"; + } else + mac = ".EN"; + + /* Handle following text, protecting whitespace. */ + + if (*cp2 != '\0') { + aft_nl = "\n"; + if (r->eqn != NULL) + aft_pr = "\\&"; + } + + /* Do the actual replacement. */ + + buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, + bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; + free(buf->buf); + buf->buf = cp1; + + /* Toggle the in-line state of the eqn subsystem. */ + + r->eqn_inline = r->eqn == NULL; + return ROFF_REPARSE; +} + +static enum rofferr +roff_EQ(ROFF_ARGS) +{ + struct eqn_node *e; + + assert(r->eqn == NULL); + e = eqn_alloc(ppos, ln, r->parse); + + if (r->last_eqn) { + r->last_eqn->next = e; + e->delim = r->last_eqn->delim; + e->odelim = r->last_eqn->odelim; + e->cdelim = r->last_eqn->cdelim; + } else + r->first_eqn = r->last_eqn = e; + + r->eqn = r->last_eqn = e; + + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, + ".EQ %s", buf->buf + pos); + + return ROFF_IGN; +} + +static enum rofferr +roff_EN(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); + return ROFF_IGN; +} + +static enum rofferr +roff_TS(ROFF_ARGS) +{ + struct tbl_node *tbl; + + if (r->tbl) { + mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, + ln, ppos, "TS breaks TS"); + tbl_end(&r->tbl); + } + + tbl = tbl_alloc(ppos, ln, r->parse); + + if (r->last_tbl) + r->last_tbl->next = tbl; + else + r->first_tbl = r->last_tbl = tbl; + + r->tbl = r->last_tbl = tbl; + return ROFF_IGN; +} + +static enum rofferr +roff_brp(ROFF_ARGS) +{ + + buf->buf[pos - 1] = '\0'; + return ROFF_CONT; +} + +static enum rofferr +roff_cc(ROFF_ARGS) +{ + const char *p; + + p = buf->buf + pos; + + if (*p == '\0' || (r->control = *p++) == '.') + r->control = 0; + + if (*p != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, + ln, p - buf->buf, "cc ... %s", p); + + return ROFF_IGN; +} + +static enum rofferr +roff_tr(ROFF_ARGS) +{ + const char *p, *first, *second; + size_t fsz, ssz; + enum mandoc_esc esc; + + p = buf->buf + pos; + + if (*p == '\0') { + mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr"); + return ROFF_IGN; + } + + while (*p != '\0') { + fsz = ssz = 1; + + first = p++; + if (*first == '\\') { + esc = mandoc_escape(&p, NULL, NULL); + if (esc == ESCAPE_ERROR) { + mandoc_msg(MANDOCERR_ESC_BAD, r->parse, + ln, (int)(p - buf->buf), first); + return ROFF_IGN; + } + fsz = (size_t)(p - first); + } + + second = p++; + if (*second == '\\') { + esc = mandoc_escape(&p, NULL, NULL); + if (esc == ESCAPE_ERROR) { + mandoc_msg(MANDOCERR_ESC_BAD, r->parse, + ln, (int)(p - buf->buf), second); + return ROFF_IGN; + } + ssz = (size_t)(p - second); + } else if (*second == '\0') { + mandoc_vmsg(MANDOCERR_TR_ODD, r->parse, + ln, first - buf->buf, "tr %s", first); + second = " "; + p--; + } + + if (fsz > 1) { + roff_setstrn(&r->xmbtab, first, fsz, + second, ssz, 0); + continue; + } + + if (r->xtab == NULL) + r->xtab = mandoc_calloc(128, + sizeof(struct roffstr)); + + free(r->xtab[(int)*first].p); + r->xtab[(int)*first].p = mandoc_strndup(second, ssz); + r->xtab[(int)*first].sz = ssz; + } + + return ROFF_IGN; +} + +static enum rofferr +roff_so(ROFF_ARGS) +{ + char *name, *cp; + + name = buf->buf + pos; + mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name); + + /* + * Handle `so'. Be EXTREMELY careful, as we shouldn't be + * opening anything that's not in our cwd or anything beneath + * it. Thus, explicitly disallow traversing up the file-system + * or using absolute paths. + */ + + if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { + mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos, + ".so %s", name); + buf->sz = mandoc_asprintf(&cp, + ".sp\nSee the file %s.\n.sp", name) + 1; + free(buf->buf); + buf->buf = cp; + *offs = 0; + return ROFF_REPARSE; + } + + *offs = pos; + return ROFF_SO; +} + +/* --- user defined strings and macros ------------------------------------ */ + +static enum rofferr +roff_userdef(ROFF_ARGS) +{ + const char *arg[9], *ap; + char *cp, *n1, *n2; + int i, ib, ie; + size_t asz, rsz; + + /* + * Collect pointers to macro argument strings + * and NUL-terminate them. + */ + + r->argc = 0; + cp = buf->buf + pos; + for (i = 0; i < 9; i++) { + if (*cp == '\0') + arg[i] = ""; + else { + arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); + r->argc = i + 1; + } + } + + /* + * Expand macro arguments. + */ + + buf->sz = strlen(r->current_string) + 1; + n1 = cp = mandoc_malloc(buf->sz); + memcpy(n1, r->current_string, buf->sz); + while (*cp != '\0') { + + /* Scan ahead for the next argument invocation. */ + + if (*cp++ != '\\') + continue; + if (*cp++ != '$') + continue; + if (*cp == '*') { /* \\$* inserts all arguments */ + ib = 0; + ie = r->argc - 1; + } else { /* \\$1 .. \\$9 insert one argument */ + ib = ie = *cp - '1'; + if (ib < 0 || ib > 8) + continue; + } + cp -= 2; + + /* + * Determine the size of the expanded argument, + * taking escaping of quotes into account. + */ + + asz = ie > ib ? ie - ib : 0; /* for blanks */ + for (i = ib; i <= ie; i++) { + for (ap = arg[i]; *ap != '\0'; ap++) { + asz++; + if (*ap == '"') + asz += 3; + } + } + if (asz != 3) { + + /* + * Determine the size of the rest of the + * unexpanded macro, including the NUL. + */ + + rsz = buf->sz - (cp - n1) - 3; + + /* + * When shrinking, move before + * releasing the storage. + */ + + if (asz < 3) + memmove(cp + asz, cp + 3, rsz); + + /* + * Resize the storage for the macro + * and readjust the parse pointer. + */ + + buf->sz += asz - 3; + n2 = mandoc_realloc(n1, buf->sz); + cp = n2 + (cp - n1); + n1 = n2; + + /* + * When growing, make room + * for the expanded argument. + */ + + if (asz > 3) + memmove(cp + asz, cp + 3, rsz); + } + + /* Copy the expanded argument, escaping quotes. */ + + n2 = cp; + for (i = ib; i <= ie; i++) { + for (ap = arg[i]; *ap != '\0'; ap++) { + if (*ap == '"') { + memcpy(n2, "\\(dq", 4); + n2 += 4; + } else + *n2++ = *ap; + } + if (i < ie) + *n2++ = ' '; + } + } + + /* + * Replace the macro invocation + * by the expanded macro. + */ + + free(buf->buf); + buf->buf = n1; + *offs = 0; + + return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? + ROFF_REPARSE : ROFF_APPEND; +} + +static size_t +roff_getname(struct roff *r, char **cpp, int ln, int pos) +{ + char *name, *cp; + size_t namesz; + + name = *cpp; + if ('\0' == *name) + return 0; + + /* Read until end of name and terminate it with NUL. */ + for (cp = name; 1; cp++) { + if ('\0' == *cp || ' ' == *cp) { + namesz = cp - name; + break; + } + if ('\\' != *cp) + continue; + namesz = cp - name; + if ('{' == cp[1] || '}' == cp[1]) + break; + cp++; + if ('\\' == *cp) + continue; + mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos, + "%.*s", (int)(cp - name + 1), name); + mandoc_escape((const char **)&cp, NULL, NULL); + break; + } + + /* Read past spaces. */ + while (' ' == *cp) + cp++; + + *cpp = cp; + return namesz; +} + +/* + * Store *string into the user-defined string called *name. + * To clear an existing entry, call with (*r, *name, NULL, 0). + * append == 0: replace mode + * append == 1: single-line append mode + * append == 2: multiline append mode, append '\n' after each call + */ +static void +roff_setstr(struct roff *r, const char *name, const char *string, + int append) +{ + + roff_setstrn(&r->strtab, name, strlen(name), string, + string ? strlen(string) : 0, append); +} + +static void +roff_setstrn(struct roffkv **r, const char *name, size_t namesz, + const char *string, size_t stringsz, int append) +{ + struct roffkv *n; + char *c; + int i; + size_t oldch, newch; + + /* Search for an existing string with the same name. */ + n = *r; + + while (n && (namesz != n->key.sz || + strncmp(n->key.p, name, namesz))) + n = n->next; + + if (NULL == n) { + /* Create a new string table entry. */ + n = mandoc_malloc(sizeof(struct roffkv)); + n->key.p = mandoc_strndup(name, namesz); + n->key.sz = namesz; + n->val.p = NULL; + n->val.sz = 0; + n->next = *r; + *r = n; + } else if (0 == append) { + free(n->val.p); + n->val.p = NULL; + n->val.sz = 0; + } + + if (NULL == string) + return; + + /* + * One additional byte for the '\n' in multiline mode, + * and one for the terminating '\0'. + */ + newch = stringsz + (1 < append ? 2u : 1u); + + if (NULL == n->val.p) { + n->val.p = mandoc_malloc(newch); + *n->val.p = '\0'; + oldch = 0; + } else { + oldch = n->val.sz; + n->val.p = mandoc_realloc(n->val.p, oldch + newch); + } + + /* Skip existing content in the destination buffer. */ + c = n->val.p + (int)oldch; + + /* Append new content to the destination buffer. */ + i = 0; + while (i < (int)stringsz) { + /* + * Rudimentary roff copy mode: + * Handle escaped backslashes. + */ + if ('\\' == string[i] && '\\' == string[i + 1]) + i++; + *c++ = string[i++]; + } + + /* Append terminating bytes. */ + if (1 < append) + *c++ = '\n'; + + *c = '\0'; + n->val.sz = (int)(c - n->val.p); +} + +static const char * +roff_getstrn(const struct roff *r, const char *name, size_t len) +{ + const struct roffkv *n; + int i; + + for (n = r->strtab; n; n = n->next) + if (0 == strncmp(name, n->key.p, len) && + '\0' == n->key.p[(int)len]) + return n->val.p; + + for (i = 0; i < PREDEFS_MAX; i++) + if (0 == strncmp(name, predefs[i].name, len) && + '\0' == predefs[i].name[(int)len]) + return predefs[i].str; + + return NULL; +} + +static void +roff_freestr(struct roffkv *r) +{ + struct roffkv *n, *nn; + + for (n = r; n; n = nn) { + free(n->key.p); + free(n->val.p); + nn = n->next; + free(n); + } +} + +/* --- accessors and utility functions ------------------------------------ */ + +const struct tbl_span * +roff_span(const struct roff *r) +{ + + return r->tbl ? tbl_span(r->tbl) : NULL; +} + +const struct eqn * +roff_eqn(const struct roff *r) +{ + + return r->last_eqn ? &r->last_eqn->eqn : NULL; +} + +/* + * Duplicate an input string, making the appropriate character + * conversations (as stipulated by `tr') along the way. + * Returns a heap-allocated string with all the replacements made. + */ +char * +roff_strdup(const struct roff *r, const char *p) +{ + const struct roffkv *cp; + char *res; + const char *pp; + size_t ssz, sz; + enum mandoc_esc esc; + + if (NULL == r->xmbtab && NULL == r->xtab) + return mandoc_strdup(p); + else if ('\0' == *p) + return mandoc_strdup(""); + + /* + * Step through each character looking for term matches + * (remember that a `tr' can be invoked with an escape, which is + * a glyph but the escape is multi-character). + * We only do this if the character hash has been initialised + * and the string is >0 length. + */ + + res = NULL; + ssz = 0; + + while ('\0' != *p) { + if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { + sz = r->xtab[(int)*p].sz; + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, r->xtab[(int)*p].p, sz); + ssz += sz; + p++; + continue; + } else if ('\\' != *p) { + res = mandoc_realloc(res, ssz + 2); + res[ssz++] = *p++; + continue; + } + + /* Search for term matches. */ + for (cp = r->xmbtab; cp; cp = cp->next) + if (0 == strncmp(p, cp->key.p, cp->key.sz)) + break; + + if (NULL != cp) { + /* + * A match has been found. + * Append the match to the array and move + * forward by its keysize. + */ + res = mandoc_realloc(res, + ssz + cp->val.sz + 1); + memcpy(res + ssz, cp->val.p, cp->val.sz); + ssz += cp->val.sz; + p += (int)cp->key.sz; + continue; + } + + /* + * Handle escapes carefully: we need to copy + * over just the escape itself, or else we might + * do replacements within the escape itself. + * Make sure to pass along the bogus string. + */ + pp = p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + sz = strlen(pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + break; + } + /* + * We bail out on bad escapes. + * No need to warn: we already did so when + * roff_res() was called. + */ + sz = (int)(p - pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + ssz += sz; + } + + res[(int)ssz] = '\0'; + return res; +} + +int +roff_getformat(const struct roff *r) +{ + + return r->format; +} + +/* + * Find out whether a line is a macro line or not. + * If it is, adjust the current position and return one; if it isn't, + * return zero and don't change the current position. + * If the control character has been set with `.cc', then let that grain + * precedence. + * This is slighly contrary to groff, where using the non-breaking + * control character when `cc' has been invoked will cause the + * non-breaking macro contents to be printed verbatim. + */ +int +roff_getcontrol(const struct roff *r, const char *cp, int *ppos) +{ + int pos; + + pos = *ppos; + + if (0 != r->control && cp[pos] == r->control) + pos++; + else if (0 != r->control) + return 0; + else if ('\\' == cp[pos] && '.' == cp[pos + 1]) + pos += 2; + else if ('.' == cp[pos] || '\'' == cp[pos]) + pos++; + else + return 0; + + while (' ' == cp[pos] || '\t' == cp[pos]) + pos++; + + *ppos = pos; + return 1; +} diff --git a/contrib/mdocml/roff.h b/contrib/mdocml/roff.h new file mode 100644 index 0000000..19ec50f --- /dev/null +++ b/contrib/mdocml/roff.h @@ -0,0 +1,164 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct mdoc_arg; +union mdoc_data; + +enum roff_macroset { + MACROSET_NONE = 0, + MACROSET_MDOC, + MACROSET_MAN +}; + +enum roff_sec { + SEC_NONE = 0, + SEC_NAME, + SEC_LIBRARY, + SEC_SYNOPSIS, + SEC_DESCRIPTION, + SEC_CONTEXT, + SEC_IMPLEMENTATION, /* IMPLEMENTATION NOTES */ + SEC_RETURN_VALUES, + SEC_ENVIRONMENT, + SEC_FILES, + SEC_EXIT_STATUS, + SEC_EXAMPLES, + SEC_DIAGNOSTICS, + SEC_COMPATIBILITY, + SEC_ERRORS, + SEC_SEE_ALSO, + SEC_STANDARDS, + SEC_HISTORY, + SEC_AUTHORS, + SEC_CAVEATS, + SEC_BUGS, + SEC_SECURITY, + SEC_CUSTOM, + SEC__MAX +}; + +enum roff_type { + ROFFT_ROOT, + ROFFT_BLOCK, + ROFFT_HEAD, + ROFFT_BODY, + ROFFT_TAIL, + ROFFT_ELEM, + ROFFT_TEXT, + ROFFT_TBL, + ROFFT_EQN +}; + +enum roff_next { + ROFF_NEXT_SIBLING = 0, + ROFF_NEXT_CHILD +}; + +/* + * Indicates that a BODY's formatting has ended, but + * the scope is still open. Used for badly nested blocks. + */ +enum mdoc_endbody { + ENDBODY_NOT = 0, + ENDBODY_SPACE, /* Is broken: append a space. */ + ENDBODY_NOSPACE /* Is broken: don't append a space. */ +}; + +struct roff_node { + struct roff_node *parent; /* Parent AST node. */ + struct roff_node *child; /* First child AST node. */ + struct roff_node *last; /* Last child AST node. */ + struct roff_node *next; /* Sibling AST node. */ + struct roff_node *prev; /* Prior sibling AST node. */ + struct roff_node *head; /* BLOCK */ + struct roff_node *body; /* BLOCK/ENDBODY */ + struct roff_node *tail; /* BLOCK */ + struct mdoc_arg *args; /* BLOCK/ELEM */ + union mdoc_data *norm; /* Normalized arguments. */ + char *string; /* TEXT */ + const struct tbl_span *span; /* TBL */ + const struct eqn *eqn; /* EQN */ + int line; /* Input file line number. */ + int pos; /* Input file column number. */ + int tok; /* Request or macro ID. */ +#define TOKEN_NONE (-1) /* No request or macro. */ + int flags; +#define MDOC_VALID (1 << 0) /* Has been validated. */ +#define MDOC_ENDED (1 << 1) /* Gone past body end mark. */ +#define MDOC_EOS (1 << 2) /* At sentence boundary. */ +#define MDOC_LINE (1 << 3) /* First macro/text on line. */ +#define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting. */ +#define MDOC_BROKEN (1 << 5) /* Must validate parent when ending. */ +#define MDOC_DELIMO (1 << 6) +#define MDOC_DELIMC (1 << 7) +#define MAN_VALID MDOC_VALID +#define MAN_EOS MDOC_EOS +#define MAN_LINE MDOC_LINE + int prev_font; /* Before entering this node. */ + int aux; /* Decoded node data, type-dependent. */ + enum roff_type type; /* AST node type. */ + enum roff_sec sec; /* Current named section. */ + enum mdoc_endbody end; /* BODY */ +}; + +struct roff_meta { + char *msec; /* Manual section, usually a digit. */ + char *vol; /* Manual volume title. */ + char *os; /* Operating system. */ + char *arch; /* Machine architecture. */ + char *title; /* Manual title, usually CAPS. */ + char *name; /* Leading manual name. */ + char *date; /* Normalized date. */ + int hasbody; /* Document is not empty. */ +}; + +struct roff_man { + struct roff_meta meta; /* Document meta-data. */ + struct mparse *parse; /* Parse pointer. */ + struct roff *roff; /* Roff parser state data. */ + const char *defos; /* Default operating system. */ + struct roff_node *first; /* The first node parsed. */ + struct roff_node *last; /* The last node parsed. */ + struct roff_node *last_es; /* The most recent Es node. */ + int quick; /* Abort parse early. */ + int flags; /* Parse flags. */ +#define MDOC_LITERAL (1 << 1) /* In a literal scope. */ +#define MDOC_PBODY (1 << 2) /* In the document body. */ +#define MDOC_NEWLINE (1 << 3) /* First macro/text in a line. */ +#define MDOC_PHRASE (1 << 4) /* In a Bl -column phrase. */ +#define MDOC_PHRASELIT (1 << 5) /* Literal within a phrase. */ +#define MDOC_FREECOL (1 << 6) /* `It' invocation should close. */ +#define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting. */ +#define MDOC_KEEP (1 << 8) /* In a word keep. */ +#define MDOC_SMOFF (1 << 9) /* Spacing is off. */ +#define MDOC_NODELIMC (1 << 10) /* Disable closing delimiter handling. */ +#define MAN_ELINE (1 << 11) /* Next-line element scope. */ +#define MAN_BLINE (1 << 12) /* Next-line block scope. */ +#define MDOC_PHRASEQF (1 << 13) /* Quote first word encountered. */ +#define MDOC_PHRASEQL (1 << 14) /* Quote last word of this phrase. */ +#define MDOC_PHRASEQN (1 << 15) /* Quote first word of the next phrase. */ +#define MAN_LITERAL MDOC_LITERAL +#define MAN_NEWLINE MDOC_NEWLINE + enum roff_macroset macroset; /* Kind of high-level macros used. */ + enum roff_sec lastsec; /* Last section seen. */ + enum roff_sec lastnamed; /* Last standard section seen. */ + enum roff_next next; /* Where to put the next node. */ +}; + + +void deroff(char **, const struct roff_node *); diff --git a/contrib/mdocml/roff_int.h b/contrib/mdocml/roff_int.h new file mode 100644 index 0000000..5567b75 --- /dev/null +++ b/contrib/mdocml/roff_int.h @@ -0,0 +1,41 @@ +/* $Id: roff_int.h,v 1.7 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct roff_node *roff_node_alloc(struct roff_man *, int, int, + enum roff_type, int); +void roff_node_append(struct roff_man *, struct roff_node *); +void roff_word_alloc(struct roff_man *, int, int, const char *); +void roff_word_append(struct roff_man *, const char *); +void roff_elem_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_block_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_head_alloc(struct roff_man *, int, int, int); +struct roff_node *roff_body_alloc(struct roff_man *, int, int, int); +void roff_addeqn(struct roff_man *, const struct eqn *); +void roff_addtbl(struct roff_man *, const struct tbl_span *); +void roff_node_unlink(struct roff_man *, struct roff_node *); +void roff_node_free(struct roff_node *); +void roff_node_delete(struct roff_man *, struct roff_node *); + +/* + * Functions called from roff.c need to be declared here, + * not in libmdoc.h or libman.h, even if they are specific + * to either the mdoc(7) or the man(7) parser. + */ + +void man_breakscope(struct roff_man *, int); +void mdoc_argv_free(struct mdoc_arg *); diff --git a/contrib/mdocml/soelim.1 b/contrib/mdocml/soelim.1 new file mode 100644 index 0000000..20f15ec --- /dev/null +++ b/contrib/mdocml/soelim.1 @@ -0,0 +1,86 @@ +.\" $Id: soelim.1,v 1.3 2015/05/20 22:59:12 schwarze Exp $ +.\" +.\" Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd $Mdocdate: May 20 2015 $ +.Dt SOELIM 1 +.Os +.Sh NAME +.Nm soelim +.Nd interpret .so requests in manpages +.Sh SYNOPSIS +.Nm +.Op Fl Crtv +.Op Fl I Ar dir +.Op Ar files ... +.Sh DESCRIPTION +.Nm +reads +.Ar files +lines by lines. +.Pp +If a line starts by: +.Dq .so anotherfile +it replace the line by processing +.Dq anotherfile . +Otherwise the line is printed to stdout. +.Bl -tag -width "-I dir" +.It Fl C +Recognise +.Em .so +when not followed by a space character. +.It Fl r +Compatibility with GNU groff's +.Xr soelim 1 +(does nothing). +.It Fl t +Compatibility with GNU groff's +.Xr soelim 1 +(does nothing). +.It Fl v +Compatibility with GNU groff's +.Xr soelim 1 +(does nothing). +.It Fl I Ar dir +This option specify directories where +.Nm +searches for files (both those on the command line and those named in +.Dq .so +directive.) +This options may be specified multiple times. The directories will be searched +in the order specified. +.El +.Pp +The files are always searched first in the current directory. +.Pp +A file specified with an absolute path will be opened directly without +performing a search. +.Sh SEE ALSO +.Xr mandoc 1 +.Sh AUTHORS +This version of the +.Nm +utility was written by +.An Baptiste Daroussin Aq Mt bapt@freebsd.org . diff --git a/contrib/mdocml/soelim.c b/contrib/mdocml/soelim.c new file mode 100644 index 0000000..3ef3082 --- /dev/null +++ b/contrib/mdocml/soelim.c @@ -0,0 +1,182 @@ +/* $Id: soelim.c,v 1.5 2015/11/07 14:22:29 schwarze Exp $ */ +/* + * Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <ctype.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#if HAVE_STRINGLIST +#include <stringlist.h> +#else +#include "compat_stringlist.h" +#endif +#include <unistd.h> + +#define C_OPTION 0x1 + +static StringList *includes; + +static void +usage(void) +{ + + fprintf(stderr, "usage: soelim [-Crtv] [-I dir] [files]\n"); + + exit(EXIT_FAILURE); +} + +static FILE * +soelim_fopen(const char *name) +{ + FILE *f; + char path[PATH_MAX]; + size_t i; + + if (strcmp(name, "-") == 0) + return (stdin); + + if ((f = fopen(name, "r")) != NULL) + return (f); + + if (*name == '/') { + warn("can't open '%s'", name); + return (NULL); + } + + for (i = 0; i < includes->sl_cur; i++) { + snprintf(path, sizeof(path), "%s/%s", includes->sl_str[i], + name); + if ((f = fopen(path, "r")) != NULL) + return (f); + } + + warn("can't open '%s'", name); + + return (f); +} + +static int +soelim_file(FILE *f, int flag) +{ + char *line = NULL; + char *walk, *cp; + size_t linecap = 0; + ssize_t linelen; + + if (f == NULL) + return (1); + + while ((linelen = getline(&line, &linecap, f)) > 0) { + if (strncmp(line, ".so", 3) != 0) { + printf("%s", line); + continue; + } + + walk = line + 3; + if (!isspace(*walk) && ((flag & C_OPTION) == 0)) { + printf("%s", line); + continue; + } + + while (isspace(*walk)) + walk++; + + cp = walk; + while (*cp != '\0' && !isspace(*cp)) + cp++; + *cp = 0; + if (cp < line + linelen) + cp++; + + if (*walk == '\0') { + printf("%s", line); + continue; + } + if (soelim_file(soelim_fopen(walk), flag) == 1) { + free(line); + return (1); + } + if (*cp != '\0') + printf("%s", cp); + } + + free(line); + fclose(f); + + return (0); +} + +int +main(int argc, char **argv) +{ + int ch, i; + int ret = 0; + int flags = 0; + + includes = sl_init(); + if (includes == NULL) + err(EXIT_FAILURE, "sl_init()"); + + while ((ch = getopt(argc, argv, "CrtvI:")) != -1) { + switch (ch) { + case 'C': + flags |= C_OPTION; + break; + case 'r': + case 'v': + case 't': + /* stub compatibility with groff's soelim */ + break; + case 'I': + sl_add(includes, optarg); + break; + default: + sl_free(includes, 0); + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) + ret = soelim_file(stdin, flags); + + for (i = 0; i < argc; i++) + ret = soelim_file(soelim_fopen(argv[i]), flags); + + sl_free(includes, 0); + + return (ret); +} diff --git a/contrib/mdocml/st.c b/contrib/mdocml/st.c new file mode 100644 index 0000000..02868f0 --- /dev/null +++ b/contrib/mdocml/st.c @@ -0,0 +1,37 @@ +/* $Id: st.c,v 1.13 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <string.h> + +#include "roff.h" +#include "mdoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2st(const char *p) +{ + +#include "st.in" + + return NULL; +} diff --git a/contrib/mdocml/st.in b/contrib/mdocml/st.in new file mode 100644 index 0000000..e70680f --- /dev/null +++ b/contrib/mdocml/st.in @@ -0,0 +1,77 @@ +/* $Id: st.in,v 1.28 2015/02/17 20:37:17 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the .St macro arguments. If you add a new + * standard, make sure that the left-and side corresponds to the .St + * argument (like .St -p1003.1) and the right-hand side corresponds to + * the formatted output string. + * + * Be sure to escape strings. + * The non-breaking blanks prevent ending an output line right before + * a number. Groff prevent line breaks at the same places. + * + * REMEMBER TO ADD NEW STANDARDS TO MDOC.7! + */ + +LINE("-p1003.1-88", "IEEE Std 1003.1-1988 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-90", "IEEE Std 1003.1-1990 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1-2013", "IEEE Std 1003.1-2008/Cor 1-2013 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1", "IEEE Std 1003.1 (\\(LqPOSIX.1\\(Rq)") +LINE("-p1003.1b", "IEEE Std 1003.1b (\\(LqPOSIX.1b\\(Rq)") +LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(LqPOSIX.1b\\(Rq)") +LINE("-p1003.1c-95", "IEEE Std 1003.1c-1995 (\\(LqPOSIX.1c\\(Rq)") +LINE("-p1003.1g-2000", "IEEE Std 1003.1g-2000 (\\(LqPOSIX.1g\\(Rq)") +LINE("-p1003.1i-95", "IEEE Std 1003.1i-1995 (\\(LqPOSIX.1i\\(Rq)") +LINE("-p1003.2", "IEEE Std 1003.2 (\\(LqPOSIX.2\\(Rq)") +LINE("-p1003.2-92", "IEEE Std 1003.2-1992 (\\(LqPOSIX.2\\(Rq)") +LINE("-p1003.2a-92", "IEEE Std 1003.2a-1992 (\\(LqPOSIX.2\\(Rq)") +LINE("-isoC", "ISO/IEC 9899:1990 (\\(LqISO\\~C90\\(Rq)") +LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(LqISO\\~C90\\(Rq)") +LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(LqISO\\~C90, Amendment 1\\(Rq)") +LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(LqISO\\~C90, Technical Corrigendum 1\\(Rq)") +LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(LqISO\\~C90, Technical Corrigendum 2\\(Rq)") +LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(LqISO\\~C99\\(Rq)") +LINE("-isoC-2011", "ISO/IEC 9899:2011 (\\(LqISO\\~C11\\(Rq)") +LINE("-iso9945-1-90", "ISO/IEC 9945-1:1990 (\\(LqPOSIX.1\\(Rq)") +LINE("-iso9945-1-96", "ISO/IEC 9945-1:1996 (\\(LqPOSIX.1\\(Rq)") +LINE("-iso9945-2-93", "ISO/IEC 9945-2:1993 (\\(LqPOSIX.2\\(Rq)") +LINE("-ansiC", "ANSI X3.159-1989 (\\(LqANSI\\~C89\\(Rq)") +LINE("-ansiC-89", "ANSI X3.159-1989 (\\(LqANSI\\~C89\\(Rq)") +LINE("-ieee754", "IEEE Std 754-1985") +LINE("-iso8802-3", "ISO 8802-3: 1989") +LINE("-iso8601", "ISO 8601") +LINE("-ieee1275-94", "IEEE Std 1275-1994 (\\(LqOpen Firmware\\(Rq)") +LINE("-xpg3", "X/Open Portability Guide Issue\\~3 (\\(LqXPG3\\(Rq)") +LINE("-xpg4", "X/Open Portability Guide Issue\\~4 (\\(LqXPG4\\(Rq)") +LINE("-xpg4.2", "X/Open Portability Guide Issue\\~4, Version\\~2 (\\(LqXPG4.2\\(Rq)") +LINE("-xbd5", "X/Open Base Definitions Issue\\~5 (\\(LqXBD5\\(Rq)") +LINE("-xcu5", "X/Open Commands and Utilities Issue\\~5 (\\(LqXCU5\\(Rq)") +LINE("-xsh4.2", "X/Open System Interfaces and Headers Issue\\~4, Version\\~2 (\\(LqXSH4.2\\(Rq)") +LINE("-xsh5", "X/Open System Interfaces and Headers Issue\\~5 (\\(LqXSH5\\(Rq)") +LINE("-xns5", "X/Open Networking Services Issue\\~5 (\\(LqXNS5\\(Rq)") +LINE("-xns5.2", "X/Open Networking Services Issue\\~5.2 (\\(LqXNS5.2\\(Rq)") +LINE("-xcurses4.2", "X/Open Curses Issue\\~4, Version\\~2 (\\(LqXCURSES4.2\\(Rq)") +LINE("-susv1", "Version\\~1 of the Single UNIX Specification (\\(LqSUSv1\\(Rq)") +LINE("-susv2", "Version\\~2 of the Single UNIX Specification (\\(LqSUSv2\\(Rq)") +LINE("-susv3", "Version\\~3 of the Single UNIX Specification (\\(LqSUSv3\\(Rq)") +LINE("-susv4", "Version\\~4 of the Single UNIX Specification (\\(LqSUSv4\\(Rq)") +LINE("-svid4", "System\\~V Interface Definition, Fourth Edition (\\(LqSVID4\\(Rq)") diff --git a/contrib/mdocml/tag.c b/contrib/mdocml/tag.c new file mode 100644 index 0000000..57925ce --- /dev/null +++ b/contrib/mdocml/tag.c @@ -0,0 +1,192 @@ +/* $Id: tag.c,v 1.11 2015/11/20 21:59:54 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "tag.h" + +struct tag_entry { + size_t line; + int prio; + char s[]; +}; + +static void tag_signal(int); + +static struct ohash tag_data; +static struct tag_files tag_files; + + +/* + * Prepare for using a pager. + * Not all pagers are capable of using a tag file, + * but for simplicity, create it anyway. + */ +struct tag_files * +tag_init(void) +{ + struct sigaction sa; + int ofd; + + ofd = -1; + tag_files.tfd = -1; + tag_files.tcpgid = -1; + + /* Save the original standard output for use by the pager. */ + + if ((tag_files.ofd = dup(STDOUT_FILENO)) == -1) + goto fail; + + /* Create both temporary output files. */ + + (void)strlcpy(tag_files.ofn, "/tmp/man.XXXXXXXXXX", + sizeof(tag_files.ofn)); + (void)strlcpy(tag_files.tfn, "/tmp/man.XXXXXXXXXX", + sizeof(tag_files.tfn)); + memset(&sa, 0, sizeof(sa)); + sigfillset(&sa.sa_mask); + sa.sa_handler = tag_signal; + sigaction(SIGHUP, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + sigaction(SIGTERM, &sa, NULL); + if ((ofd = mkstemp(tag_files.ofn)) == -1) + goto fail; + if ((tag_files.tfd = mkstemp(tag_files.tfn)) == -1) + goto fail; + if (dup2(ofd, STDOUT_FILENO) == -1) + goto fail; + close(ofd); + + /* + * Set up the ohash table to collect output line numbers + * where various marked-up terms are documented. + */ + + mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); + return &tag_files; + +fail: + tag_unlink(); + if (ofd != -1) + close(ofd); + if (tag_files.ofd != -1) + close(tag_files.ofd); + if (tag_files.tfd != -1) + close(tag_files.tfd); + *tag_files.ofn = '\0'; + *tag_files.tfn = '\0'; + tag_files.ofd = -1; + tag_files.tfd = -1; + return NULL; +} + +/* + * Set the line number where a term is defined, + * unless it is already defined at a higher priority. + */ +void +tag_put(const char *s, int prio, size_t line) +{ + struct tag_entry *entry; + size_t len; + unsigned int slot; + + if (tag_files.tfd <= 0 || strchr(s, ' ') != NULL) + return; + slot = ohash_qlookup(&tag_data, s); + entry = ohash_find(&tag_data, slot); + if (entry == NULL) { + len = strlen(s) + 1; + entry = mandoc_malloc(sizeof(*entry) + len); + memcpy(entry->s, s, len); + ohash_insert(&tag_data, slot, entry); + } else if (entry->prio <= prio) + return; + entry->line = line; + entry->prio = prio; +} + +/* + * Write out the tags file using the previously collected + * information and clear the ohash table while going along. + */ +void +tag_write(void) +{ + FILE *stream; + struct tag_entry *entry; + unsigned int slot; + + if (tag_files.tfd <= 0) + return; + stream = fdopen(tag_files.tfd, "w"); + entry = ohash_first(&tag_data, &slot); + while (entry != NULL) { + if (stream != NULL) + fprintf(stream, "%s %s %zu\n", + entry->s, tag_files.ofn, entry->line); + free(entry); + entry = ohash_next(&tag_data, &slot); + } + ohash_delete(&tag_data); + if (stream != NULL) + fclose(stream); +} + +void +tag_unlink(void) +{ + pid_t tc_pgid; + + if (tag_files.tcpgid != -1) { + tc_pgid = tcgetpgrp(STDIN_FILENO); + if (tc_pgid == tag_files.pager_pid || + tc_pgid == getpgid(0) || + getpgid(tc_pgid) == -1) + (void)tcsetpgrp(STDIN_FILENO, tag_files.tcpgid); + } + if (*tag_files.ofn != '\0') + unlink(tag_files.ofn); + if (*tag_files.tfn != '\0') + unlink(tag_files.tfn); +} + +static void +tag_signal(int signum) +{ + struct sigaction sa; + + tag_unlink(); + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sigaction(signum, &sa, NULL); + kill(getpid(), signum); + /* NOTREACHED */ + _exit(1); +} diff --git a/contrib/mdocml/tag.h b/contrib/mdocml/tag.h new file mode 100644 index 0000000..ab1388d --- /dev/null +++ b/contrib/mdocml/tag.h @@ -0,0 +1,31 @@ +/* $Id: tag.h,v 1.7 2015/11/20 21:59:54 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct tag_files { + char ofn[20]; + char tfn[20]; + int ofd; + int tfd; + pid_t tcpgid; + pid_t pager_pid; +}; + + +struct tag_files *tag_init(void); +void tag_put(const char *, int, size_t); +void tag_write(void); +void tag_unlink(void); diff --git a/contrib/mdocml/tbl.3 b/contrib/mdocml/tbl.3 new file mode 100644 index 0000000..f3db622 --- /dev/null +++ b/contrib/mdocml/tbl.3 @@ -0,0 +1,354 @@ +.\" $Id: tbl.3,v 1.2 2015/01/30 04:11:50 schwarze Exp $ +.\" +.\" Copyright (c) 2013 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: January 30 2015 $ +.Dt TBL 3 +.Os +.Sh NAME +.Nm tbl_alloc , +.Nm tbl_read , +.Nm tbl_restart , +.Nm tbl_span , +.Nm tbl_end , +.Nm tbl_free +.Nd roff table parser library for mandoc +.Sh SYNOPSIS +.In mandoc.h +.In libmandoc.h +.In libroff.h +.Ft struct tbl_node * +.Fo tbl_alloc +.Fa "int pos" +.Fa "int line" +.Fa "struct mparse *parse" +.Fc +.Ft enum rofferr +.Fo tbl_read +.Fa "struct tbl_node *tbl" +.Fa "int ln" +.Fa "const char *p" +.Fa "int offs" +.Fc +.Ft void +.Fo tbl_restart +.Fa "int line" +.Fa "int pos" +.Fa "struct tbl_node *tbl" +.Fc +.Ft const struct tbl_span * +.Fo tbl_span +.Fa "struct tbl_node *tbl" +.Fc +.Ft void +.Fo tbl_end +.Fa "struct tbl_node **tblp" +.Fc +.Ft void +.Fo tbl_free +.Fa "struct tbl_node *tbl" +.Fc +.Sh DESCRIPTION +This library is tightly integrated into the +.Xr mandoc 1 +utility and not designed for stand-alone use. +The present manual is intended as a reference for developers working on +.Xr mandoc 1 . +.Ss Data structures +Unless otherwise noted, all of the following data structures are defined in +.In mandoc.h +and are deleted in +.Fn tbl_free . +.Bl -tag -width Ds +.It Vt struct tbl_node +This structure describes a complete table. +It is defined in +.In libroff.h , +created in +.Fn tbl_alloc , +and stored in the members +.Fa first_tbl , +.Fa last_tbl , +and +.Fa tbl +of +.Vt struct roff Bq Pa roff.c . +.Pp +The +.Fa first_span , +.Fa current_span , +.Fa last_span , +and +.Fa next +members may be +.Dv NULL . +The +.Fa first_row +and +.Fa last_row +members may be +.Dv NULL , +but if there is a span, the function +.Fn tbl_layout +guarantees that these pointers are not +.Dv NULL . +The function +.Fn tbl_alloc +guarantees that the +.Fa parse +member is not +.Dv NULL . +.It Vt struct tbl_opts +This structure describes the options of one table. +It is used as a substructure of +.Vt struct tbl_node +and thus created and deleted together with it. +It is filled in +.Fn tbl_options . +.It Vt struct tbl_row +This structure describes one layout line in a table +by maintaining a list of all the cells in that line. +It is allocated and filled in +.Fn row Bq Pa tbl_layout.c +and referenced from the +.Fa layout +member of +.Vt struct tbl_node . +.Pp +The +.Fa next +member may be +.Dv NULL . +The function +.Fn tbl_layout +guarantees that the +.Fa first +and +.Fa last +members are not NULL. +.It Vt struct tbl_cell +This structure describes one layout cell in a table, +in particular its alignment, membership in spans, and +usage for lines. +It is allocated and filled in +.Fn cell_alloc Bq Pa tbl_layout.c +and referenced from the +.Fa first +and +.Fa last +members of +.Vt struct tbl_row . +.Pp +The +.Fa next +member may be +.Dv NULL . +.It Vt struct tbl_span +This structure describes one data line in a table +by maintaining a list of all data cells in that line +or by specifying that it is a horizontal line. +It is allocated and filled in +.Fn newspan Bq Pa tbl_data.c +which is called from +.Fn tbl_data +and referenced from the +.Fa first_span , +.Fa current_span , +and +.Fa last_span +members of +.Vt struct tbl_node , +and from the +.Fa span +members of +.Vt struct man_node +and +.Vt struct mdoc_node +from +.In man.h +and +.In mdoc.h . +.Pp +The +.Fa first , +.Fa last , +.Fa prev , +and +.Fa next +members may be +.Dv NULL . +The function +.Fn newspan Bq Pa tbl_data.c +guarantees that the +.Fa opts +and +.Fa layout +members are not +.Dv NULL . +.It Vt struct tbl_dat +This structure describes one data cell in a table by specifying +whether it contains a line or data, whether it spans additional +layout cells, and by storing the data. +It is allocated and filled in +.Fn tbl_data +and referenced from the +.Fa first +and +.Fa last +members of +.Vt struct tbl_span . +.Pp +The +.Fa string +and +.Fa next +members may be +.Dv NULL . +The function +.Fn getdata +guarantees that the +.Fa layout +member is not +.Dv NULL . +.El +.Ss Interface functions +The following functions are implemented in +.Pa tbl.c , +and all callers in +.Pa roff.c . +.Bl -tag -width Ds +.It Fn tbl_alloc +Allocates, initializes, and returns a new +.Vt struct tbl_node . +Called from +.Fn roff_TS . +.It Fn tbl_read +Dispatches to +.Fn tbl_option , +.Fn tbl_layout , +.Fn tbl_cdata , +and +.Fn tbl_data , +see below. +Called from +.Fn roff_parseln . +.It Fn tbl_restart +Resets the +.Fa part +member of +.Vt struct tbl_node +to +.Dv TBL_PART_LAYOUT . +Called from +.Fn roff_T_ . +.It Fn tbl_span +On the first call, return the first +.Vt struct tbl_span ; +for later calls, return the next one or +.Dv NULL . +Called from +.Fn roff_span . +.It Fn tbl_end +Flags the last span as +.Dv TBL_SPAN_LAST +and clears the pointer passed as an argment. +Called from +.Fn roff_TE +and +.Fn roff_endparse . +.It Fn tbl_free +Frees the specified +.Vt struct tbl_node +and all the tbl_row, tbl_cell, tbl_span, and tbl_dat structures +referenced from it. +Called from +.Fn roff_free +and +.Fn roff_reset . +.El +.Ss Private functions +.Bl -tag -width Ds +.It Ft int Fn tbl_options "struct tbl_node *tbl" "int ln" "const char *p" +Parses the options line into +.Vt struct tbl_opts . +Implemented in +.Pa tbl_opts.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_layout "struct tbl_node *tbl" "int ln" "const char *p" +Allocates and fills one +.Vt struct tbl_row +for each layout line and one +.Vt struct tbl_cell +for each layout cell. +Implemented in +.Pa tbl_layout.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_data "struct tbl_node *tbl" "int ln" "const char *p" +Allocates one +.Vt struct tbl_span +for each data line and calls +.Fn getdata +for each data cell. +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_read . +.It Ft int Fn tbl_cdata "struct tbl_node *tbl" "int ln" "const char *p" +Continues parsing a data line: +When finding +.Sq T} , +switches back to +.Dv TBL_PART_DATA +mode and calls +.Fn getdata +if there are more data cells on the line. +Otherwise, appends the data to the current data cell. +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_read . +.It Xo +.Ft int +.Fo getdata +.Fa "struct tbl_node *tbl" +.Fa "struct tbl_span *dp" +.Fa "int ln" +.Fa "const char *p" +.Fa "int *pos" +.Fc +.Xc +Parses one data cell into one +.Vt struct tbl_dat . +Implemented in +.Pa tbl_data.c , +called from +.Fn tbl_data +and +.Fn tbl_cdata . +.El +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr mandoc 3 , +.Xr tbl 7 +.Sh AUTHORS +.An -nosplit +The +.Nm tbl +library was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv +with contributions from +.An Ingo Schwarze Aq Mt schwarze@openbsd.org . diff --git a/contrib/mdocml/tbl.7 b/contrib/mdocml/tbl.7 new file mode 100644 index 0000000..c8fa8e4 --- /dev/null +++ b/contrib/mdocml/tbl.7 @@ -0,0 +1,367 @@ +.\" $Id: tbl.7,v 1.26 2015/01/29 00:33:57 schwarze Exp $ +.\" +.\" Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +.\" Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate: January 29 2015 $ +.Dt TBL 7 +.Os +.Sh NAME +.Nm tbl +.Nd tbl language reference for mandoc +.Sh DESCRIPTION +The +.Nm tbl +language is a table-formatting language. +It is used within +.Xr mdoc 7 +and +.Xr man 7 +.Ux +manual pages. +This manual describes the subset of the +.Nm +language accepted by the +.Xr mandoc 1 +utility. +.Pp +Tables within +.Xr mdoc 7 +or +.Xr man 7 +are enclosed by the +.Sq TS +and +.Sq TE +macro tags, whose precise syntax is documented in +.Xr roff 7 . +Tables consist of a series of options on a single line, followed by the +table layout, followed by data. +.Pp +For example, the following creates a boxed table with digits centered in +the cells. +.Bd -literal -offset indent +\&.TS +tab(:) box; +c5 c5 c5. +1:2:3 +4:5:6 +\&.TE +.Ed +.Pp +When formatted, the following output is produced: +.Bd -filled -offset indent -compact +.TS +tab(:) box; +c5 c5 c5. +1:2:3 +4:5:6 +.TE +.Ed +.Sh TABLE STRUCTURE +Tables are enclosed by the +.Sq TS +and +.Sq TE +.Xr roff 7 +macros. +A table consists of an optional single line of table +.Sx Options +terminated by a semicolon, followed by one or more lines of +.Sx Layout +specifications terminated by a period, then +.Sx Data . +All input must be 7-bit ASCII. +Example: +.Bd -literal -offset indent +\&.TS +box tab(:); +c | c +| c | c. +1:2 +3:4 +\&.TE +.Ed +.Pp +Table data is +.Em pre-processed , +that is, data rows are parsed then inserted into the underlying stream +of input data. +This allows data rows to be interspersed by arbitrary +.Xr roff 7 , +.Xr mdoc 7 , +and +.Xr man 7 +macros such as +.Bd -literal -offset indent +\&.TS +tab(:); +c c c. +1:2:3 +\&.Ao +3:2:1 +\&.Ac +\&.TE +.Ed +.Pp +in the case of +.Xr mdoc 7 +or +.Bd -literal -offset indent +\&.TS +tab(:); +c c c. +\&.ds ab 2 +1:\e*(ab:3 +\&.I +3:2:1 +\&.TE +.Ed +.Pp +in the case of +.Xr man 7 . +.Ss Options +The first line of a table may contain options separated by spaces, tabs, +or commas and terminated by a semicolon. +If the first line does not have a terminating semicolon, it is assumed +that no options are specified and instead a +.Sx Layout +is processed. +Some options require arguments enclosed by parentheses. +The following case-insensitive options are available: +.Bl -tag -width Ds +.It Cm allbox +Draw a single-line box around each table cell. +Currently treated as a synonym for +.Cm box . +.It Cm box +Draw a single-line box around the table. +For GNU compatibility, this may also be invoked with +.Cm frame . +.It Cm center +Center the table instead of left-adjusting it. +For GNU compatibility, this may also be invoked with +.Cm centre . +.It Cm decimalpoint +Use the single-character argument as the decimal point with the +.Cm n +layout key. +This is a GNU extension. +.It Cm delim +Use the two characters of the argument as +.Xr eqn 7 +delimiters. +Currently unsupported. +.It Cm doublebox +Draw a double-line box around the table. +For GNU compatibility, this may also be invoked with +.Cm doubleframe . +.It Cm expand +Increase the width of the table to the current line length. +Currently ignored. +.It Cm linesize +Draw lines with the point size given by the unsigned integer argument. +Currently ignored. +.It Cm nokeep +Allow page breaks within the table. +This is a GNU extension and currently ignored. +.It Cm nospaces +Ignore leading and trailing spaces in data cells. +This is a GNU extension and currently ignored. +.It Cm nowarn +Suppress warnings about tables exceeding the current line length. +This is a GNU extension and currently ignored. +.It Cm tab +Use the single-character argument as a delimiter between data cells. +By default, the tab character is used. +.El +.Ss Layout +The table layout follows +.Sx Options +or a +.Sq \&T& +macro invocation. +Layout specifies how data rows are displayed on output. +Each layout line corresponds to a line of data; the last layout line +applies to all remaining data lines. +Layout lines may also be separated by a comma. +Each layout cell consists of one of the following case-insensitive keys: +.Bl -tag -width 2n +.It Cm c +Center a literal string within its column. +.It Cm r +Right-justify a literal string within its column. +.It Cm l +Left-justify a literal string within its column. +.It Cm n +Justify a number around its last decimal point. +If the decimal point is not found on the number, it's assumed to trail +the number. +.It Cm s +Horizontally span columns from the last +.No non- Ns Cm s +data cell. +It is an error if spanning columns follow a +.Cm \- +or +.Cm \(ba +cell, or come first. +This option is not supported by +.Xr mandoc 1 . +.It Cm a +Left-justify a literal string and pad with one space. +.It Cm ^ +Vertically span rows from the last +.No non- Ns Cm ^ +data cell. +It is an error to invoke a vertical span on the first layout row. +Unlike a horizontal spanner, you must specify an empty cell (if it not +empty, the data is discarded) in the corresponding data cell. +.It Cm \- +Replace the data cell (its contents will be lost) with a single +horizontal line. +This may also be invoked with +.Cm _ . +.It Cm = +Replace the data cell (its contents will be lost) with a double +horizontal line. +.It Cm \(ba +Emit a vertical bar instead of data. +.It Cm \(ba\(ba +Emit a double-vertical bar instead of data. +.El +.Pp +Keys may be followed by a set of modifiers. +A modifier is either a modifier key or a natural number for specifying +the minimum width of a column. +The following case-insensitive modifier keys are available: +.Bl -tag -width 2n +.It Cm b +Use a bold font for the contents of this column. +.It Cm d +Move cell content down to the last cell of a vertical span. +Currently ignored. +.It Cm e +Make this column wider to match the maximum width +of any other column also having the +.Cm e +modifier. +.It Cm f +The next character selects the font to use for this column. +See the +.Xr roff 7 +manual for supported one-character font names. +.It Cm i +Use an italic font for the contents of this column. +.It Cm m +Specify a cell start macro. +This is a GNU extension and currently unsupported. +.It Cm p +Set the point size to the following unsigned argument, +or change it by the following signed argument. +Currently ignored. +.It Cm v +Set the vertical line spacing to the following unsigned argument, +or change it by the following signed argument. +Currently ignored. +.It Cm t +Do not vertically center cell content in the vertical span, +leave it at the top. +Currently ignored. +.It Cm u +Move cell content up by half a table line. +Currently ignored. +.It Cm w +Specify minimum column width. +Currently ignored. +.It Cm x +After determining the width of all other columns, distribute the +rest of the line length among all columns having the +.Cm x +modifier. +.It Cm z +Do not use this cell for determining the width of this column. +.El +.Pp +For example, the following layout specifies a center-justified column of +minimum width 10, followed by vertical bar, followed by a left-justified +column of minimum width 10, another vertical bar, then a column using +bold font justified about the decimal point in numbers: +.Pp +.Dl c10 | l10 | nfB +.Ss Data +The data section follows the last layout row. +By default, cells in a data section are delimited by a tab. +This behaviour may be changed with the +.Cm tab +option. +If +.Cm _ +or +.Cm = +is specified, a single or double line, respectively, is drawn across the +data field. +If +.Cm \e- +or +.Cm \e= +is specified, a line is drawn within the data field (i.e. terminating +within the cell and not draw to the border). +If the last cell of a line is +.Cm T{ , +all subsequent lines are included as part of the cell until +.Cm T} +is specified as its own data cell. +It may then be followed by a tab +.Pq or as designated by Cm tab +or an end-of-line to terminate the row. +.Sh COMPATIBILITY +The +.Xr mandoc 1 +implementation of +.Nm +doesn't support +.Xr mdoc 7 +and +.Xr man 7 +macros and +.Xr eqn 7 +equations inside tables. +.Sh SEE ALSO +.Xr mandoc 1 , +.Xr man 7 , +.Xr mandoc_char 7 , +.Xr mdoc 7 , +.Xr roff 7 +.Rs +.%A M. E. Lesk +.%T Tbl\(emA Program to Format Tables +.%D June 11, 1976 +.Re +.Sh HISTORY +The tbl utility, a preprocessor for troff, was originally written by M. +E. Lesk at Bell Labs in 1975. +The GNU reimplementation of tbl, part of the groff package, was released +in 1990 by James Clark. +A standalone tbl implementation was written by Kristaps Dzonsons in +2010. +This formed the basis of the implementation that is part of the +.Xr mandoc 1 +utility. +.Sh AUTHORS +This +.Nm +reference was written by +.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv . diff --git a/contrib/mdocml/tbl.c b/contrib/mdocml/tbl.c new file mode 100644 index 0000000..ed6fbd8 --- /dev/null +++ b/contrib/mdocml/tbl.c @@ -0,0 +1,183 @@ +/* $Id: tbl.c,v 1.40 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "libmandoc.h" +#include "libroff.h" + + +enum rofferr +tbl_read(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + const char *cp; + int active; + + /* + * In the options section, proceed to the layout section + * after a semicolon, or right away if there is no semicolon. + * Ignore semicolons in arguments. + */ + + if (tbl->part == TBL_PART_OPTS) { + tbl->part = TBL_PART_LAYOUT; + active = 1; + for (cp = p + pos; *cp != '\0'; cp++) { + switch (*cp) { + case '(': + active = 0; + continue; + case ')': + active = 1; + continue; + case ';': + if (active) + break; + continue; + default: + continue; + } + break; + } + if (*cp == ';') { + tbl_option(tbl, ln, p, &pos); + if (p[pos] == '\0') + return ROFF_IGN; + } + } + + /* Process the other section types. */ + + switch (tbl->part) { + case TBL_PART_LAYOUT: + tbl_layout(tbl, ln, p, pos); + return ROFF_IGN; + case TBL_PART_CDATA: + return tbl_cdata(tbl, ln, p, pos) ? ROFF_TBL : ROFF_IGN; + default: + break; + } + + tbl_data(tbl, ln, p, pos); + return ROFF_TBL; +} + +struct tbl_node * +tbl_alloc(int pos, int line, struct mparse *parse) +{ + struct tbl_node *tbl; + + tbl = mandoc_calloc(1, sizeof(*tbl)); + tbl->line = line; + tbl->pos = pos; + tbl->parse = parse; + tbl->part = TBL_PART_OPTS; + tbl->opts.tab = '\t'; + tbl->opts.decimal = '.'; + return tbl; +} + +void +tbl_free(struct tbl_node *tbl) +{ + struct tbl_row *rp; + struct tbl_cell *cp; + struct tbl_span *sp; + struct tbl_dat *dp; + + while ((rp = tbl->first_row) != NULL) { + tbl->first_row = rp->next; + while (rp->first != NULL) { + cp = rp->first; + rp->first = cp->next; + free(cp); + } + free(rp); + } + + while ((sp = tbl->first_span) != NULL) { + tbl->first_span = sp->next; + while (sp->first != NULL) { + dp = sp->first; + sp->first = dp->next; + free(dp->string); + free(dp); + } + free(sp); + } + + free(tbl); +} + +void +tbl_restart(int line, int pos, struct tbl_node *tbl) +{ + if (tbl->part == TBL_PART_CDATA) + mandoc_msg(MANDOCERR_TBLDATA_BLK, tbl->parse, + line, pos, "T&"); + + tbl->part = TBL_PART_LAYOUT; + tbl->line = line; + tbl->pos = pos; +} + +const struct tbl_span * +tbl_span(struct tbl_node *tbl) +{ + struct tbl_span *span; + + assert(tbl); + span = tbl->current_span ? tbl->current_span->next + : tbl->first_span; + if (span) + tbl->current_span = span; + return span; +} + +int +tbl_end(struct tbl_node **tblp) +{ + struct tbl_node *tbl; + struct tbl_span *sp; + + tbl = *tblp; + *tblp = NULL; + + if (tbl->part == TBL_PART_CDATA) + mandoc_msg(MANDOCERR_TBLDATA_BLK, tbl->parse, + tbl->line, tbl->pos, "TE"); + + sp = tbl->first_span; + while (sp != NULL && sp->first == NULL) + sp = sp->next; + if (sp == NULL) { + mandoc_msg(MANDOCERR_TBLDATA_NONE, tbl->parse, + tbl->line, tbl->pos, NULL); + return 0; + } + return 1; +} diff --git a/contrib/mdocml/tbl_data.c b/contrib/mdocml/tbl_data.c new file mode 100644 index 0000000..40b756a0 --- /dev/null +++ b/contrib/mdocml/tbl_data.c @@ -0,0 +1,241 @@ +/* $Id: tbl_data.c,v 1.41 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "libmandoc.h" +#include "libroff.h" + +static void getdata(struct tbl_node *, struct tbl_span *, + int, const char *, int *); +static struct tbl_span *newspan(struct tbl_node *, int, + struct tbl_row *); + + +static void +getdata(struct tbl_node *tbl, struct tbl_span *dp, + int ln, const char *p, int *pos) +{ + struct tbl_dat *dat; + struct tbl_cell *cp; + int sv; + + /* Advance to the next layout cell, skipping spanners. */ + + cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next; + while (cp != NULL && cp->pos == TBL_CELL_SPAN) + cp = cp->next; + + /* + * Stop processing when we reach the end of the available layout + * cells. This means that we have extra input. + */ + + if (cp == NULL) { + mandoc_msg(MANDOCERR_TBLDATA_EXTRA, tbl->parse, + ln, *pos, p + *pos); + /* Skip to the end... */ + while (p[*pos]) + (*pos)++; + return; + } + + dat = mandoc_calloc(1, sizeof(*dat)); + dat->layout = cp; + dat->pos = TBL_DATA_NONE; + dat->spans = 0; + for (cp = cp->next; cp != NULL; cp = cp->next) + if (cp->pos == TBL_CELL_SPAN) + dat->spans++; + else + break; + + if (dp->last == NULL) + dp->first = dat; + else + dp->last->next = dat; + dp->last = dat; + + sv = *pos; + while (p[*pos] && p[*pos] != tbl->opts.tab) + (*pos)++; + + /* + * Check for a continued-data scope opening. This consists of a + * trailing `T{' at the end of the line. Subsequent lines, + * until a standalone `T}', are included in our cell. + */ + + if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') { + tbl->part = TBL_PART_CDATA; + return; + } + + dat->string = mandoc_strndup(p + sv, *pos - sv); + + if (p[*pos]) + (*pos)++; + + if ( ! strcmp(dat->string, "_")) + dat->pos = TBL_DATA_HORIZ; + else if ( ! strcmp(dat->string, "=")) + dat->pos = TBL_DATA_DHORIZ; + else if ( ! strcmp(dat->string, "\\_")) + dat->pos = TBL_DATA_NHORIZ; + else if ( ! strcmp(dat->string, "\\=")) + dat->pos = TBL_DATA_NDHORIZ; + else + dat->pos = TBL_DATA_DATA; + + if ((dat->layout->pos == TBL_CELL_HORIZ || + dat->layout->pos == TBL_CELL_DHORIZ || + dat->layout->pos == TBL_CELL_DOWN) && + dat->pos == TBL_DATA_DATA && *dat->string != '\0') + mandoc_msg(MANDOCERR_TBLDATA_SPAN, + tbl->parse, ln, sv, dat->string); +} + +int +tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_dat *dat; + size_t sz; + + dat = tbl->last_span->last; + + if (p[pos] == 'T' && p[pos + 1] == '}') { + pos += 2; + if (p[pos] == tbl->opts.tab) { + tbl->part = TBL_PART_DATA; + pos++; + while (p[pos] != '\0') + getdata(tbl, tbl->last_span, ln, p, &pos); + return 1; + } else if (p[pos] == '\0') { + tbl->part = TBL_PART_DATA; + return 1; + } + + /* Fallthrough: T} is part of a word. */ + } + + dat->pos = TBL_DATA_DATA; + + if (dat->string != NULL) { + sz = strlen(p + pos) + strlen(dat->string) + 2; + dat->string = mandoc_realloc(dat->string, sz); + (void)strlcat(dat->string, " ", sz); + (void)strlcat(dat->string, p + pos, sz); + } else + dat->string = mandoc_strdup(p + pos); + + if (dat->layout->pos == TBL_CELL_DOWN) + mandoc_msg(MANDOCERR_TBLDATA_SPAN, tbl->parse, + ln, pos, dat->string); + + return 0; +} + +static struct tbl_span * +newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) +{ + struct tbl_span *dp; + + dp = mandoc_calloc(1, sizeof(*dp)); + dp->line = line; + dp->opts = &tbl->opts; + dp->layout = rp; + dp->prev = tbl->last_span; + + if (dp->prev == NULL) { + tbl->first_span = dp; + tbl->current_span = NULL; + } else + dp->prev->next = dp; + tbl->last_span = dp; + + return dp; +} + +void +tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_span *dp; + struct tbl_row *rp; + + /* + * Choose a layout row: take the one following the last parsed + * span's. If that doesn't exist, use the last parsed span's. + * If there's no last parsed span, use the first row. Lastly, + * if the last span was a horizontal line, use the same layout + * (it doesn't "consume" the layout). + */ + + if (tbl->last_span != NULL) { + if (tbl->last_span->pos == TBL_SPAN_DATA) { + for (rp = tbl->last_span->layout->next; + rp != NULL && rp->first != NULL; + rp = rp->next) { + switch (rp->first->pos) { + case TBL_CELL_HORIZ: + dp = newspan(tbl, ln, rp); + dp->pos = TBL_SPAN_HORIZ; + continue; + case TBL_CELL_DHORIZ: + dp = newspan(tbl, ln, rp); + dp->pos = TBL_SPAN_DHORIZ; + continue; + default: + break; + } + break; + } + } else + rp = tbl->last_span->layout; + + if (rp == NULL) + rp = tbl->last_span->layout; + } else + rp = tbl->first_row; + + assert(rp); + + dp = newspan(tbl, ln, rp); + + if ( ! strcmp(p, "_")) { + dp->pos = TBL_SPAN_HORIZ; + return; + } else if ( ! strcmp(p, "=")) { + dp->pos = TBL_SPAN_DHORIZ; + return; + } + + dp->pos = TBL_SPAN_DATA; + + while (p[pos] != '\0') + getdata(tbl, dp, ln, p, &pos); +} diff --git a/contrib/mdocml/tbl_html.c b/contrib/mdocml/tbl_html.c new file mode 100644 index 0000000..51c4328 --- /dev/null +++ b/contrib/mdocml/tbl_html.c @@ -0,0 +1,142 @@ +/* $Id: tbl_html.c,v 1.18 2015/10/12 00:08:16 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" + +static void html_tblopen(struct html *, const struct tbl_span *); +static size_t html_tbl_len(size_t, void *); +static size_t html_tbl_strlen(const char *, void *); + + +static size_t +html_tbl_len(size_t sz, void *arg) +{ + + return sz; +} + +static size_t +html_tbl_strlen(const char *p, void *arg) +{ + + return strlen(p); +} + +static void +html_tblopen(struct html *h, const struct tbl_span *sp) +{ + struct htmlpair tag; + struct roffsu su; + struct roffcol *col; + int ic; + + if (h->tbl.cols == NULL) { + h->tbl.len = html_tbl_len; + h->tbl.slen = html_tbl_strlen; + tblcalc(&h->tbl, sp, 0); + } + + assert(NULL == h->tblt); + PAIR_CLASS_INIT(&tag, "tbl"); + h->tblt = print_otag(h, TAG_TABLE, 1, &tag); + + for (ic = 0; ic < sp->opts->cols; ic++) { + bufinit(h); + col = h->tbl.cols + ic; + SCALE_HS_INIT(&su, col->width); + bufcat_su(h, "width", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_COL, 1, &tag); + } + + print_otag(h, TAG_TBODY, 0, NULL); +} + +void +print_tblclose(struct html *h) +{ + + assert(h->tblt); + print_tagq(h, h->tblt); + h->tblt = NULL; +} + +void +print_tbl(struct html *h, const struct tbl_span *sp) +{ + const struct tbl_dat *dp; + struct htmlpair tag; + struct tag *tt; + int ic; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + if (h->tblt == NULL) + html_tblopen(h, sp); + + assert(h->tblt); + + h->flags |= HTML_NONOSPACE; + h->flags |= HTML_NOSPACE; + + tt = print_otag(h, TAG_TR, 0, NULL); + + switch (sp->pos) { + case TBL_SPAN_HORIZ: + case TBL_SPAN_DHORIZ: + PAIR_INIT(&tag, ATTR_COLSPAN, "0"); + print_otag(h, TAG_TD, 1, &tag); + break; + default: + dp = sp->first; + for (ic = 0; ic < sp->opts->cols; ic++) { + print_stagq(h, tt); + print_otag(h, TAG_TD, 0, NULL); + + if (dp == NULL || dp->layout->col > ic) + continue; + if (dp->layout->pos != TBL_CELL_DOWN) + if (dp->string != NULL) + print_text(h, dp->string); + dp = dp->next; + } + break; + } + + print_tagq(h, tt); + + h->flags &= ~HTML_NONOSPACE; + + if (sp->next == NULL) { + assert(h->tbl.cols); + free(h->tbl.cols); + h->tbl.cols = NULL; + print_tblclose(h); + } + +} diff --git a/contrib/mdocml/tbl_layout.c b/contrib/mdocml/tbl_layout.c new file mode 100644 index 0000000..c0eafbd --- /dev/null +++ b/contrib/mdocml/tbl_layout.c @@ -0,0 +1,355 @@ +/* $Id: tbl_layout.c,v 1.41 2015/10/12 00:08:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "libmandoc.h" +#include "libroff.h" + +struct tbl_phrase { + char name; + enum tbl_cellt key; +}; + +static const struct tbl_phrase keys[] = { + { 'c', TBL_CELL_CENTRE }, + { 'r', TBL_CELL_RIGHT }, + { 'l', TBL_CELL_LEFT }, + { 'n', TBL_CELL_NUMBER }, + { 's', TBL_CELL_SPAN }, + { 'a', TBL_CELL_LONG }, + { '^', TBL_CELL_DOWN }, + { '-', TBL_CELL_HORIZ }, + { '_', TBL_CELL_HORIZ }, + { '=', TBL_CELL_DHORIZ } +}; + +#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void mods(struct tbl_node *, struct tbl_cell *, + int, const char *, int *); +static void cell(struct tbl_node *, struct tbl_row *, + int, const char *, int *); +static struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *, + enum tbl_cellt); + + +static void +mods(struct tbl_node *tbl, struct tbl_cell *cp, + int ln, const char *p, int *pos) +{ + char *endptr; + +mod: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + /* Row delimiters and cell specifiers end modifier lists. */ + + if (strchr(".,-=^_ACLNRSaclnrs", p[*pos]) != NULL) + return; + + /* Throw away parenthesised expression. */ + + if ('(' == p[*pos]) { + (*pos)++; + while (p[*pos] && ')' != p[*pos]) + (*pos)++; + if (')' == p[*pos]) { + (*pos)++; + goto mod; + } + mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse, + ln, *pos, NULL); + return; + } + + /* Parse numerical spacing from modifier string. */ + + if (isdigit((unsigned char)p[*pos])) { + cp->spacing = strtoull(p + *pos, &endptr, 10); + *pos = endptr - p; + goto mod; + } + + switch (tolower((unsigned char)p[(*pos)++])) { + case 'b': + cp->flags |= TBL_CELL_BOLD; + goto mod; + case 'd': + cp->flags |= TBL_CELL_BALIGN; + goto mod; + case 'e': + cp->flags |= TBL_CELL_EQUAL; + goto mod; + case 'f': + break; + case 'i': + cp->flags |= TBL_CELL_ITALIC; + goto mod; + case 'm': + mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse, + ln, *pos, "m"); + goto mod; + case 'p': + case 'v': + if (p[*pos] == '-' || p[*pos] == '+') + (*pos)++; + while (isdigit((unsigned char)p[*pos])) + (*pos)++; + goto mod; + case 't': + cp->flags |= TBL_CELL_TALIGN; + goto mod; + case 'u': + cp->flags |= TBL_CELL_UP; + goto mod; + case 'w': /* XXX for now, ignore minimal column width */ + goto mod; + case 'x': + cp->flags |= TBL_CELL_WMAX; + goto mod; + case 'z': + cp->flags |= TBL_CELL_WIGN; + goto mod; + case '|': + if (cp->vert < 2) + cp->vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + tbl->parse, ln, *pos - 1, NULL); + goto mod; + default: + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos - 1, "%c", p[*pos - 1]); + goto mod; + } + + /* Ignore parenthised font names for now. */ + + if (p[*pos] == '(') + goto mod; + + /* Support only one-character font-names for now. */ + + if (p[*pos] == '\0' || (p[*pos + 1] != ' ' && p[*pos + 1] != '.')) { + mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse, + ln, *pos, "TS %s", p + *pos - 1); + if (p[*pos] != '\0') + (*pos)++; + if (p[*pos] != '\0') + (*pos)++; + goto mod; + } + + switch (p[(*pos)++]) { + case '3': + case 'B': + cp->flags |= TBL_CELL_BOLD; + goto mod; + case '2': + case 'I': + cp->flags |= TBL_CELL_ITALIC; + goto mod; + case '1': + case 'R': + goto mod; + default: + mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse, + ln, *pos - 1, "TS f%c", p[*pos - 1]); + goto mod; + } +} + +static void +cell(struct tbl_node *tbl, struct tbl_row *rp, + int ln, const char *p, int *pos) +{ + int i; + enum tbl_cellt c; + + /* Handle leading vertical lines */ + + while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') { + if (p[*pos] == '|') { + if (rp->vert < 2) + rp->vert++; + else + mandoc_msg(MANDOCERR_TBLLAYOUT_VERT, + tbl->parse, ln, *pos, NULL); + } + (*pos)++; + } + +again: + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + if (p[*pos] == '.' || p[*pos] == '\0') + return; + + /* Parse the column position (`c', `l', `r', ...). */ + + for (i = 0; i < KEYS_MAX; i++) + if (tolower((unsigned char)p[*pos]) == keys[i].name) + break; + + if (i == KEYS_MAX) { + mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse, + ln, *pos, "%c", p[*pos]); + (*pos)++; + goto again; + } + c = keys[i].key; + + /* Special cases of spanners. */ + + if (c == TBL_CELL_SPAN) { + if (rp->last == NULL) + mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN, + tbl->parse, ln, *pos, NULL); + else if (rp->last->pos == TBL_CELL_HORIZ || + rp->last->pos == TBL_CELL_DHORIZ) + c = rp->last->pos; + } else if (c == TBL_CELL_DOWN && rp == tbl->first_row) + mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN, + tbl->parse, ln, *pos, NULL); + + (*pos)++; + + /* Allocate cell then parse its modifiers. */ + + mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos); +} + +void +tbl_layout(struct tbl_node *tbl, int ln, const char *p, int pos) +{ + struct tbl_row *rp; + + rp = NULL; + for (;;) { + /* Skip whitespace before and after each cell. */ + + while (p[pos] == ' ' || p[pos] == '\t') + pos++; + + switch (p[pos]) { + case ',': /* Next row on this input line. */ + pos++; + rp = NULL; + continue; + case '\0': /* Next row on next input line. */ + return; + case '.': /* End of layout. */ + pos++; + tbl->part = TBL_PART_DATA; + + /* + * When the layout is completely empty, + * default to one left-justified column. + */ + + if (tbl->first_row == NULL) { + tbl->first_row = tbl->last_row = + mandoc_calloc(1, sizeof(*rp)); + } + if (tbl->first_row->first == NULL) { + mandoc_msg(MANDOCERR_TBLLAYOUT_NONE, + tbl->parse, ln, pos, NULL); + cell_alloc(tbl, tbl->first_row, + TBL_CELL_LEFT); + return; + } + + /* + * Search for the widest line + * along the left and right margins. + */ + + for (rp = tbl->first_row; rp; rp = rp->next) { + if (tbl->opts.lvert < rp->vert) + tbl->opts.lvert = rp->vert; + if (rp->last != NULL && + rp->last->col + 1 == tbl->opts.cols && + tbl->opts.rvert < rp->last->vert) + tbl->opts.rvert = rp->last->vert; + + /* If the last line is empty, drop it. */ + + if (rp->next != NULL && + rp->next->first == NULL) { + free(rp->next); + rp->next = NULL; + tbl->last_row = rp; + } + } + return; + default: /* Cell. */ + break; + } + + /* + * If the last line had at least one cell, + * start a new one; otherwise, continue it. + */ + + if (rp == NULL) { + if (tbl->last_row == NULL || + tbl->last_row->first != NULL) { + rp = mandoc_calloc(1, sizeof(*rp)); + if (tbl->last_row) + tbl->last_row->next = rp; + else + tbl->first_row = rp; + tbl->last_row = rp; + } else + rp = tbl->last_row; + } + cell(tbl, rp, ln, p, &pos); + } +} + +static struct tbl_cell * +cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) +{ + struct tbl_cell *p, *pp; + + p = mandoc_calloc(1, sizeof(*p)); + p->pos = pos; + + if ((pp = rp->last) != NULL) { + pp->next = p; + p->col = pp->col + 1; + } else + rp->first = p; + rp->last = p; + + if (tbl->opts.cols <= p->col) + tbl->opts.cols = p->col + 1; + + return p; +} diff --git a/contrib/mdocml/tbl_opts.c b/contrib/mdocml/tbl_opts.c new file mode 100644 index 0000000..f2f5942 --- /dev/null +++ b/contrib/mdocml/tbl_opts.c @@ -0,0 +1,173 @@ +/* $Id: tbl_opts.c,v 1.21 2015/09/26 00:54:04 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +#define KEY_DPOINT 0 +#define KEY_DELIM 1 +#define KEY_LINESIZE 2 +#define KEY_TAB 3 + +struct tbl_phrase { + const char *name; + int key; +}; + +static const struct tbl_phrase keys[] = { + {"decimalpoint", 0}, + {"delim", 0}, + {"linesize", 0}, + {"tab", 0}, + {"allbox", TBL_OPT_ALLBOX | TBL_OPT_BOX}, + {"box", TBL_OPT_BOX}, + {"frame", TBL_OPT_BOX}, + {"center", TBL_OPT_CENTRE}, + {"centre", TBL_OPT_CENTRE}, + {"doublebox", TBL_OPT_DBOX}, + {"doubleframe", TBL_OPT_DBOX}, + {"expand", TBL_OPT_EXPAND}, + {"nokeep", TBL_OPT_NOKEEP}, + {"nospaces", TBL_OPT_NOSPACE}, + {"nowarn", TBL_OPT_NOWARN}, +}; + +#define KEY_MAXKEYS ((int)(sizeof(keys)/sizeof(keys[0]))) + +static void arg(struct tbl_node *, int, const char *, int *, int); + + +static void +arg(struct tbl_node *tbl, int ln, const char *p, int *pos, int key) +{ + int len, want; + + while (p[*pos] == ' ' || p[*pos] == '\t') + (*pos)++; + + /* Arguments are enclosed in parentheses. */ + + len = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + len] != ')') + len++; + } + + switch (key) { + case KEY_DELIM: + mandoc_vmsg(MANDOCERR_TBLOPT_EQN, tbl->parse, + ln, *pos, "%.*s", len, p + *pos); + want = 2; + break; + case KEY_TAB: + want = 1; + if (len == want) + tbl->opts.tab = p[*pos]; + break; + case KEY_LINESIZE: + want = 0; + break; + case KEY_DPOINT: + want = 1; + if (len == want) + tbl->opts.decimal = p[*pos]; + break; + default: + abort(); + } + + if (len == 0) + mandoc_msg(MANDOCERR_TBLOPT_NOARG, + tbl->parse, ln, *pos, keys[key].name); + else if (want && len != want) + mandoc_vmsg(MANDOCERR_TBLOPT_ARGSZ, + tbl->parse, ln, *pos, "%s want %d have %d", + keys[key].name, want, len); + + *pos += len; + if (p[*pos] == ')') + (*pos)++; +} + +/* + * Parse one line of options up to the semicolon. + * Each option can be preceded by blanks and/or commas, + * and some options are followed by arguments. + */ +void +tbl_option(struct tbl_node *tbl, int ln, const char *p, int *offs) +{ + int i, pos, len; + + pos = *offs; + for (;;) { + while (p[pos] == ' ' || p[pos] == '\t' || p[pos] == ',') + pos++; + + if (p[pos] == ';') { + *offs = pos + 1; + return; + } + + /* Parse one option name. */ + + len = 0; + while (isalpha((unsigned char)p[pos + len])) + len++; + + if (len == 0) { + mandoc_vmsg(MANDOCERR_TBLOPT_ALPHA, + tbl->parse, ln, pos, "%c", p[pos]); + pos++; + continue; + } + + /* Look up the option name. */ + + i = 0; + while (i < KEY_MAXKEYS && + (strncasecmp(p + pos, keys[i].name, len) || + keys[i].name[len] != '\0')) + i++; + + if (i == KEY_MAXKEYS) { + mandoc_vmsg(MANDOCERR_TBLOPT_BAD, tbl->parse, + ln, pos, "%.*s", len, p + pos); + pos += len; + continue; + } + + /* Handle the option. */ + + pos += len; + if (keys[i].key) + tbl->opts.opts |= keys[i].key; + else + arg(tbl, ln, p, &pos, i); + } +} diff --git a/contrib/mdocml/tbl_term.c b/contrib/mdocml/tbl_term.c new file mode 100644 index 0000000..eceaa4b --- /dev/null +++ b/contrib/mdocml/tbl_term.c @@ -0,0 +1,420 @@ +/* $Id: tbl_term.c,v 1.43 2015/10/12 00:08:16 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011, 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" + +static size_t term_tbl_len(size_t, void *); +static size_t term_tbl_strlen(const char *, void *); +static void tbl_char(struct termp *, char, size_t); +static void tbl_data(struct termp *, const struct tbl_opts *, + const struct tbl_dat *, + const struct roffcol *); +static void tbl_literal(struct termp *, const struct tbl_dat *, + const struct roffcol *); +static void tbl_number(struct termp *, const struct tbl_opts *, + const struct tbl_dat *, + const struct roffcol *); +static void tbl_hrule(struct termp *, const struct tbl_span *, int); +static void tbl_word(struct termp *, const struct tbl_dat *); + + +static size_t +term_tbl_strlen(const char *p, void *arg) +{ + + return term_strlen((const struct termp *)arg, p); +} + +static size_t +term_tbl_len(size_t sz, void *arg) +{ + + return term_len((const struct termp *)arg, sz); +} + +void +term_tbl(struct termp *tp, const struct tbl_span *sp) +{ + const struct tbl_cell *cp; + const struct tbl_dat *dp; + static size_t offset; + size_t rmargin, maxrmargin, tsz; + int ic, horiz, spans, vert; + + rmargin = tp->rmargin; + maxrmargin = tp->maxrmargin; + + tp->rmargin = tp->maxrmargin = TERM_MAXMARGIN; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + tp->flags |= TERMP_NONOSPACE; + tp->flags |= TERMP_NOSPACE; + + /* + * The first time we're invoked for a given table block, + * calculate the table widths and decimal positions. + */ + + if (tp->tbl.cols == NULL) { + tp->tbl.len = term_tbl_len; + tp->tbl.slen = term_tbl_strlen; + tp->tbl.arg = tp; + + tblcalc(&tp->tbl, sp, rmargin - tp->offset); + + /* Center the table as a whole. */ + + offset = tp->offset; + if (sp->opts->opts & TBL_OPT_CENTRE) { + tsz = sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) + ? 2 : !!sp->opts->lvert + !!sp->opts->rvert; + for (ic = 0; ic < sp->opts->cols; ic++) + tsz += tp->tbl.cols[ic].width + 3; + tsz -= 3; + if (offset + tsz > rmargin) + tsz -= 1; + tp->offset = (offset + rmargin > tsz) ? + (offset + rmargin - tsz) / 2 : 0; + } + + /* Horizontal frame at the start of boxed tables. */ + + if (sp->opts->opts & TBL_OPT_DBOX) + tbl_hrule(tp, sp, 2); + if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) + tbl_hrule(tp, sp, 1); + } + + /* Vertical frame at the start of each row. */ + + horiz = sp->pos == TBL_SPAN_HORIZ || sp->pos == TBL_SPAN_DHORIZ; + + if (sp->layout->vert || + (sp->prev != NULL && sp->prev->layout->vert) || + sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX)) + term_word(tp, horiz ? "+" : "|"); + else if (sp->opts->lvert) + tbl_char(tp, horiz ? '-' : ASCII_NBRSP, 1); + + /* + * Now print the actual data itself depending on the span type. + * Match data cells to column numbers. + */ + + if (sp->pos == TBL_SPAN_DATA) { + cp = sp->layout->first; + dp = sp->first; + spans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + + /* + * Remeber whether we need a vertical bar + * after this cell. + */ + + vert = cp == NULL ? 0 : cp->vert; + + /* + * Print the data and advance to the next cell. + */ + + if (spans == 0) { + tbl_data(tp, sp->opts, dp, tp->tbl.cols + ic); + if (dp != NULL) { + spans = dp->spans; + dp = dp->next; + } + } else + spans--; + if (cp != NULL) + cp = cp->next; + + /* + * Separate columns, except in the middle + * of spans and after the last cell. + */ + + if (ic + 1 == sp->opts->cols || spans) + continue; + + tbl_char(tp, ASCII_NBRSP, 1); + if (vert > 0) + tbl_char(tp, '|', vert); + if (vert < 2) + tbl_char(tp, ASCII_NBRSP, 2 - vert); + } + } else if (horiz) + tbl_hrule(tp, sp, 0); + + /* Vertical frame at the end of each row. */ + + if (sp->layout->last->vert || + (sp->prev != NULL && sp->prev->layout->last->vert) || + (sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX))) + term_word(tp, horiz ? "+" : " |"); + else if (sp->opts->rvert) + tbl_char(tp, horiz ? '-' : ASCII_NBRSP, 1); + term_flushln(tp); + + /* + * If we're the last row, clean up after ourselves: clear the + * existing table configuration and set it to NULL. + */ + + if (sp->next == NULL) { + if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) { + tbl_hrule(tp, sp, 1); + tp->skipvsp = 1; + } + if (sp->opts->opts & TBL_OPT_DBOX) { + tbl_hrule(tp, sp, 2); + tp->skipvsp = 2; + } + assert(tp->tbl.cols); + free(tp->tbl.cols); + tp->tbl.cols = NULL; + tp->offset = offset; + } + + tp->flags &= ~TERMP_NONOSPACE; + tp->rmargin = rmargin; + tp->maxrmargin = maxrmargin; +} + +/* + * Kinds of horizontal rulers: + * 0: inside the table (single or double line with crossings) + * 1: inner frame (single line with crossings and ends) + * 2: outer frame (single line without crossings with ends) + */ +static void +tbl_hrule(struct termp *tp, const struct tbl_span *sp, int kind) +{ + const struct tbl_cell *c1, *c2; + int vert; + char line, cross; + + line = (kind == 0 && TBL_SPAN_DHORIZ == sp->pos) ? '=' : '-'; + cross = (kind < 2) ? '+' : '-'; + + if (kind) + term_word(tp, "+"); + c1 = sp->layout->first; + c2 = sp->prev == NULL ? NULL : sp->prev->layout->first; + if (c2 == c1) + c2 = NULL; + for (;;) { + tbl_char(tp, line, tp->tbl.cols[c1->col].width + 1); + vert = c1->vert; + if ((c1 = c1->next) == NULL) + break; + if (c2 != NULL) { + if (vert < c2->vert) + vert = c2->vert; + c2 = c2->next; + } + if (vert) + tbl_char(tp, cross, vert); + if (vert < 2) + tbl_char(tp, line, 2 - vert); + } + if (kind) { + term_word(tp, "+"); + term_flushln(tp); + } +} + +static void +tbl_data(struct termp *tp, const struct tbl_opts *opts, + const struct tbl_dat *dp, + const struct roffcol *col) +{ + + if (dp == NULL) { + tbl_char(tp, ASCII_NBRSP, col->width); + return; + } + + switch (dp->pos) { + case TBL_DATA_NONE: + tbl_char(tp, ASCII_NBRSP, col->width); + return; + case TBL_DATA_HORIZ: + case TBL_DATA_NHORIZ: + tbl_char(tp, '-', col->width); + return; + case TBL_DATA_NDHORIZ: + case TBL_DATA_DHORIZ: + tbl_char(tp, '=', col->width); + return; + default: + break; + } + + switch (dp->layout->pos) { + case TBL_CELL_HORIZ: + tbl_char(tp, '-', col->width); + break; + case TBL_CELL_DHORIZ: + tbl_char(tp, '=', col->width); + break; + case TBL_CELL_LONG: + case TBL_CELL_CENTRE: + case TBL_CELL_LEFT: + case TBL_CELL_RIGHT: + tbl_literal(tp, dp, col); + break; + case TBL_CELL_NUMBER: + tbl_number(tp, opts, dp, col); + break; + case TBL_CELL_DOWN: + tbl_char(tp, ASCII_NBRSP, col->width); + break; + default: + abort(); + } +} + +static void +tbl_char(struct termp *tp, char c, size_t len) +{ + size_t i, sz; + char cp[2]; + + cp[0] = c; + cp[1] = '\0'; + + sz = term_strlen(tp, cp); + + for (i = 0; i < len; i += sz) + term_word(tp, cp); +} + +static void +tbl_literal(struct termp *tp, const struct tbl_dat *dp, + const struct roffcol *col) +{ + size_t len, padl, padr, width; + int ic, spans; + + assert(dp->string); + len = term_strlen(tp, dp->string); + width = col->width; + ic = dp->layout->col; + spans = dp->spans; + while (spans--) + width += tp->tbl.cols[++ic].width + 3; + + padr = width > len ? width - len : 0; + padl = 0; + + switch (dp->layout->pos) { + case TBL_CELL_LONG: + padl = term_len(tp, 1); + padr = padr > padl ? padr - padl : 0; + break; + case TBL_CELL_CENTRE: + if (2 > padr) + break; + padl = padr / 2; + padr -= padl; + break; + case TBL_CELL_RIGHT: + padl = padr; + padr = 0; + break; + default: + break; + } + + tbl_char(tp, ASCII_NBRSP, padl); + tbl_word(tp, dp); + tbl_char(tp, ASCII_NBRSP, padr); +} + +static void +tbl_number(struct termp *tp, const struct tbl_opts *opts, + const struct tbl_dat *dp, + const struct roffcol *col) +{ + char *cp; + char buf[2]; + size_t sz, psz, ssz, d, padl; + int i; + + /* + * See calc_data_number(). Left-pad by taking the offset of our + * and the maximum decimal; right-pad by the remaining amount. + */ + + assert(dp->string); + + sz = term_strlen(tp, dp->string); + + buf[0] = opts->decimal; + buf[1] = '\0'; + + psz = term_strlen(tp, buf); + + if ((cp = strrchr(dp->string, opts->decimal)) != NULL) { + for (ssz = 0, i = 0; cp != &dp->string[i]; i++) { + buf[0] = dp->string[i]; + ssz += term_strlen(tp, buf); + } + d = ssz + psz; + } else + d = sz + psz; + + if (col->decimal > d && col->width > sz) { + padl = col->decimal - d; + if (padl + sz > col->width) + padl = col->width - sz; + tbl_char(tp, ASCII_NBRSP, padl); + } else + padl = 0; + tbl_word(tp, dp); + if (col->width > sz + padl) + tbl_char(tp, ASCII_NBRSP, col->width - sz - padl); +} + +static void +tbl_word(struct termp *tp, const struct tbl_dat *dp) +{ + int prev_font; + + prev_font = tp->fonti; + if (dp->layout->flags & TBL_CELL_BOLD) + term_fontpush(tp, TERMFONT_BOLD); + else if (dp->layout->flags & TBL_CELL_ITALIC) + term_fontpush(tp, TERMFONT_UNDER); + + term_word(tp, dp->string); + + term_fontpopq(tp, prev_font); +} diff --git a/contrib/mdocml/term.c b/contrib/mdocml/term.c new file mode 100644 index 0000000..e917101 --- /dev/null +++ b/contrib/mdocml/term.c @@ -0,0 +1,829 @@ +/* $Id: term.c,v 1.256 2016/01/07 21:03:54 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "main.h" + +static size_t cond_width(const struct termp *, int, int *); +static void adjbuf(struct termp *p, size_t); +static void bufferc(struct termp *, char); +static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); + + +void +term_free(struct termp *p) +{ + + free(p->buf); + free(p->fontq); + free(p); +} + +void +term_begin(struct termp *p, term_margin head, + term_margin foot, const struct roff_meta *arg) +{ + + p->headf = head; + p->footf = foot; + p->argf = arg; + (*p->begin)(p); +} + +void +term_end(struct termp *p) +{ + + (*p->end)(p); +} + +/* + * Flush a chunk of text. By default, break the output line each time + * the right margin is reached, and continue output on the next line + * at the same offset as the chunk itself. By default, also break the + * output line at the end of the chunk. + * The following flags may be specified: + * + * - TERMP_NOBREAK: Do not break the output line at the right margin, + * but only at the max right margin. Also, do not break the output + * line at the end of the chunk, such that the next call can pad to + * the next column. However, if less than p->trailspace blanks, + * which can be 0, 1, or 2, remain to the right margin, the line + * will be broken. + * - TERMP_BRTRSP: Consider trailing whitespace significant + * when deciding whether the chunk fits or not. + * - TERMP_BRIND: If the chunk does not fit and the output line has + * to be broken, start the next line at the right margin instead + * of at the offset. Used together with TERMP_NOBREAK for the tags + * in various kinds of tagged lists. + * - TERMP_DANGLE: Do not break the output line at the right margin, + * append the next chunk after it even if this one is too long. + * To be used together with TERMP_NOBREAK. + * - TERMP_HANG: Like TERMP_DANGLE, and also suppress padding before + * the next chunk if this column is not full. + */ +void +term_flushln(struct termp *p) +{ + size_t i; /* current input position in p->buf */ + int ntab; /* number of tabs to prepend */ + size_t vis; /* current visual position on output */ + size_t vbl; /* number of blanks to prepend to output */ + size_t vend; /* end of word visual position on output */ + size_t bp; /* visual right border position */ + size_t dv; /* temporary for visual pos calculations */ + size_t j; /* temporary loop index for p->buf */ + size_t jhy; /* last hyph before overflow w/r/t j */ + size_t maxvis; /* output position of visible boundary */ + + /* + * First, establish the maximum columns of "visible" content. + * This is usually the difference between the right-margin and + * an indentation, but can be, for tagged lists or columns, a + * small set of values. + * + * The following unsigned-signed subtractions look strange, + * but they are actually correct. If the int p->overstep + * is negative, it gets sign extended. Subtracting that + * very large size_t effectively adds a small number to dv. + */ + dv = p->rmargin > p->offset ? p->rmargin - p->offset : 0; + maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; + + if (p->flags & TERMP_NOBREAK) { + dv = p->maxrmargin > p->offset ? + p->maxrmargin - p->offset : 0; + bp = (int)dv > p->overstep ? + dv - (size_t)p->overstep : 0; + } else + bp = maxvis; + + /* + * Calculate the required amount of padding. + */ + vbl = p->offset + p->overstep > p->viscol ? + p->offset + p->overstep - p->viscol : 0; + + vis = vend = 0; + i = 0; + + while (i < p->col) { + /* + * Handle literal tab characters: collapse all + * subsequent tabs into a single huge set of spaces. + */ + ntab = 0; + while (i < p->col && '\t' == p->buf[i]) { + vend = (vis / p->tabwidth + 1) * p->tabwidth; + vbl += vend - vis; + vis = vend; + ntab++; + i++; + } + + /* + * Count up visible word characters. Control sequences + * (starting with the CSI) aren't counted. A space + * generates a non-printing word, which is valid (the + * space is printed according to regular spacing rules). + */ + + for (j = i, jhy = 0; j < p->col; j++) { + if (' ' == p->buf[j] || '\t' == p->buf[j]) + break; + + /* Back over the the last printed character. */ + if (8 == p->buf[j]) { + assert(j); + vend -= (*p->width)(p, p->buf[j - 1]); + continue; + } + + /* Regular word. */ + /* Break at the hyphen point if we overrun. */ + if (vend > vis && vend < bp && + (ASCII_HYPH == p->buf[j] || + ASCII_BREAK == p->buf[j])) + jhy = j; + + /* + * Hyphenation now decided, put back a real + * hyphen such that we get the correct width. + */ + if (ASCII_HYPH == p->buf[j]) + p->buf[j] = '-'; + + vend += (*p->width)(p, p->buf[j]); + } + + /* + * Find out whether we would exceed the right margin. + * If so, break to the next line. + */ + if (vend > bp && 0 == jhy && vis > 0) { + vend -= vis; + (*p->endline)(p); + p->viscol = 0; + if (TERMP_BRIND & p->flags) { + vbl = p->rmargin; + vend += p->rmargin; + vend -= p->offset; + } else + vbl = p->offset; + + /* use pending tabs on the new line */ + + if (0 < ntab) + vbl += ntab * p->tabwidth; + + /* + * Remove the p->overstep width. + * Again, if p->overstep is negative, + * sign extension does the right thing. + */ + + bp += (size_t)p->overstep; + p->overstep = 0; + } + + /* Write out the [remaining] word. */ + for ( ; i < p->col; i++) { + if (vend > bp && jhy > 0 && i > jhy) + break; + if ('\t' == p->buf[i]) + break; + if (' ' == p->buf[i]) { + j = i; + while (i < p->col && ' ' == p->buf[i]) + i++; + dv = (i - j) * (*p->width)(p, ' '); + vbl += dv; + vend += dv; + break; + } + if (ASCII_NBRSP == p->buf[i]) { + vbl += (*p->width)(p, ' '); + continue; + } + if (ASCII_BREAK == p->buf[i]) + continue; + + /* + * Now we definitely know there will be + * printable characters to output, + * so write preceding white space now. + */ + if (vbl) { + (*p->advance)(p, vbl); + p->viscol += vbl; + vbl = 0; + } + + (*p->letter)(p, p->buf[i]); + if (8 == p->buf[i]) + p->viscol -= (*p->width)(p, p->buf[i-1]); + else + p->viscol += (*p->width)(p, p->buf[i]); + } + vis = vend; + } + + /* + * If there was trailing white space, it was not printed; + * so reset the cursor position accordingly. + */ + if (vis > vbl) + vis -= vbl; + else + vis = 0; + + p->col = 0; + p->overstep = 0; + p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE); + + if ( ! (TERMP_NOBREAK & p->flags)) { + p->viscol = 0; + (*p->endline)(p); + return; + } + + if (TERMP_HANG & p->flags) { + p->overstep += (int)(p->offset + vis - p->rmargin + + p->trailspace * (*p->width)(p, ' ')); + + /* + * If we have overstepped the margin, temporarily move + * it to the right and flag the rest of the line to be + * shorter. + * If there is a request to keep the columns together, + * allow negative overstep when the column is not full. + */ + if (p->trailspace && p->overstep < 0) + p->overstep = 0; + return; + + } else if (TERMP_DANGLE & p->flags) + return; + + /* Trailing whitespace is significant in some columns. */ + if (vis && vbl && (TERMP_BRTRSP & p->flags)) + vis += vbl; + + /* If the column was overrun, break the line. */ + if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) { + (*p->endline)(p); + p->viscol = 0; + } +} + +/* + * A newline only breaks an existing line; it won't assert vertical + * space. All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_newln(struct termp *p) +{ + + p->flags |= TERMP_NOSPACE; + if (p->col || p->viscol) + term_flushln(p); +} + +/* + * Asserts a vertical space (a full, empty line-break between lines). + * Note that if used twice, this will cause two blank spaces and so on. + * All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_vspace(struct termp *p) +{ + + term_newln(p); + p->viscol = 0; + if (0 < p->skipvsp) + p->skipvsp--; + else + (*p->endline)(p); +} + +/* Swap current and previous font; for \fP and .ft P */ +void +term_fontlast(struct termp *p) +{ + enum termfont f; + + f = p->fontl; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + +/* Set font, save current, discard previous; for \f, .ft, .B etc. */ +void +term_fontrepl(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + +/* Set font, save previous. */ +void +term_fontpush(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + if (++p->fonti == p->fontsz) { + p->fontsz += 8; + p->fontq = mandoc_reallocarray(p->fontq, + p->fontsz, sizeof(*p->fontq)); + } + p->fontq[p->fonti] = f; +} + +/* Flush to make the saved pointer current again. */ +void +term_fontpopq(struct termp *p, int i) +{ + + assert(i >= 0); + if (p->fonti > i) + p->fonti = i; +} + +/* Pop one font off the stack. */ +void +term_fontpop(struct termp *p) +{ + + assert(p->fonti); + p->fonti--; +} + +/* + * Handle pwords, partial words, which may be either a single word or a + * phrase that cannot be broken down (such as a literal string). This + * handles word styling. + */ +void +term_word(struct termp *p, const char *word) +{ + const char nbrsp[2] = { ASCII_NBRSP, 0 }; + const char *seq, *cp; + int sz, uc; + size_t ssz; + enum mandoc_esc esc; + + if ( ! (TERMP_NOSPACE & p->flags)) { + if ( ! (TERMP_KEEP & p->flags)) { + bufferc(p, ' '); + if (TERMP_SENTENCE & p->flags) + bufferc(p, ' '); + } else + bufferc(p, ASCII_NBRSP); + } + if (TERMP_PREKEEP & p->flags) + p->flags |= TERMP_KEEP; + + if ( ! (p->flags & TERMP_NONOSPACE)) + p->flags &= ~TERMP_NOSPACE; + else + p->flags |= TERMP_NOSPACE; + + p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); + p->skipvsp = 0; + + while ('\0' != *word) { + if ('\\' != *word) { + if (TERMP_NBRWORD & p->flags) { + if (' ' == *word) { + encode(p, nbrsp, 1); + word++; + continue; + } + ssz = strcspn(word, "\\ "); + } else + ssz = strcspn(word, "\\"); + encode(p, word, ssz); + word += (int)ssz; + continue; + } + + word++; + esc = mandoc_escape(&word, &seq, &sz); + if (ESCAPE_ERROR == esc) + continue; + + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, sz); + if (uc < 0) + continue; + break; + case ESCAPE_SPECIAL: + if (p->enc == TERMENC_ASCII) { + cp = mchars_spec2str(seq, sz, &ssz); + if (cp != NULL) + encode(p, cp, ssz); + } else { + uc = mchars_spec2cp(seq, sz); + if (uc > 0) + encode1(p, uc); + } + continue; + case ESCAPE_FONTBOLD: + term_fontrepl(p, TERMFONT_BOLD); + continue; + case ESCAPE_FONTITALIC: + term_fontrepl(p, TERMFONT_UNDER); + continue; + case ESCAPE_FONTBI: + term_fontrepl(p, TERMFONT_BI); + continue; + case ESCAPE_FONT: + case ESCAPE_FONTROMAN: + term_fontrepl(p, TERMFONT_NONE); + continue; + case ESCAPE_FONTPREV: + term_fontlast(p); + continue; + case ESCAPE_NOSPACE: + if (p->flags & TERMP_BACKAFTER) + p->flags &= ~TERMP_BACKAFTER; + else if (*word == '\0') + p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); + continue; + case ESCAPE_SKIPCHAR: + p->flags |= TERMP_BACKAFTER; + continue; + case ESCAPE_OVERSTRIKE: + cp = seq + sz; + while (seq < cp) { + if (*seq == '\\') { + mandoc_escape(&seq, NULL, NULL); + continue; + } + encode1(p, *seq++); + if (seq < cp) { + if (p->flags & TERMP_BACKBEFORE) + p->flags |= TERMP_BACKAFTER; + else + p->flags |= TERMP_BACKBEFORE; + } + } + /* Trim trailing backspace/blank pair. */ + if (p->col > 2 && p->buf[p->col - 1] == ' ') + p->col -= 2; + continue; + default: + continue; + } + + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + encode(p, cp, strlen(cp)); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + encode1(p, uc); + } + } + p->flags &= ~TERMP_NBRWORD; +} + +static void +adjbuf(struct termp *p, size_t sz) +{ + + if (0 == p->maxcols) + p->maxcols = 1024; + while (sz >= p->maxcols) + p->maxcols <<= 2; + + p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int)); +} + +static void +bufferc(struct termp *p, char c) +{ + + if (p->col + 1 >= p->maxcols) + adjbuf(p, p->col + 1); + + p->buf[p->col++] = c; +} + +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 7 >= p->maxcols) + adjbuf(p, p->col + 7); + + f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? + p->fontq[p->fonti] : TERMFONT_NONE; + + if (p->flags & TERMP_BACKBEFORE) { + if (p->buf[p->col - 1] == ' ') + p->col--; + else + p->buf[p->col++] = 8; + p->flags &= ~TERMP_BACKBEFORE; + } + if (TERMFONT_UNDER == f || TERMFONT_BI == f) { + p->buf[p->col++] = '_'; + p->buf[p->col++] = 8; + } + if (TERMFONT_BOLD == f || TERMFONT_BI == f) { + if (ASCII_HYPH == c) + p->buf[p->col++] = '-'; + else + p->buf[p->col++] = c; + p->buf[p->col++] = 8; + } + p->buf[p->col++] = c; + if (p->flags & TERMP_BACKAFTER) { + p->flags |= TERMP_BACKBEFORE; + p->flags &= ~TERMP_BACKAFTER; + } +} + +static void +encode(struct termp *p, const char *word, size_t sz) +{ + size_t i; + + if (p->col + 2 + (sz * 5) >= p->maxcols) + adjbuf(p, p->col + 2 + (sz * 5)); + + for (i = 0; i < sz; i++) { + if (ASCII_HYPH == word[i] || + isgraph((unsigned char)word[i])) + encode1(p, word[i]); + else + p->buf[p->col++] = word[i]; + } +} + +void +term_setwidth(struct termp *p, const char *wstr) +{ + struct roffsu su; + int iop, width; + + iop = 0; + width = 0; + if (NULL != wstr) { + switch (*wstr) { + case '+': + iop = 1; + wstr++; + break; + case '-': + iop = -1; + wstr++; + break; + default: + break; + } + if (a2roffsu(wstr, &su, SCALE_MAX)) + width = term_hspan(p, &su); + else + iop = 0; + } + (*p->setwidth)(p, iop, width); +} + +size_t +term_len(const struct termp *p, size_t sz) +{ + + return (*p->width)(p, ' ') * sz; +} + +static size_t +cond_width(const struct termp *p, int c, int *skip) +{ + + if (*skip) { + (*skip) = 0; + return 0; + } else + return (*p->width)(p, c); +} + +size_t +term_strlen(const struct termp *p, const char *cp) +{ + size_t sz, rsz, i; + int ssz, skip, uc; + const char *seq, *rhs; + enum mandoc_esc esc; + static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, + ASCII_BREAK, '\0' }; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ + + sz = 0; + skip = 0; + while ('\0' != *cp) { + rsz = strcspn(cp, rej); + for (i = 0; i < rsz; i++) + sz += cond_width(p, *cp++, &skip); + + switch (*cp) { + case '\\': + cp++; + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) + continue; + + rhs = NULL; + + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, ssz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, ssz); + if (uc < 0) + continue; + break; + case ESCAPE_SPECIAL: + if (p->enc == TERMENC_ASCII) { + rhs = mchars_spec2str(seq, ssz, &rsz); + if (rhs != NULL) + break; + } else { + uc = mchars_spec2cp(seq, ssz); + if (uc > 0) + sz += cond_width(p, uc, &skip); + } + continue; + case ESCAPE_SKIPCHAR: + skip = 1; + continue; + case ESCAPE_OVERSTRIKE: + rsz = 0; + rhs = seq + ssz; + while (seq < rhs) { + if (*seq == '\\') { + mandoc_escape(&seq, NULL, NULL); + continue; + } + i = (*p->width)(p, *seq++); + if (rsz < i) + rsz = i; + } + sz += rsz; + continue; + default: + continue; + } + + /* + * Common handling for Unicode and numbered + * character escape sequences. + */ + + if (rhs == NULL) { + if (p->enc == TERMENC_ASCII) { + rhs = ascii_uc2str(uc); + rsz = strlen(rhs); + } else { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + sz += cond_width(p, uc, &skip); + continue; + } + } + + if (skip) { + skip = 0; + break; + } + + /* + * Common handling for all escape sequences + * printing more than one character. + */ + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case ASCII_NBRSP: + sz += cond_width(p, ' ', &skip); + cp++; + break; + case ASCII_HYPH: + sz += cond_width(p, '-', &skip); + cp++; + break; + default: + break; + } + } + + return sz; +} + +int +term_vspan(const struct termp *p, const struct roffsu *su) +{ + double r; + int ri; + + switch (su->unit) { + case SCALE_BU: + r = su->scale / 40.0; + break; + case SCALE_CM: + r = su->scale * 6.0 / 2.54; + break; + case SCALE_FS: + r = su->scale * 65536.0 / 40.0; + break; + case SCALE_IN: + r = su->scale * 6.0; + break; + case SCALE_MM: + r = su->scale * 0.006; + break; + case SCALE_PC: + r = su->scale; + break; + case SCALE_PT: + r = su->scale / 12.0; + break; + case SCALE_EN: + case SCALE_EM: + r = su->scale * 0.6; + break; + case SCALE_VS: + r = su->scale; + break; + default: + abort(); + } + ri = r > 0.0 ? r + 0.4995 : r - 0.4995; + return ri < 66 ? ri : 1; +} + +/* + * Convert a scaling width to basic units, rounding down. + */ +int +term_hspan(const struct termp *p, const struct roffsu *su) +{ + + return (*p->hspan)(p, su); +} diff --git a/contrib/mdocml/term.h b/contrib/mdocml/term.h new file mode 100644 index 0000000..fabc117 --- /dev/null +++ b/contrib/mdocml/term.h @@ -0,0 +1,135 @@ +/* $Id: term.h,v 1.118 2015/11/07 14:01:16 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +enum termenc { + TERMENC_ASCII, + TERMENC_LOCALE, + TERMENC_UTF8 +}; + +enum termtype { + TERMTYPE_CHAR, + TERMTYPE_PS, + TERMTYPE_PDF +}; + +enum termfont { + TERMFONT_NONE = 0, + TERMFONT_BOLD, + TERMFONT_UNDER, + TERMFONT_BI, + TERMFONT__MAX +}; + +#define TERM_MAXMARGIN 100000 /* FIXME */ + +struct roff_meta; +struct termp; + +typedef void (*term_margin)(struct termp *, const struct roff_meta *); + +struct termp_tbl { + int width; /* width in fixed chars */ + int decimal; /* decimal point position */ +}; + +struct termp { + enum termtype type; + struct rofftbl tbl; /* table configuration */ + int synopsisonly; /* print the synopsis only */ + int mdocstyle; /* imitate mdoc(7) output */ + size_t line; /* Current output line number. */ + size_t defindent; /* Default indent for text. */ + size_t defrmargin; /* Right margin of the device. */ + size_t lastrmargin; /* Right margin before the last ll. */ + size_t rmargin; /* Current right margin. */ + size_t maxrmargin; /* Max right margin. */ + size_t maxcols; /* Max size of buf. */ + size_t offset; /* Margin offest. */ + size_t tabwidth; /* Distance of tab positions. */ + size_t col; /* Bytes in buf. */ + size_t viscol; /* Chars on current line. */ + size_t trailspace; /* See termp_flushln(). */ + int overstep; /* See termp_flushln(). */ + int skipvsp; /* Vertical space to skip. */ + int flags; +#define TERMP_SENTENCE (1 << 0) /* Space before a sentence. */ +#define TERMP_NOSPACE (1 << 1) /* No space before words. */ +#define TERMP_NONOSPACE (1 << 2) /* No space (no autounset). */ +#define TERMP_NBRWORD (1 << 3) /* Make next word nonbreaking. */ +#define TERMP_KEEP (1 << 4) /* Keep words together. */ +#define TERMP_PREKEEP (1 << 5) /* ...starting with the next one. */ +#define TERMP_BACKAFTER (1 << 6) /* Back up after next character. */ +#define TERMP_BACKBEFORE (1 << 7) /* Back up before next character. */ +#define TERMP_NOBREAK (1 << 8) /* See term_flushln(). */ +#define TERMP_BRTRSP (1 << 9) /* See term_flushln(). */ +#define TERMP_BRIND (1 << 10) /* See term_flushln(). */ +#define TERMP_DANGLE (1 << 11) /* See term_flushln(). */ +#define TERMP_HANG (1 << 12) /* See term_flushln(). */ +#define TERMP_NOSPLIT (1 << 13) /* Do not break line before .An. */ +#define TERMP_SPLIT (1 << 14) /* Break line before .An. */ +#define TERMP_NONEWLINE (1 << 15) /* No line break in nofill mode. */ + int *buf; /* Output buffer. */ + enum termenc enc; /* Type of encoding. */ + enum termfont fontl; /* Last font set. */ + enum termfont *fontq; /* Symmetric fonts. */ + int fontsz; /* Allocated size of font stack */ + int fonti; /* Index of font stack. */ + term_margin headf; /* invoked to print head */ + term_margin footf; /* invoked to print foot */ + void (*letter)(struct termp *, int); + void (*begin)(struct termp *); + void (*end)(struct termp *); + void (*endline)(struct termp *); + void (*advance)(struct termp *, size_t); + void (*setwidth)(struct termp *, int, int); + size_t (*width)(const struct termp *, int); + int (*hspan)(const struct termp *, + const struct roffsu *); + const void *argf; /* arg for headf/footf */ + struct termp_ps *ps; +}; + + +struct tbl_span; +struct eqn; + +const char *ascii_uc2str(int); + +void term_eqn(struct termp *, const struct eqn *); +void term_tbl(struct termp *, const struct tbl_span *); +void term_free(struct termp *); +void term_newln(struct termp *); +void term_vspace(struct termp *); +void term_word(struct termp *, const char *); +void term_flushln(struct termp *); +void term_begin(struct termp *, term_margin, + term_margin, const struct roff_meta *); +void term_end(struct termp *); + +void term_setwidth(struct termp *, const char *); +int term_hspan(const struct termp *, const struct roffsu *); +int term_vspan(const struct termp *, const struct roffsu *); +size_t term_strlen(const struct termp *, const char *); +size_t term_len(const struct termp *, size_t); + +void term_fontpush(struct termp *, enum termfont); +void term_fontpop(struct termp *); +void term_fontpopq(struct termp *, int); +void term_fontrepl(struct termp *, enum termfont); +void term_fontlast(struct termp *); diff --git a/contrib/mdocml/term_ascii.c b/contrib/mdocml/term_ascii.c new file mode 100644 index 0000000..7215a59 --- /dev/null +++ b/contrib/mdocml/term_ascii.c @@ -0,0 +1,383 @@ +/* $Id: term_ascii.c,v 1.52 2015/11/12 21:50:03 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#if HAVE_WCHAR +#include <locale.h> +#endif +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#if HAVE_WCHAR +#include <wchar.h> +#endif + +#include "mandoc.h" +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "manconf.h" +#include "main.h" + +static struct termp *ascii_init(enum termenc, const struct manoutput *); +static int ascii_hspan(const struct termp *, + const struct roffsu *); +static size_t ascii_width(const struct termp *, int); +static void ascii_advance(struct termp *, size_t); +static void ascii_begin(struct termp *); +static void ascii_end(struct termp *); +static void ascii_endline(struct termp *); +static void ascii_letter(struct termp *, int); +static void ascii_setwidth(struct termp *, int, int); + +#if HAVE_WCHAR +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); +#endif + + +static struct termp * +ascii_init(enum termenc enc, const struct manoutput *outopts) +{ +#if HAVE_WCHAR + char *v; +#endif + struct termp *p; + + p = mandoc_calloc(1, sizeof(struct termp)); + + p->line = 1; + p->tabwidth = 5; + p->defrmargin = p->lastrmargin = 78; + p->fontq = mandoc_reallocarray(NULL, + (p->fontsz = 8), sizeof(enum termfont)); + p->fontq[0] = p->fontl = TERMFONT_NONE; + + p->begin = ascii_begin; + p->end = ascii_end; + p->hspan = ascii_hspan; + p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; + p->setwidth = ascii_setwidth; + p->width = ascii_width; + +#if HAVE_WCHAR + if (TERMENC_ASCII != enc) { + + /* + * Do not change any of this to LC_ALL. It might break + * the formatting by subtly changing the behaviour of + * various functions, for example strftime(3). As a + * worst case, it might even cause buffer overflows. + */ + + v = TERMENC_LOCALE == enc ? + setlocale(LC_CTYPE, "") : + setlocale(LC_CTYPE, "en_US.UTF-8"); + if (NULL != v && MB_CUR_MAX > 1) { + p->enc = enc; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } + } +#endif + + if (outopts->mdoc) { + p->mdocstyle = 1; + p->defindent = 5; + } + if (outopts->indent) + p->defindent = outopts->indent; + if (outopts->width) + p->defrmargin = outopts->width; + if (outopts->synopsisonly) + p->synopsisonly = 1; + + return p; +} + +void * +ascii_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_ASCII, outopts); +} + +void * +utf8_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_UTF8, outopts); +} + +void * +locale_alloc(const struct manoutput *outopts) +{ + + return ascii_init(TERMENC_LOCALE, outopts); +} + +static void +ascii_setwidth(struct termp *p, int iop, int width) +{ + + width /= 24; + p->rmargin = p->defrmargin; + if (iop > 0) + p->defrmargin += width; + else if (iop == 0) + p->defrmargin = width ? (size_t)width : p->lastrmargin; + else if (p->defrmargin > (size_t)width) + p->defrmargin -= width; + else + p->defrmargin = 0; + p->lastrmargin = p->rmargin; + p->rmargin = p->maxrmargin = p->defrmargin; +} + +void +ascii_sepline(void *arg) +{ + struct termp *p; + size_t i; + + p = (struct termp *)arg; + p->line += 3; + putchar('\n'); + for (i = 0; i < p->defrmargin; i++) + putchar('-'); + putchar('\n'); + putchar('\n'); +} + +static size_t +ascii_width(const struct termp *p, int c) +{ + + return 1; +} + +void +ascii_free(void *arg) +{ + + term_free((struct termp *)arg); +} + +static void +ascii_letter(struct termp *p, int c) +{ + + putchar(c); +} + +static void +ascii_begin(struct termp *p) +{ + + (*p->headf)(p, p->argf); +} + +static void +ascii_end(struct termp *p) +{ + + (*p->footf)(p, p->argf); +} + +static void +ascii_endline(struct termp *p) +{ + + p->line++; + putchar('\n'); +} + +static void +ascii_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putchar(' '); +} + +static int +ascii_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + switch (su->unit) { + case SCALE_BU: + r = su->scale; + break; + case SCALE_CM: + r = su->scale * 240.0 / 2.54; + break; + case SCALE_FS: + r = su->scale * 65536.0; + break; + case SCALE_IN: + r = su->scale * 240.0; + break; + case SCALE_MM: + r = su->scale * 0.24; + break; + case SCALE_VS: + case SCALE_PC: + r = su->scale * 40.0; + break; + case SCALE_PT: + r = su->scale * 10.0 / 3.0; + break; + case SCALE_EN: + case SCALE_EM: + r = su->scale * 24.0; + break; + default: + abort(); + } + return r > 0.0 ? r + 0.01 : r - 0.01; +} + +const char * +ascii_uc2str(int uc) +{ + static const char nbrsp[2] = { ASCII_NBRSP, '\0' }; + static const char *tab[] = { + "<NUL>","<SOH>","<STX>","<ETX>","<EOT>","<ENQ>","<ACK>","<BEL>", + "<BS>", "\t", "<LF>", "<VT>", "<FF>", "<CR>", "<SO>", "<SI>", + "<DLE>","<DC1>","<DC2>","<DC3>","<DC4>","<NAK>","<SYN>","<ETB>", + "<CAN>","<EM>", "<SUB>","<ESC>","<FS>", "<GS>", "<RS>", "<US>", + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\\", "]", "^", "_", + "`", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "{", "|", "}", "~", "<DEL>", + "<80>", "<81>", "<82>", "<83>", "<84>", "<85>", "<86>", "<87>", + "<88>", "<89>", "<8A>", "<8B>", "<8C>", "<8D>", "<8E>", "<8F>", + "<90>", "<91>", "<92>", "<93>", "<94>", "<95>", "<96>", "<97>", + "<99>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", + nbrsp, "!", "/\bc", "GBP", "o\bx", "=\bY", "|", "<sec>", + "\"", "(C)", "_\ba", "<<", "~", "", "(R)", "-", + "<deg>","+-", "2", "3", "'", ",\bu", "<par>",".", + ",", "1", "_\bo", ">>", "1/4", "1/2", "3/4", "?", + "`\bA", "'\bA", "^\bA", "~\bA", "\"\bA","o\bA", "AE", ",\bC", + "`\bE", "'\bE", "^\bE", "\"\bE","`\bI", "'\bI", "^\bI", "\"\bI", + "-\bD", "~\bN", "`\bO", "'\bO", "^\bO", "~\bO", "\"\bO","x", + "/\bO", "`\bU", "'\bU", "^\bU", "\"\bU","'\bY", "Th", "ss", + "`\ba", "'\ba", "^\ba", "~\ba", "\"\ba","o\ba", "ae", ",\bc", + "`\be", "'\be", "^\be", "\"\be","`\bi", "'\bi", "^\bi", "\"\bi", + "d", "~\bn", "`\bo", "'\bo", "^\bo", "~\bo", "\"\bo","-:-", + "/\bo", "`\bu", "'\bu", "^\bu", "\"\bu","'\by", "th", "\"\by", + "A", "a", "A", "a", "A", "a", "'\bC", "'\bc", + "^\bC", "^\bc", "C", "c", "C", "c", "D", "d", + "/\bD", "/\bd", "E", "e", "E", "e", "E", "e", + "E", "e", "E", "e", "^\bG", "^\bg", "G", "g", + "G", "g", ",\bG", ",\bg", "^\bH", "^\bh", "/\bH", "/\bh", + "~\bI", "~\bi", "I", "i", "I", "i", "I", "i", + "I", "i", "IJ", "ij", "^\bJ", "^\bj", ",\bK", ",\bk", + "q", "'\bL", "'\bl", ",\bL", ",\bl", "L", "l", "L", + "l", "/\bL", "/\bl", "'\bN", "'\bn", ",\bN", ",\bn", "N", + "n", "'n", "Ng", "ng", "O", "o", "O", "o", + "O", "o", "OE", "oe", "'\bR", "'\br", ",\bR", ",\br", + "R", "r", "'\bS", "'\bs", "^\bS", "^\bs", ",\bS", ",\bs", + "S", "s", ",\bT", ",\bt", "T", "t", "/\bT", "/\bt", + "~\bU", "~\bu", "U", "u", "U", "u", "U", "u", + "U", "u", "U", "u", "^\bW", "^\bw", "^\bY", "^\by", + "\"\bY","'\bZ", "'\bz", "Z", "z", "Z", "z", "s", + "b", "B", "B", "b", "6", "6", "O", "C", + "c", "D", "D", "D", "d", "d", "3", "@", + "E", "F", ",\bf", "G", "G", "hv", "I", "/\bI", + "K", "k", "/\bl", "l", "W", "N", "n", "~\bO", + "O", "o", "OI", "oi", "P", "p", "YR", "2", + "2", "SH", "sh", "t", "T", "t", "T", "U", + "u", "Y", "V", "Y", "y", "/\bZ", "/\bz", "ZH", + "ZH", "zh", "zh", "/\b2", "5", "5", "ts", "w", + "|", "||", "|=", "!", "DZ", "Dz", "dz", "LJ", + "Lj", "lj", "NJ", "Nj", "nj", "A", "a", "I", + "i", "O", "o", "U", "u", "U", "u", "U", + "u", "U", "u", "U", "u", "@", "A", "a", + "A", "a", "AE", "ae", "/\bG", "/\bg", "G", "g", + "K", "k", "O", "o", "O", "o", "ZH", "zh", + "j", "DZ", "Dz", "dz", "'\bG", "'\bg", "HV", "W", + "`\bN", "`\bn", "A", "a", "'\bAE","'\bae","O", "o"}; + + assert(uc >= 0); + if ((size_t)uc < sizeof(tab)/sizeof(tab[0])) + return tab[uc]; + return mchars_uc2str(uc); +} + +#if HAVE_WCHAR +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + if (c == ASCII_NBRSP) + c = ' '; + rc = wcwidth(c); + if (rc < 0) + rc = 0; + return rc; +} + +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putwchar(L' '); +} + +static void +locale_endline(struct termp *p) +{ + + p->line++; + putwchar(L'\n'); +} + +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} +#endif diff --git a/contrib/mdocml/term_ps.c b/contrib/mdocml/term_ps.c new file mode 100644 index 0000000..6105d55 --- /dev/null +++ b/contrib/mdocml/term_ps.c @@ -0,0 +1,1339 @@ +/* $Id: term_ps.c,v 1.80 2015/12/23 20:50:13 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#if HAVE_ERR +#include <err.h> +#endif +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" +#include "manconf.h" +#include "main.h" + +/* These work the buffer used by the header and footer. */ +#define PS_BUFSLOP 128 + +/* Convert PostScript point "x" to an AFM unit. */ +#define PNT2AFM(p, x) \ + (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale)) + +/* Convert an AFM unit "x" to a PostScript points */ +#define AFM2PNT(p, x) \ + ((double)(x) / (1000.0 / (double)(p)->ps->scale)) + +struct glyph { + unsigned short wx; /* WX in AFM */ +}; + +struct font { + const char *name; /* FontName in AFM */ +#define MAXCHAR 95 /* total characters we can handle */ + struct glyph gly[MAXCHAR]; /* glyph metrics */ +}; + +struct termp_ps { + int flags; +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ +#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ +#define PS_BACKSP (1 << 3) /* last character was backspace */ + size_t pscol; /* visible column (AFM units) */ + size_t pscolnext; /* used for overstrike */ + size_t psrow; /* visible row (AFM units) */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* cur index in margin buf */ + char last; /* last non-backspace seen */ + enum termfont lastf; /* last set font */ + enum termfont nextf; /* building next font here */ + size_t scale; /* font scaling factor */ + size_t pages; /* number of pages shown */ + size_t lineheight; /* line height (AFM units) */ + size_t top; /* body top (AFM units) */ + size_t bottom; /* body bottom (AFM units) */ + size_t height; /* page height (AFM units */ + size_t width; /* page width (AFM units) */ + size_t lastwidth; /* page width before last ll */ + size_t left; /* body left (AFM units) */ + size_t header; /* header pos (AFM units) */ + size_t footer; /* footer pos (AFM units) */ + size_t pdfbytes; /* current output byte */ + size_t pdflastpg; /* byte of last page mark */ + size_t pdfbody; /* start of body object */ + size_t *pdfobjs; /* table of object offsets */ + size_t pdfobjsz; /* size of pdfobjs */ +}; + +static int ps_hspan(const struct termp *, + const struct roffsu *); +static size_t ps_width(const struct termp *, int); +static void ps_advance(struct termp *, size_t); +static void ps_begin(struct termp *); +static void ps_closepage(struct termp *); +static void ps_end(struct termp *); +static void ps_endline(struct termp *); +static void ps_fclose(struct termp *); +static void ps_growbuf(struct termp *, size_t); +static void ps_letter(struct termp *, int); +static void ps_pclose(struct termp *); +static void ps_pletter(struct termp *, int); +#if __GNUC__ - 0 >= 4 +__attribute__((__format__ (__printf__, 2, 3))) +#endif +static void ps_printf(struct termp *, const char *, ...); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); +static void ps_setwidth(struct termp *, int, int); +static struct termp *pspdf_alloc(const struct manoutput *); +static void pdf_obj(struct termp *, size_t); + +/* + * We define, for the time being, three fonts: bold, oblique/italic, and + * normal (roman). The following table hard-codes the font metrics for + * ASCII, i.e., 32--127. + */ + +static const struct font fonts[TERMFONT__MAX] = { + { "Times-Roman", { + { 250 }, + { 333 }, + { 408 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 564 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 564 }, + { 564 }, + { 564 }, + { 444 }, + { 921 }, + { 722 }, + { 667 }, + { 667 }, + { 722 }, + { 611 }, + { 556 }, + { 722 }, + { 722 }, + { 333 }, + { 389 }, + { 722 }, + { 611 }, + { 889 }, + { 722 }, + { 722 }, + { 556 }, + { 722 }, + { 667 }, + { 556 }, + { 611 }, + { 722 }, + { 722 }, + { 944 }, + { 722 }, + { 722 }, + { 611 }, + { 333 }, + { 278 }, + { 333 }, + { 469 }, + { 500 }, + { 333 }, + { 444 }, + { 500 }, + { 444 }, + { 500}, + { 444}, + { 333}, + { 500}, + { 500}, + { 278}, + { 278}, + { 500}, + { 278}, + { 778}, + { 500}, + { 500}, + { 500}, + { 500}, + { 333}, + { 389}, + { 278}, + { 500}, + { 500}, + { 722}, + { 500}, + { 500}, + { 444}, + { 480}, + { 200}, + { 480}, + { 541}, + } }, + { "Times-Bold", { + { 250 }, + { 333 }, + { 555 }, + { 500 }, + { 500 }, + { 1000 }, + { 833 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 570 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 570 }, + { 570 }, + { 570 }, + { 500 }, + { 930 }, + { 722 }, + { 667 }, + { 722 }, + { 722 }, + { 667 }, + { 611 }, + { 778 }, + { 778 }, + { 389 }, + { 500 }, + { 778 }, + { 667 }, + { 944 }, + { 722 }, + { 778 }, + { 611 }, + { 778 }, + { 722 }, + { 556 }, + { 667 }, + { 722 }, + { 722 }, + { 1000 }, + { 722 }, + { 722 }, + { 667 }, + { 333 }, + { 278 }, + { 333 }, + { 581 }, + { 500 }, + { 333 }, + { 500 }, + { 556 }, + { 444 }, + { 556 }, + { 444 }, + { 333 }, + { 500 }, + { 556 }, + { 278 }, + { 333 }, + { 556 }, + { 278 }, + { 833 }, + { 556 }, + { 500 }, + { 556 }, + { 556 }, + { 444 }, + { 389 }, + { 333 }, + { 556 }, + { 500 }, + { 722 }, + { 500 }, + { 500 }, + { 444 }, + { 394 }, + { 220 }, + { 394 }, + { 520 }, + } }, + { "Times-Italic", { + { 250 }, + { 333 }, + { 420 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 675 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 675 }, + { 675 }, + { 675 }, + { 500 }, + { 920 }, + { 611 }, + { 611 }, + { 667 }, + { 722 }, + { 611 }, + { 611 }, + { 722 }, + { 722 }, + { 333 }, + { 444 }, + { 667 }, + { 556 }, + { 833 }, + { 667 }, + { 722 }, + { 611 }, + { 722 }, + { 611 }, + { 500 }, + { 556 }, + { 722 }, + { 611 }, + { 833 }, + { 611 }, + { 556 }, + { 556 }, + { 389 }, + { 278 }, + { 389 }, + { 422 }, + { 500 }, + { 333 }, + { 500 }, + { 500 }, + { 444 }, + { 500 }, + { 444 }, + { 278 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 444 }, + { 278 }, + { 722 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 389 }, + { 389 }, + { 278 }, + { 500 }, + { 444 }, + { 667 }, + { 444 }, + { 444 }, + { 389 }, + { 400 }, + { 275 }, + { 400 }, + { 541 }, + } }, + { "Times-BoldItalic", { + { 250 }, + { 389 }, + { 555 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 570 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 570 }, + { 570 }, + { 570 }, + { 500 }, + { 832 }, + { 667 }, + { 667 }, + { 667 }, + { 722 }, + { 667 }, + { 667 }, + { 722 }, + { 778 }, + { 389 }, + { 500 }, + { 667 }, + { 611 }, + { 889 }, + { 722 }, + { 722 }, + { 611 }, + { 722 }, + { 667 }, + { 556 }, + { 611 }, + { 722 }, + { 667 }, + { 889 }, + { 667 }, + { 611 }, + { 611 }, + { 333 }, + { 278 }, + { 333 }, + { 570 }, + { 500 }, + { 333 }, + { 500 }, + { 500 }, + { 444 }, + { 500 }, + { 444 }, + { 333 }, + { 500 }, + { 556 }, + { 278 }, + { 278 }, + { 500 }, + { 278 }, + { 778 }, + { 556 }, + { 500 }, + { 500 }, + { 500 }, + { 389 }, + { 389 }, + { 278 }, + { 556 }, + { 444 }, + { 667 }, + { 500 }, + { 444 }, + { 389 }, + { 348 }, + { 220 }, + { 348 }, + { 570 }, + } }, +}; + +void * +pdf_alloc(const struct manoutput *outopts) +{ + struct termp *p; + + if (NULL != (p = pspdf_alloc(outopts))) + p->type = TERMTYPE_PDF; + + return p; +} + +void * +ps_alloc(const struct manoutput *outopts) +{ + struct termp *p; + + if (NULL != (p = pspdf_alloc(outopts))) + p->type = TERMTYPE_PS; + + return p; +} + +static struct termp * +pspdf_alloc(const struct manoutput *outopts) +{ + struct termp *p; + unsigned int pagex, pagey; + size_t marginx, marginy, lineheight; + const char *pp; + + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = TERMENC_ASCII; + p->fontq = mandoc_reallocarray(NULL, + (p->fontsz = 8), sizeof(enum termfont)); + p->fontq[0] = p->fontl = TERMFONT_NONE; + p->ps = mandoc_calloc(1, sizeof(struct termp_ps)); + + p->advance = ps_advance; + p->begin = ps_begin; + p->end = ps_end; + p->endline = ps_endline; + p->hspan = ps_hspan; + p->letter = ps_letter; + p->setwidth = ps_setwidth; + p->width = ps_width; + + /* Default to US letter (millimetres). */ + + pagex = 216; + pagey = 279; + + /* + * The ISO-269 paper sizes can be calculated automatically, but + * it would require bringing in -lm for pow() and I'd rather not + * do that. So just do it the easy way for now. Since this + * only happens once, I'm not terribly concerned. + */ + + pp = outopts->paper; + if (pp && strcasecmp(pp, "letter")) { + if (0 == strcasecmp(pp, "a3")) { + pagex = 297; + pagey = 420; + } else if (0 == strcasecmp(pp, "a4")) { + pagex = 210; + pagey = 297; + } else if (0 == strcasecmp(pp, "a5")) { + pagex = 148; + pagey = 210; + } else if (0 == strcasecmp(pp, "legal")) { + pagex = 216; + pagey = 356; + } else if (2 != sscanf(pp, "%ux%u", &pagex, &pagey)) + warnx("%s: Unknown paper", pp); + } + + /* + * This MUST be defined before any PNT2AFM or AFM2PNT + * calculations occur. + */ + + p->ps->scale = 11; + + /* Remember millimetres -> AFM units. */ + + pagex = PNT2AFM(p, ((double)pagex * 2.834)); + pagey = PNT2AFM(p, ((double)pagey * 2.834)); + + /* Margins are 1/9 the page x and y. */ + + marginx = (size_t)((double)pagex / 9.0); + marginy = (size_t)((double)pagey / 9.0); + + /* Line-height is 1.4em. */ + + lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4)); + + p->ps->width = p->ps->lastwidth = (size_t)pagex; + p->ps->height = (size_t)pagey; + p->ps->header = pagey - (marginy / 2) - (lineheight / 2); + p->ps->top = pagey - marginy; + p->ps->footer = (marginy / 2) - (lineheight / 2); + p->ps->bottom = marginy; + p->ps->left = marginx; + p->ps->lineheight = lineheight; + + p->defrmargin = pagex - (marginx * 2); + return p; +} + +static void +ps_setwidth(struct termp *p, int iop, int width) +{ + size_t lastwidth; + + lastwidth = p->ps->width; + if (iop > 0) + p->ps->width += width; + else if (iop == 0) + p->ps->width = width ? (size_t)width : p->ps->lastwidth; + else if (p->ps->width > (size_t)width) + p->ps->width -= width; + else + p->ps->width = 0; + p->ps->lastwidth = lastwidth; +} + +void +pspdf_free(void *arg) +{ + struct termp *p; + + p = (struct termp *)arg; + + free(p->ps->psmarg); + free(p->ps->pdfobjs); + + free(p->ps); + term_free(p); +} + +static void +ps_printf(struct termp *p, const char *fmt, ...) +{ + va_list ap; + int pos, len; + + va_start(ap, fmt); + + /* + * If we're running in regular mode, then pipe directly into + * vprintf(). If we're processing margins, then push the data + * into our growable margin buffer. + */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + len = vprintf(fmt, ap); + va_end(ap); + p->ps->pdfbytes += len < 0 ? 0 : (size_t)len; + return; + } + + /* + * XXX: I assume that the in-margin print won't exceed + * PS_BUFSLOP (128 bytes), which is reasonable but still an + * assumption that will cause pukeage if it's not the case. + */ + + ps_growbuf(p, PS_BUFSLOP); + + pos = (int)p->ps->psmargcur; + vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap); + + va_end(ap); + + p->ps->psmargcur = strlen(p->ps->psmarg); +} + +static void +ps_putchar(struct termp *p, char c) +{ + int pos; + + /* See ps_printf(). */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + putchar(c); + p->ps->pdfbytes++; + return; + } + + ps_growbuf(p, 2); + + pos = (int)p->ps->psmargcur++; + p->ps->psmarg[pos++] = c; + p->ps->psmarg[pos] = '\0'; +} + +static void +pdf_obj(struct termp *p, size_t obj) +{ + + assert(obj > 0); + + if ((obj - 1) >= p->ps->pdfobjsz) { + p->ps->pdfobjsz = obj + 128; + p->ps->pdfobjs = mandoc_reallocarray(p->ps->pdfobjs, + p->ps->pdfobjsz, sizeof(size_t)); + } + + p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes; + ps_printf(p, "%zu 0 obj\n", obj); +} + +static void +ps_closepage(struct termp *p) +{ + int i; + size_t len, base; + + /* + * Close out a page that we've already flushed to output. In + * PostScript, we simply note that the page must be showed. In + * PDF, we must now create the Length, Resource, and Page node + * for the page contents. + */ + + assert(p->ps->psmarg && p->ps->psmarg[0]); + ps_printf(p, "%s", p->ps->psmarg); + + if (TERMTYPE_PS != p->type) { + ps_printf(p, "ET\n"); + + len = p->ps->pdfbytes - p->ps->pdflastpg; + base = p->ps->pages * 4 + p->ps->pdfbody; + + ps_printf(p, "endstream\nendobj\n"); + + /* Length of content. */ + pdf_obj(p, base + 1); + ps_printf(p, "%zu\nendobj\n", len); + + /* Resource for content. */ + pdf_obj(p, base + 2); + ps_printf(p, "<<\n/ProcSet [/PDF /Text]\n"); + ps_printf(p, "/Font <<\n"); + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, "/F%d %d 0 R\n", i, 3 + i); + ps_printf(p, ">>\n>>\n"); + + /* Page node. */ + pdf_obj(p, base + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Page\n"); + ps_printf(p, "/Parent 2 0 R\n"); + ps_printf(p, "/Resources %zu 0 R\n", base + 2); + ps_printf(p, "/Contents %zu 0 R\n", base); + ps_printf(p, ">>\nendobj\n"); + } else + ps_printf(p, "showpage\n"); + + p->ps->pages++; + p->ps->psrow = p->ps->top; + assert( ! (PS_NEWPAGE & p->ps->flags)); + p->ps->flags |= PS_NEWPAGE; +} + +static void +ps_end(struct termp *p) +{ + size_t i, xref, base; + + /* + * At the end of the file, do one last showpage. This is the + * same behaviour as groff(1) and works for multiple pages as + * well as just one. + */ + + if ( ! (PS_NEWPAGE & p->ps->flags)) { + assert(0 == p->ps->flags); + assert('\0' == p->ps->last); + ps_closepage(p); + } + + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Trailer\n"); + ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages); + ps_printf(p, "%%%%EOF\n"); + return; + } + + pdf_obj(p, 2); + ps_printf(p, "<<\n/Type /Pages\n"); + ps_printf(p, "/MediaBox [0 0 %zu %zu]\n", + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); + + ps_printf(p, "/Count %zu\n", p->ps->pages); + ps_printf(p, "/Kids ["); + + for (i = 0; i < p->ps->pages; i++) + ps_printf(p, " %zu 0 R", i * 4 + p->ps->pdfbody + 3); + + base = (p->ps->pages - 1) * 4 + p->ps->pdfbody + 4; + + ps_printf(p, "]\n>>\nendobj\n"); + pdf_obj(p, base); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Catalog\n"); + ps_printf(p, "/Pages 2 0 R\n"); + ps_printf(p, ">>\n"); + xref = p->ps->pdfbytes; + ps_printf(p, "xref\n"); + ps_printf(p, "0 %zu\n", base + 1); + ps_printf(p, "0000000000 65535 f \n"); + + for (i = 0; i < base; i++) + ps_printf(p, "%.10zu 00000 n \n", + p->ps->pdfobjs[(int)i]); + + ps_printf(p, "trailer\n"); + ps_printf(p, "<<\n"); + ps_printf(p, "/Size %zu\n", base + 1); + ps_printf(p, "/Root %zu 0 R\n", base); + ps_printf(p, "/Info 1 0 R\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "startxref\n"); + ps_printf(p, "%zu\n", xref); + ps_printf(p, "%%%%EOF\n"); +} + +static void +ps_begin(struct termp *p) +{ + int i; + + /* + * Print margins into margin buffer. Nothing gets output to the + * screen yet, so we don't need to initialise the primary state. + */ + + if (p->ps->psmarg) { + assert(p->ps->psmargsz); + p->ps->psmarg[0] = '\0'; + } + + /*p->ps->pdfbytes = 0;*/ + p->ps->psmargcur = 0; + p->ps->flags = PS_MARGINS; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->header; + + ps_setfont(p, TERMFONT_NONE); + + (*p->headf)(p, p->argf); + (*p->endline)(p); + + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->footer; + + (*p->footf)(p, p->argf); + (*p->endline)(p); + + p->ps->flags &= ~PS_MARGINS; + + assert(0 == p->ps->flags); + assert(p->ps->psmarg); + assert('\0' != p->ps->psmarg[0]); + + /* + * Print header and initialise page state. Following this, + * stuff gets printed to the screen, so make sure we're sane. + */ + + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%!PS-Adobe-3.0\n"); + ps_printf(p, "%%%%DocumentData: Clean7Bit\n"); + ps_printf(p, "%%%%Orientation: Portrait\n"); + ps_printf(p, "%%%%Pages: (atend)\n"); + ps_printf(p, "%%%%PageOrder: Ascend\n"); + ps_printf(p, "%%%%DocumentMedia: " + "Default %zu %zu 0 () ()\n", + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); + ps_printf(p, "%%%%DocumentNeededResources: font"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, " %s", fonts[i].name); + + ps_printf(p, "\n%%%%EndComments\n"); + } else { + ps_printf(p, "%%PDF-1.1\n"); + pdf_obj(p, 1); + ps_printf(p, "<<\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "endobj\n"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) { + pdf_obj(p, (size_t)i + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Font\n"); + ps_printf(p, "/Subtype /Type1\n"); + ps_printf(p, "/Name /F%d\n", i); + ps_printf(p, "/BaseFont /%s\n", fonts[i].name); + ps_printf(p, ">>\n"); + } + } + + p->ps->pdfbody = (size_t)TERMFONT__MAX + 3; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->top; + p->ps->flags |= PS_NEWPAGE; + ps_setfont(p, TERMFONT_NONE); +} + +static void +ps_pletter(struct termp *p, int c) +{ + int f; + + /* + * If we haven't opened a page context, then output that we're + * in a new page and make sure the font is correctly set. + */ + + if (PS_NEWPAGE & p->ps->flags) { + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Page: %zu %zu\n", + p->ps->pages + 1, p->ps->pages + 1); + ps_printf(p, "/%s %zu selectfont\n", + fonts[(int)p->ps->lastf].name, + p->ps->scale); + } else { + pdf_obj(p, p->ps->pdfbody + + p->ps->pages * 4); + ps_printf(p, "<<\n"); + ps_printf(p, "/Length %zu 0 R\n", + p->ps->pdfbody + 1 + p->ps->pages * 4); + ps_printf(p, ">>\nstream\n"); + } + p->ps->pdflastpg = p->ps->pdfbytes; + p->ps->flags &= ~PS_NEWPAGE; + } + + /* + * If we're not in a PostScript "word" context, then open one + * now at the current cursor. + */ + + if ( ! (PS_INLINE & p->ps->flags)) { + if (TERMTYPE_PS != p->type) { + ps_printf(p, "BT\n/F%d %zu Tf\n", + (int)p->ps->lastf, p->ps->scale); + ps_printf(p, "%.3f %.3f Td\n(", + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + } else + ps_printf(p, "%.3f %.3f moveto\n(", + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + p->ps->flags |= PS_INLINE; + } + + assert( ! (PS_NEWPAGE & p->ps->flags)); + + /* + * We need to escape these characters as per the PostScript + * specification. We would also escape non-graphable characters + * (like tabs), but none of them would get to this point and + * it's superfluous to abort() on them. + */ + + switch (c) { + case '(': + case ')': + case '\\': + ps_putchar(p, '\\'); + break; + default: + break; + } + + /* Write the character and adjust where we are on the page. */ + + f = (int)p->ps->lastf; + + if (c <= 32 || c - 32 >= MAXCHAR) + c = 32; + + ps_putchar(p, (char)c); + c -= 32; + p->ps->pscol += (size_t)fonts[f].gly[c].wx; +} + +static void +ps_pclose(struct termp *p) +{ + + /* + * Spit out that we're exiting a word context (this is a + * "partial close" because we don't check the last-char buffer + * or anything). + */ + + if ( ! (PS_INLINE & p->ps->flags)) + return; + + if (TERMTYPE_PS != p->type) { + ps_printf(p, ") Tj\nET\n"); + } else + ps_printf(p, ") show\n"); + + p->ps->flags &= ~PS_INLINE; +} + +static void +ps_fclose(struct termp *p) +{ + + /* + * Strong closure: if we have a last-char, spit it out after + * checking that we're in the right font mode. This will of + * course open a new scope, if applicable. + * + * Following this, close out any scope that's open. + */ + + if (p->ps->last != '\0') { + assert( ! (p->ps->flags & PS_BACKSP)); + if (p->ps->nextf != p->ps->lastf) { + ps_pclose(p); + ps_setfont(p, p->ps->nextf); + } + p->ps->nextf = TERMFONT_NONE; + ps_pletter(p, p->ps->last); + p->ps->last = '\0'; + } + + if ( ! (PS_INLINE & p->ps->flags)) + return; + + ps_pclose(p); +} + +static void +ps_letter(struct termp *p, int arg) +{ + size_t savecol, wx; + char c; + + c = arg >= 128 || arg <= 0 ? '?' : arg; + + /* + * When receiving a backspace, merely flag it. + * We don't know yet whether it is + * a font instruction or an overstrike. + */ + + if (c == '\b') { + assert(p->ps->last != '\0'); + assert( ! (p->ps->flags & PS_BACKSP)); + p->ps->flags |= PS_BACKSP; + return; + } + + /* + * Decode font instructions. + */ + + if (p->ps->flags & PS_BACKSP) { + if (p->ps->last == '_') { + switch (p->ps->nextf) { + case TERMFONT_BI: + break; + case TERMFONT_BOLD: + p->ps->nextf = TERMFONT_BI; + break; + default: + p->ps->nextf = TERMFONT_UNDER; + } + p->ps->last = c; + p->ps->flags &= ~PS_BACKSP; + return; + } + if (p->ps->last == c) { + switch (p->ps->nextf) { + case TERMFONT_BI: + break; + case TERMFONT_UNDER: + p->ps->nextf = TERMFONT_BI; + break; + default: + p->ps->nextf = TERMFONT_BOLD; + } + p->ps->flags &= ~PS_BACKSP; + return; + } + + /* + * This is not a font instruction, but rather + * the next character. Prepare for overstrike. + */ + + savecol = p->ps->pscol; + } else + savecol = SIZE_MAX; + + /* + * We found the next character, so the font instructions + * for the previous one are complete. + * Use them and print it. + */ + + if (p->ps->last != '\0') { + if (p->ps->nextf != p->ps->lastf) { + ps_pclose(p); + ps_setfont(p, p->ps->nextf); + } + p->ps->nextf = TERMFONT_NONE; + + /* + * For an overstrike, if a previous character + * was wider, advance to center the new one. + */ + + if (p->ps->pscolnext) { + wx = fonts[p->ps->lastf].gly[(int)p->ps->last-32].wx; + if (p->ps->pscol + wx < p->ps->pscolnext) + p->ps->pscol = (p->ps->pscol + + p->ps->pscolnext - wx) / 2; + } + + ps_pletter(p, p->ps->last); + + /* + * For an overstrike, if a previous character + * was wider, advance to the end of the old one. + */ + + if (p->ps->pscol < p->ps->pscolnext) { + ps_pclose(p); + p->ps->pscol = p->ps->pscolnext; + } + } + + /* + * Do not print the current character yet because font + * instructions might follow; only remember it. + * For the first character, nothing else is done. + * The final character will get printed from ps_fclose(). + */ + + p->ps->last = c; + + /* + * For an overstrike, back up to the previous position. + * If the previous character is wider than any it overstrikes, + * remember the current position, because it might also be + * wider than all that will overstrike it. + */ + + if (savecol != SIZE_MAX) { + if (p->ps->pscolnext < p->ps->pscol) + p->ps->pscolnext = p->ps->pscol; + ps_pclose(p); + p->ps->pscol = savecol; + p->ps->flags &= ~PS_BACKSP; + } else + p->ps->pscolnext = 0; +} + +static void +ps_advance(struct termp *p, size_t len) +{ + + /* + * Advance some spaces. This can probably be made smarter, + * i.e., to have multiple space-separated words in the same + * scope, but this is easier: just close out the current scope + * and readjust our column settings. + */ + + ps_fclose(p); + p->ps->pscol += len; +} + +static void +ps_endline(struct termp *p) +{ + + /* Close out any scopes we have open: we're at eoln. */ + + ps_fclose(p); + + /* + * If we're in the margin, don't try to recalculate our current + * row. XXX: if the column tries to be fancy with multiple + * lines, we'll do nasty stuff. + */ + + if (PS_MARGINS & p->ps->flags) + return; + + /* Left-justify. */ + + p->ps->pscol = p->ps->left; + + /* If we haven't printed anything, return. */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + /* + * Put us down a line. If we're at the page bottom, spit out a + * showpage and restart our row. + */ + + if (p->ps->psrow >= p->ps->lineheight + p->ps->bottom) { + p->ps->psrow -= p->ps->lineheight; + return; + } + + ps_closepage(p); +} + +static void +ps_setfont(struct termp *p, enum termfont f) +{ + + assert(f < TERMFONT__MAX); + p->ps->lastf = f; + + /* + * If we're still at the top of the page, let the font-setting + * be delayed until we actually have stuff to print. + */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + if (TERMTYPE_PS == p->type) + ps_printf(p, "/%s %zu selectfont\n", + fonts[(int)f].name, p->ps->scale); + else + ps_printf(p, "/F%d %zu Tf\n", + (int)f, p->ps->scale); +} + +static size_t +ps_width(const struct termp *p, int c) +{ + + if (c <= 32 || c - 32 >= MAXCHAR) + c = 0; + else + c -= 32; + + return (size_t)fonts[(int)TERMFONT_NONE].gly[c].wx; +} + +static int +ps_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + /* + * All of these measurements are derived by converting from the + * native measurement to AFM units. + */ + switch (su->unit) { + case SCALE_BU: + /* + * Traditionally, the default unit is fixed to the + * output media. So this would refer to the point. In + * mandoc(1), however, we stick to the default terminal + * scaling unit so that output is the same regardless + * the media. + */ + r = PNT2AFM(p, su->scale * 72.0 / 240.0); + break; + case SCALE_CM: + r = PNT2AFM(p, su->scale * 72.0 / 2.54); + break; + case SCALE_EM: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[109 - 32].wx; + break; + case SCALE_EN: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[110 - 32].wx; + break; + case SCALE_IN: + r = PNT2AFM(p, su->scale * 72.0); + break; + case SCALE_MM: + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[109 - 32].wx / 100.0; + break; + case SCALE_PC: + r = PNT2AFM(p, su->scale * 12.0); + break; + case SCALE_PT: + r = PNT2AFM(p, su->scale * 1.0); + break; + case SCALE_VS: + r = su->scale * p->ps->lineheight; + break; + default: + r = su->scale; + break; + } + + return r * 24.0; +} + +static void +ps_growbuf(struct termp *p, size_t sz) +{ + if (p->ps->psmargcur + sz <= p->ps->psmargsz) + return; + + if (sz < PS_BUFSLOP) + sz = PS_BUFSLOP; + + p->ps->psmargsz += sz; + p->ps->psmarg = mandoc_realloc(p->ps->psmarg, p->ps->psmargsz); +} diff --git a/contrib/mdocml/test-dirent-namlen.c b/contrib/mdocml/test-dirent-namlen.c new file mode 100644 index 0000000..032e75e --- /dev/null +++ b/contrib/mdocml/test-dirent-namlen.c @@ -0,0 +1,10 @@ +#include <sys/types.h> +#include <dirent.h> + +int +main(void) +{ + struct dirent entry; + + return sizeof(entry.d_namlen) == 0; +} diff --git a/contrib/mdocml/test-err.c b/contrib/mdocml/test-err.c new file mode 100644 index 0000000..d17e0f0 --- /dev/null +++ b/contrib/mdocml/test-err.c @@ -0,0 +1,28 @@ +/* $Id: test-err.c,v 1.1 2015/10/11 21:12:55 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <err.h> + +int +main(void) +{ + warnx("%d. warnx", 1); + warn("%d. warn", 2); + err(0, "%d. err", 3); + /* NOTREACHED */ + return 1; +} diff --git a/contrib/mdocml/test-fgetln.c b/contrib/mdocml/test-fgetln.c new file mode 100644 index 0000000..ac225a1 --- /dev/null +++ b/contrib/mdocml/test-fgetln.c @@ -0,0 +1,11 @@ +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> + +int +main(void) +{ + size_t sz; + fclose(stdin); + return(NULL != fgetln(stdin, &sz)); +} diff --git a/contrib/mdocml/test-fts.c b/contrib/mdocml/test-fts.c new file mode 100644 index 0000000..dbee529 --- /dev/null +++ b/contrib/mdocml/test-fts.c @@ -0,0 +1,42 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fts.h> +#include <stdio.h> + +int +main(void) +{ + const char *argv[2]; + FTS *ftsp; + FTSENT *entry; + + argv[0] = "."; + argv[1] = (char *)NULL; + + ftsp = fts_open((char * const *)argv, + FTS_PHYSICAL | FTS_NOCHDIR, NULL); + + if (ftsp == NULL) { + perror("fts_open"); + return 1; + } + + entry = fts_read(ftsp); + + if (entry == NULL) { + perror("fts_read"); + return 1; + } + + if (fts_set(ftsp, entry, FTS_SKIP) != 0) { + perror("fts_set"); + return 1; + } + + if (fts_close(ftsp) != 0) { + perror("fts_close"); + return 1; + } + + return 0; +} diff --git a/contrib/mdocml/test-getline.c b/contrib/mdocml/test-getline.c new file mode 100644 index 0000000..d05df21 --- /dev/null +++ b/contrib/mdocml/test-getline.c @@ -0,0 +1,13 @@ +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> + +int +main(void) +{ + char *line = NULL; + size_t linesz = 0; + + fclose(stdin); + return getline(&line, &linesz, stdin) != -1; +} diff --git a/contrib/mdocml/test-getsubopt.c b/contrib/mdocml/test-getsubopt.c new file mode 100644 index 0000000..afcc559 --- /dev/null +++ b/contrib/mdocml/test-getsubopt.c @@ -0,0 +1,34 @@ +/* $Id: test-getsubopt.c,v 1.4 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* getsubopt() */ +#endif + +#include <stdlib.h> + +int +main(void) +{ + char buf[] = "k=v"; + char *options = buf; + char token0[] = "k"; + char *const tokens[] = { token0, NULL }; + char *value = NULL; + return ! (getsubopt(&options, tokens, &value) == 0 + && value == buf+2 && options == buf+3); +} diff --git a/contrib/mdocml/test-isblank.c b/contrib/mdocml/test-isblank.c new file mode 100644 index 0000000..eb2cfdf --- /dev/null +++ b/contrib/mdocml/test-isblank.c @@ -0,0 +1,7 @@ +#include <ctype.h> + +int +main(void) +{ + return !isblank(' ') || !isblank('\t') || isblank('_'); +} diff --git a/contrib/mdocml/test-mkdtemp.c b/contrib/mdocml/test-mkdtemp.c new file mode 100644 index 0000000..a0cb30e --- /dev/null +++ b/contrib/mdocml/test-mkdtemp.c @@ -0,0 +1,12 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main(void) +{ + char dirname[] = "/tmp/temp.XXXXXX"; + + if (mkdtemp(dirname) != dirname) + return 1; + return rmdir(dirname) == -1; +} diff --git a/contrib/mdocml/test-mmap.c b/contrib/mdocml/test-mmap.c new file mode 100644 index 0000000..3a6232d --- /dev/null +++ b/contrib/mdocml/test-mmap.c @@ -0,0 +1,9 @@ +#include <sys/types.h> +#include <sys/mman.h> +#include <stddef.h> + +int +main(void) +{ + return mmap(NULL, 1, PROT_READ, MAP_SHARED, -1, 0) != MAP_FAILED; +} diff --git a/contrib/mdocml/test-ohash.c b/contrib/mdocml/test-ohash.c new file mode 100644 index 0000000..138d520 --- /dev/null +++ b/contrib/mdocml/test-ohash.c @@ -0,0 +1,21 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#include <ohash.h> + +void *xmalloc(size_t sz, void *arg) { return calloc(1,sz); } +void *xcalloc(size_t nmemb, size_t sz, void *arg) { return calloc(nmemb,sz); } +void xfree(void *p, void *arg) { free(p); } + +int +main(void) +{ + struct ohash h; + struct ohash_info i; + i.alloc = xmalloc; + i.calloc = xcalloc; + i.free = xfree; + ohash_init(&h, 2, &i); + ohash_delete(&h); + return 0; +} diff --git a/contrib/mdocml/test-pledge.c b/contrib/mdocml/test-pledge.c new file mode 100644 index 0000000..ab2dfb4 --- /dev/null +++ b/contrib/mdocml/test-pledge.c @@ -0,0 +1,7 @@ +#include <unistd.h> + +int +main(void) +{ + return !!pledge("stdio", NULL); +} diff --git a/contrib/mdocml/test-progname.c b/contrib/mdocml/test-progname.c new file mode 100644 index 0000000..0418b3c --- /dev/null +++ b/contrib/mdocml/test-progname.c @@ -0,0 +1,10 @@ +#include <stdlib.h> + +int +main(void) +{ + const char * progname; + + progname = getprogname(); + return progname == NULL; +} diff --git a/contrib/mdocml/test-reallocarray.c b/contrib/mdocml/test-reallocarray.c new file mode 100644 index 0000000..f99e685 --- /dev/null +++ b/contrib/mdocml/test-reallocarray.c @@ -0,0 +1,7 @@ +#include <stdlib.h> + +int +main(void) +{ + return !reallocarray(NULL, 2, 2); +} diff --git a/contrib/mdocml/test-rewb-bsd.c b/contrib/mdocml/test-rewb-bsd.c new file mode 100644 index 0000000..88d3d35 --- /dev/null +++ b/contrib/mdocml/test-rewb-bsd.c @@ -0,0 +1,26 @@ +#include <sys/types.h> +#include <regex.h> + +int +main(void) +{ + regex_t re; + + if (regcomp(&re, "[[:<:]]word[[:>:]]", REG_EXTENDED | REG_NOSUB)) + return 1; + if (regexec(&re, "the word is here", 0, NULL, 0)) + return 2; + if (regexec(&re, "same word", 0, NULL, 0)) + return 3; + if (regexec(&re, "word again", 0, NULL, 0)) + return 4; + if (regexec(&re, "word", 0, NULL, 0)) + return 5; + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + return 6; + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + return 7; + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + return 8; + return 0; +} diff --git a/contrib/mdocml/test-rewb-sysv.c b/contrib/mdocml/test-rewb-sysv.c new file mode 100644 index 0000000..cb35c54 --- /dev/null +++ b/contrib/mdocml/test-rewb-sysv.c @@ -0,0 +1,26 @@ +#include <sys/types.h> +#include <regex.h> + +int +main(void) +{ + regex_t re; + + if (regcomp(&re, "\\<word\\>", REG_EXTENDED | REG_NOSUB)) + return 1; + if (regexec(&re, "the word is here", 0, NULL, 0)) + return 2; + if (regexec(&re, "same word", 0, NULL, 0)) + return 3; + if (regexec(&re, "word again", 0, NULL, 0)) + return 4; + if (regexec(&re, "word", 0, NULL, 0)) + return 5; + if (regexec(&re, "wordy", 0, NULL, 0) != REG_NOMATCH) + return 6; + if (regexec(&re, "sword", 0, NULL, 0) != REG_NOMATCH) + return 7; + if (regexec(&re, "reworded", 0, NULL, 0) != REG_NOMATCH) + return 8; + return 0; +} diff --git a/contrib/mdocml/test-sqlite3.c b/contrib/mdocml/test-sqlite3.c new file mode 100644 index 0000000..11f17ad --- /dev/null +++ b/contrib/mdocml/test-sqlite3.c @@ -0,0 +1,47 @@ +/* $Id: test-sqlite3.c,v 1.2 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sqlite3.h> + +int +main(void) +{ + sqlite3 *db; + + if (sqlite3_open_v2("test.db", &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, + NULL) != SQLITE_OK) { + perror("test.db"); + fprintf(stderr, "sqlite3_open_v2: %s", sqlite3_errmsg(db)); + return 1; + } + unlink("test.db"); + + if (sqlite3_exec(db, "PRAGMA foreign_keys = ON", + NULL, NULL, NULL) != SQLITE_OK) { + fprintf(stderr, "sqlite3_exec: %s", sqlite3_errmsg(db)); + return 1; + } + + if (sqlite3_close(db) != SQLITE_OK) { + fprintf(stderr, "sqlite3_close: %s", sqlite3_errmsg(db)); + return 1; + } + return 0; +} diff --git a/contrib/mdocml/test-sqlite3_errstr.c b/contrib/mdocml/test-sqlite3_errstr.c new file mode 100644 index 0000000..4d3c7c5 --- /dev/null +++ b/contrib/mdocml/test-sqlite3_errstr.c @@ -0,0 +1,8 @@ +#include <string.h> +#include <sqlite3.h> + +int +main(void) +{ + return strcmp(sqlite3_errstr(SQLITE_OK), "not an error"); +} diff --git a/contrib/mdocml/test-strcasestr.c b/contrib/mdocml/test-strcasestr.c new file mode 100644 index 0000000..c3a87de --- /dev/null +++ b/contrib/mdocml/test-strcasestr.c @@ -0,0 +1,13 @@ +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* strcasestr() */ +#endif + +#include <string.h> + +int +main(void) +{ + const char *big = "BigString"; + char *cp = strcasestr(big, "Gst"); + return cp != big + 2; +} diff --git a/contrib/mdocml/test-stringlist.c b/contrib/mdocml/test-stringlist.c new file mode 100644 index 0000000..7555d20 --- /dev/null +++ b/contrib/mdocml/test-stringlist.c @@ -0,0 +1,37 @@ +/* $Id: test-stringlist.c,v 1.2 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stringlist.h> + +int +main(void) +{ + StringList *sl; + char teststr[] = "test"; + + if ((sl = sl_init()) == NULL) + return 1; + if (sl_add(sl, teststr)) + return 2; + if (sl->sl_cur != 1) + return 3; + if (sl->sl_str[0] != teststr) + return 4; + + sl_free(sl, 0); + return 0; +} diff --git a/contrib/mdocml/test-strlcat.c b/contrib/mdocml/test-strlcat.c new file mode 100644 index 0000000..48ab38a --- /dev/null +++ b/contrib/mdocml/test-strlcat.c @@ -0,0 +1,9 @@ +#include <string.h> + +int +main(void) +{ + char buf[3] = "a"; + return ! (strlcat(buf, "b", sizeof(buf)) == 2 && + buf[0] == 'a' && buf[1] == 'b' && buf[2] == '\0'); +} diff --git a/contrib/mdocml/test-strlcpy.c b/contrib/mdocml/test-strlcpy.c new file mode 100644 index 0000000..12e1db3 --- /dev/null +++ b/contrib/mdocml/test-strlcpy.c @@ -0,0 +1,9 @@ +#include <string.h> + +int +main(void) +{ + char buf[2] = ""; + return ! (strlcpy(buf, "a", sizeof(buf)) == 1 && + buf[0] == 'a' && buf[1] == '\0'); +} diff --git a/contrib/mdocml/test-strptime.c b/contrib/mdocml/test-strptime.c new file mode 100644 index 0000000..d8799e9 --- /dev/null +++ b/contrib/mdocml/test-strptime.c @@ -0,0 +1,14 @@ +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* strptime() */ +#endif + +#include <time.h> + +int +main(void) +{ + struct tm tm; + const char input[] = "2014-01-04"; + return ! (strptime(input, "%Y-%m-%d", &tm) == input + 10 && + tm.tm_year == 114 && tm.tm_mon == 0 && tm.tm_mday == 4); +} diff --git a/contrib/mdocml/test-strsep.c b/contrib/mdocml/test-strsep.c new file mode 100644 index 0000000..18bbb10 --- /dev/null +++ b/contrib/mdocml/test-strsep.c @@ -0,0 +1,10 @@ +#include <string.h> + +int +main(void) +{ + char buf[6] = "aybxc"; + char *workp = buf; + char *retp = strsep(&workp, "xy"); + return ! (retp == buf && buf[1] == '\0' && workp == buf + 2); +} diff --git a/contrib/mdocml/test-strtonum.c b/contrib/mdocml/test-strtonum.c new file mode 100644 index 0000000..4b6cd3f --- /dev/null +++ b/contrib/mdocml/test-strtonum.c @@ -0,0 +1,42 @@ +/* $Id: test-strtonum.c,v 1.2 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> + +int +main(void) +{ + const char *errstr; + + if (strtonum("1", 0, 2, &errstr) != 1) + return 1; + if (errstr != NULL) + return 2; + if (strtonum("1x", 0, 2, &errstr) != 0) + return 3; + if (errstr == NULL) + return 4; + if (strtonum("2", 0, 1, &errstr) != 0) + return 5; + if (errstr == NULL) + return 6; + if (strtonum("0", 1, 2, &errstr) != 0) + return 7; + if (errstr == NULL) + return 8; + return 0; +} diff --git a/contrib/mdocml/test-vasprintf.c b/contrib/mdocml/test-vasprintf.c new file mode 100644 index 0000000..bdb4408 --- /dev/null +++ b/contrib/mdocml/test-vasprintf.c @@ -0,0 +1,49 @@ +/* $Id: test-vasprintf.c,v 1.3 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* vasprintf() */ +#endif + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +int +testfunc(char **ret, const char *format, ...) +{ + va_list ap; + int irc; + + va_start(ap, format); + irc = vasprintf(ret, format, ap); + va_end(ap); + + return irc; +} + +int +main(void) +{ + char *ret; + + if (testfunc(&ret, "%s.", "Text") != 5) + return 1; + if (strcmp(ret, "Text.")) + return 2; + return 0; +} diff --git a/contrib/mdocml/test-wchar.c b/contrib/mdocml/test-wchar.c new file mode 100644 index 0000000..a096705 --- /dev/null +++ b/contrib/mdocml/test-wchar.c @@ -0,0 +1,63 @@ +/* $Id: test-wchar.c,v 1.3 2015/10/06 18:32:20 schwarze Exp $ */ +/* + * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if defined(__linux__) || defined(__MINT__) +#define _GNU_SOURCE /* wcwidth() */ +#endif + +#include <locale.h> +#include <stdio.h> +#include <wchar.h> +#include <unistd.h> + +int +main(void) +{ + wchar_t wc; + int width; + + if (setlocale(LC_ALL, "") == NULL) { + fputs("setlocale(LC_ALL, \"\") failed\n", stderr); + return 1; + } + + if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) { + fputs("setlocale(LC_CTYPE, \"en_US.UTF-8\") failed\n", + stderr); + return 1; + } + + if (sizeof(wchar_t) < 4) { + fprintf(stderr, "wchar_t is only %zu bytes\n", + sizeof(wchar_t)); + return 1; + } + + if ((width = wcwidth(L' ')) != 1) { + fprintf(stderr, "wcwidth(L' ') returned %d\n", width); + return 1; + } + + dup2(STDERR_FILENO, STDOUT_FILENO); + wc = L'*'; + if (putwchar(wc) != (wint_t)wc) { + fputs("bad putwchar return value\n", stderr); + return 1; + } + + return 0; +} diff --git a/contrib/mdocml/tree.c b/contrib/mdocml/tree.c new file mode 100644 index 0000000..52ca754 --- /dev/null +++ b/contrib/mdocml/tree.c @@ -0,0 +1,377 @@ +/* $Id: tree.c,v 1.69 2015/10/12 00:08:16 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "config.h" + +#include <sys/types.h> + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "man.h" +#include "main.h" + +static void print_box(const struct eqn_box *, int); +static void print_man(const struct roff_node *, int); +static void print_mdoc(const struct roff_node *, int); +static void print_span(const struct tbl_span *, int); + + +void +tree_mdoc(void *arg, const struct roff_man *mdoc) +{ + + print_mdoc(mdoc->first->child, 0); +} + +void +tree_man(void *arg, const struct roff_man *man) +{ + + print_man(man->first->child, 0); +} + +static void +print_mdoc(const struct roff_node *n, int indent) +{ + const char *p, *t; + int i, j; + size_t argc; + struct mdoc_argv *argv; + + if (n == NULL) + return; + + argv = NULL; + argc = 0; + t = p = NULL; + + switch (n->type) { + case ROFFT_ROOT: + t = "root"; + break; + case ROFFT_BLOCK: + t = "block"; + break; + case ROFFT_HEAD: + t = "head"; + break; + case ROFFT_BODY: + if (n->end) + t = "body-end"; + else + t = "body"; + break; + case ROFFT_TAIL: + t = "tail"; + break; + case ROFFT_ELEM: + t = "elem"; + break; + case ROFFT_TEXT: + t = "text"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + t = "eqn"; + break; + default: + abort(); + } + + switch (n->type) { + case ROFFT_TEXT: + p = n->string; + break; + case ROFFT_BODY: + p = mdoc_macronames[n->tok]; + break; + case ROFFT_HEAD: + p = mdoc_macronames[n->tok]; + break; + case ROFFT_TAIL: + p = mdoc_macronames[n->tok]; + break; + case ROFFT_ELEM: + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case ROFFT_BLOCK: + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + p = "EQ"; + break; + case ROFFT_ROOT: + p = "root"; + break; + default: + abort(); + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else { + for (i = 0; i < indent; i++) + putchar(' '); + + printf("%s (%s)", p, t); + + for (i = 0; i < (int)argc; i++) { + printf(" -%s", mdoc_argnames[argv[i].arg]); + if (argv[i].sz > 0) + printf(" ["); + for (j = 0; j < (int)argv[i].sz; j++) + printf(" [%s]", argv[i].value[j]); + if (argv[i].sz > 0) + printf(" ]"); + } + + putchar(' '); + if (MDOC_DELIMO & n->flags) + putchar('('); + if (MDOC_LINE & n->flags) + putchar('*'); + printf("%d:%d", n->line, n->pos + 1); + if (MDOC_DELIMC & n->flags) + putchar(')'); + if (MDOC_EOS & n->flags) + putchar('.'); + putchar('\n'); + } + + if (n->eqn) + print_box(n->eqn->root->first, indent + 4); + if (n->child) + print_mdoc(n->child, indent + + (n->type == ROFFT_BLOCK ? 2 : 4)); + if (n->next) + print_mdoc(n->next, indent); +} + +static void +print_man(const struct roff_node *n, int indent) +{ + const char *p, *t; + int i; + + if (n == NULL) + return; + + t = p = NULL; + + switch (n->type) { + case ROFFT_ROOT: + t = "root"; + break; + case ROFFT_ELEM: + t = "elem"; + break; + case ROFFT_TEXT: + t = "text"; + break; + case ROFFT_BLOCK: + t = "block"; + break; + case ROFFT_HEAD: + t = "head"; + break; + case ROFFT_BODY: + t = "body"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + t = "eqn"; + break; + default: + abort(); + } + + switch (n->type) { + case ROFFT_TEXT: + p = n->string; + break; + case ROFFT_ELEM: + case ROFFT_BLOCK: + case ROFFT_HEAD: + case ROFFT_BODY: + p = man_macronames[n->tok]; + break; + case ROFFT_ROOT: + p = "root"; + break; + case ROFFT_TBL: + break; + case ROFFT_EQN: + p = "EQ"; + break; + default: + abort(); + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else { + for (i = 0; i < indent; i++) + putchar(' '); + printf("%s (%s) ", p, t); + if (MAN_LINE & n->flags) + putchar('*'); + printf("%d:%d", n->line, n->pos + 1); + if (MAN_EOS & n->flags) + putchar('.'); + putchar('\n'); + } + + if (n->eqn) + print_box(n->eqn->root->first, indent + 4); + if (n->child) + print_man(n->child, indent + + (n->type == ROFFT_BLOCK ? 2 : 4)); + if (n->next) + print_man(n->next, indent); +} + +static void +print_box(const struct eqn_box *ep, int indent) +{ + int i; + const char *t; + + static const char *posnames[] = { + NULL, "sup", "subsup", "sub", + "to", "from", "fromto", + "over", "sqrt", NULL }; + + if (NULL == ep) + return; + for (i = 0; i < indent; i++) + putchar(' '); + + t = NULL; + switch (ep->type) { + case EQN_ROOT: + t = "eqn-root"; + break; + case EQN_LISTONE: + case EQN_LIST: + t = "eqn-list"; + break; + case EQN_SUBEXPR: + t = "eqn-expr"; + break; + case EQN_TEXT: + t = "eqn-text"; + break; + case EQN_PILE: + t = "eqn-pile"; + break; + case EQN_MATRIX: + t = "eqn-matrix"; + break; + } + + fputs(t, stdout); + if (ep->pos) + printf(" pos=%s", posnames[ep->pos]); + if (ep->left) + printf(" left=\"%s\"", ep->left); + if (ep->right) + printf(" right=\"%s\"", ep->right); + if (ep->top) + printf(" top=\"%s\"", ep->top); + if (ep->bottom) + printf(" bottom=\"%s\"", ep->bottom); + if (ep->text) + printf(" text=\"%s\"", ep->text); + if (ep->font) + printf(" font=%d", ep->font); + if (ep->size != EQN_DEFSIZE) + printf(" size=%d", ep->size); + if (ep->expectargs != UINT_MAX && ep->expectargs != ep->args) + printf(" badargs=%zu(%zu)", ep->args, ep->expectargs); + else if (ep->args) + printf(" args=%zu", ep->args); + putchar('\n'); + + print_box(ep->first, indent + 4); + print_box(ep->next, indent); +} + +static void +print_span(const struct tbl_span *sp, int indent) +{ + const struct tbl_dat *dp; + int i; + + for (i = 0; i < indent; i++) + putchar(' '); + + switch (sp->pos) { + case TBL_SPAN_HORIZ: + putchar('-'); + return; + case TBL_SPAN_DHORIZ: + putchar('='); + return; + default: + break; + } + + for (dp = sp->first; dp; dp = dp->next) { + switch (dp->pos) { + case TBL_DATA_HORIZ: + case TBL_DATA_NHORIZ: + putchar('-'); + continue; + case TBL_DATA_DHORIZ: + case TBL_DATA_NDHORIZ: + putchar('='); + continue; + default: + break; + } + printf("[\"%s\"", dp->string ? dp->string : ""); + if (dp->spans) + printf("(%d)", dp->spans); + if (NULL == dp->layout) + putchar('*'); + putchar(']'); + putchar(' '); + } + + printf("(tbl) %d:1\n", sp->line); +} |