#!/usr/bin/perl # $Id: sgmlfmt.pl,v 1.7 1995/09/22 18:24:32 jfieber Exp $ # Format an sgml document tagged according to the linuxdoc DTD. # by John Fieber for the FreeBSD documentation # project. # # Bugs: # # Text lines that start with a period (.) confuse the conversions that # use groff. The workaround is to make sure the SGML source doesn't # have any periods at the beginning of a line. # # Although legal by the DTD, it the ascii formatting gets botched if # the tags are omitted following a . # # Beginning and end tags for the element must occur on the same line. # # The whole approach of using sgmlsasp and passing a few things # through for processing by this script is doomed. This whole thing # needs to be re-thought and a better DTD should be used anyway. # ####################################################################### # Look in a couple places for the SGML DTD and replacement files # require 'newgetopt.pl'; if (-d "$ENV{'HOME'}/lib/sgml/FreeBSD") { $sgmldir = "$ENV{'HOME'}/lib/sgml"; } elsif (-d "$ENV{'HOME'}/sgml/FreeBSD") { $sgmldir = "$ENV{'HOME'}/sgml"; } elsif (-d "/usr/share/sgml/FreeBSD" ) { $sgmldir = "/usr/share/sgml"; } else { die "Cannot locate sgml files!\n"; } # # Locate the DTD, an SGML declaration, and the replacement files # $dtdbase = "$sgmldir/FreeBSD"; $dtd = "$dtdbase/dtd/linuxdoc"; if (-f "$dtd.dec") { $decl = "$dtd.dec"; } else { $decl = ""; } $replbase = "$dtdbase/rep"; if (! $ENV{"SGML_PATH"}) { $ENV{"SGML_PATH"} = "$sgmldir/%O/%C/%T"; } sub usage { print "Usage:\n"; print "sgmlfmt -f [-i ...] [-links] [-ssi] file\n"; print "where is one of: html, latex, ascii, nroff\n"; } # # Look for the file specified on the command line # sub getfile { local($filearg) = @_; if (-f "$filearg.sgml") { $file = "$filearg.sgml"; } elsif (-f $filearg) { $file = $filearg; } else { return 0; } $fileroot = $file; $fileroot =~ s/.*\///; # drop the path $fileroot =~ s/\.sgml$//; # drop the .sgml $filepath = $file; $filepath =~ s/\/*[^\/]*$//; if ($filepath eq "") { $ENV{"SGML_PATH"} .= ":."; } else { $ENV{"SGML_PATH"} .= ":$filepath/%S:."; } return 1; } # # A function to run sgmls and sgmlsasp on the input file. # # Arguments: # 1. A file handle for the output # 2. A replacement file (directory actually) # sub sgmlparse { local($fhandle, $replacement) = @_; $defines = join(" -i ", @opt_i); if ($defines ne "") { $defines = "-i $defines"; } $ENV{'SGML_PATH'} = "$replbase/$replacement.%N:$ENV{'SGML_PATH'}"; open($fhandle, "sgmls $defines $decl $file | sgmlsasp $replbase/$replacement.mapping |"); } # # Generate nroff output # sub gen_nroff { open (outfile, ">$fileroot.nroff"); &sgmlparse(infile, "nroff"); $\ = "\n"; # automatically add newline on print while () { chop; # This is supposed to ensure that no text line starts # with a dot (.), thus confusing groff, but it doesn't # appear to work. unless (/^\.DS/.../^\.DE/) { s/^[ \t]{1,}(.*)/$1/g; } s/^\.[ \t].*/\\\&$&/g; print outfile; } $\ = ""; close(infile); close(outfile); } # # Generate ASCII output using groff # sub gen_ascii { &sgmlparse(infile, "nroff"); open(outfile, "| groff -T ascii -t -ms | col -b > $fileroot.ascii"); while () { print outfile; } close(infile); close(outfile); } # # Generate Postscript output using groff (this is suboptimal # for printed output!) # sub gen_ps { &sgmlparse(infile, "grops"); open(outfile, "| groff -T ps -t -ms > $fileroot.ps"); while () { print outfile; } close(infile); close(outfile); } # # Generate LaTeX output # sub gen_latex { open(outfile, ">$fileroot.tex"); &sgmlparse(infile, "latex"); while () { print outfile; } close(infile); close(outfile); } # # Generate HTML output. # # HTML is generated in two passes. # # The first pass takes the output from sgmlsasp and gathers information # about the structure of the document that is used in the sceond pass # for splitting the file into separate files. Targets for cross # references are also stored in this pass. # # Based on the information from the first pass, the second pass # generates a numbered series of HTML files, a "toplevel" file # containing the title, author, abstract and a brief table of # contents. A detailed table of contents is also generated. The # second pass generates links for cross references and URLs. # # Tunable parameters # $maxlevel = 3; # max section depth $num_depth = 4; # depth of numbering $m_depth = 2; # depth of menus $sc = 0; # section counter $filecount = 0; # file counter # Other variables: # # st_xxxx - Section Table. Arrays containing information about a # given section. To be accesssed via the section counter $sc. # # st_ol - The output level of the given section. I.E. how many # levels from the table of contents does it lie in terms # of HTML files which is distinct from , etc. # levels. # # st_sl - The absolute depth of a section. Contrast st_ol. # # st_num - The section number in the form X.Y.Z.... # # st_file - The HTML file the section belongs to. # # st_header - The text of the section title. # # st_parent - The section number of the given sections parent. sub gen_html { local($i, $sl); $tmpfile = "/tmp/sgmlf.$$"; open(bar, ">$tmpfile"); # print STDERR "(Pass 1..."; &sgmlparse(foo, "html"); while () { print bar; # count up the number of files to be generated # and gather assorted info about the document structure if (/^<\@\@sect>/) { $sl++; # current section level $sc++; # current section number $st_sl[$sc] = $sl; # In case this section has subsections, set the parent # pointer for this level to point at this section. $parent_pointer[$sl] = $sc; # Figure out who is the parent if this section. $st_parent[$sc] = $parent_pointer[$sl - 1]; # Per level counters $counter[$sl]++; $counter[$sl + 1] = 0; # calculate the section number in the form x.y.z. if ($sl <= $num_depth) { $st_num[$sc] = $st_num[$st_parent[$sc]] . "$counter[$sl]."; } # calculate the file number and output level if ($sl <= $maxlevel) { $filecount++; $st_ol[$sc] = $sl; } else { $st_ol[$sc] = $maxlevel; } $st_file[$sc] = $filecount; # Calculate the highest level node in which this # node should appear as a menu item. $st_pl[$sc] = $sl - $m_depth; if ($st_pl[$sc] < 0) { $st_pl[$sc] = 0; } if ($st_pl[$sc] > $maxlevel) { $st_pl[$sc] = $maxlevel; } } if (/^<\@\@endsect>/) { $sl--; } # record section titles if (/^<\@\@head>/) { chop; s/^<\@\@head>//; $st_header[$sc] = $_; } # record the section number that a label occurs in if (/^<\@\@label>/) { chop; s/^<\@\@label>//; if ($references{$_} eq "") { $references{$_} = "$filecount"; if ($opt_links) { &extlink($_, "${fileroot}${filecount}.html"); } } else { print STDERR "Warning: the label `$_' is multiply-defined.\n"; } } } close(bar); open(foofile, $tmpfile); &html2html(foofile, "boo"); unlink($tmpfile); } # # HTML conversion, pass number 2 # sub html2html { local($infile, $outfile) = @_; local($i); $sc = 0; push(@scs, $sc); open(tocfile, ">${fileroot}_toc.html"); print tocfile "\n"; while (<$infile>) { # change `<' and `>' to `<' and `>' in

	if (/
/.../<\/pre>/) {
	    s//<\1pre>/g;
	    s/>/\>/g;
	    s/<([\/]*)pre\>/<\1pre>/g;
	}

	# remove extraneous empty paragraphs (it is arguable that this
 	# is really a bug with the DTD, but changing it would break
 	# almost every document written to this DTD.)
	s/

<\/p>//; tagsw: { # titles and headings if (s/^<\@\@title>//) { chop; $st_header[0] = $_; $st_parent[0] = -1; print tocfile "\n$st_header[0]\n\n"; print tocfile "

$st_header[0]

\n"; $header[$st_ol[$sc]] = "\n\n$st_header[0]\n" . "\n\n"; if ($opt_ssi) { # Server Side Include hook $header[$st_ol[$sc]] .= ""; } $header[$st_ol[$sc]] .= "\n

$st_header[0]

\n"; $footer[$st_ol[$sc]] = "\n"; if ($opt_ssi) { # Server Side Include hook $footer[$st_ol[$sc]] .= ""; } $footer[$st_ol[$sc]] .= "\n\n\n"; last tagsw; } # # HEADER begin # if (s/^<\@\@head>//) { chop; if ($part == 1) { $text[0] .= "

Part $partnum:
$_"; last tagsw; } $href = "\"${fileroot}$st_file[$sc].html#$sc\""; # set up headers and footers if ($st_sl[$sc] > 0 && $st_sl[$sc] <= $maxlevel) { $header[$st_ol[$sc]] = "\n\n$_\n\n\n"; if ($opt_ssi) { # Server Side Include hook $header[$st_ol[$sc]] .= ""; } $header[$st_ol[$sc]] .= "\n$navbar[$st_ol[$sc]]\n
\n"; $footer[$st_ol[$sc]] = "
\n$navbar[$st_ol[$sc]]\n"; if ($opt_ssi) { # Server Side Include hook $footer[$st_ol[$sc]] .= ""; } $footer[$st_ol[$sc]] .= "\n\n"; } # Add this to the master table of contents print tocfile "
$st_num[$sc] " . "$_"; # Calculate the level to use in the HTML file $hlevel = $st_sl[$sc] - $st_ol[$sc] + 2; $shlevel = $st_sl[$sc] - $st_ol[$sc] + 3; $i = $st_ol[$sc]; # Add the section header $text[$i] .= "$st_num[$sc] $_"; $i--; # And also to the parent if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) { $text[$i] .= "$st_num[$sc] " . "$_"; $i--; } # and to the grandparents for (; $i >= $st_pl[$sc]; $i--) { $text[$i] .= "
$st_num[$sc] " . "$_"; } last tagsw; } # # HEADER end # if (s/^<\@\@endhead>//) { if ($part == 1) { $text[0] .= "

\n"; $part = 0; last tagsw; } print tocfile "\n"; $i = $st_ol[$sc]; # Close the section header $text[$i] .= "\n"; $i--; # in the parent... if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) { $text[$i] .= "\n"; $i--; } # in the grandparent... for (; $i >= $st_pl[$sc]; $i--) { $text[$i] .= "\n"; } last tagsw; } # sectioning if (s/^<\@\@part>//) { $part = 1; $partnum++; last tagsw; } # # BEGINNING of a section # if (s/^<\@\@sect>//) { # Increment the section counter and save it on a stack # for future reference. $sc++; push(@scs, $sc); # Set up the navigation bar if ($st_file[$sc] > $st_file[$sc - 1]) { &navbar($st_file[$sc], $filecount, $sc); } # Prepare for menu entries in the table of contents and # parent file(s). if ($st_sl[$sc - 1] < $st_sl[$sc]) { print tocfile "
\n"; $i = $st_ol[$sc] - 1 - ($st_sl[$sc] == $st_ol[$sc]); for (; $i >= $st_pl[$sc]; $i--) { $text[$i] .= "
\n"; } } last tagsw; } # # END of a section # if (s/^<\@\@endsect>//) { # Remember the section number! Subsections may have # altered the global $sc variable. local ($lsc) = pop(@scs); # Close off subsection menus we may have created in # parent file(s). if ($st_sl[$lsc] > $st_sl[$sc + 1]) { print tocfile "
\n"; $i = $st_ol[$lsc] - 1 - ($st_sl[$lsc] == $st_ol[$lsc]); for (; $i >= $st_pl[$lsc]; $i--) { $text[$i] .= "
\n"; } } # If this section is below $maxlevel, write it now. if ($st_sl[$lsc] <= $maxlevel) { open(SECOUT, ">${fileroot}$st_file[$lsc].html"); print SECOUT "$header[$st_ol[$lsc]] $text[$st_ol[$lsc]] " . "$footer[$st_ol[$lsc]]"; $text[$st_ol[$lsc]] = ""; close(SECOUT); } last tagsw; } # cross references if (s/^<\@\@label>//) { chop; $text[$st_ol[$sc]] .= ""; last tagsw; } if (s/^<\@\@ref>//) { chop; $refname = $_; if ($references{$_} eq "") { print "Warning: Reference to $_ has no defined target\n"; } else { $text[$st_ol[$sc]] .= ""; } last tagsw; } if (s/^<\@\@endref>//) { # $text[$st_ol[$sc]] .= ""; last tagsw; } if (s/^<\@\@refnam>//) { $text[$st_ol[$sc]] .= "$refname"; last tagsw; } # URLs if (s/^<\@\@url>//) { chop; $urlname = $_; $text[$st_ol[$sc]] .= ""; last tagsw; } if (s/^<\@\@urlnam>//) { $text[$st_ol[$sc]] .= "$urlname"; last tagsw; } if (s/^<\@\@endurl>//) { # $text[$st_ol[$sc]] .= ""; last tagsw; } # If nothing else did anything with this line, just print it. $text[$st_ol[$sc]] .= "$_"; } } print tocfile ""; open(SECOUT, ">$fileroot.html"); print SECOUT "$header[0] $text[0] $footer[0]"; close(SECOUT); close tocfile; } # navbar # # Generate a navigation bar to go on the top and bottom of the page. sub navbar { local ($fnum, $fmax, $sc) = @_; local ($i, $itext, $prv, $nxt, $colon); $colon = ":"; # Generate the section hierarchy $navbar[$st_ol[$sc]] = "$st_header[0]\n"; $i = $st_parent[$sc]; while ($i > 0) { $itext = " $colon $st_header[$i]\n$itext"; $i = $st_parent[$i]; } $navbar[$st_ol[$sc]] .= "$itext $colon $st_header[$sc]
\n"; # Generate previous and next pointers # Previous pointer must be in a different file AND must be at the # same or higher section level. If the current node is the # beginning of a chapter, then previous will go to the beginning # of the previous chapter, not the end of the previous chapter. $prv = $sc; while ($prv >= 0 && $st_file[$prv] >= $st_file[$sc] - 1) { $prv--; } $prv++; $navbar[$st_ol[$sc]] .= "Previous: $st_header[$prv]
\n"; # Then next pointer must be in a higher numbered file OR the home # page of the document. $nxt = $sc; if ($st_file[$nxt] == $filecount) { $nxt = 0; } else { while ($st_file[$nxt] == $st_file[$sc]) { $nxt++; } } $navbar[$st_ol[$sc]] .= "Next: $st_header[$nxt]\n"; $navbar[$st_ol[$sc]] .= "\n"; } # extlink # # creates a symbolic link from the name in a reference to the numbered # html file. Since the file number that any given section has is # subject to change as the document goes through revisions, this allows # for a fixed target that separate documents can hook into. # # Slashes (/) in the reference are converted to percents (%) while # spaces ( ) are converted to underscores (_); sub extlink { local ($ref, $fn) = @_; $ref =~ s/\//%/g; $ref =~ s/ /_/g; $file = "$ref.html"; if (-e $file) { if (-l $file) { unlink($file); symlink($fn, $file); } else { print "Warning: $file exists and is not a symbolic link\n"; } } else { symlink($fn, $file); } } # Now, read the command line and take appropriate action sub main { # Check arguments if (!&NGetOpt('f=s', 'links', 'ssi', 'i:s@')) { &usage; exit 1; } if (@ARGV == 0) { print "An input file must be specified.\n"; &usage; exit 1; } if (&getfile($ARGV[0]) == 0) { print "Cannot locate specified file: $ARGV[0]\n"; &usage; exit 1; } # Generate output if ($opt_f eq 'html') { print "generating $fileroot.html"; if ($opt_links == 1) { print " with external links"; } print "...\n"; &gen_html(); } elsif ($opt_f eq 'tex' || $opt_f eq 'latex') { print "generating $fileroot.tex...\n"; &gen_latex(); } elsif ($opt_f eq 'nroff') { print "generating $fileroot.nroff...\n"; &gen_nroff(); } elsif ($opt_f eq 'ascii') { print "generating $fileroot.ascii...\n"; &gen_ascii(); } elsif ($opt_f eq 'ps') { print "generating $fileroot.ps...\n"; &gen_ps(); } else { if ($opt_f eq "") { print "An output format must be specified with the -f option.\n"; } else { print "\"$opt_f\" is an unknown output format.\n"; } &usage; exit 1; } } &main; exit 0;