diff options
Diffstat (limited to 'contrib/perl5/lib/Pod')
-rw-r--r-- | contrib/perl5/lib/Pod/Checker.pm | 1195 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Find.pm | 278 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Functions.pm | 20 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Html.pm | 1654 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/InputObjects.pm | 933 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Man.pm | 1217 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/ParseUtils.pm | 837 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Parser.pm | 1771 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Plainer.pm | 69 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Select.pm | 745 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Text.pm | 1154 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Text/Color.pm | 125 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Text/Termcap.pm | 142 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Usage.pm | 544 |
14 files changed, 9590 insertions, 1094 deletions
diff --git a/contrib/perl5/lib/Pod/Checker.pm b/contrib/perl5/lib/Pod/Checker.pm new file mode 100644 index 0000000..ae32677 --- /dev/null +++ b/contrib/perl5/lib/Pod/Checker.pm @@ -0,0 +1,1195 @@ +############################################################################# +# Pod/Checker.pm -- check pod documents for syntax errors +# +# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Checker; + +use vars qw($VERSION); +$VERSION = 1.098; ## Current version of this package +require 5.005; ## requires this Perl version or later + +use Pod::ParseUtils; ## for hyperlinks and lists + +=head1 NAME + +Pod::Checker, podchecker() - check pod documents for syntax errors + +=head1 SYNOPSIS + + use Pod::Checker; + + $syntax_okay = podchecker($filepath, $outputpath, %options); + + my $checker = new Pod::Checker %options; + $checker->parse_from_file($filepath, \*STDERR); + +=head1 OPTIONS/ARGUMENTS + +C<$filepath> is the input POD to read and C<$outputpath> is +where to write POD syntax error messages. Either argument may be a scalar +indicating a file-path, or else a reference to an open filehandle. +If unspecified, the input-file it defaults to C<\*STDIN>, and +the output-file defaults to C<\*STDERR>. + +=head2 podchecker() + +This function can take a hash of options: + +=over 4 + +=item B<-warnings> =E<gt> I<val> + +Turn warnings on/off. See L<"Warnings">. + +=back + +=head1 DESCRIPTION + +B<podchecker> will perform syntax checking of Perl5 POD format documentation. + +I<NOTE THAT THIS MODULE IS CURRENTLY IN THE BETA STAGE!> + +It is hoped that curious/ambitious user will help flesh out and add the +additional features they wish to see in B<Pod::Checker> and B<podchecker> +and verify that the checks are consistent with L<perlpod>. + +The following checks are currently preformed: + +=over 4 + +=item * + +Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, +and unterminated interior sequences. + +=item * + +Check for proper balancing of C<=begin> and C<=end>. The contents of such +a block are generally ignored, i.e. no syntax checks are performed. + +=item * + +Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. + +=item * + +Check for same nested interior-sequences (e.g. +C<LE<lt>...LE<lt>...E<gt>...E<gt>>). + +=item * + +Check for malformed or nonexisting entities C<EE<lt>...E<gt>>. + +=item * + +Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> +for details. + +=item * + +Check for unresolved document-internal links. This check may also reveal +misspelled links that seem to be internal links but should be links +to something else. + +=back + +=head1 DIAGNOSTICS + +=head2 Errors + +=over 4 + +=item * empty =headn + +A heading (C<=head1> or C<=head2>) without any text? That ain't no +heading! + +=item * =over on line I<N> without closing =back + +The C<=over> command does not have a corresponding C<=back> before the +next heading (C<=head1> or C<=head2>) or the end of the file. + +=item * =item without previous =over + +=item * =back without previous =over + +An C<=item> or C<=back> command has been found outside a +C<=over>/C<=back> block. + +=item * No argument for =begin + +A C<=begin> command was found that is not followed by the formatter +specification. + +=item * =end without =begin + +A standalone C<=end> command was found. + +=item * Nested =begin's + +There were at least two consecutive C<=begin> commands without +the corresponding C<=end>. Only one C<=begin> may be active at +a time. + +=item * =for without formatter specification + +There is no specification of the formatter after the C<=for> command. + +=item * unresolved internal link I<NAME> + +The given link to I<NAME> does not have a matching node in the current +POD. This also happend when a single word node name is not enclosed in +C<"">. + +=item * Unknown command "I<CMD>" + +An invalid POD command has been found. Valid are C<=head1>, C<=head2>, +C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, C<=for>, C<=pod>, +C<=cut> + +=item * Unknown interior-sequence "I<SEQ>" + +An invalid markup command has been encountered. Valid are: +C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, +C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, +C<ZE<lt>E<gt>> + +=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> + +Two nested identical markup commands have been found. Generally this +does not make sense. + +=item * garbled entity I<STRING> + +The I<STRING> found cannot be interpreted as a character entity. + +=item * Entity number out of range + +An entity specified by number (dec, hex, oct) is out of range (1-255). + +=item * malformed link LE<lt>E<gt> + +The link found cannot be parsed because it does not conform to the +syntax described in L<perlpod>. + +=item * nonempty ZE<lt>E<gt> + +The C<ZE<lt>E<gt>> sequence is supposed to be empty. + +=item * empty XE<lt>E<gt> + +The index entry specified contains nothing but whitespace. + +=item * Spurious text after =pod / =cut + +The commands C<=pod> and C<=cut> do not take any arguments. + +=item * Spurious character(s) after =back + +The C<=back> command does not take any arguments. + +=back + +=head2 Warnings + +These may not necessarily cause trouble, but indicate mediocre style. + +=over 4 + +=item * multiple occurence of link target I<name> + +The POD file has some C<=item> and/or C<=head> commands that have +the same text. Potential hyperlinks to such a text cannot be unique then. + +=item * line containing nothing but whitespace in paragraph + +There is some whitespace on a seemingly empty line. POD is very sensitive +to such things, so this is flagged. B<vi> users switch on the B<list> +option to avoid this problem. + +=item * file does not start with =head + +The file starts with a different POD directive than head. +This is most probably something you do not want. + +=item * No numeric argument for =over + +The C<=over> command is supposed to have a numeric argument (the +indentation). + +=item * previous =item has no contents + +There is a list C<=item> right above the flagged line that has no +text contents. You probably want to delete empty items. + +=item * preceding non-item paragraph(s) + +A list introduced by C<=over> starts with a text or verbatim paragraph, +but continues with C<=item>s. Move the non-item paragraph out of the +C<=over>/C<=back> block. + +=item * =item type mismatch (I<one> vs. I<two>) + +A list started with e.g. a bulletted C<=item> and continued with a +numbered one. This is obviously inconsistent. For most translators the +type of the I<first> C<=item> determines the type of the list. + +=item * I<N> unescaped C<E<lt>E<gt>> in paragraph + +Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> +can potentially cause errors as they could be misinterpreted as +markup commands. + +=item * Unknown entity + +A character entity was found that does not belong to the standard +ISO set or the POD specials C<verbar> and C<sol>. + +=item * No items in =over + +The list opened with C<=over> does not contain any items. + +=item * No argument for =item + +C<=item> without any parameters is deprecated. It should either be followed +by C<*> to indicate an unordered list, by a number (optionally followed +by a dot) to indicate an ordered (numbered) list or simple text for a +definition list. + +=item * empty section in previous paragraph + +The previous section (introduced by a C<=head> command) does not contain +any text. This usually indicates that something is missing. Note: A +C<=head1> followed immediately by C<=head2> does not trigger this warning. + +=item * Verbatim paragraph in NAME section + +The NAME section (C<=head1 NAME>) should consist of a single paragraph +with the script/module name, followed by a dash `-' and a very short +description of what the thing is good for. + +=item * Hyperlinks + +There are some warnings wrt. hyperlinks: +Leading/trailing whitespace, newlines in hyperlinks, +brackets C<()>. + +=back + +=head1 RETURN VALUE + +B<podchecker> returns the number of POD syntax errors found or -1 if +there were no POD commands at all found in the file. + +=head1 EXAMPLES + +I<[T.B.D.]> + +=head1 INTERFACE + +While checking, this module collects document properties, e.g. the nodes +for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). +POD translators can use this feature to syntax-check and get the nodes in +a first pass before actually starting to convert. This is expensive in terms +of execution time, but allows for very robust conversions. + +=cut + +############################################################################# + +use strict; +#use diagnostics; +use Carp; +use Exporter; +use Pod::Parser; +require VMS::Filespec if $^O eq 'VMS'; + +use vars qw(@ISA @EXPORT); +@ISA = qw(Pod::Parser); +@EXPORT = qw(&podchecker); + +use vars qw(%VALID_COMMANDS %VALID_SEQUENCES); + +my %VALID_COMMANDS = ( + 'pod' => 1, + 'cut' => 1, + 'head1' => 1, + 'head2' => 1, + 'over' => 1, + 'back' => 1, + 'item' => 1, + 'for' => 1, + 'begin' => 1, + 'end' => 1, +); + +my %VALID_SEQUENCES = ( + 'I' => 1, + 'B' => 1, + 'S' => 1, + 'C' => 1, + 'L' => 1, + 'F' => 1, + 'X' => 1, + 'Z' => 1, + 'E' => 1, +); + +# stolen from HTML::Entities +my %ENTITIES = ( + # Some normal chars that have special meaning in SGML context + amp => '&', # ampersand +'gt' => '>', # greater than +'lt' => '<', # less than + quot => '"', # double quote + + # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML + AElig => 'Æ', # capital AE diphthong (ligature) + Aacute => 'Á', # capital A, acute accent + Acirc => 'Â', # capital A, circumflex accent + Agrave => 'À', # capital A, grave accent + Aring => 'Å', # capital A, ring + Atilde => 'Ã', # capital A, tilde + Auml => 'Ä', # capital A, dieresis or umlaut mark + Ccedil => 'Ç', # capital C, cedilla + ETH => 'Ð', # capital Eth, Icelandic + Eacute => 'É', # capital E, acute accent + Ecirc => 'Ê', # capital E, circumflex accent + Egrave => 'È', # capital E, grave accent + Euml => 'Ë', # capital E, dieresis or umlaut mark + Iacute => 'Í', # capital I, acute accent + Icirc => 'Î', # capital I, circumflex accent + Igrave => 'Ì', # capital I, grave accent + Iuml => 'Ï', # capital I, dieresis or umlaut mark + Ntilde => 'Ñ', # capital N, tilde + Oacute => 'Ó', # capital O, acute accent + Ocirc => 'Ô', # capital O, circumflex accent + Ograve => 'Ò', # capital O, grave accent + Oslash => 'Ø', # capital O, slash + Otilde => 'Õ', # capital O, tilde + Ouml => 'Ö', # capital O, dieresis or umlaut mark + THORN => 'Þ', # capital THORN, Icelandic + Uacute => 'Ú', # capital U, acute accent + Ucirc => 'Û', # capital U, circumflex accent + Ugrave => 'Ù', # capital U, grave accent + Uuml => 'Ü', # capital U, dieresis or umlaut mark + Yacute => 'Ý', # capital Y, acute accent + aacute => 'á', # small a, acute accent + acirc => 'â', # small a, circumflex accent + aelig => 'æ', # small ae diphthong (ligature) + agrave => 'à', # small a, grave accent + aring => 'å', # small a, ring + atilde => 'ã', # small a, tilde + auml => 'ä', # small a, dieresis or umlaut mark + ccedil => 'ç', # small c, cedilla + eacute => 'é', # small e, acute accent + ecirc => 'ê', # small e, circumflex accent + egrave => 'è', # small e, grave accent + eth => 'ð', # small eth, Icelandic + euml => 'ë', # small e, dieresis or umlaut mark + iacute => 'í', # small i, acute accent + icirc => 'î', # small i, circumflex accent + igrave => 'ì', # small i, grave accent + iuml => 'ï', # small i, dieresis or umlaut mark + ntilde => 'ñ', # small n, tilde + oacute => 'ó', # small o, acute accent + ocirc => 'ô', # small o, circumflex accent + ograve => 'ò', # small o, grave accent + oslash => 'ø', # small o, slash + otilde => 'õ', # small o, tilde + ouml => 'ö', # small o, dieresis or umlaut mark + szlig => 'ß', # small sharp s, German (sz ligature) + thorn => 'þ', # small thorn, Icelandic + uacute => 'ú', # small u, acute accent + ucirc => 'û', # small u, circumflex accent + ugrave => 'ù', # small u, grave accent + uuml => 'ü', # small u, dieresis or umlaut mark + yacute => 'ý', # small y, acute accent + yuml => 'ÿ', # small y, dieresis or umlaut mark + + # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) + copy => '©', # copyright sign + reg => '®', # registered sign + nbsp => "\240", # non breaking space + + # Additional ISO-8859/1 entities listed in rfc1866 (section 14) + iexcl => '¡', + cent => '¢', + pound => '£', + curren => '¤', + yen => '¥', + brvbar => '¦', + sect => '§', + uml => '¨', + ordf => 'ª', + laquo => '«', +'not' => '¬', # not is a keyword in perl + shy => '', + macr => '¯', + deg => '°', + plusmn => '±', + sup1 => '¹', + sup2 => '²', + sup3 => '³', + acute => '´', + micro => 'µ', + para => '¶', + middot => '·', + cedil => '¸', + ordm => 'º', + raquo => '»', + frac14 => '¼', + frac12 => '½', + frac34 => '¾', + iquest => '¿', +'times' => '×', # times is a keyword in perl + divide => '÷', + +# some POD special entities + verbar => '|', + sol => '/' +); + +##--------------------------------------------------------------------------- + +##--------------------------------- +## Function definitions begin here +##--------------------------------- + +sub podchecker( $ ; $ % ) { + my ($infile, $outfile, %options) = @_; + local $_; + + ## Set defaults + $infile ||= \*STDIN; + $outfile ||= \*STDERR; + + ## Now create a pod checker + my $checker = new Pod::Checker(%options); + $checker->parseopts(-process_cut_cmd => 1, -warnings => 1); + + ## Now check the pod document for errors + $checker->parse_from_file($infile, $outfile); + + ## Return the number of errors found + return $checker->num_errors(); +} + +##--------------------------------------------------------------------------- + +##------------------------------- +## Method definitions begin here +##------------------------------- + +## sub new { +## my $this = shift; +## my $class = ref($this) || $this; +## my %params = @_; +## my $self = {%params}; +## bless $self, $class; +## $self->initialize(); +## return $self; +## } + +sub initialize { + my $self = shift; + ## Initialize number of errors, and setup an error function to + ## increment this number and then print to the designated output. + $self->{_NUM_ERRORS} = 0; + $self->errorsub('poderror'); # set the error handling subroutine + $self->{_commands} = 0; # total number of POD commands encountered + $self->{_list_stack} = []; # stack for nested lists + $self->{_have_begin} = ''; # stores =begin + $self->{_links} = []; # stack for internal hyperlinks + $self->{_nodes} = []; # stack for =head/=item nodes + $self->{_index} = []; # text in X<> + # print warnings? + $self->{-warnings} = 1 unless(defined $self->{-warnings}); + $self->{_current_head1} = ''; # the current =head1 block +} + +################################## + +=over 4 + +=item C<$checker-E<gt>poderror( @args )> + +=item C<$checker-E<gt>poderror( {%opts}, @args )> + +Internal method for printing errors and warnings. If no options are +given, simply prints "@_". The following options are recognized and used +to form the output: + + -msg + +A message to print prior to C<@args>. + + -line + +The line number the error occurred in. + + -file + +The file (name) the error occurred in. + + -severity + +The error level, should be 'WARNING' or 'ERROR'. + +=cut + +# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) +sub poderror { + my $self = shift; + my %opts = (ref $_[0]) ? %{shift()} : (); + $opts{-file} = VMS::Filespec::unixify($opts{-file}) if (exists($opts{-file}) && $^O eq 'VMS'); + + ## Retrieve options + chomp( my $msg = ($opts{-msg} || "")."@_" ); + my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ""; + my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ""; + unless (exists $opts{-severity}) { + ## See if can find severity in message prefix + $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); + } + my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ""; + + ## Increment error count and print message " + ++($self->{_NUM_ERRORS}) + if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); + my $out_fh = $self->output_handle(); + print $out_fh ($severity, $msg, $line, $file, "\n") + if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); +} + +################################## + +=item C<$checker-E<gt>num_errors()> + +Set (if argument specified) and retrieve the number of errors found. + +=cut + +sub num_errors { + return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; +} + +################################## + +=item C<$checker-E<gt>name()> + +Set (if argument specified) and retrieve the canonical name of POD as +found in the C<=head1 NAME> section. + +=cut + +sub name { + return (@_ > 1 && $_[1]) ? + ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; +} + +################################## + +=item C<$checker-E<gt>node()> + +Add (if argument specified) and retrieve the nodes (as defined by C<=headX> +and C<=item>) of the current POD. The nodes are returned in the order of +their occurence. They consist of plain text, each piece of whitespace is +collapsed to a single blank. + +=cut + +sub node { + my ($self,$text) = @_; + if(defined $text) { + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! + push(@{$self->{_nodes}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); + return $text; + } + @{$self->{_nodes}}; +} + +################################## + +=item C<$checker-E<gt>idx()> + +Add (if argument specified) and retrieve the index entries (as defined by +C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece +of whitespace is collapsed to a single blank. + +=cut + +# set/return index entries of current POD +sub idx { + my ($self,$text) = @_; + if(defined $text) { + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! + push(@{$self->{_index}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); + return $text; + } + @{$self->{_index}}; +} + +################################## + +=item C<$checker-E<gt>hyperlink()> + +Add (if argument specified) and retrieve the hyperlinks (as defined by +C<LE<lt>E<gt>>) of the current POD. They consist of an 2-item array: line +number and C<Pod::Hyperlink> object. + +=back + +=cut + +# set/return hyperlinks of the current POD +sub hyperlink { + my $self = shift; + if($_[0]) { + push(@{$self->{_links}}, $_[0]); + return $_[0]; + } + @{$self->{_links}}; +} + +## overrides for Pod::Parser + +sub end_pod { + ## Do some final checks and + ## print the number of errors found + my $self = shift; + my $infile = $self->input_file(); + $infile = VMS::Filespec::unixify($infile) if $^O eq 'VMS'; + my $out_fh = $self->output_handle(); + + if(@{$self->{_list_stack}}) { + # _TODO_ display, but don't count them for now + my $list; + while(($list = $self->_close_list('EOF',$infile)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => 'EOF', -file => $infile, + -severity => 'ERROR', -msg => "=over on line " . + $list->start() . " without closing =back" }); #" + } + } + + # check validity of document internal hyperlinks + # first build the node names from the paragraph text + my %nodes; + foreach($self->node()) { + $nodes{$_} = 1; + if(/^(\S+)\s+/) { + # we have more than one word. Use the first as a node, too. + # This is used heavily in perlfunc.pod + $nodes{$1} ||= 2; # derived node + } + } + foreach($self->hyperlink()) { + my ($line,$link) = @$_; + # _TODO_ what if there is a link to the page itself by the name, + # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION"> + if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { + my $node = $self->_check_ptree($self->parse_text($link->node(), + $line), $line, $infile, 'L'); + if($node && !$nodes{$node}) { + $self->poderror({ -line => $line || '', -file => $infile, + -severity => 'ERROR', + -msg => "unresolved internal link '$node'"}); + } + } + } + + # check the internal nodes for uniqueness. This pertains to + # =headX, =item and X<...> + foreach(grep($self->{_unique_nodes}->{$_} > 1, + keys %{$self->{_unique_nodes}})) { + $self->poderror({ -line => '-', -file => $infile, + -severity => 'WARNING', + -msg => "multiple occurence of link target '$_'"}); + } + + ## Print the number of errors found + my $num_errors = $self->num_errors(); + if ($num_errors > 0) { + printf $out_fh ("$infile has $num_errors pod syntax %s.\n", + ($num_errors == 1) ? "error" : "errors"); + } + elsif($self->{_commands} == 0) { + print $out_fh "$infile does not contain any pod commands.\n"; + $self->num_errors(-1); + } + else { + print $out_fh "$infile pod syntax OK.\n"; + } +} + +# check a POD command directive +sub command { + my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; + my ($file, $line) = $pod_para->file_line; + ## Check the command syntax + my $arg; # this will hold the command argument + if (! $VALID_COMMANDS{$cmd}) { + $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', + -msg => "Unknown command '$cmd'" }); + } + else { + # found a valid command + if(!$self->{_commands}++ && $cmd !~ /^head/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "file does not start with =head" }); + } + ## check syntax of particular command + if($cmd eq 'over') { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + my $indent = 4; # default + if($arg && $arg =~ /^\s*(\d+)\s*$/) { + $indent = $1; + } else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "No numeric argument for =over"}); + } + # start a new list + $self->_open_list($indent,$line,$file); + } + elsif($cmd eq 'item') { + # are we in a list? + unless(@{$self->{_list_stack}}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "=item without previous =over" }); + # auto-open in case we encounter many more + $self->_open_list('auto',$line,$file); + } + my $list = $self->{_list_stack}->[0]; + # check whether the previous item had some contents + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + if($list->{_has_par}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "preceding non-item paragraph(s)" }); + delete $list->{_has_par}; + } + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line, $file); + if($arg && $arg =~ /(\S+)/) { + $arg =~ s/[\s\n]+$//; + my $type; + if($arg =~ /^[*]\s*(\S*.*)/) { + $type = 'bullet'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + elsif($arg =~ /^\d+\.?\s*(\S*)/) { + $type = 'number'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + else { + $type = 'definition'; + $self->{_list_item_contents} = 1; + } + my $first = $list->type(); + if($first && $first ne $type) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=item type mismatch ('$first' vs. '$type')"}); + } + else { # first item + $list->type($type); + } + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "No argument for =item" }); + $arg = ' '; # empty + $self->{_list_item_contents} = 0; + } + # add this item + $list->item($arg); + # remember this node + $self->node($arg); + } + elsif($cmd eq 'back') { + # check if we have an open list + unless(@{$self->{_list_stack}}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "=back without previous =over" }); + } + else { + # check for spurious characters + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /\S/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Spurious character(s) after =back" }); + } + # close list + my $list = $self->_close_list($line,$file); + # check for empty lists + if(!$list->item() && $self->{-warnings}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "No items in =over (at line " . + $list->start() . ") / =back list"}); #" + } + } + } + elsif($cmd =~ /^head(\d+)/) { + # check whether the previous =head section had some contents + if(defined $self->{_commands_in_head} && + $self->{_commands_in_head} == 0 && + defined $self->{_last_head} && + $self->{_last_head} >= $1) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "empty section in previous paragraph"}); + } + $self->{_commands_in_head} = -1; + $self->{_last_head} = $1; + # check if there is an open list + if(@{$self->{_list_stack}}) { + my $list; + while(($list = $self->_close_list($line,$file)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "=over on line ". $list->start() . + " without closing =back (at $cmd)" }); + } + } + # remember this node + $arg = $self->interpolate_and_check($paragraph, $line,$file); + $arg =~ s/[\s\n]+$//s; + $self->node($arg); + unless(length($arg)) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "empty =$cmd"}); + } + if($cmd eq 'head1') { + $self->{_current_head1} = $arg; + } else { + $self->{_current_head1} = ''; + } + } + elsif($cmd eq 'begin') { + if($self->{_have_begin}) { + # already have a begin + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Nested =begin's (first at line " . + $self->{_have_begin} . ")"}); + } + else { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + unless($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "No argument for =begin"}); + } + # remember the =begin + $self->{_have_begin} = "$line:$1"; + } + } + elsif($cmd eq 'end') { + if($self->{_have_begin}) { + # close the existing =begin + $self->{_have_begin} = ''; + # check for spurious characters + $arg = $self->interpolate_and_check($paragraph, $line,$file); + # the closing argument is optional + #if($arg && $arg =~ /\S/) { + # $self->poderror({ -line => $line, -file => $file, + # -severity => 'WARNING', + # -msg => "Spurious character(s) after =end" }); + #} + } + else { + # don't have a matching =begin + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "=end without =begin" }); + } + } + elsif($cmd eq 'for') { + unless($paragraph =~ /\s*(\S+)\s*/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "=for without formatter specification" }); + } + $arg = ''; # do not expand paragraph below + } + elsif($cmd =~ /^(pod|cut)$/) { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Spurious text after =$cmd"}); + } + } + $self->{_commands_in_head}++; + ## Check the interior sequences in the command-text + $self->interpolate_and_check($paragraph, $line,$file) + unless(defined $arg); + } +} + +sub _open_list +{ + my ($self,$indent,$line,$file) = @_; + my $list = Pod::List->new( + -indent => $indent, + -start => $line, + -file => $file); + unshift(@{$self->{_list_stack}}, $list); + undef $self->{_list_item_contents}; + $list; +} + +sub _close_list +{ + my ($self,$line,$file) = @_; + my $list = shift(@{$self->{_list_stack}}); + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "previous =item has no contents" }); + } + undef $self->{_list_item_contents}; + $list; +} + +# process a block of some text +sub interpolate_and_check { + my ($self, $paragraph, $line, $file) = @_; + ## Check the interior sequences in the command-text + # and return the text + $self->_check_ptree( + $self->parse_text($paragraph,$line), $line, $file, ''); +} + +sub _check_ptree { + my ($self,$ptree,$line,$file,$nestlist) = @_; + local($_); + my $text = ''; + # process each node in the parse tree + foreach(@$ptree) { + # regular text chunk + unless(ref) { + my $count; + # count the unescaped angle brackets + my $i = $_; + if($count = $i =~ tr/<>/<>/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "$count unescaped <> in paragraph" }) + if($self->{-warnings}); + } + $text .= $i; + next; + } + # have an interior sequence + my $cmd = $_->cmd_name(); + my $contents = $_->parse_tree(); + ($file,$line) = $_->file_line(); + # check for valid tag + if (! $VALID_SEQUENCES{$cmd}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => qq(Unknown interior-sequence '$cmd')}); + # expand it anyway + $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + next; + } + if($nestlist =~ /$cmd/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "nested commands $cmd<...$cmd<...>...>"}); + # _TODO_ should we add the contents anyway? + # expand it anyway, see below + } + if($cmd eq 'E') { + # preserve entities + if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "garbled entity " . $_->raw_text()}); + next; + } + my $ent = $$contents[0]; + my $val; + if($ent =~ /^0x[0-9a-f]+$/i) { + # hexadec entity + $val = hex($ent); + } + elsif($ent =~ /^0\d+$/) { + # octal + $val = oct($ent); + } + elsif($ent =~ /^\d+$/) { + # numeric entity + $val = $ent; + } + if(defined $val) { + if($val>0 && $val<256) { + $text .= chr($val); + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Entity number out of range " . $_->raw_text()}); + } + } + elsif($ENTITIES{$ent}) { + # known ISO entity + $text .= $ENTITIES{$ent}; + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "Unknown entity " . $_->raw_text()}); + $text .= "E<$ent>"; + } + } + elsif($cmd eq 'L') { + # try to parse the hyperlink + my $link = Pod::Hyperlink->new($contents->raw_text()); + unless(defined $link) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "malformed link " . $_->raw_text() ." : $@"}); + next; + } + $link->line($line); # remember line + if($self->{-warnings}) { + foreach my $w ($link->warning()) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => $w }); + } + } + # check the link text + $text .= $self->_check_ptree($self->parse_text($link->text(), + $line), $line, $file, "$nestlist$cmd"); + # remember link + $self->hyperlink([$line,$link]); + } + elsif($cmd =~ /[BCFIS]/) { + # add the guts + $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + } + elsif($cmd eq 'Z') { + if(length($contents->raw_text())) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Nonempty Z<>"}); + } + } + elsif($cmd eq 'X') { + my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + if($idx =~ /^\s*$/s) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Empty X<>"}); + } + else { + # remember this node + $self->idx($idx); + } + } + else { + # not reached + die "internal error"; + } + } + $text; +} + +# process a block of verbatim text +sub verbatim { + ## Nothing particular to check + my ($self, $paragraph, $line_num, $pod_para) = @_; + + $self->_preproc_par($paragraph); + + if($self->{_current_head1} eq 'NAME') { + my ($file, $line) = $pod_para->file_line; + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'Verbatim paragraph in NAME section' }); + } +} + +# process a block of regular text +sub textblock { + my ($self, $paragraph, $line_num, $pod_para) = @_; + my ($file, $line) = $pod_para->file_line; + + $self->_preproc_par($paragraph); + + # skip this paragraph if in a =begin block + unless($self->{_have_begin}) { + my $block = $self->interpolate_and_check($paragraph, $line,$file); + if($self->{_current_head1} eq 'NAME') { + if($block =~ /^\s*(\S+?)\s*[,-]/) { + # this is the canonical name + $self->{-name} = $1 unless(defined $self->{-name}); + } + } + } +} + +sub _preproc_par +{ + my $self = shift; + $_[0] =~ s/[\s\n]+$//; + if($_[0]) { + $self->{_commands_in_head}++; + $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); + if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { + $self->{_list_stack}->[0]->{_has_par} = 1; + } + } +} + +1; + +__END__ + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt> + +Based on code for B<Pod::Text::pod2text()> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + diff --git a/contrib/perl5/lib/Pod/Find.pm b/contrib/perl5/lib/Pod/Find.pm new file mode 100644 index 0000000..8de197b --- /dev/null +++ b/contrib/perl5/lib/Pod/Find.pm @@ -0,0 +1,278 @@ +############################################################################# +# Pod/Find.pm -- finds files containing POD documentation +# +# Author: Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de> +# +# Copyright (C) 1999-2000 by Marek Rouchal (and borrowing code +# from Nick Ing-Simmon's PodToHtml). All rights reserved. +# This file is part of "PodParser". Pod::Find is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Find; + +use vars qw($VERSION); +$VERSION = 0.12; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::Find - find POD documents in directory trees + +=head1 SYNOPSIS + + use Pod::Find qw(pod_find simplify_name); + my %pods = pod_find({ -verbose => 1, -inc => 1 }); + foreach(keys %pods) { + print "found library POD `$pods{$_}' in $_\n"; + } + + print "podname=",simplify_name('a/b/c/mymodule.pod'),"\n"; + +=head1 DESCRIPTION + +B<Pod::Find> provides a function B<pod_find> that searches for POD +documents in a given set of files and directories. It returns a hash +with the file names as keys and the POD name as value. The POD name +is derived from the file name and its position in the directory tree. + +E.g. when searching in F<$HOME/perl5lib>, the file +F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>, +whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be +I<Myclass::Subclass>. The name information can be used for POD +translators. + +Only text files containing at least one valid POD command are found. + +A warning is printed if more than one POD file with the same POD name +is found, e.g. F<CPAN.pm> in different directories. This usually +indicates duplicate occurrences of modules in the I<@INC> search path. + +The function B<simplify_name> is equivalent to B<basename>, but also +strips Perl-like extensions (.pm, .pl, .pod) and extensions like +F<.bat>, F<.cmd> on Win32 and OS/2, respectively. + +Note that neither B<pod_find> nor B<simplify_name> are exported by +default so be sure to specify them in the B<use> statement if you need +them: + + use Pod::Find qw(pod_find simplify_name); + +=head1 OPTIONS + +The first argument for B<pod_find> may be a hash reference with options. +The rest are either directories that are searched recursively or files. +The POD names of files are the plain basenames with any Perl-like extension +(.pm, .pl, .pod) stripped. + +=over 4 + +=item B<-verbose> + +Print progress information while scanning. + +=item B<-perl> + +Apply Perl-specific heuristics to find the correct PODs. This includes +stripping Perl-like extensions, omitting subdirectories that are numeric +but do I<not> match the current Perl interpreter's version id, suppressing +F<site_perl> as a module hierarchy name etc. + +=item B<-script> + +Search for PODs in the current Perl interpreter's installation +B<scriptdir>. This is taken from the local L<Config|Config> module. + +=item B<-inc> + +Search for PODs in the current Perl interpreter's I<@INC> paths. This +automatically considers paths specified in the C<PERL5LIB> environment. + +=back + +=head1 AUTHOR + +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, +heavily borrowing code from Nick Ing-Simmons' PodToHtml. + +=head1 SEE ALSO + +L<Pod::Parser>, L<Pod::Checker> + +=cut + +use strict; +#use diagnostics; +use Exporter; +use File::Spec; +use File::Find; +use Cwd; + +use vars qw(@ISA @EXPORT_OK $VERSION); +@ISA = qw(Exporter); +@EXPORT_OK = qw(&pod_find &simplify_name); + +# package global variables +my $SIMPLIFY_RX; + +# return a hash of the POD files found +# first argument may be a hashref (options), +# rest is a list of directories to search recursively +sub pod_find +{ + my %opts; + if(ref $_[0]) { + %opts = %{shift()}; + } + + $opts{-verbose} ||= 0; + $opts{-perl} ||= 0; + + my (@search) = @_; + + if($opts{-script}) { + require Config; + push(@search, $Config::Config{scriptdir}); + $opts{-perl} = 1; + } + + if($opts{-inc}) { + push(@search, grep($_ ne '.',@INC)); + $opts{-perl} = 1; + } + + if($opts{-perl}) { + require Config; + # this code simplifies the POD name for Perl modules: + # * remove "site_perl" + # * remove e.g. "i586-linux" (from 'archname') + # * remove e.g. 5.00503 + # * remove pod/ if followed by *.pod (e.g. in pod/perlfunc.pod) + $SIMPLIFY_RX = + qq!^(?i:site_perl/|\Q$Config::Config{archname}\E/|\\d+\\.\\d+([_.]?\\d+)?/|pod/(?=.*?\\.pod\\z))*!; + + } + + my %dirs_visited; + my %pods; + my %names; + my $pwd = cwd(); + + foreach my $try (@search) { + unless(File::Spec->file_name_is_absolute($try)) { + # make path absolute + $try = File::Spec->catfile($pwd,$try); + } + # simplify path + $try = File::Spec->canonpath($try); + my $name; + if(-f $try) { + if($name = _check_and_extract_name($try, $opts{-verbose})) { + _check_for_duplicates($try, $name, \%names, \%pods); + } + next; + } + my $root_rx = qq!^\Q$try\E/!; + File::Find::find( sub { + my $item = $File::Find::name; + if(-d) { + if($dirs_visited{$item}) { + warn "Directory '$item' already seen, skipping.\n" + if($opts{-verbose}); + $File::Find::prune = 1; + return; + } + else { + $dirs_visited{$item} = 1; + } + if($opts{-perl} && /^(\d+\.[\d_]+)\z/s && eval "$1" != $]) { + $File::Find::prune = 1; + warn "Perl $] version mismatch on $_, skipping.\n" + if($opts{-verbose}); + } + return; + } + if($name = _check_and_extract_name($item, $opts{-verbose}, $root_rx)) { + _check_for_duplicates($item, $name, \%names, \%pods); + } + }, $try); # end of File::Find::find + } + chdir $pwd; + %pods; +} + +sub _check_for_duplicates { + my ($file, $name, $names_ref, $pods_ref) = @_; + if($$names_ref{$name}) { + warn "Duplicate POD found (shadowing?): $name ($file)\n"; + warn " Already seen in ", + join(' ', grep($$pods_ref{$_} eq $name, keys %$pods_ref)),"\n"; + } + else { + $$names_ref{$name} = 1; + } + $$pods_ref{$file} = $name; +} + +sub _check_and_extract_name { + my ($file, $verbose, $root_rx) = @_; + + # check extension or executable flag + # this involves testing the .bat extension on Win32! + unless($file =~ /\.(pod|pm|plx?)\z/i || (-f $file && -x _ && -T _)) { + return undef; + } + + # check for one line of POD + unless(open(POD,"<$file")) { + warn "Error: $file is unreadable: $!\n"; + return undef; + } + local $/ = undef; + my $pod = <POD>; + close(POD); + unless($pod =~ /\n=(head\d|pod|over|item)\b/) { + warn "No POD in $file, skipping.\n" + if($verbose); + return; + } + undef $pod; + + # strip non-significant path components + # _TODO_ what happens on e.g. Win32? + my $name = $file; + if(defined $root_rx) { + $name =~ s!$root_rx!!s; + $name =~ s!$SIMPLIFY_RX!!os if(defined $SIMPLIFY_RX); + } + else { + $name =~ s:^.*/::s; + } + _simplify($name); + $name =~ s!/+!::!g; #/ + $name; +} + +# basic simplification of the POD name: +# basename & strip extension +sub simplify_name { + my ($str) = @_; + # remove all path components + $str =~ s:^.*/::s; + _simplify($str); + $str; +} + +# internal sub only +sub _simplify { + # strip Perl's own extensions + $_[0] =~ s/\.(pod|pm|plx?)\z//i; + # strip meaningless extensions on Win32 and OS/2 + $_[0] =~ s/\.(bat|exe|cmd)\z//i if($^O =~ /win|os2/i); +} + +1; + diff --git a/contrib/perl5/lib/Pod/Functions.pm b/contrib/perl5/lib/Pod/Functions.pm index 3cc9b38..03cbf71 100644 --- a/contrib/perl5/lib/Pod/Functions.pm +++ b/contrib/perl5/lib/Pod/Functions.pm @@ -65,6 +65,8 @@ while (<DATA>) { } } +close DATA; + unless (caller) { foreach $type ( @Type_Order ) { $list = join(", ", sort @{$Kinds{$type}}); @@ -90,9 +92,9 @@ __DATA__ abs Math absolute value function accept Socket accept an incoming socket connect alarm Process schedule a SIGALRM -atan2 Math arctangent of Y/X +atan2 Math arctangent of Y/X in the range -PI to PI bind Socket binds an address to a socket -binmode I/O prepare binary files on old systems +binmode I/O prepare binary files for I/O bless Objects create an object caller Flow,Namespace get context of the current subroutine call chdir File change your current working directory @@ -104,7 +106,7 @@ chr String get character this number represents chroot File make directory new root for path lookups close I/O close file (or pipe or socket) handle closedir I/O close directory handle -connect Socket connect to a remove socket +connect Socket connect to a remote socket continue Flow optional trailing block in a while or foreach cos Math cosine function crypt String one-way passwd-style encryption @@ -123,12 +125,12 @@ endprotoent Network be done using protocols file endpwent User be done using passwd file endservent Network be done using services file eof I/O test a filehandle for its end -eval Flow,Misc catch exceptions or compile code +eval Flow,Misc catch exceptions or compile and run code exec Process abandon this program to run another exists HASH test whether a hash key is present exit Flow terminate this program exp Math raise I<e> to a power -fcntl File file control system all +fcntl File file control system call fileno I/O return file descriptor from filehandle flock I/O lock an entire file with an advisory lock fork Process create a new process just like this one @@ -145,7 +147,7 @@ getlogin User return who logged in at this tty getnetbyaddr Network get network record given its address getnetbyname Network get networks record given name getnetent Network get next networks record -getpeername Socket find the other hend of a socket connection +getpeername Socket find the other end of a socket connection getpgrp Process get process group getppid Process get parent process ID getpriority Process get current nice value @@ -180,6 +182,7 @@ link File create a hard link in the filesytem listen Socket register your socket as a server local Misc,Namespace create a temporary value for a global variable (dynamic scoping) localtime Time convert UNIX time into record or string using local time +lock Threads get a thread lock on a variable, subroutine, or method log Math retrieve the natural logarithm for a number lstat File stat a symbolic link m// Regexp match a string with a regular expression pattern @@ -210,9 +213,11 @@ qq/STRING/ String doubly quote a string quotemeta Regexp quote regular expression magic characters qw/STRING/ LIST quote a list of words qx/STRING/ Process backquote quote a string +qr/PATTERN/ Regexp Compile pattern rand Math retrieve the next pseudorandom number read I/O,Binary fixed-length buffered input from a filehandle readdir I/O get a directory from a directory handle +readline I/O fetch a record from a file readlink File determine where a symbolic link is pointing recv Socket receive a message over a Socket redo Flow start this loop iteration over again @@ -249,7 +254,7 @@ shmget SysV get SysV shared memory segment identifier shmread SysV read SysV shared memory shmwrite SysV write SysV shared memory shutdown Socket close down just half of a socket connection -sin Math return the sin of a number +sin Math return the sine of a number sleep Process block for some number of seconds socket Socket create a socket socketpair Socket create a pair of sockets @@ -266,6 +271,7 @@ substr String get or alter a portion of a stirng symlink File create a symbolic link to a file syscall I/O,Binary execute an arbitrary system call sysread I/O,Binary fixed-length unbuffered input from a filehandle +sysseek I/O,Binary position I/O pointer on handle used with sysread and syswrite system Process run a separate program syswrite I/O,Binary fixed-length unbuffered output to a filehandle tell I/O get current seekpointer on a filehandle diff --git a/contrib/perl5/lib/Pod/Html.pm b/contrib/perl5/lib/Pod/Html.pm index e71afa8..89e3d0f 100644 --- a/contrib/perl5/lib/Pod/Html.pm +++ b/contrib/perl5/lib/Pod/Html.pm @@ -1,22 +1,21 @@ package Pod::Html; - -use Pod::Functions; -use Getopt::Long; # package for handling command-line parameters +use strict; require Exporter; -use vars qw($VERSION); -$VERSION = 1.01; -@ISA = Exporter; + +use vars qw($VERSION @ISA @EXPORT); +$VERSION = 1.03; +@ISA = qw(Exporter); @EXPORT = qw(pod2html htmlify); -use Cwd; use Carp; +use Config; +use Cwd; +use File::Spec::Unix; +use Getopt::Long; +use Pod::Functions; use locale; # make \w work right in non-ASCII lands -use strict; - -use Config; - =head1 NAME Pod::Html - module to convert pod files to HTML @@ -38,12 +37,48 @@ Pod::Html takes the following arguments: =over 4 +=item backlink + + --backlink="Back to Top" + +Adds "Back to Top" links in front of every HEAD1 heading (except for +the first). By default, no backlink are being generated. + +=item css + + --css=stylesheet + +Specify the URL of a cascading style sheet. + +=item flush + + --flush + +Flushes the item and directory caches. + +=item header + + --header + --noheader + +Creates header and footer blocks containing the text of the NAME +section. By default, no headers are being generated. + =item help --help Displays the usage message. +=item htmldir + + --htmldir=name + +Sets the directory in which the resulting HTML file is placed. This +is used to generate relative links to other files. Not passing this +causes all links to be absolute, since this is the value that tells +Pod::Html the root of the documentation tree. + =item htmlroot --htmlroot=name @@ -51,6 +86,14 @@ Displays the usage message. Sets the base URL for the HTML files. When cross-references are made, the HTML root is prepended to the URL. +=item index + + --index + --noindex + +Generate an index at the top of the HTML file. This is the default +behaviour. + =item infile --infile=name @@ -58,26 +101,6 @@ the HTML root is prepended to the URL. Specify the pod file to convert. Input is taken from STDIN if no infile is specified. -=item outfile - - --outfile=name - -Specify the HTML file to create. Output goes to STDOUT if no outfile -is specified. - -=item podroot - - --podroot=name - -Specify the base directory for finding library pods. - -=item podpath - - --podpath=name:...:name - -Specify which subdirectories of the podroot contain pod files whose -HTML converted forms can be linked-to in cross-references. - =item libpods --libpods=name:...:name @@ -87,39 +110,46 @@ List of page names (eg, "perlfunc") which contain linkable C<=item>s. =item netscape --netscape + --nonetscape -Use Netscape HTML directives when applicable. - -=item nonetscape +Use Netscape HTML directives when applicable. By default, they will +B<not> be used. - --nonetscape +=item outfile -Do not use Netscape HTML directives (default). + --outfile=name -=item index +Specify the HTML file to create. Output goes to STDOUT if no outfile +is specified. - --index +=item podpath -Generate an index at the top of the HTML file (default behaviour). + --podpath=name:...:name -=item noindex +Specify which subdirectories of the podroot contain pod files whose +HTML converted forms can be linked-to in cross-references. - --noindex +=item podroot -Do not generate an index at the top of the HTML file. + --podroot=name +Specify the base directory for finding library pods. -=item recurse +=item quiet - --recurse + --quiet + --noquiet -Recurse into subdirectories specified in podpath (default behaviour). +Don't display I<mostly harmless> warning messages. These messages +will be displayed by default. But this is not the same as C<verbose> +mode. -=item norecurse +=item recurse + --recurse --norecurse -Do not recurse into subdirectories specified in podpath. +Recurse into subdirectories specified in podpath (default behaviour). =item title @@ -130,8 +160,9 @@ Specify the title of the resulting HTML file. =item verbose --verbose + --noverbose -Display progress messages. +Display progress messages. By default, they won't be displayed. =back @@ -146,13 +177,13 @@ Display progress messages. "--infile=foo.pod", "--outfile=/perl/nmanual/foo.html"); -=head1 AUTHOR +=head1 ENVIRONMENT -Tom Christiansen, E<lt>tchrist@perl.comE<gt>. +Uses $Config{pod2html} to setup default options. -=head1 BUGS +=head1 AUTHOR -Has trouble with C<> etc in = commands. +Tom Christiansen, E<lt>tchrist@perl.comE<gt>. =head1 SEE ALSO @@ -164,30 +195,35 @@ This program is distributed under the Artistic License. =cut -my $dircache = "pod2html-dircache"; -my $itemcache = "pod2html-itemcache"; +my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~"; +my $dircache = "pod2htmd$cache_ext"; +my $itemcache = "pod2htmi$cache_ext"; my @begin_stack = (); # begin/end stack my @libpods = (); # files to search for links from C<> directives my $htmlroot = "/"; # http-server base directory from which all # relative paths in $podpath stem. +my $htmldir = ""; # The directory to which the html pages + # will (eventually) be written. my $htmlfile = ""; # write to stdout by default +my $htmlfileurl = "" ; # The url that other files would use to + # refer to this file. This is only used + # to make relative urls that point to + # other files. my $podfile = ""; # read from stdin by default my @podpath = (); # list of directories containing library pods. my $podroot = "."; # filesystem base directory from which all # relative paths in $podpath stem. +my $css = ''; # Cascading style sheet my $recurse = 1; # recurse on subdirectories in $podpath. +my $quiet = 0; # not quiet by default my $verbose = 0; # not verbose by default my $doindex = 1; # non-zero if we should generate an index +my $backlink = ''; # text for "back to top" links my $listlevel = 0; # current list depth -my @listitem = (); # stack of HTML commands to use when a =item is - # encountered. the top of the stack is the - # current list. -my @listdata = (); # similar to @listitem, but for the text after - # an =item -my @listend = (); # similar to @listitem, but the text to use to - # end the list. +my @listend = (); # the text to use to end the list. +my $after_lpar = 0; # set to true after a par in an =item my $ignore = 1; # whether or not to format text. we don't # format text until we hit our first pod # directive. @@ -196,42 +232,45 @@ my %items_named = (); # for the multiples of the same item in perlfunc my @items_seen = (); my $netscape = 0; # whether or not to use netscape directives. my $title; # title to give the pod(s) +my $header = 0; # produce block header/footer my $top = 1; # true if we are at the top of the doc. used # to prevent the first <HR> directive. my $paragraph; # which paragraph we're processing (used # for error messages) +my $ptQuote = 0; # status of double-quote conversion my %pages = (); # associative array used to find the location # of pages referenced by L<> links. my %sections = (); # sections within this page my %items = (); # associative array used to find the location # of =item directives referenced by C<> links +my %local_items = (); # local items - avoid destruction of %items my $Is83; # is dos with short filenames (8.3) sub init_globals { -$dircache = "pod2html-dircache"; -$itemcache = "pod2html-itemcache"; +$dircache = "pod2htmd$cache_ext"; +$itemcache = "pod2htmi$cache_ext"; @begin_stack = (); # begin/end stack @libpods = (); # files to search for links from C<> directives $htmlroot = "/"; # http-server base directory from which all # relative paths in $podpath stem. +$htmldir = ""; # The directory to which the html pages + # will (eventually) be written. $htmlfile = ""; # write to stdout by default $podfile = ""; # read from stdin by default @podpath = (); # list of directories containing library pods. $podroot = "."; # filesystem base directory from which all # relative paths in $podpath stem. +$css = ''; # Cascading style sheet $recurse = 1; # recurse on subdirectories in $podpath. +$quiet = 0; # not quiet by default $verbose = 0; # not verbose by default $doindex = 1; # non-zero if we should generate an index +$backlink = ''; # text for "back to top" links $listlevel = 0; # current list depth -@listitem = (); # stack of HTML commands to use when a =item is - # encountered. the top of the stack is the - # current list. -@listdata = (); # similar to @listitem, but for the text after - # an =item -@listend = (); # similar to @listitem, but the text to use to - # end the list. +@listend = (); # the text to use to end the list. +$after_lpar = 0; # set to true after a par in an =item $ignore = 1; # whether or not to format text. we don't # format text until we hit our first pod # directive. @@ -239,6 +278,7 @@ $ignore = 1; # whether or not to format text. we don't @items_seen = (); %items_named = (); $netscape = 0; # whether or not to use netscape directives. +$header = 0; # produce block header/footer $title = ''; # title to give the pod(s) $top = 1; # true if we are at the top of the doc. used # to prevent the first <HR> directive. @@ -252,9 +292,28 @@ $paragraph = ''; # which paragraph we're processing (used # of pages referenced by L<> links. #%items = (); # associative array used to find the location # of =item directives referenced by C<> links +%local_items = (); $Is83=$^O eq 'dos'; } +# +# clean_data: global clean-up of pod data +# +sub clean_data($){ + my( $dataref ) = @_; + my $i; + for( $i = 0; $i <= $#$dataref; $i++ ){ + ${$dataref}[$i] =~ s/\s+\Z//; + + # have a look for all-space lines + if( ${$dataref}[$i] =~ /^\s+$/m ){ + my @chunks = split( /^\s+$/m, ${$dataref}[$i] ); + splice( @$dataref, $i, 1, @chunks ); + } + } +} + + sub pod2html { local(@ARGV) = @_; local($/); @@ -283,19 +342,32 @@ sub pod2html { } $htmlfile = "-" unless $htmlfile; # stdout $htmlroot = "" if $htmlroot eq "/"; # so we don't get a // + $htmldir =~ s#/\z## ; # so we don't get a // + if ( $htmlroot eq '' + && defined( $htmldir ) + && $htmldir ne '' + && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir + ) + { + # Set the 'base' url for this file, so that we can use it + # as the location from which to calculate relative links + # to other files. If this is '', then absolute links will + # be used throughout. + $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1); + } # read the pod a paragraph at a time warn "Scanning for sections in input file(s)\n" if $verbose; $/ = ""; my @poddata = <POD>; close(POD); + clean_data( \@poddata ); # scan the pod for =head[1-6] directives and build an index my $index = scan_headings(\%sections, @poddata); unless($index) { - warn "No pod in $podfile\n" if $verbose; - return; + warn "No headings in $podfile\n" if $verbose; } # open the output file @@ -316,7 +388,7 @@ sub pod2html { } } } - if (!$title and $podfile =~ /\.pod$/) { + if (!$title and $podfile =~ /\.pod\z/) { # probably a split pod so take first =head[12] as title for (my $i = 0; $i < @poddata; $i++) { last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; @@ -327,46 +399,63 @@ sub pod2html { if ($title) { $title =~ s/\s*\(.*\)//; } else { - warn "$0: no title for $podfile"; - $podfile =~ /^(.*)(\.[^.\/]+)?$/; + warn "$0: no title for $podfile" unless $quiet; + $podfile =~ /^(.*)(\.[^.\/]+)?\z/s; $title = ($podfile eq "-" ? 'No Title' : $1); warn "using $title" if $verbose; } + my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : ''; + $csslink =~ s,\\,/,g; + $csslink =~ s,(/.):,$1|,; + + my $block = $header ? <<END_OF_BLOCK : ''; +<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%> +<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc"> +<FONT SIZE=+1><STRONG><P CLASS=block> $title</P></STRONG></FONT> +</TD></TR> +</TABLE> +END_OF_BLOCK + print HTML <<END_OF_HEAD; <HTML> <HEAD> -<TITLE>$title</TITLE> +<TITLE>$title</TITLE>$csslink <LINK REV="made" HREF="mailto:$Config{perladmin}"> </HEAD> <BODY> - +$block END_OF_HEAD # load/reload/validate/cache %pages and %items get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse); # scan the pod for =item directives - scan_items("", \%items, @poddata); + scan_items( \%local_items, "", @poddata); # put an index at the top of the file. note, if $doindex is 0 we # still generate an index, but surround it with an html comment. # that way some other program can extract it if desired. $index =~ s/--+/-/g; + print HTML "<A NAME=\"__index__\"></A>\n"; print HTML "<!-- INDEX BEGIN -->\n"; print HTML "<!--\n" unless $doindex; print HTML $index; print HTML "-->\n" unless $doindex; print HTML "<!-- INDEX END -->\n\n"; - print HTML "<HR>\n" if $doindex; + print HTML "<HR>\n" if $doindex and $index; # now convert this file - warn "Converting input file\n" if $verbose; - foreach my $i (0..$#poddata) { + my $after_item; # set to true after an =item + warn "Converting input file $podfile\n" if $verbose; + foreach my $i (0..$#poddata){ + $ptQuote = 0; # status of quote conversion + $_ = $poddata[$i]; $paragraph = $i+1; if (/^(=.*)/s) { # is it a pod directive? $ignore = 0; + $after_item = 0; $_ = $1; if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin process_begin($1, $2); @@ -380,14 +469,17 @@ END_OF_HEAD next if @begin_stack && $begin_stack[-1] ne 'html'; if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading - process_head($1, $2); - } elsif (/^=item\s*(.*\S)/sm) { # =item text - process_item($1); + process_head( $1, $2, $doindex && $index ); + } elsif (/^=item\s*(.*\S)?/sm) { # =item text + warn "$0: $podfile: =item without bullet, number or text" + . " in paragraph $paragraph.\n" if !defined($1) or $1 eq ''; + process_item( $1 ); + $after_item = 1; } elsif (/^=over\s*(.*)/) { # =over N process_over(); } elsif (/^=back/) { # =back process_back(); - } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for + } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for process_for($1,$2); } else { /^=(\S*)\s*/; @@ -401,14 +493,55 @@ END_OF_HEAD next if $ignore; next if @begin_stack && $begin_stack[-1] ne 'html'; my $text = $_; - process_text(\$text, 1); - print HTML "<P>\n$text"; + if( $text =~ /\A\s+/ ){ + process_pre( \$text ); + print HTML "<PRE>\n$text</PRE>\n"; + + } else { + process_text( \$text ); + + # experimental: check for a paragraph where all lines + # have some ...\t...\t...\n pattern + if( $text =~ /\t/ ){ + my @lines = split( "\n", $text ); + if( @lines > 1 ){ + my $all = 2; + foreach my $line ( @lines ){ + if( $line =~ /\S/ && $line !~ /\t/ ){ + $all--; + last if $all == 0; + } + } + if( $all > 0 ){ + $text =~ s/\t+/<TD>/g; + $text =~ s/^/<TR><TD>/gm; + $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' . + $text . '</TABLE>'; + } + } + } + ## end of experimental + + if( $after_item ){ + print HTML "$text\n"; + $after_lpar = 1; + } else { + print HTML "<P>$text</P>\n"; + } + } + $after_item = 0; } } # finish off any pending directives finish_list(); + + # link to page index + print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n" + if $doindex and $index and $backlink; + print HTML <<END_OF_TAIL; +$block </BODY> </HTML> @@ -435,40 +568,52 @@ Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> --libpods=<name>:...:<name> --recurse --verbose --index --netscape --norecurse --noindex - --flush - flushes the item and directory caches. - --help - prints this message. - --htmlroot - http-server base directory from which all relative paths - in podpath stem (default is /). - --index - generate an index at the top of the resulting html - (default). - --infile - filename for the pod to convert (input taken from stdin - by default). - --libpods - colon-separated list of pages to search for =item pod - directives in as targets of C<> and implicit links (empty - by default). note, these are not filenames, but rather - page names like those that appear in L<> links. - --netscape - will use netscape html directives when applicable. - --nonetscape - will not use netscape directives (default). - --outfile - filename for the resulting html file (output sent to - stdout by default). - --podpath - colon-separated list of directories containing library - pods. empty by default. - --podroot - filesystem base directory from which all relative paths - in podpath stem (default is .). - --noindex - don't generate an index at the top of the resulting html. - --norecurse - don't recurse on those subdirectories listed in podpath. - --recurse - recurse on those subdirectories listed in podpath - (default behavior). - --title - title that will appear in resulting html file. - --verbose - self-explanatory + --backlink - set text for "back to top" links (default: none). + --css - stylesheet URL + --flush - flushes the item and directory caches. + --[no]header - produce block header/footer (default is no headers). + --help - prints this message. + --htmldir - directory for resulting HTML files. + --htmlroot - http-server base directory from which all relative paths + in podpath stem (default is /). + --[no]index - generate an index at the top of the resulting html + (default behaviour). + --infile - filename for the pod to convert (input taken from stdin + by default). + --libpods - colon-separated list of pages to search for =item pod + directives in as targets of C<> and implicit links (empty + by default). note, these are not filenames, but rather + page names like those that appear in L<> links. + --[no]netscape - will use netscape html directives when applicable. + (default is not to use them). + --outfile - filename for the resulting html file (output sent to + stdout by default). + --podpath - colon-separated list of directories containing library + pods (empty by default). + --podroot - filesystem base directory from which all relative paths + in podpath stem (default is .). + --[no]quiet - supress some benign warning messages (default is off). + --[no]recurse - recurse on those subdirectories listed in podpath + (default behaviour). + --title - title that will appear in resulting html file. + --[no]verbose - self-explanatory (off by default). END_OF_USAGE sub parse_command_line { - my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose); + my ($opt_backlink,$opt_css,$opt_flush,$opt_header,$opt_help,$opt_htmldir, + $opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape, + $opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,$opt_recurse, + $opt_title,$opt_verbose); + + unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html}; my $result = GetOptions( + 'backlink=s' => \$opt_backlink, + 'css=s' => \$opt_css, 'flush' => \$opt_flush, + 'header!' => \$opt_header, 'help' => \$opt_help, + 'htmldir=s' => \$opt_htmldir, 'htmlroot=s' => \$opt_htmlroot, 'index!' => \$opt_index, 'infile=s' => \$opt_infile, @@ -477,34 +622,37 @@ sub parse_command_line { 'outfile=s' => \$opt_outfile, 'podpath=s' => \$opt_podpath, 'podroot=s' => \$opt_podroot, - 'norecurse' => \$opt_norecurse, + 'quiet!' => \$opt_quiet, 'recurse!' => \$opt_recurse, 'title=s' => \$opt_title, - 'verbose' => \$opt_verbose, + 'verbose!' => \$opt_verbose, ); usage("-", "invalid parameters") if not $result; usage("-") if defined $opt_help; # see if the user asked for help $opt_help = ""; # just to make -w shut-up. - $podfile = $opt_infile if defined $opt_infile; - $htmlfile = $opt_outfile if defined $opt_outfile; - @podpath = split(":", $opt_podpath) if defined $opt_podpath; @libpods = split(":", $opt_libpods) if defined $opt_libpods; + $backlink = $opt_backlink if defined $opt_backlink; + $css = $opt_css if defined $opt_css; + $header = $opt_header if defined $opt_header; + $htmldir = $opt_htmldir if defined $opt_htmldir; + $htmlroot = $opt_htmlroot if defined $opt_htmlroot; + $doindex = $opt_index if defined $opt_index; + $podfile = $opt_infile if defined $opt_infile; + $netscape = $opt_netscape if defined $opt_netscape; + $htmlfile = $opt_outfile if defined $opt_outfile; + $podroot = $opt_podroot if defined $opt_podroot; + $quiet = $opt_quiet if defined $opt_quiet; + $recurse = $opt_recurse if defined $opt_recurse; + $title = $opt_title if defined $opt_title; + $verbose = $opt_verbose if defined $opt_verbose; + warn "Flushing item and directory caches\n" if $opt_verbose && defined $opt_flush; unlink($dircache, $itemcache) if defined $opt_flush; - - $htmlroot = $opt_htmlroot if defined $opt_htmlroot; - $podroot = $opt_podroot if defined $opt_podroot; - - $doindex = $opt_index if defined $opt_index; - $recurse = $opt_recurse if defined $opt_recurse; - $title = $opt_title if defined $opt_title; - $verbose = defined $opt_verbose ? 1 : 0; - $netscape = $opt_netscape if defined $opt_netscape; } @@ -542,7 +690,7 @@ sub get_cache { sub cache_key { my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; return join('!', $dircache, $itemcache, $recurse, - @$podpath, $podroot, stat($dircache), stat($itemcache)); + @$podpath, $podroot, stat($dircache), stat($itemcache)); } # @@ -550,7 +698,6 @@ sub cache_key { # are valid caches of %pages and %items. if they are valid then it loads # them and returns a non-zero value. # - sub load_cache { my($dircache, $itemcache, $podpath, $podroot) = @_; my($tests); @@ -648,12 +795,14 @@ sub scan_podpath { next unless defined $pages{$libpod} && $pages{$libpod}; # if there is a directory then use the .pod and .pm files within it. - if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { + # NOTE: Only finds the first so-named directory in the tree. +# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { + if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { # find all the .pod and .pm files within the directory $dirname = $1; opendir(DIR, $dirname) || die "$0: error opening directory $dirname: $!\n"; - @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR)); + @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR)); closedir(DIR); # scan each .pod and .pm file for =item directives @@ -662,15 +811,17 @@ sub scan_podpath { die "$0: error opening $dirname/$pod for input: $!\n"; @poddata = <POD>; close(POD); + clean_data( \@poddata ); - scan_items("$dirname/$pod", @poddata); + scan_items( \%items, "$dirname/$pod", @poddata); } # use the names of files as =item directives too. - foreach $pod (@files) { - $pod =~ /^(.*)(\.pod|\.pm)$/; - $items{$1} = "$dirname/$1.html" if $1; - } +### Don't think this should be done this way - confuses issues.(WL) +### foreach $pod (@files) { +### $pod =~ /^(.*)(\.pod|\.pm)$/; +### $items{$1} = "$dirname/$1.html" if $1; +### } } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ || $pages{$libpod} =~ /([^:]*\.pm):/) { # scan the .pod or .pm file for =item directives @@ -679,8 +830,9 @@ sub scan_podpath { die "$0: error opening $pod for input: $!\n"; @poddata = <POD>; close(POD); + clean_data( \@poddata ); - scan_items("$pod", @poddata); + scan_items( \%items, "$pod", @poddata); } else { warn "$0: shouldn't be here (line ".__LINE__."\n"; } @@ -736,13 +888,13 @@ sub scan_dir { $pages{$_} = "" unless defined $pages{$_}; $pages{$_} .= "$dir/$_:"; push(@subdirs, $_); - } elsif (/\.pod$/) { # .pod - s/\.pod$//; + } elsif (/\.pod\z/) { # .pod + s/\.pod\z//; $pages{$_} = "" unless defined $pages{$_}; $pages{$_} .= "$dir/$_.pod:"; push(@pods, "$dir/$_.pod"); - } elsif (/\.pm$/) { # .pm - s/\.pm$//; + } elsif (/\.pm\z/) { # .pm + s/\.pm\z//; $pages{$_} = "" unless defined $pages{$_}; $pages{$_} .= "$dir/$_.pm:"; push(@pods, "$dir/$_.pm"); @@ -764,7 +916,7 @@ sub scan_dir { # sub scan_headings { my($sections, @data) = @_; - my($tag, $which_head, $title, $listdepth, $index); + my($tag, $which_head, $otitle, $listdepth, $index); # here we need local $ignore = 0; # unfortunately, we can't have it, because $ignore is lexical @@ -777,9 +929,12 @@ sub scan_headings { # pointing to each of them. foreach my $line (@data) { if ($line =~ /^=(head)([1-6])\s+(.*)/) { - ($tag,$which_head, $title) = ($1,$2,$3); - chomp($title); - $$sections{htmlify(0,$title)} = 1; + ($tag, $which_head, $otitle) = ($1,$2,$3); + + my $title = depod( $otitle ); + my $name = htmlify( $title ); + $$sections{$name} = 1; + $title = process_text( \$otitle ); while ($which_head != $listdepth) { if ($which_head > $listdepth) { @@ -792,8 +947,8 @@ sub scan_headings { } $index .= "\n" . ("\t" x $listdepth) . "<LI>" . - "<A HREF=\"#" . htmlify(0,$title) . "\">" . - html_escape(process_text(\$title, 0)) . "</A>"; + "<A HREF=\"#" . $name . "\">" . + $title . "</A></LI>"; } } @@ -815,36 +970,30 @@ sub scan_headings { # will use this information later on in resolving C<> links. # sub scan_items { - my($pod, @poddata) = @_; + my( $itemref, $pod, @poddata ) = @_; my($i, $item); local $_; - $pod =~ s/\.pod$//; + $pod =~ s/\.pod\z//; $pod .= ".html" if $pod; foreach $i (0..$#poddata) { - $_ = $poddata[$i]; - - # remove any formatting instructions - s,[A-Z]<([^<>]*)>,$1,g; - - # figure out what kind of item it is and get the first word of - # it's name. - if (/^=item\s+(\w*)\s*.*$/s) { - if ($1 eq "*") { # bullet list - /\A=item\s+\*\s*(.*?)\s*\Z/s; - $item = $1; - } elsif ($1 =~ /^\d+/) { # numbered list - /\A=item\s+\d+\.?(.*?)\s*\Z/s; - $item = $1; - } else { -# /\A=item\s+(.*?)\s*\Z/s; - /\A=item\s+(\w*)/s; - $item = $1; - } - - $items{$item} = "$pod" if $item; + my $txt = depod( $poddata[$i] ); + + # figure out what kind of item it is. + # Build string for referencing this item. + if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet + next unless $1; + $item = $1; + } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list + $item = $1; + } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item + $item = $1; + } else { + next; } + my $fid = fragment_id( $item ); + $$itemref{$fid} = "$pod" if $fid; } } @@ -852,168 +1001,167 @@ sub scan_items { # process_head - convert a pod head[1-6] tag and convert it to HTML format. # sub process_head { - my($tag, $heading) = @_; - my $firstword; + my($tag, $heading, $hasindex) = @_; # figure out the level of the =head $tag =~ /head([1-6])/; my $level = $1; - # can't have a heading full of spaces and speechmarks and so on - $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/; - - print HTML "<P>\n" unless $listlevel; - print HTML "<HR>\n" unless $listlevel || $top; - print HTML "<H$level>"; # unless $listlevel; - #print HTML "<H$level>" unless $listlevel; - my $convert = $heading; process_text(\$convert, 0); - $convert = html_escape($convert); - print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>"; - print HTML "</H$level>"; # unless $listlevel; - print HTML "\n"; + if( $listlevel ){ + warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n"; + while( $listlevel ){ + process_back(); + } + } + + print HTML "<P>\n"; + if( $level == 1 && ! $top ){ + print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n" + if $hasindex and $backlink; + print HTML "<HR>\n" + } + + my $name = htmlify( depod( $heading ) ); + my $convert = process_text( \$heading ); + print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n"; } + # -# process_item - convert a pod item tag and convert it to HTML format. +# emit_item_tag - print an =item's text +# Note: The global $EmittedItem is used for inhibiting self-references. # -sub process_item { - my $text = $_[0]; - my($i, $quote, $name); +my $EmittedItem; + +sub emit_item_tag($$$){ + my( $otext, $text, $compact ) = @_; + my $item = fragment_id( $text ); - my $need_preamble = 0; - my $this_entry; + $EmittedItem = $item; + ### print STDERR "emit_item_tag=$item ($text)\n"; + print HTML '<STRONG>'; + if ($items_named{$item}++) { + print HTML process_text( \$otext ); + } else { + my $name = 'item_' . $item; + print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>'; + } + print HTML "</STRONG><BR>\n"; + undef( $EmittedItem ); +} + +sub emit_li { + my( $tag ) = @_; + if( $items_seen[$listlevel]++ == 0 ){ + push( @listend, "</$tag>" ); + print HTML "<$tag>\n"; + } + print HTML $tag eq 'DL' ? '<DT>' : '<LI>'; +} + +# +# process_item - convert a pod item tag and convert it to HTML format. +# +sub process_item { + my( $otext ) = @_; # lots of documents start a list without doing an =over. this is # bad! but, the proper thing to do seems to be to just assume # they did do an =over. so warn them once and then continue. - warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n" - unless $listlevel; - process_over() unless $listlevel; + if( $listlevel == 0 ){ + warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"; + process_over(); + } - return unless $listlevel; + # formatting: insert a paragraph if preceding item has >1 paragraph + if( $after_lpar ){ + print HTML "<P></P>\n"; + $after_lpar = 0; + } # remove formatting instructions from the text - 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g; - pre_escape(\$text); - - $need_preamble = $items_seen[$listlevel]++ == 0; - - # check if this is the first =item after an =over - $i = $listlevel - 1; - my $need_new = $listlevel >= @listitem; - - if ($text =~ /\A\*/) { # bullet - - if ($need_preamble) { - push(@listend, "</UL>"); - print HTML "<UL>\n"; - } - - print HTML '<LI>'; - if ($text =~ /\A\*\s*(.+)\Z/s) { - print HTML '<STRONG>'; - if ($items_named{$1}++) { - print HTML html_escape($1); - } else { - my $name = 'item_' . htmlify(1,$1); - print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; - } - print HTML '</STRONG>'; - } - - } elsif ($text =~ /\A[\d#]+/) { # numbered list - - if ($need_preamble) { - push(@listend, "</OL>"); - print HTML "<OL>\n"; - } - - print HTML '<LI>'; - if ($text =~ /\A\d+\.?\s*(.+)\Z/s) { - print HTML '<STRONG>'; - if ($items_named{$1}++) { - print HTML html_escape($1); - } else { - my $name = 'item_' . htmlify(0,$1); - print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; - } - print HTML '</STRONG>'; + my $text = depod( $otext ); + + # all the list variants: + if( $text =~ /\A\*/ ){ # bullet + emit_li( 'UL' ); + if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text + my $tag = $1; + $otext =~ s/\A\*\s+//; + emit_item_tag( $otext, $tag, 1 ); } - } else { # all others - - if ($need_preamble) { - push(@listend, '</DL>'); - print HTML "<DL>\n"; + } elsif( $text =~ /\A\d+/ ){ # numbered list + emit_li( 'OL' ); + if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text + my $tag = $1; + $otext =~ s/\A\d+\.?\s*//; + emit_item_tag( $otext, $tag, 1 ); } - print HTML '<DT>'; - if ($text =~ /(\S+)/) { - print HTML '<STRONG>'; - if ($items_named{$1}++) { - print HTML html_escape($text); - } else { - my $name = 'item_' . htmlify(1,$text); - print HTML qq(<A NAME="$name">), html_escape($text), '</A>'; - } - print HTML '</STRONG>'; + } else { # definition list + emit_li( 'DL' ); + if ($text =~ /\A(.+)\Z/s ){ # should have text + emit_item_tag( $otext, $text, 1 ); } print HTML '<DD>'; } - print HTML "\n"; } # -# process_over - process a pod over tag and start a corresponding HTML -# list. +# process_over - process a pod over tag and start a corresponding HTML list. # sub process_over { # start a new list $listlevel++; + push( @items_seen, 0 ); + $after_lpar = 0; } # # process_back - process a pod back tag and convert it to HTML format. # sub process_back { - warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n" - unless $listlevel; - return unless $listlevel; + if( $listlevel == 0 ){ + warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"; + return; + } # close off the list. note, I check to see if $listend[$listlevel] is # defined because an =item directive may have never appeared and thus # $listend[$listlevel] may have never been initialized. $listlevel--; - print HTML $listend[$listlevel] if defined $listend[$listlevel]; - print HTML "\n"; - - # don't need the corresponding perl code anymore - pop(@listitem); - pop(@listdata); - pop(@listend); + if( defined $listend[$listlevel] ){ + print HTML '<P></P>' if $after_lpar; + print HTML $listend[$listlevel]; + print HTML "\n"; + pop( @listend ); + } + $after_lpar = 0; - pop(@items_seen); + # clean up item count + pop( @items_seen ); } # -# process_cut - process a pod cut tag, thus stop ignoring pod directives. +# process_cut - process a pod cut tag, thus start ignoring pod directives. # sub process_cut { $ignore = 1; } # -# process_pod - process a pod pod tag, thus ignore pod directives until we see a -# corresponding cut. +# process_pod - process a pod pod tag, thus stop ignoring pod directives +# until we see a corresponding cut. # sub process_pod { # no need to set $ignore to 0 cause the main loop did it } # -# process_for - process a =for pod tag. if it's for html, split +# process_for - process a =for pod tag. if it's for html, spit # it out verbatim, if illustration, center it, otherwise ignore it. # sub process_for { @@ -1053,54 +1201,69 @@ sub process_end { if ($begin_stack[-1] ne $whom ) { die "Unmatched begin/end at chunk $paragraph\n" } - pop @begin_stack; + pop( @begin_stack ); } # -# process_text - handles plaintext that appears in the input pod file. -# there may be pod commands embedded within the text so those must be -# converted to html commands. +# process_pre - indented paragraph, made into <PRE></PRE> # -sub process_text { - my($text, $escapeQuotes) = @_; - my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf); - my($podcommand, $params, $tag, $quote); - +sub process_pre { + my( $text ) = @_; + my( $rest ); return if $ignore; - $quote = 0; # status of double-quote conversion - $result = ""; $rest = $$text; - if ($rest =~ /^\s+/) { # preformatted text, no pod directives - $rest =~ s/\n+\Z//; - $rest =~ s#.*# + # insert spaces in place of tabs + $rest =~ s#.*# my $line = $&; 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; $line; #eg; - $rest =~ s/&/&/g; - $rest =~ s/</</g; - $rest =~ s/>/>/g; - $rest =~ s/"/"/g; - - # try and create links for all occurrences of perl.* within - # the preformatted text. - $rest =~ s{ - (\s*)(perl\w+) - }{ - if (defined $pages{$2}) { # is a link - qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>); - } elsif (defined $pages{dosify($2)}) { # is a link - qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>); - } else { - "$1$2"; - } - }xeg; - $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g; - - my $urls = '(' . join ('|', qw{ + # convert some special chars to HTML escapes + $rest =~ s/&/&/g; + $rest =~ s/</</g; + $rest =~ s/>/>/g; + $rest =~ s/"/"/g; + + # try and create links for all occurrences of perl.* within + # the preformatted text. + $rest =~ s{ + (\s*)(perl\w+) + }{ + if ( defined $pages{$2} ){ # is a link + qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>); + } elsif (defined $pages{dosify($2)}) { # is a link + qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>); + } else { + "$1$2"; + } + }xeg; + $rest =~ s{ + (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? + }{ + my $url ; + if ( $htmlfileurl ne '' ){ + # Here, we take advantage of the knowledge + # that $htmlfileurl ne '' implies $htmlroot eq ''. + # Since $htmlroot eq '', we need to prepend $htmldir + # on the fron of the link to get the absolute path + # of the link's target. We check for a leading '/' + # to avoid corrupting links that are #, file:, etc. + my $old_url = $3 ; + $old_url = "$htmldir$old_url" if $old_url =~ m{^\/}; + $url = relativize_url( "$old_url.html", $htmlfileurl ); + } else { + $url = "$3.html" ; + } + "$1$url" ; + }xeg; + + # Look for embedded URLs and make them into links. We don't + # relativize them since they are best left as the author intended. + + my $urls = '(' . join ('|', qw{ http telnet mailto @@ -1112,15 +1275,16 @@ sub process_text { } ) . ')'; - my $ltrs = '\w'; - my $gunk = '/#~:.?+=&%@!\-'; - my $punc = '.:?\-'; - my $any = "${ltrs}${gunk}${punc}"; + my $ltrs = '\w'; + my $gunk = '/#~:.?+=&%@!\-'; + my $punc = '.:?\-'; + my $any = "${ltrs}${gunk}${punc}"; - $rest =~ s{ + $rest =~ s{ \b # start at word boundary ( # begin $1 { $urls : # need resource and a colon + (?!:) # Ignore File::, among others. [$any] +? # followed by on or more # of any valid character, but # be conservative and take only @@ -1134,168 +1298,81 @@ sub process_text { ) }{<A HREF="$1">$1</A>}igox; - $result = "<PRE>" # text should be as it is (verbatim) - . "$rest\n" - . "</PRE>\n"; - } else { # formatted text - # parse through the string, stopping each time we find a - # pod-escape. once the string has been throughly processed - # we can output it. - while (length $rest) { - # check to see if there are any possible pod directives in - # the remaining part of the text. - if ($rest =~ m/[BCEIFLSZ]</) { - warn "\$rest\t= $rest\n" unless - $rest =~ /\A - ([^<]*?) - ([BCEIFLSZ]?) - < - (.*)\Z/xs; - - $s1 = $1; # pure text - $s2 = $2; # the type of pod-escape that follows - $s3 = '<'; # '<' - $s4 = $3; # the rest of the string - } else { - $s1 = $rest; - $s2 = ""; - $s3 = ""; - $s4 = ""; - } - - if ($s3 eq '<' && $s2) { # a pod-escape - $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1); - $podcommand = "$s2<"; - $rest = $s4; - - # find the matching '>' - $match = 1; - $bf = 0; - while ($match && !$bf) { - $bf = 1; - if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) { - $bf = 0; - $match++; - $podcommand .= $1; - $rest = $2; - } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) { - $bf = 0; - $match--; - $podcommand .= $1; - $rest = $2; - } - } - - if ($match != 0) { - warn <<WARN; -$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph. -WARN - $result .= substr $podcommand, 0, 2; - $rest = substr($podcommand, 2) . $rest; - next; - } + # text should be as it is (verbatim) + $$text = $rest; +} - # pull out the parameters to the pod-escape - $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s; - $tag = $1; - $params = $2; - - # process the text within the pod-escape so that any escapes - # which must occur do. - process_text(\$params, 0) unless $tag eq 'L'; - - $s1 = $params; - if (!$tag || $tag eq " ") { # <> : no tag - $s1 = "<$params>"; - } elsif ($tag eq "L") { # L<> : link - $s1 = process_L($params); - } elsif ($tag eq "I" || # I<> : italicize text - $tag eq "B" || # B<> : bold text - $tag eq "F") { # F<> : file specification - $s1 = process_BFI($tag, $params); - } elsif ($tag eq "C") { # C<> : literal code - $s1 = process_C($params, 1); - } elsif ($tag eq "E") { # E<> : escape - $s1 = process_E($params); - } elsif ($tag eq "Z") { # Z<> : zero-width character - $s1 = process_Z($params); - } elsif ($tag eq "S") { # S<> : non-breaking space - $s1 = process_S($params); - } elsif ($tag eq "X") { # S<> : non-breaking space - $s1 = process_X($params); - } else { - warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n"; - } - $result .= "$s1"; - } else { - # for pure text we must deal with implicit links and - # double-quotes among other things. - $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3"); - $rest = $s4; - } - } - } - $$text = $result; +# +# pure text processing +# +# pure_text/inIS_text: differ with respect to automatic C<> recognition. +# we don't want this to happen within IS +# +sub pure_text($){ + my $text = shift(); + process_puretext( $text, \$ptQuote, 1 ); } -sub html_escape { - my $rest = $_[0]; - $rest =~ s/&/&/g; - $rest =~ s/</</g; - $rest =~ s/>/>/g; - $rest =~ s/"/"/g; - return $rest; -} +sub inIS_text($){ + my $text = shift(); + process_puretext( $text, \$ptQuote, 0 ); +} # # process_puretext - process pure text (without pod-escapes) converting # double-quotes and handling implicit C<> links. # sub process_puretext { - my($text, $quote) = @_; - my(@words, $result, $rest, $lead, $trail); + my($text, $quote, $notinIS) = @_; - # convert double-quotes to single-quotes - $text =~ s/\A([^"]*)"/$1''/s if $$quote; - while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {} + ## Guessing at func() or [$@%&]*var references in plain text is destined + ## to produce some strange looking ref's. uncomment to disable: + ## $notinIS = 0; - $$quote = ($text =~ m/"/ ? 1 : 0); - $text =~ s/\A([^"]*)"/$1``/s if $$quote; + my(@words, $lead, $trail); + + # convert double-quotes to single-quotes + if( $$quote && $text =~ s/"/''/s ){ + $$quote = 0; + } + while ($text =~ s/"([^"]*)"/``$1''/sg) {}; + $$quote = 1 if $text =~ s/"/``/s; # keep track of leading and trailing white-space - $lead = ($text =~ /\A(\s*)/s ? $1 : ""); - $trail = ($text =~ /(\s*)\Z/s ? $1 : ""); + $lead = ($text =~ s/\A(\s+)//s ? $1 : ""); + $trail = ($text =~ s/(\s+)\Z//s ? $1 : ""); - # collapse all white space into a single space - $text =~ s/\s+/ /g; - @words = split(" ", $text); + # split at space/non-space boundaries + @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text ); # process each word individually foreach my $word (@words) { + # skip space runs + next if $word =~ /^\s*$/; # see if we can infer a link - if ($word =~ /^\w+\(/) { + if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) { # has parenthesis so should have been a C<> ref - $word = process_C($word); -# $word =~ /^[^()]*]\(/; -# if (defined $items{$1} && $items{$1}) { -# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_" -# . htmlify(0,$word) -# . "\">$word</A></CODE>"; -# } elsif (defined $items{$word} && $items{$word}) { -# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_" -# . htmlify(0,$word) -# . "\">$word</A></CODE>"; -# } else { -# $word = "\n<CODE><A HREF=\"#item_" -# . htmlify(0,$word) -# . "\">$word</A></CODE>"; -# } - } elsif ($word =~ /^[\$\@%&*]+\w+$/) { - # perl variables, should be a C<> ref - $word = process_C($word, 1); + ## try for a pagename (perlXXX(1))? + my( $func, $args ) = ( $1, $2 ); + if( $args =~ /^\d+$/ ){ + my $url = page_sect( $word, '' ); + if( defined $url ){ + $word = "<A HREF=\"$url\">the $word manpage</A>"; + next; + } + } + ## try function name for a link, append tt'ed argument list + $word = emit_C( $func, '', "($args)"); + +#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing. +## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) { +## # perl variables, should be a C<> ref +## $word = emit_C( $word ); + } elsif ($word =~ m,^\w+://\w,) { # looks like a URL + # Don't relativize it: leave it as the author intended $word = qq(<A HREF="$word">$word</A>); } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { # looks like an e-mail address @@ -1311,42 +1388,283 @@ sub process_puretext { } } - # build a new string based upon our conversion - $result = ""; - $rest = join(" ", @words); - while (length($rest) > 75) { - if ( $rest =~ m/^(.{0,75})\s(.*?)$/o || - $rest =~ m/^(\S*)\s(.*?)$/o) { + # put everything back together + return $lead . join( '', @words ) . $trail; +} - $result .= "$1\n"; - $rest = $2; + +# +# process_text - handles plaintext that appears in the input pod file. +# there may be pod commands embedded within the text so those must be +# converted to html commands. +# + +sub process_text1($$;$$); +sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' } +sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 } + +sub process_text { + return if $ignore; + my( $tref ) = @_; + my $res = process_text1( 0, $tref ); + $$tref = $res; +} + +sub process_text1($$;$$){ + my( $lev, $rstr, $func, $closing ) = @_; + my $res = ''; + + unless (defined $func) { + $func = ''; + $lev++; + } + + if( $func eq 'B' ){ + # B<text> - boldface + $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>'; + + } elsif( $func eq 'C' ){ + # C<code> - can be a ref or <CODE></CODE> + # need to extract text + my $par = go_ahead( $rstr, 'C', $closing ); + + ## clean-up of the link target + my $text = depod( $par ); + + ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ; + ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n"; + + $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); + + } elsif( $func eq 'E' ){ + # E<x> - convert to character + $$rstr =~ s/^(\w+)>//; + $res = "&$1;"; + + } elsif( $func eq 'F' ){ + # F<filename> - italizice + $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>'; + + } elsif( $func eq 'I' ){ + # I<text> - italizice + $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>'; + + } elsif( $func eq 'L' ){ + # L<link> - link + ## L<text|cross-ref> => produce text, use cross-ref for linking + ## L<cross-ref> => make text from cross-ref + ## need to extract text + my $par = go_ahead( $rstr, 'L', $closing ); + + # some L<>'s that shouldn't be: + # a) full-blown URL's are emitted as-is + if( $par =~ m{^\w+://}s ){ + return make_URL_href( $par ); + } + # b) C<...> is stripped and treated as C<> + if( $par =~ /^C<(.*)>$/ ){ + my $text = depod( $1 ); + return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); + } + + # analyze the contents + $par =~ s/\n/ /g; # undo word-wrapped tags + my $opar = $par; + my $linktext; + if( $par =~ s{^([^|]+)\|}{} ){ + $linktext = $1; + } + + # make sure sections start with a / + $par =~ s{^"}{/"}; + + my( $page, $section, $ident ); + + # check for link patterns + if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident + # we've got a name/ident (no quotes) + ( $page, $ident ) = ( $1, $2 ); + ### print STDERR "--> L<$par> to page $page, ident $ident\n"; + + } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section" + # even though this should be a "section", we go for ident first + ( $page, $ident ) = ( $1, $2 ); + ### print STDERR "--> L<$par> to page $page, section $section\n"; + + } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes + ( $page, $section ) = ( '', $par ); + ### print STDERR "--> L<$par> to void page, section $section\n"; + + } else { + ( $page, $section ) = ( $par, '' ); + ### print STDERR "--> L<$par> to page $par, void section\n"; + } + + # now, either $section or $ident is defined. the convoluted logic + # below tries to resolve L<> according to what the user specified. + # failing this, we try to find the next best thing... + my( $url, $ltext, $fid ); + + RESOLVE: { + if( defined $ident ){ + ## try to resolve $ident as an item + ( $url, $fid ) = coderef( $page, $ident ); + if( $url ){ + if( ! defined( $linktext ) ){ + $linktext = $ident; + $linktext .= " in " if $ident && $page; + $linktext .= "the $page manpage" if $page; + } + ### print STDERR "got coderef url=$url\n"; + last RESOLVE; + } + ## no luck: go for a section (auto-quoting!) + $section = $ident; + } + ## now go for a section + my $htmlsection = htmlify( $section ); + $url = page_sect( $page, $htmlsection ); + if( $url ){ + if( ! defined( $linktext ) ){ + $linktext = $section; + $linktext .= " in " if $section && $page; + $linktext .= "the $page manpage" if $page; + } + ### print STDERR "got page/section url=$url\n"; + last RESOLVE; + } + ## no luck: go for an ident + if( $section ){ + $ident = $section; + } else { + $ident = $page; + $page = undef(); + } + ( $url, $fid ) = coderef( $page, $ident ); + if( $url ){ + if( ! defined( $linktext ) ){ + $linktext = $ident; + $linktext .= " in " if $ident && $page; + $linktext .= "the $page manpage" if $page; + } + ### print STDERR "got section=>coderef url=$url\n"; + last RESOLVE; + } + + # warning; show some text. + $linktext = $opar unless defined $linktext; + warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph."; + } + + # now we have an URL or just plain code + $$rstr = $linktext . '>' . $$rstr; + if( defined( $url ) ){ + $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>'; + } else { + $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>'; + } + + } elsif( $func eq 'S' ){ + # S<text> - non-breaking spaces + $res = process_text1( $lev, $rstr ); + $res =~ s/ / /g; + + } elsif( $func eq 'X' ){ + # X<> - ignore + $$rstr =~ s/^[^>]*>//; + + } elsif( $func eq 'Z' ){ + # Z<> - empty + warn "$0: $podfile: invalid X<> in paragraph $paragraph." + unless $$rstr =~ s/^>//; + + } else { + my $term = pattern $closing; + while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){ + # all others: either recurse into new function or + # terminate at closing angle bracket(s) + my $pt = $1; + $pt .= $2 if !$3 && $lev == 1; + $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt ); + return $res if !$3 && $lev > 1; + if( $3 ){ + $res .= process_text1( $lev, $rstr, $3, closing $4 ); + } + } + if( $lev == 1 ){ + $res .= pure_text( $$rstr ); } else { - $result .= "$rest\n"; - $rest = ""; + warn "$0: $podfile: undelimited $func<> in paragraph $paragraph."; } } - $result .= $rest if $rest; + return $res; +} - # restore the leading and trailing white-space - $result = "$lead$result$trail"; +# +# go_ahead: extract text of an IS (can be nested) +# +sub go_ahead($$$){ + my( $rstr, $func, $closing ) = @_; + my $res = ''; + my @closing = ($closing); + while( $$rstr =~ + s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){ + $res .= $1; + unless( $3 ){ + shift @closing; + return $res unless @closing; + } else { + unshift @closing, closing $4; + } + $res .= $2; + } + warn "$0: $podfile: undelimited $func<> in paragraph $paragraph."; + return $res; +} - return $result; +# +# emit_C - output result of C<text> +# $text is the depod-ed text +# +sub emit_C($;$$){ + my( $text, $nocode, $args ) = @_; + $args = '' unless defined $args; + my $res; + my( $url, $fid ) = coderef( undef(), $text ); + + # need HTML-safe text + my $linktext = html_escape( "$text$args" ); + + if( defined( $url ) && + (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){ + $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>"; + } elsif( 0 && $nocode ){ + $res = $linktext; + } else { + $res = "<CODE>$linktext</CODE>"; + } + return $res; } # -# pre_escape - convert & in text to $amp; +# html_escape: make text safe for HTML # -sub pre_escape { - my($str) = @_; +sub html_escape { + my $rest = $_[0]; + $rest =~ s/&/&/g; + $rest =~ s/</</g; + $rest =~ s/>/>/g; + $rest =~ s/"/"/g; + return $rest; +} - $$str =~ s,&,&,g; -} # # dosify - convert filenames to 8.3 # sub dosify { my($str) = @_; + return lc($str) if $^O eq 'VMS'; # VMS just needs casing if ($Is83) { $str = lc $str; $str =~ s/(\.\w+)/substr ($1,0,4)/ge; @@ -1356,183 +1674,205 @@ sub dosify { } # -# process_L - convert a pod L<> directive to a corresponding HTML link. -# most of the links made are inferred rather than known about directly -# (i.e it's not known whether the =head\d section exists in the target file, -# or whether a .pod file exists in the case of split files). however, the -# guessing usually works. -# -# Unlike the other directives, this should be called with an unprocessed -# string, else tags in the link won't be matched. +# page_sect - make an URL from the text of a L<> # -sub process_L { - my($str) = @_; - my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings - - $str =~ s/\n/ /g; # undo word-wrapped tags - $s1 = $str; - for ($s1) { - # LREF: a la HREF L<show this text|man/section> - $linktext = $1 if s:^([^|]+)\|::; - - # make sure sections start with a / - s,^",/",g; - s,^,/,g if (!m,/, && / /); - - # check if there's a section specified - if (m,^(.*?)/"?(.*?)"?$,) { # yes - ($page, $section) = ($1, $2); - } else { # no - ($page, $section) = ($str, ""); - } - - # check if we know that this is a section in this page - if (!defined $pages{$page} && defined $sections{$page}) { - $section = $page; - $page = ""; - } +sub page_sect($$) { + my( $page, $section ) = @_; + my( $linktext, $page83, $link); # work strings + + # check if we know that this is a section in this page + if (!defined $pages{$page} && defined $sections{$page}) { + $section = $page; + $page = ""; + ### print STDERR "reset page='', section=$section\n"; } $page83=dosify($page); $page=$page83 if (defined $pages{$page83}); if ($page eq "") { - $link = "#" . htmlify(0,$section); - $linktext = $section unless defined($linktext); + $link = "#" . htmlify( $section ); } elsif ( $page =~ /::/ ) { - $linktext = ($section ? "$section" : "$page"); $page =~ s,::,/,g; + # Search page cache for an entry keyed under the html page name, + # then look to see what directory that page might be in. NOTE: + # this will only find one page. A better solution might be to produce + # an intermediate page that is an index to all such pages. + my $page_name = $page ; + $page_name =~ s,^.*/,,s ; + if ( defined( $pages{ $page_name } ) && + $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/ + ) { + $page = $1 ; + } + else { + # NOTE: This branch assumes that all A::B pages are located in + # $htmlroot/A/B.html . This is often incorrect, since they are + # often in $htmlroot/lib/A/B.html or such like. Perhaps we could + # analyze the contents of %pages and figure out where any + # cousins of A::B are, then assume that. So, if A::B isn't found, + # but A::C is found in lib/A/C.pm, then A::B is assumed to be in + # lib/A/B.pm. This is also limited, but it's an improvement. + # Maybe a hints file so that the links point to the correct places + # nonetheless? + + } $link = "$htmlroot/$page.html"; - $link .= "#" . htmlify(0,$section) if ($section); + $link .= "#" . htmlify( $section ) if ($section); } elsif (!defined $pages{$page}) { - warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n"; $link = ""; - $linktext = $page unless defined($linktext); } else { - $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext); - $section = htmlify(0,$section) if $section ne ""; + $section = htmlify( $section ) if $section ne ""; + ### print STDERR "...section=$section\n"; # if there is a directory by the name of the page, then assume that an # appropriate section will exist in the subdirectory - if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { +# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { + if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { $link = "$htmlroot/$1/$section.html"; + ### print STDERR "...link=$link\n"; # since there is no directory by the name of the page, the section will # have to exist within a .html of the same name. thus, make sure there # is a .pod or .pm that might become that .html } else { - $section = "#$section"; + $section = "#$section" if $section; + ### print STDERR "...section=$section\n"; + # check if there is a .pod with the page name if ($pages{$page} =~ /([^:]*)\.pod:/) { $link = "$htmlroot/$1.html$section"; } elsif ($pages{$page} =~ /([^:]*)\.pm:/) { $link = "$htmlroot/$1.html$section"; } else { - warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ". - "no .pod or .pm found\n"; $link = ""; - $linktext = $section unless defined($linktext); } } } - process_text(\$linktext, 0); if ($link) { - $s1 = "<A HREF=\"$link\">$linktext</A>"; + # Here, we take advantage of the knowledge that $htmlfileurl ne '' + # implies $htmlroot eq ''. This means that the link in question + # needs a prefix of $htmldir if it begins with '/'. The test for + # the initial '/' is done to avoid '#'-only links, and to allow + # for other kinds of links, like file:, ftp:, etc. + my $url ; + if ( $htmlfileurl ne '' ) { + $link = "$htmldir$link" if $link =~ m{^/}s; + $url = relativize_url( $link, $htmlfileurl ); +# print( " b: [$link,$htmlfileurl,$url]\n" ); + } + else { + $url = $link ; + } + return $url; + } else { - $s1 = "<EM>$linktext</EM>"; + return undef(); } - return $s1; } # -# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and -# convert them to corresponding HTML directives. +# relativize_url - convert an absolute URL to one relative to a base URL. +# Assumes both end in a filename. # -sub process_BFI { - my($tag, $str) = @_; - my($s1); # work string - my(%repltext) = ( 'B' => 'STRONG', - 'F' => 'EM', - 'I' => 'EM'); +sub relativize_url { + my ($dest,$source) = @_ ; - # extract the modified text and convert to HTML - $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>"; - return $s1; -} - -# -# process_C - process the C<> pod-escape. -# -sub process_C { - my($str, $doref) = @_; - my($s1, $s2); + my ($dest_volume,$dest_directory,$dest_file) = + File::Spec::Unix->splitpath( $dest ) ; + $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ; - $s1 = $str; - $s1 =~ s/\([^()]*\)//g; # delete parentheses - $s2 = $s1; - $s1 =~ s/\W//g; # delete bogus characters - $str = html_escape($str); + my ($source_volume,$source_directory,$source_file) = + File::Spec::Unix->splitpath( $source ) ; + $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ; - # if there was a pod file that we found earlier with an appropriate - # =item directive, then create a link to that page. - if ($doref && defined $items{$s1}) { - $s1 = ($items{$s1} ? - "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" : - "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>"); - $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,; - confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/; - } else { - $s1 = "<CODE>$str</CODE>"; - # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose + my $rel_path = '' ; + if ( $dest ne '' ) { + $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ; } + if ( $rel_path ne '' && + substr( $rel_path, -1 ) ne '/' && + substr( $dest_file, 0, 1 ) ne '#' + ) { + $rel_path .= "/$dest_file" ; + } + else { + $rel_path .= "$dest_file" ; + } - return $s1; + return $rel_path ; } + # -# process_E - process the E<> pod directive which seems to escape a character. +# coderef - make URL from the text of a C<> # -sub process_E { - my($str) = @_; +sub coderef($$){ + my( $page, $item ) = @_; + my( $url ); + + my $fid = fragment_id( $item ); + if( defined( $page ) ){ + # we have been given a $page... + $page =~ s{::}{/}g; + + # Do we take it? Item could be a section! + my $base = $items{$fid} || ""; + $base =~ s{[^/]*/}{}; + if( $base ne "$page.html" ){ + ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n"; + $page = undef(); + } - for ($str) { - s,([^/].*),\&$1\;,g; + } else { + # no page - local items precede cached items + if( defined( $fid ) ){ + if( exists $local_items{$fid} ){ + $page = $local_items{$fid}; + } else { + $page = $items{$fid}; + } + } } - return $str; -} + # if there was a pod file that we found earlier with an appropriate + # =item directive, then create a link to that page. + if( defined $page ){ + if( $page ){ + if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){ + $page = $1 . '.html'; + } + my $link = "$htmlroot/$page#item_$fid"; -# -# process_Z - process the Z<> pod directive which really just amounts to -# ignoring it. this allows someone to start a paragraph with an = -# -sub process_Z { - my($str) = @_; + # Here, we take advantage of the knowledge that $htmlfileurl + # ne '' implies $htmlroot eq ''. + if ( $htmlfileurl ne '' ) { + $link = "$htmldir$link" ; + $url = relativize_url( $link, $htmlfileurl ) ; + } else { + $url = $link ; + } + } else { + $url = "#item_" . $fid; + } - # there is no equivalent in HTML for this so just ignore it. - $str = ""; - return $str; + confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/; + } + return( $url, $fid ); } -# -# process_S - process the S<> pod directive which means to convert all -# spaces in the string to non-breaking spaces (in HTML-eze). -# -sub process_S { - my($str) = @_; - # convert all spaces in the text to non-breaking spaces in HTML. - $str =~ s/ / /g; - return $str; -} # -# process_X - this is supposed to make an index entry. we'll just -# ignore it. -# -sub process_X { - return ''; +# Adapted from Nick Ing-Simmons' PodToHtml package. +sub relative_url { + my $source_file = shift ; + my $destination_file = shift; + + my $source = URI::file->new_abs($source_file); + my $uo = URI::file->new($destination_file,$source)->abs; + return $uo->rel->as_string; } @@ -1549,29 +1889,131 @@ sub finish_list { # # htmlify - converts a pod section specification to a suitable section -# specification for HTML. if first arg is 1, only takes 1st word. +# specification for HTML. Note that we keep spaces and special characters +# except ", ? (Netscape problem) and the hyphen (writer's problem...). # sub htmlify { - my($compact, $heading) = @_; + my( $heading) = @_; + $heading =~ s/(\s+)/ /g; + $heading =~ s/\s+\Z//; + $heading =~ s/\A\s+//; + # The hyphen is a disgrace to the English language. + $heading =~ s/[-"?]//g; + $heading = lc( $heading ); + return $heading; +} - if ($compact) { - $heading =~ /^(\w+)/; - $heading = $1; - } +# +# depod - convert text by eliminating all interior sequences +# Note: can be called with copy or modify semantics +# +my %E2c; +$E2c{lt} = '<'; +$E2c{gt} = '>'; +$E2c{sol} = '/'; +$E2c{verbar} = '|'; +$E2c{amp} = '&'; # in Tk's pods + +sub depod1($;$$); + +sub depod($){ + my $string; + if( ref( $_[0] ) ){ + $string = ${$_[0]}; + ${$_[0]} = depod1( \$string ); + } else { + $string = $_[0]; + depod1( \$string ); + } +} - # $heading = lc($heading); - $heading =~ s/[^\w\s]/_/g; - $heading =~ s/(\s+)/ /g; - $heading =~ s/^\s*(.*?)\s*$/$1/s; - $heading =~ s/ /_/g; - $heading =~ s/\A(.{32}).*\Z/$1/s; - $heading =~ s/\s+\Z//; - $heading =~ s/_{2,}/_/g; +sub depod1($;$$){ + my( $rstr, $func, $closing ) = @_; + my $res = ''; + return $res unless defined $$rstr; + if( ! defined( $func ) ){ + # skip to next begin of an interior sequence + while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){ + # recurse into its text + $res .= $1 . depod1( $rstr, $2, closing $3); + } + $res .= $$rstr; + } elsif( $func eq 'E' ){ + # E<x> - convert to character + $$rstr =~ s/^(\w+)>//; + $res .= $E2c{$1} || ""; + } elsif( $func eq 'X' ){ + # X<> - ignore + $$rstr =~ s/^[^>]*>//; + } elsif( $func eq 'Z' ){ + # Z<> - empty + $$rstr =~ s/^>//; + } else { + # all others: either recurse into new function or + # terminate at closing angle bracket + my $term = pattern $closing; + while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){ + $res .= $1; + last unless $3; + $res .= depod1( $rstr, $3, closing $4 ); + } + ## If we're here and $2 ne '>': undelimited interior sequence. + ## Ignored, as this is called without proper indication of where we are. + ## Rely on process_text to produce diagnostics. + } + return $res; +} - return $heading; +# +# fragment_id - construct a fragment identifier from: +# a) =item text +# b) contents of C<...> +# +my @hc; +sub fragment_id { + my $text = shift(); + $text =~ s/\s+\Z//s; + if( $text ){ + # a method or function? + return $1 if $text =~ /(\w+)\s*\(/; + return $1 if $text =~ /->\s*(\w+)\s*\(?/; + + # a variable name? + return $1 if $text =~ /^([$@%*]\S+)/; + + # some pattern matching operator? + return $1 if $text =~ m|^(\w+/).*/\w*$|; + + # fancy stuff... like "do { }" + return $1 if $text =~ m|^(\w+)\s*{.*}$|; + + # honour the perlfunc manpage: func [PAR[,[ ]PAR]...] + # and some funnies with ... Module ... + return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$}; + return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$}; + + # text? normalize! + $text =~ s/\s+/_/sg; + $text =~ s{(\W)}{ + defined( $hc[ord($1)] ) ? $hc[ord($1)] + : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe; + $text = substr( $text, 0, 50 ); + } else { + return undef(); + } } -BEGIN { +# +# make_URL_href - generate HTML href from URL +# Special treatment for CGI queries. +# +sub make_URL_href($){ + my( $url ) = @_; + if( $url !~ + s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){ + $url = "<A HREF=\"$url\">$url</A>"; + } + return $url; } 1; diff --git a/contrib/perl5/lib/Pod/InputObjects.pm b/contrib/perl5/lib/Pod/InputObjects.pm new file mode 100644 index 0000000..849182b --- /dev/null +++ b/contrib/perl5/lib/Pod/InputObjects.pm @@ -0,0 +1,933 @@ +############################################################################# +# Pod/InputObjects.pm -- package which defines objects for input streams +# and paragraphs and commands when parsing POD docs. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::InputObjects; + +use vars qw($VERSION); +$VERSION = 1.12; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::InputObjects - objects representing POD input paragraphs, commands, etc. + +=head1 SYNOPSIS + + use Pod::InputObjects; + +=head1 REQUIRES + +perl5.004, Carp + +=head1 EXPORTS + +Nothing. + +=head1 DESCRIPTION + +This module defines some basic input objects used by B<Pod::Parser> when +reading and parsing POD text from an input source. The following objects +are defined: + +=over 4 + +=begin __PRIVATE__ + +=item B<Pod::InputSource> + +An object corresponding to a source of POD input text. It is mostly a +wrapper around a filehandle or C<IO::Handle>-type object (or anything +that implements the C<getline()> method) which keeps track of some +additional information relevant to the parsing of PODs. + +=end __PRIVATE__ + +=item B<Pod::Paragraph> + +An object corresponding to a paragraph of POD input text. It may be a +plain paragraph, a verbatim paragraph, or a command paragraph (see +L<perlpod>). + +=item B<Pod::InteriorSequence> + +An object corresponding to an interior sequence command from the POD +input text (see L<perlpod>). + +=item B<Pod::ParseTree> + +An object corresponding to a tree of parsed POD text. Each "node" in +a parse-tree (or I<ptree>) is either a text-string or a reference to +a B<Pod::InteriorSequence> object. The nodes appear in the parse-tree +in they order in which they were parsed from left-to-right. + +=back + +Each of these input objects are described in further detail in the +sections which follow. + +=cut + +############################################################################# + +use strict; +#use diagnostics; +#use Carp; + +############################################################################# + +package Pod::InputSource; + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<Pod::InputSource> + +This object corresponds to an input source or stream of POD +documentation. When parsing PODs, it is necessary to associate and store +certain context information with each input source. All of this +information is kept together with the stream itself in one of these +C<Pod::InputSource> objects. Each such object is merely a wrapper around +an C<IO::Handle> object of some kind (or at least something that +implements the C<getline()> method). They have the following +methods/attributes: + +=end __PRIVATE__ + +=cut + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<new()> + + my $pod_input1 = Pod::InputSource->new(-handle => $filehandle); + my $pod_input2 = new Pod::InputSource(-handle => $filehandle, + -name => $name); + my $pod_input3 = new Pod::InputSource(-handle => \*STDIN); + my $pod_input4 = Pod::InputSource->new(-handle => \*STDIN, + -name => "(STDIN)"); + +This is a class method that constructs a C<Pod::InputSource> object and +returns a reference to the new input source object. It takes one or more +keyword arguments in the form of a hash. The keyword C<-handle> is +required and designates the corresponding input handle. The keyword +C<-name> is optional and specifies the name associated with the input +handle (typically a file name). + +=end __PRIVATE__ + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { -name => '(unknown)', + -handle => undef, + -was_cutting => 0, + @_ }; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<name()> + + my $filename = $pod_input->name(); + $pod_input->name($new_filename_to_use); + +This method gets/sets the name of the input source (usually a filename). +If no argument is given, it returns a string containing the name of +the input source; otherwise it sets the name of the input source to the +contents of the given argument. + +=end __PRIVATE__ + +=cut + +sub name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## allow 'filename' as an alias for 'name' +*filename = \&name; + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<handle()> + + my $handle = $pod_input->handle(); + +Returns a reference to the handle object from which input is read (the +one used to contructed this input source object). + +=end __PRIVATE__ + +=cut + +sub handle { + return $_[0]->{'-handle'}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<was_cutting()> + + print "Yes.\n" if ($pod_input->was_cutting()); + +The value of the C<cutting> state (that the B<cutting()> method would +have returned) immediately before any input was read from this input +stream. After all input from this stream has been read, the C<cutting> +state is restored to this value. + +=end __PRIVATE__ + +=cut + +sub was_cutting { + (@_ > 1) and $_[0]->{-was_cutting} = $_[1]; + return $_[0]->{-was_cutting}; +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::Paragraph; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::Paragraph> + +An object representing a paragraph of POD input text. +It has the following methods/attributes: + +=cut + +##--------------------------------------------------------------------------- + +=head2 B<new()> + + my $pod_para1 = Pod::Paragraph->new(-text => $text); + my $pod_para2 = Pod::Paragraph->new(-name => $cmd, + -text => $text); + my $pod_para3 = new Pod::Paragraph(-text => $text); + my $pod_para4 = new Pod::Paragraph(-name => $cmd, + -text => $text); + my $pod_para5 = Pod::Paragraph->new(-name => $cmd, + -text => $text, + -file => $filename, + -line => $line_number); + +This is a class method that constructs a C<Pod::Paragraph> object and +returns a reference to the new paragraph object. It may be given one or +two keyword arguments. The C<-text> keyword indicates the corresponding +text of the POD paragraph. The C<-name> keyword indicates the name of +the corresponding POD command, such as C<head1> or C<item> (it should +I<not> contain the C<=> prefix); this is needed only if the POD +paragraph corresponds to a command paragraph. The C<-file> and C<-line> +keywords indicate the filename and line number corresponding to the +beginning of the paragraph + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { + -name => undef, + -text => (@_ == 1) ? $_[0] : undef, + -file => '<unknown-file>', + -line => 0, + -prefix => '=', + -separator => ' ', + -ptree => [], + @_ + }; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 B<cmd_name()> + + my $para_cmd = $pod_para->cmd_name(); + +If this paragraph is a command paragraph, then this method will return +the name of the command (I<without> any leading C<=> prefix). + +=cut + +sub cmd_name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## let name() be an alias for cmd_name() +*name = \&cmd_name; + +##--------------------------------------------------------------------------- + +=head2 B<text()> + + my $para_text = $pod_para->text(); + +This method will return the corresponding text of the paragraph. + +=cut + +sub text { + (@_ > 1) and $_[0]->{'-text'} = $_[1]; + return $_[0]->{'-text'}; +} + +##--------------------------------------------------------------------------- + +=head2 B<raw_text()> + + my $raw_pod_para = $pod_para->raw_text(); + +This method will return the I<raw> text of the POD paragraph, exactly +as it appeared in the input. + +=cut + +sub raw_text { + return $_[0]->{'-text'} unless (defined $_[0]->{'-name'}); + return $_[0]->{'-prefix'} . $_[0]->{'-name'} . + $_[0]->{'-separator'} . $_[0]->{'-text'}; +} + +##--------------------------------------------------------------------------- + +=head2 B<cmd_prefix()> + + my $prefix = $pod_para->cmd_prefix(); + +If this paragraph is a command paragraph, then this method will return +the prefix used to denote the command (which should be the string "=" +or "=="). + +=cut + +sub cmd_prefix { + return $_[0]->{'-prefix'}; +} + +##--------------------------------------------------------------------------- + +=head2 B<cmd_separator()> + + my $separator = $pod_para->cmd_separator(); + +If this paragraph is a command paragraph, then this method will return +the text used to separate the command name from the rest of the +paragraph (if any). + +=cut + +sub cmd_separator { + return $_[0]->{'-separator'}; +} + +##--------------------------------------------------------------------------- + +=head2 B<parse_tree()> + + my $ptree = $pod_parser->parse_text( $pod_para->text() ); + $pod_para->parse_tree( $ptree ); + $ptree = $pod_para->parse_tree(); + +This method will get/set the corresponding parse-tree of the paragraph's text. + +=cut + +sub parse_tree { + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; +} + +## let ptree() be an alias for parse_tree() +*ptree = \&parse_tree; + +##--------------------------------------------------------------------------- + +=head2 B<file_line()> + + my ($filename, $line_number) = $pod_para->file_line(); + my $position = $pod_para->file_line(); + +Returns the current filename and line number for the paragraph +object. If called in an array context, it returns a list of two +elements: first the filename, then the line number. If called in +a scalar context, it returns a string containing the filename, followed +by a colon (':'), followed by the line number. + +=cut + +sub file_line { + my @loc = ($_[0]->{'-file'} || '<unknown-file>', + $_[0]->{'-line'} || 0); + return (wantarray) ? @loc : join(':', @loc); +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::InteriorSequence; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::InteriorSequence> + +An object representing a POD interior sequence command. +It has the following methods/attributes: + +=cut + +##--------------------------------------------------------------------------- + +=head2 B<new()> + + my $pod_seq1 = Pod::InteriorSequence->new(-name => $cmd + -ldelim => $delimiter); + my $pod_seq2 = new Pod::InteriorSequence(-name => $cmd, + -ldelim => $delimiter); + my $pod_seq3 = new Pod::InteriorSequence(-name => $cmd, + -ldelim => $delimiter, + -file => $filename, + -line => $line_number); + + my $pod_seq4 = new Pod::InteriorSequence(-name => $cmd, $ptree); + my $pod_seq5 = new Pod::InteriorSequence($cmd, $ptree); + +This is a class method that constructs a C<Pod::InteriorSequence> object +and returns a reference to the new interior sequence object. It should +be given two keyword arguments. The C<-ldelim> keyword indicates the +corresponding left-delimiter of the interior sequence (e.g. 'E<lt>'). +The C<-name> keyword indicates the name of the corresponding interior +sequence command, such as C<I> or C<B> or C<C>. The C<-file> and +C<-line> keywords indicate the filename and line number corresponding +to the beginning of the interior sequence. If the C<$ptree> argument is +given, it must be the last argument, and it must be either string, or +else an array-ref suitable for passing to B<Pod::ParseTree::new> (or +it may be a reference to an Pod::ParseTree object). + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## See if first argument has no keyword + if (((@_ <= 2) or (@_ % 2)) and $_[0] !~ /^-\w/) { + ## Yup - need an implicit '-name' before first parameter + unshift @_, '-name'; + } + + ## See if odd number of args + if ((@_ % 2) != 0) { + ## Yup - need an implicit '-ptree' before the last parameter + splice @_, $#_, 0, '-ptree'; + } + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { + -name => (@_ == 1) ? $_[0] : undef, + -file => '<unknown-file>', + -line => 0, + -ldelim => '<', + -rdelim => '>', + @_ + }; + + ## Initialize contents if they havent been already + my $ptree = $self->{'-ptree'} || new Pod::ParseTree(); + if ( ref $ptree =~ /^(ARRAY)?$/ ) { + ## We have an array-ref, or a normal scalar. Pass it as an + ## an argument to the ptree-constructor + $ptree = new Pod::ParseTree($1 ? [$ptree] : $ptree); + } + $self->{'-ptree'} = $ptree; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 B<cmd_name()> + + my $seq_cmd = $pod_seq->cmd_name(); + +The name of the interior sequence command. + +=cut + +sub cmd_name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## let name() be an alias for cmd_name() +*name = \&cmd_name; + +##--------------------------------------------------------------------------- + +## Private subroutine to set the parent pointer of all the given +## children that are interior-sequences to be $self + +sub _set_child2parent_links { + my ($self, @children) = @_; + ## Make sure any sequences know who their parent is + for (@children) { + next unless (length and ref and ref ne 'SCALAR'); + if (UNIVERSAL::isa($_, 'Pod::InteriorSequence') or + UNIVERSAL::can($_, 'nested')) + { + $_->nested($self); + } + } +} + +## Private subroutine to unset child->parent links + +sub _unset_child2parent_links { + my $self = shift; + $self->{'-parent_sequence'} = undef; + my $ptree = $self->{'-ptree'}; + for (@$ptree) { + next unless (length and ref and ref ne 'SCALAR'); + $_->_unset_child2parent_links() + if UNIVERSAL::isa($_, 'Pod::InteriorSequence'); + } +} + +##--------------------------------------------------------------------------- + +=head2 B<prepend()> + + $pod_seq->prepend($text); + $pod_seq1->prepend($pod_seq2); + +Prepends the given string or parse-tree or sequence object to the parse-tree +of this interior sequence. + +=cut + +sub prepend { + my $self = shift; + $self->{'-ptree'}->prepend(@_); + _set_child2parent_links($self, @_); + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 B<append()> + + $pod_seq->append($text); + $pod_seq1->append($pod_seq2); + +Appends the given string or parse-tree or sequence object to the parse-tree +of this interior sequence. + +=cut + +sub append { + my $self = shift; + $self->{'-ptree'}->append(@_); + _set_child2parent_links($self, @_); + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 B<nested()> + + $outer_seq = $pod_seq->nested || print "not nested"; + +If this interior sequence is nested inside of another interior +sequence, then the outer/parent sequence that contains it is +returned. Otherwise C<undef> is returned. + +=cut + +sub nested { + my $self = shift; + (@_ == 1) and $self->{'-parent_sequence'} = shift; + return $self->{'-parent_sequence'} || undef; +} + +##--------------------------------------------------------------------------- + +=head2 B<raw_text()> + + my $seq_raw_text = $pod_seq->raw_text(); + +This method will return the I<raw> text of the POD interior sequence, +exactly as it appeared in the input. + +=cut + +sub raw_text { + my $self = shift; + my $text = $self->{'-name'} . $self->{'-ldelim'}; + for ( $self->{'-ptree'}->children ) { + $text .= (ref $_) ? $_->raw_text : $_; + } + $text .= $self->{'-rdelim'}; + return $text; +} + +##--------------------------------------------------------------------------- + +=head2 B<left_delimiter()> + + my $ldelim = $pod_seq->left_delimiter(); + +The leftmost delimiter beginning the argument text to the interior +sequence (should be "<"). + +=cut + +sub left_delimiter { + (@_ > 1) and $_[0]->{'-ldelim'} = $_[1]; + return $_[0]->{'-ldelim'}; +} + +## let ldelim() be an alias for left_delimiter() +*ldelim = \&left_delimiter; + +##--------------------------------------------------------------------------- + +=head2 B<right_delimiter()> + +The rightmost delimiter beginning the argument text to the interior +sequence (should be ">"). + +=cut + +sub right_delimiter { + (@_ > 1) and $_[0]->{'-rdelim'} = $_[1]; + return $_[0]->{'-rdelim'}; +} + +## let rdelim() be an alias for right_delimiter() +*rdelim = \&right_delimiter; + +##--------------------------------------------------------------------------- + +=head2 B<parse_tree()> + + my $ptree = $pod_parser->parse_text($paragraph_text); + $pod_seq->parse_tree( $ptree ); + $ptree = $pod_seq->parse_tree(); + +This method will get/set the corresponding parse-tree of the interior +sequence's text. + +=cut + +sub parse_tree { + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; +} + +## let ptree() be an alias for parse_tree() +*ptree = \&parse_tree; + +##--------------------------------------------------------------------------- + +=head2 B<file_line()> + + my ($filename, $line_number) = $pod_seq->file_line(); + my $position = $pod_seq->file_line(); + +Returns the current filename and line number for the interior sequence +object. If called in an array context, it returns a list of two +elements: first the filename, then the line number. If called in +a scalar context, it returns a string containing the filename, followed +by a colon (':'), followed by the line number. + +=cut + +sub file_line { + my @loc = ($_[0]->{'-file'} || '<unknown-file>', + $_[0]->{'-line'} || 0); + return (wantarray) ? @loc : join(':', @loc); +} + +##--------------------------------------------------------------------------- + +=head2 B<DESTROY()> + +This method performs any necessary cleanup for the interior-sequence. +If you override this method then it is B<imperative> that you invoke +the parent method from within your own method, otherwise +I<interior-sequence storage will not be reclaimed upon destruction!> + +=cut + +sub DESTROY { + ## We need to get rid of all child->parent pointers throughout the + ## tree so their reference counts will go to zero and they can be + ## garbage-collected + _unset_child2parent_links(@_); +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::ParseTree; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::ParseTree> + +This object corresponds to a tree of parsed POD text. As POD text is +scanned from left to right, it is parsed into an ordered list of +text-strings and B<Pod::InteriorSequence> objects (in order of +appearance). A B<Pod::ParseTree> object corresponds to this list of +strings and sequences. Each interior sequence in the parse-tree may +itself contain a parse-tree (since interior sequences may be nested). + +=cut + +##--------------------------------------------------------------------------- + +=head2 B<new()> + + my $ptree1 = Pod::ParseTree->new; + my $ptree2 = new Pod::ParseTree; + my $ptree4 = Pod::ParseTree->new($array_ref); + my $ptree3 = new Pod::ParseTree($array_ref); + +This is a class method that constructs a C<Pod::Parse_tree> object and +returns a reference to the new parse-tree. If a single-argument is given, +it must be a reference to an array, and is used to initialize the root +(top) of the parse tree. + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + my $self = (@_ == 1 and ref $_[0]) ? $_[0] : []; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 B<top()> + + my $top_node = $ptree->top(); + $ptree->top( $top_node ); + $ptree->top( @children ); + +This method gets/sets the top node of the parse-tree. If no arguments are +given, it returns the topmost node in the tree (the root), which is also +a B<Pod::ParseTree>. If it is given a single argument that is a reference, +then the reference is assumed to a parse-tree and becomes the new top node. +Otherwise, if arguments are given, they are treated as the new list of +children for the top node. + +=cut + +sub top { + my $self = shift; + if (@_ > 0) { + @{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_; + } + return $self; +} + +## let parse_tree() & ptree() be aliases for the 'top' method +*parse_tree = *ptree = \⊤ + +##--------------------------------------------------------------------------- + +=head2 B<children()> + +This method gets/sets the children of the top node in the parse-tree. +If no arguments are given, it returns the list (array) of children +(each of which should be either a string or a B<Pod::InteriorSequence>. +Otherwise, if arguments are given, they are treated as the new list of +children for the top node. + +=cut + +sub children { + my $self = shift; + if (@_ > 0) { + @{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_; + } + return @{ $self }; +} + +##--------------------------------------------------------------------------- + +=head2 B<prepend()> + +This method prepends the given text or parse-tree to the current parse-tree. +If the first item on the parse-tree is text and the argument is also text, +then the text is prepended to the first item (not added as a separate string). +Otherwise the argument is added as a new string or parse-tree I<before> +the current one. + +=cut + +use vars qw(@ptree); ## an alias used for performance reasons + +sub prepend { + my $self = shift; + local *ptree = $self; + for (@_) { + next unless length; + if (@ptree and !(ref $ptree[0]) and !(ref $_)) { + $ptree[0] = $_ . $ptree[0]; + } + else { + unshift @ptree, $_; + } + } +} + +##--------------------------------------------------------------------------- + +=head2 B<append()> + +This method appends the given text or parse-tree to the current parse-tree. +If the last item on the parse-tree is text and the argument is also text, +then the text is appended to the last item (not added as a separate string). +Otherwise the argument is added as a new string or parse-tree I<after> +the current one. + +=cut + +sub append { + my $self = shift; + local *ptree = $self; + for (@_) { + next unless length; + if (@ptree and !(ref $ptree[-1]) and !(ref $_)) { + $ptree[-1] .= $_; + } + else { + push @ptree, $_; + } + } +} + +=head2 B<raw_text()> + + my $ptree_raw_text = $ptree->raw_text(); + +This method will return the I<raw> text of the POD parse-tree +exactly as it appeared in the input. + +=cut + +sub raw_text { + my $self = shift; + my $text = ""; + for ( @$self ) { + $text .= (ref $_) ? $_->raw_text : $_; + } + return $text; +} + +##--------------------------------------------------------------------------- + +## Private routines to set/unset child->parent links + +sub _unset_child2parent_links { + my $self = shift; + local *ptree = $self; + for (@ptree) { + next unless (length and ref and ref ne 'SCALAR'); + $_->_unset_child2parent_links() + if UNIVERSAL::isa($_, 'Pod::InteriorSequence'); + } +} + +sub _set_child2parent_links { + ## nothing to do, Pod::ParseTrees cant have parent pointers +} + +=head2 B<DESTROY()> + +This method performs any necessary cleanup for the parse-tree. +If you override this method then it is B<imperative> +that you invoke the parent method from within your own method, +otherwise I<parse-tree storage will not be reclaimed upon destruction!> + +=cut + +sub DESTROY { + ## We need to get rid of all child->parent pointers throughout the + ## tree so their reference counts will go to zero and they can be + ## garbage-collected + _unset_child2parent_links(@_); +} + +############################################################################# + +=head1 SEE ALSO + +See L<Pod::Parser>, L<Pod::Select> + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +=cut + +1; diff --git a/contrib/perl5/lib/Pod/Man.pm b/contrib/perl5/lib/Pod/Man.pm new file mode 100644 index 0000000..97a3828 --- /dev/null +++ b/contrib/perl5/lib/Pod/Man.pm @@ -0,0 +1,1217 @@ +# Pod::Man -- Convert POD data to formatted *roff input. +# $Id: Man.pm,v 1.2 2000/03/19 07:30:13 eagle Exp $ +# +# Copyright 1999, 2000 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# This module is intended to be a replacement for the pod2man script +# distributed with versions of Perl prior to 5.6, and attempts to match its +# output except for some specific circumstances where other decisions seemed +# to produce better output. It uses Pod::Parser and is designed to be easy +# to subclass. +# +# Perl core hackers, please note that this module is also separately +# maintained outside of the Perl core as part of the podlators. Please send +# me any patches at the address above in addition to sending them to the +# standard Perl mailing lists. + +############################################################################ +# Modules and declarations +############################################################################ + +package Pod::Man; + +require 5.004; + +use Carp qw(carp croak); +use Pod::Parser (); + +use strict; +use subs qw(makespace); +use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION); + +@ISA = qw(Pod::Parser); + +# Don't use the CVS revision as the version, since this module is also in +# Perl core and too many things could munge CVS magic revision strings. +# This number should ideally be the same as the CVS revision in podlators, +# however. +$VERSION = 1.02; + + +############################################################################ +# Preamble and *roff output tables +############################################################################ + +# The following is the static preamble which starts all *roff output we +# generate. It's completely static except for the font to use as a +# fixed-width font, which is designed by @CFONT@. $PREAMBLE should +# therefore be run through s/\@CFONT\@/<font>/g before output. +$PREAMBLE = <<'----END OF PREAMBLE----'; +.de Sh \" Subsection heading +.br +.if t .Sp +.ne 5 +.PP +\fB\\$1\fR +.PP +.. +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Ip \" List item +.br +.ie \\n(.$>=3 .ne \\$3 +.el .ne 3 +.IP "\\$1" \\$2 +.. +.de Vb \" Begin verbatim text +.ft @CFONT@ +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R + +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. | will give a +.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used +.\" to do unbreakable dashes and therefore won't be available. \*(C` and +.\" \*(C' expand to `' in nroff, nothing in troff, for use with C<> +.tr \(*W-|\(bv\*(Tr +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` ` +. ds C' ' +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" If the F register is turned on, we'll generate index entries on stderr +.\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and +.\" index entries marked with X<> in POD. Of course, you'll have to process +.\" the output yourself in some meaningful fashion. +.if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +. . +. nr % 0 +. rr F +.\} +.\" +.\" For nroff, turn off justification. Always turn off hyphenation; it +.\" makes way too many mistakes in technical documents. +.hy 0 +.if n .na +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +.bd B 3 +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +----END OF PREAMBLE---- + +# This table is taken nearly verbatim from Tom Christiansen's pod2man. It +# assumes that the standard preamble has already been printed, since that's +# what defines all of the accent marks. Note that some of these are quoted +# with double quotes since they contain embedded single quotes, so use \\ +# uniformly for backslash for readability. +%ESCAPES = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + + 'Aacute' => "A\\*'", # capital A, acute accent + 'aacute' => "a\\*'", # small a, acute accent + 'Acirc' => 'A\\*^', # capital A, circumflex accent + 'acirc' => 'a\\*^', # small a, circumflex accent + 'AElig' => '\*(AE', # capital AE diphthong (ligature) + 'aelig' => '\*(ae', # small ae diphthong (ligature) + 'Agrave' => "A\\*`", # capital A, grave accent + 'agrave' => "A\\*`", # small a, grave accent + 'Aring' => 'A\\*o', # capital A, ring + 'aring' => 'a\\*o', # small a, ring + 'Atilde' => 'A\\*~', # capital A, tilde + 'atilde' => 'a\\*~', # small a, tilde + 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark + 'auml' => 'a\\*:', # small a, dieresis or umlaut mark + 'Ccedil' => 'C\\*,', # capital C, cedilla + 'ccedil' => 'c\\*,', # small c, cedilla + 'Eacute' => "E\\*'", # capital E, acute accent + 'eacute' => "e\\*'", # small e, acute accent + 'Ecirc' => 'E\\*^', # capital E, circumflex accent + 'ecirc' => 'e\\*^', # small e, circumflex accent + 'Egrave' => 'E\\*`', # capital E, grave accent + 'egrave' => 'e\\*`', # small e, grave accent + 'ETH' => '\\*(D-', # capital Eth, Icelandic + 'eth' => '\\*(d-', # small eth, Icelandic + 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark + 'euml' => 'e\\*:', # small e, dieresis or umlaut mark + 'Iacute' => "I\\*'", # capital I, acute accent + 'iacute' => "i\\*'", # small i, acute accent + 'Icirc' => 'I\\*^', # capital I, circumflex accent + 'icirc' => 'i\\*^', # small i, circumflex accent + 'Igrave' => 'I\\*`', # capital I, grave accent + 'igrave' => 'i\\*`', # small i, grave accent + 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark + 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark + 'Ntilde' => 'N\*~', # capital N, tilde + 'ntilde' => 'n\*~', # small n, tilde + 'Oacute' => "O\\*'", # capital O, acute accent + 'oacute' => "o\\*'", # small o, acute accent + 'Ocirc' => 'O\\*^', # capital O, circumflex accent + 'ocirc' => 'o\\*^', # small o, circumflex accent + 'Ograve' => 'O\\*`', # capital O, grave accent + 'ograve' => 'o\\*`', # small o, grave accent + 'Oslash' => 'O\\*/', # capital O, slash + 'oslash' => 'o\\*/', # small o, slash + 'Otilde' => 'O\\*~', # capital O, tilde + 'otilde' => 'o\\*~', # small o, tilde + 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark + 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark + 'szlig' => '\*8', # small sharp s, German (sz ligature) + 'THORN' => '\\*(Th', # capital THORN, Icelandic + 'thorn' => '\\*(th', # small thorn, Icelandic + 'Uacute' => "U\\*'", # capital U, acute accent + 'uacute' => "u\\*'", # small u, acute accent + 'Ucirc' => 'U\\*^', # capital U, circumflex accent + 'ucirc' => 'u\\*^', # small u, circumflex accent + 'Ugrave' => 'U\\*`', # capital U, grave accent + 'ugrave' => 'u\\*`', # small u, grave accent + 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark + 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark + 'Yacute' => "Y\\*'", # capital Y, acute accent + 'yacute' => "y\\*'", # small y, acute accent + 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark +); + + +############################################################################ +# Static helper functions +############################################################################ + +# Protect leading quotes and periods against interpretation as commands. +# Also protect anything starting with a backslash, since it could expand +# or hide something that *roff would interpret as a command. This is +# overkill, but it's much simpler than trying to parse *roff here. +sub protect { + local $_ = shift; + s/^([.\'\\])/\\&$1/mg; + $_; +} + +# Given a command and a single argument that may or may not contain double +# quotes, handle double-quote formatting for it. If there are no double +# quotes, just return the command followed by the argument in double quotes. +# If there are double quotes, use an if statement to test for nroff, and for +# nroff output the command followed by the argument in double quotes with +# embedded double quotes doubled. For other formatters, remap paired double +# quotes to `` and ''. +sub switchquotes { + my $command = shift; + local $_ = shift; + my $extra = shift; + s/\\\*\([LR]\"/\"/g; + if (/\"/) { + s/\"/\"\"/g; + my $troff = $_; + $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g; + s/\"/\"\"/g if $extra; + $troff =~ s/\"/\"\"/g if $extra; + $_ = qq("$_") . ($extra ? " $extra" : ''); + $troff = qq("$troff") . ($extra ? " $extra" : ''); + return ".if n $command $_\n.el $command $troff\n"; + } else { + $_ = qq("$_") . ($extra ? " $extra" : ''); + return "$command $_\n"; + } +} + +# Translate a font string into an escape. +sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] } + + +############################################################################ +# Initialization +############################################################################ + +# Initialize the object. Here, we also process any additional options +# passed to the constructor or set up defaults if none were given. center +# is the centered title, release is the version number, and date is the date +# for the documentation. Note that we can't know what file name we're +# processing due to the architecture of Pod::Parser, so that *has* to either +# be passed to the constructor or set separately with Pod::Man::name(). +sub initialize { + my $self = shift; + + # Figure out the fixed-width font. If user-supplied, make sure that + # they are the right length. + for (qw/fixed fixedbold fixeditalic fixedbolditalic/) { + if (defined $$self{$_}) { + if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) { + croak "roff font should be 1 or 2 chars, not `$$self{$_}'"; + } + } else { + $$self{$_} = ''; + } + } + + # Set the default fonts. We can't be sure what fixed bold-italic is + # going to be called, so default to just bold. + $$self{fixed} ||= 'CW'; + $$self{fixedbold} ||= 'CB'; + $$self{fixeditalic} ||= 'CI'; + $$self{fixedbolditalic} ||= 'CB'; + + # Set up a table of font escapes. First number is fixed-width, second + # is bold, third is italic. + $$self{FONTS} = { '000' => '\fR', '001' => '\fI', + '010' => '\fB', '011' => '\f(BI', + '100' => toescape ($$self{fixed}), + '101' => toescape ($$self{fixeditalic}), + '110' => toescape ($$self{fixedbold}), + '111' => toescape ($$self{fixedbolditalic})}; + + # Extra stuff for page titles. + $$self{center} = 'User Contributed Perl Documentation' + unless defined $$self{center}; + $$self{indent} = 4 unless defined $$self{indent}; + + # We used to try first to get the version number from a local binary, + # but we shouldn't need that any more. Get the version from the running + # Perl. Work a little magic to handle subversions correctly under both + # the pre-5.6 and the post-5.6 version numbering schemes. + if (!defined $$self{release}) { + my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/); + $version[2] ||= 0; + $version[2] *= 10 ** (3 - length $version[2]); + for (@version) { $_ += 0 } + $$self{release} = 'perl v' . join ('.', @version); + } + + # Double quotes in things that will be quoted. + for (qw/center date release/) { + $$self{$_} =~ s/\"/\"\"/g if $$self{$_}; + } + + $$self{INDENT} = 0; # Current indentation level. + $$self{INDENTS} = []; # Stack of indentations. + $$self{INDEX} = []; # Index keys waiting to be printed. + + $self->SUPER::initialize; +} + +# For each document we process, output the preamble first. Note that the +# fixed width font is a global default; once we interpolate it into the +# PREAMBLE, it ain't ever changing. Maybe fix this later. +sub begin_pod { + my $self = shift; + + # Try to figure out the name and section from the file name. + my $section = $$self{section} || 1; + my $name = $$self{name}; + if (!defined $name) { + $name = $self->input_file; + $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i); + $name =~ s/\.p(od|[lm])\z//i; + if ($section =~ /^1/) { + require File::Basename; + $name = uc File::Basename::basename ($name); + } else { + # Lose everything up to the first of + # */lib/*perl* standard or site_perl module + # */*perl*/lib from -D prefix=/opt/perl + # */*perl*/ random module hierarchy + # which works. Should be fixed to use File::Spec. Also handle + # a leading lib/ since that's what ExtUtils::MakeMaker creates. + for ($name) { + s%//+%/%g; + if ( s%^.*?/lib/[^/]*perl[^/]*/%%si + or s%^.*?/[^/]*perl[^/]*/(?:lib/)?%%si) { + s%^site(_perl)?/%%s; # site and site_perl + s%^(.*-$^O|$^O-.*)/%%so; # arch + s%^\d+\.\d+%%s; # version + } + s%^lib/%%; + s%/%::%g; + } + } + } + + # Modification date header. Try to use the modification time of our + # input. + if (!defined $$self{date}) { + my $time = (stat $self->input_file)[9] || time; + my ($day, $month, $year) = (localtime $time)[3,4,5]; + $month++; + $year += 1900; + $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day); + } + + # Now, print out the preamble and the title. + $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/; + chomp $PREAMBLE; + print { $self->output_handle } <<"----END OF HEADER----"; +.\\" Automatically generated by Pod::Man version $VERSION +.\\" @{[ scalar localtime ]} +.\\" +.\\" Standard preamble: +.\\" ====================================================================== +$PREAMBLE +.\\" ====================================================================== +.\\" +.IX Title "$name $section" +.TH $name $section "$$self{release}" "$$self{date}" "$$self{center}" +.UC +----END OF HEADER---- +#"# for cperl-mode + + # Initialize a few per-file variables. + $$self{INDENT} = 0; + $$self{NEEDSPACE} = 0; +} + + +############################################################################ +# Core overrides +############################################################################ + +# Called for each command paragraph. Gets the command, the associated +# paragraph, the line number, and a Pod::Paragraph object. Just dispatches +# the command to a method named the same as the command. =cut is handled +# internally by Pod::Parser. +sub command { + my $self = shift; + my $command = shift; + return if $command eq 'pod'; + return if ($$self{EXCLUDE} && $command ne 'end'); + $command = 'cmd_' . $command; + $self->$command (@_); +} + +# Called for a verbatim paragraph. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Rofficate backslashes, untabify, put a +# zero-width character at the beginning of each line to protect against +# commands, and wrap in .Vb/.Ve. +sub verbatim { + my $self = shift; + return if $$self{EXCLUDE}; + local $_ = shift; + return if /^\s+$/; + s/\s+$/\n/; + my $lines = tr/\n/\n/; + 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me; + s/\\/\\e/g; + s/^(\s*\S)/'\&' . $1/gme; + $self->makespace if $$self{NEEDSPACE}; + $self->output (".Vb $lines\n$_.Ve\n"); + $$self{NEEDSPACE} = 0; +} + +# Called for a regular text block. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Perform interpolation and output the results. +sub textblock { + my $self = shift; + return if $$self{EXCLUDE}; + $self->output ($_[0]), return if $$self{VERBATIM}; + + # Perform a little magic to collapse multiple L<> references. We'll + # just rewrite the whole thing into actual text at this part, bypassing + # the whole internal sequence parsing thing. + my $text = shift; + $text =~ s{ + (L< # A link of the form L</something>. + / + ( + [:\w]+ # The item has to be a simple word... + (\(\))? # ...or simple function. + ) + > + ( + ,?\s+(and\s+)? # Allow lots of them, conjuncted. + L< + / + ( [:\w]+ ( \(\) )? ) + > + )+ + ) + } { + local $_ = $1; + s{ L< / ( [^>]+ ) > } {$1}xg; + my @items = split /(?:,?\s+(?:and\s+)?)/; + my $string = 'the '; + my $i; + for ($i = 0; $i < @items; $i++) { + $string .= $items[$i]; + $string .= ', ' if @items > 2 && $i != $#items; + $string .= ' ' if @items == 2 && $i == 2; + $string .= 'and ' if ($i == $#items - 1); + } + $string .= ' entries elsewhere in this document'; + $string; + }gex; + + # Parse the tree and output it. collapse knows about references to + # scalars as well as scalars and does the right thing with them. + $text = $self->parse ($text, @_); + $text =~ s/\n\s*$/\n/; + $self->makespace if $$self{NEEDSPACE}; + $self->output (protect $self->mapfonts ($text)); + $self->outindex; + $$self{NEEDSPACE} = 1; +} + +# Called for an interior sequence. Takes a Pod::InteriorSequence object and +# returns a reference to a scalar. This scalar is the final formatted text. +# It's returned as a reference so that other interior sequences above us +# know that the text has already been processed. +sub sequence { + my ($self, $seq) = @_; + my $command = $seq->cmd_name; + + # Zero-width characters. + if ($command eq 'Z') { + # Workaround to generate a blessable reference, needed by 5.005. + my $tmp = '\&'; + return bless \ "$tmp", 'Pod::Man::String'; + } + + # C<>, L<>, X<>, and E<> don't apply guesswork to their contents. + local $_ = $self->collapse ($seq->parse_tree, $command =~ /^[CELX]$/); + + # Handle E<> escapes. + if ($command eq 'E') { + if (/^\d+$/) { + return bless \ chr ($_), 'Pod::Man::String'; + } elsif (exists $ESCAPES{$_}) { + return bless \ "$ESCAPES{$_}", 'Pod::Man::String'; + } else { + carp "Unknown escape E<$1>"; + return bless \ "E<$_>", 'Pod::Man::String'; + } + } + + # For all the other sequences, empty content produces no output. + return '' if $_ eq ''; + + # Handle formatting sequences. + if ($command eq 'B') { + return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String'; + } elsif ($command eq 'F') { + return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String'; + } elsif ($command eq 'I') { + return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String'; + } elsif ($command eq 'C') { + s/-/\\-/g; + s/__/_\\|_/g; + return bless \ ('\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"), + 'Pod::Man::String'; + } + + # Handle links. + if ($command eq 'L') { + # A bug in lvalue subs in 5.6 requires the temporary variable. + my $tmp = $self->buildlink ($_); + return bless \ "$tmp", 'Pod::Man::String'; + } + + # Whitespace protection replaces whitespace with "\ ". + if ($command eq 'S') { + s/\s+/\\ /g; + return bless \ "$_", 'Pod::Man::String'; + } + + # Add an index entry to the list of ones waiting to be output. + if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' } + + # Anything else is unknown. + carp "Unknown sequence $command<$_>"; +} + + +############################################################################ +# Command paragraphs +############################################################################ + +# All command paragraphs take the paragraph and the line number. + +# First level heading. We can't output .IX in the NAME section due to a bug +# in some versions of catman, so don't output a .IX for that section. .SH +# already uses small caps, so remove any E<> sequences that would cause +# them. +sub cmd_head1 { + my $self = shift; + local $_ = $self->parse (@_); + s/\s+$//; + s/\\s-?\d//g; + $self->output (switchquotes ('.SH', $self->mapfonts ($_))); + $self->outindex (($_ eq 'NAME') ? () : ('Header', $_)); + $$self{NEEDSPACE} = 0; +} + +# Second level heading. +sub cmd_head2 { + my $self = shift; + local $_ = $self->parse (@_); + s/\s+$//; + $self->output (switchquotes ('.Sh', $self->mapfonts ($_))); + $self->outindex ('Subsection', $_); + $$self{NEEDSPACE} = 0; +} + +# Start a list. For indents after the first, wrap the outside indent in .RS +# so that hanging paragraph tags will be correct. +sub cmd_over { + my $self = shift; + local $_ = shift; + unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} } + if (@{ $$self{INDENTS} } > 0) { + $self->output (".RS $$self{INDENT}\n"); + } + push (@{ $$self{INDENTS} }, $$self{INDENT}); + $$self{INDENT} = ($_ + 0); +} + +# End a list. If we've closed an embedded indent, we've mangled the hanging +# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT. +# We'll close that .RS at the next =back or =item. +sub cmd_back { + my $self = shift; + $$self{INDENT} = pop @{ $$self{INDENTS} }; + unless (defined $$self{INDENT}) { + carp "Unmatched =back"; + $$self{INDENT} = 0; + } + if ($$self{WEIRDINDENT}) { + $self->output (".RE\n"); + $$self{WEIRDINDENT} = 0; + } + if (@{ $$self{INDENTS} } > 0) { + $self->output (".RE\n"); + $self->output (".RS $$self{INDENT}\n"); + $$self{WEIRDINDENT} = 1; + } + $$self{NEEDSPACE} = 1; +} + +# An individual list item. Emit an index entry for anything that's +# interesting, but don't emit index entries for things like bullets and +# numbers. rofficate bullets too while we're at it (so for nice output, use +# * for your lists rather than o or . or - or some other thing). Newlines +# in an item title are turned into spaces since *roff can't handle them +# embedded. +sub cmd_item { + my $self = shift; + local $_ = $self->parse (@_); + s/\s+$//; + s/\s*\n\s*/ /g; + my $index; + if (/\w/ && !/^\w[.\)]\s*$/) { + $index = $_; + $index =~ s/^\s*[-*+o.]?\s*//; + } + s/^\*(\s|\Z)/\\\(bu$1/; + if ($$self{WEIRDINDENT}) { + $self->output (".RE\n"); + $$self{WEIRDINDENT} = 0; + } + $_ = $self->mapfonts ($_); + $self->output (switchquotes ('.Ip', $_, $$self{INDENT})); + $self->outindex ($index ? ('Item', $index) : ()); + $$self{NEEDSPACE} = 0; +} + +# Begin a block for a particular translator. Setting VERBATIM triggers +# special handling in textblock(). +sub cmd_begin { + my $self = shift; + local $_ = shift; + my ($kind) = /^(\S+)/ or return; + if ($kind eq 'man' || $kind eq 'roff') { + $$self{VERBATIM} = 1; + } else { + $$self{EXCLUDE} = 1; + } +} + +# End a block for a particular translator. We assume that all =begin/=end +# pairs are properly closed. +sub cmd_end { + my $self = shift; + $$self{EXCLUDE} = 0; + $$self{VERBATIM} = 0; +} + +# One paragraph for a particular translator. Ignore it unless it's intended +# for man or roff, in which case we output it verbatim. +sub cmd_for { + my $self = shift; + local $_ = shift; + return unless s/^(?:man|roff)\b[ \t]*\n?//; + $self->output ($_); +} + + +############################################################################ +# Link handling +############################################################################ + +# Handle links. We can't actually make real hyperlinks, so this is all to +# figure out what text and formatting we print out. +sub buildlink { + my $self = shift; + local $_ = shift; + + # Smash whitespace in case we were split across multiple lines. + s/\s+/ /g; + + # If we were given any explicit text, just output it. + if (m{ ^ ([^|]+) \| }x) { return $1 } + + # Okay, leading and trailing whitespace isn't important. + s/^\s+//; + s/\s+$//; + + # Default to using the whole content of the link entry as a section + # name. Note that L<manpage/> forces a manpage interpretation, as does + # something looking like L<manpage(section)>. Do the same thing to + # L<manpage(section)> as we would to manpage(section) without the L<>; + # see guesswork(). If we've added italics, don't add the "manpage" + # text; markup is sufficient. + my ($manpage, $section) = ('', $_); + if (/^"\s*(.*?)\s*"$/) { + $section = '"' . $1 . '"'; + } elsif (m{ ^ [-:.\w]+ (?: \( \S+ \) )? $ }x) { + ($manpage, $section) = ($_, ''); + $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|('/e; + } elsif (m%/%) { + ($manpage, $section) = split (/\s*\/\s*/, $_, 2); + if ($manpage =~ /^[-:.\w]+(?:\(\S+\))?$/) { + $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|'/e; + } + $section =~ s/^\"\s*//; + $section =~ s/\s*\"$//; + } + if ($manpage && $manpage !~ /\\f\(IS/) { + $manpage = "the $manpage manpage"; + } + + # Now build the actual output text. + my $text = ''; + if (!length ($section) && !length ($manpage)) { + carp "Invalid link $_"; + } elsif (!length ($section)) { + $text = $manpage; + } elsif ($section =~ /^[:\w]+(?:\(\))?/) { + $text .= 'the ' . $section . ' entry'; + $text .= (length $manpage) ? " in $manpage" + : " elsewhere in this document"; + } else { + if ($section !~ /^".*"$/) { $section = '"' . $section . '"' } + $text .= 'the section on ' . $section; + $text .= " in $manpage" if length $manpage; + } + $text; +} + + +############################################################################ +# Escaping and fontification +############################################################################ + +# At this point, we'll have embedded font codes of the form \f(<font>[SE] +# where <font> is one of B, I, or F. Turn those into the right font start +# or end codes. B<someI<thing> else> should map to \fBsome\f(BIthing\fB +# else\fR. The old pod2man didn't get this right; the second \fB was \fR, +# so nested sequences didn't work right. We take care of this by using +# variables as a combined pointer to our current font sequence, and set each +# to the number of current nestings of start tags for that font. Use them +# as a vector to look up what font sequence to use. +sub mapfonts { + my $self = shift; + local $_ = shift; + + my ($fixed, $bold, $italic) = (0, 0, 0); + my %magic = (F => \$fixed, B => \$bold, I => \$italic); + s { \\f\((.)(.) } { + ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; + $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)}; + }gxe; + $_; +} + + +############################################################################ +# *roff-specific parsing +############################################################################ + +# Called instead of parse_text, calls parse_text with the right flags. +sub parse { + my $self = shift; + $self->parse_text ({ -expand_seq => 'sequence', + -expand_ptree => 'collapse' }, @_); +} + +# Takes a parse tree and a flag saying whether or not to treat it as literal +# text (not call guesswork on it), and returns the concatenation of all of +# the text strings in that parse tree. If the literal flag isn't true, +# guesswork() will be called on all plain scalars in the parse tree. +# Assumes that everything in the parse tree is either a scalar or a +# reference to a scalar. +sub collapse { + my ($self, $ptree, $literal) = @_; + if ($literal) { + return join ('', map { + if (ref $_) { + $$_; + } else { + s/\\/\\e/g; + $_; + } + } $ptree->children); + } else { + return join ('', map { + ref ($_) ? $$_ : $self->guesswork ($_) + } $ptree->children); + } +} + +# Takes a text block to perform guesswork on; this is guaranteed not to +# contain any interior sequences. Returns the text block with remapping +# done. +sub guesswork { + my $self = shift; + local $_ = shift; + + # rofficate backslashes. + s/\\/\\e/g; + + # Ensure double underbars have a tiny space between them. + s/__/_\\|_/g; + + # Make all caps a little smaller. Be careful here, since we don't want + # to make @ARGV into small caps, nor do we want to fix the MIME in + # MIME-Version, since it looks weird with the full-height V. + s{ + ( ^ | [\s\(\"\'\`\[\{<>] ) + ( [A-Z] [A-Z] [/A-Z+:\d_\$&-]* ) + (?: (?= [\s>\}\]\)\'\".?!,;:] | -- ) | $ ) + } { $1 . '\s-1' . $2 . '\s0' }egx; + + # Turn PI into a pretty pi. + s{ (?: \\s-1 | \b ) PI (?: \\s0 | \b ) } {\\*\(PI}gx; + + # Italize functions in the form func(). + s{ + \b + ( + [:\w]+ (?:\\s-1)? \(\) + ) + } { '\f(IS' . $1 . '\f(IE' }egx; + + # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n). + s{ + \b + (\w[-:.\w]+ (?:\\s-1)?) + ( + \( [^\)] \) + ) + } { '\f(IS' . $1 . '\f(IE\|' . $2 }egx; + + # Convert simple Perl variable references to a fixed-width font. + s{ + ( \s+ ) + ( [\$\@%] [\w:]+ ) + (?! \( ) + } { $1 . '\f(FS' . $2 . '\f(FE'}egx; + + # Translate -- into a real em dash if it's used like one and fix up + # dashes, but keep hyphens hyphens. + s{ (\G|^|.) (-+) (\b|.) } { + my ($pre, $dash, $post) = ($1, $2, $3); + if (length ($dash) == 1) { + ($pre =~ /[a-zA-Z]/) ? "$pre-$post" : "$pre\\-$post"; + } elsif (length ($dash) == 2 + && ((!$pre && !$post) + || ($pre =~ /\w/ && !$post) + || ($pre eq ' ' && $post eq ' ') + || ($pre eq '=' && $post ne '=') + || ($pre ne '=' && $post eq '='))) { + "$pre\\*(--$post"; + } else { + $pre . ('\-' x length $dash) . $post; + } + }egxs; + + # Fix up double quotes. + s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx; + + # Make C++ into \*(C+, which is a squinched version. + s{ \b C\+\+ } {\\*\(C+}gx; + + # All done. + $_; +} + + +############################################################################ +# Output formatting +############################################################################ + +# Make vertical whitespace. +sub makespace { + my $self = shift; + $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n"); +} + +# Output any pending index entries, and optionally an index entry given as +# an argument. Support multiple index entries in X<> separated by slashes, +# and strip special escapes from index entries. +sub outindex { + my ($self, $section, $index) = @_; + my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} }; + return unless ($section || @entries); + $$self{INDEX} = []; + my $output; + if (@entries) { + my $output = '.IX Xref "' + . join (' ', map { s/\"/\"\"/; $_ } @entries) + . '"' . "\n"; + } + if ($section) { + $index =~ s/\"/\"\"/; + $index =~ s/\\-/-/g; + $index =~ s/\\(?:s-?\d|.\(..|.)//g; + $output .= ".IX $section " . '"' . $index . '"' . "\n"; + } + $self->output ($output); +} + +# Output text to the output device. +sub output { print { $_[0]->output_handle } $_[1] } + +__END__ + +.\" These are some extra bits of roff that I don't want to lose track of +.\" but that have been removed from the preamble to make it a bit shorter +.\" since they're not currently being used. They're accents and special +.\" characters we don't currently have escapes for. +.if n \{\ +. ds ? ? +. ds ! ! +. ds q +.\} +.if t \{\ +. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10' +. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m' +. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10' +.\} +.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#] +.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u' +.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u' +.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#] +.ds oe o\h'-(\w'o'u*4/10)'e +.ds Oe O\h'-(\w'O'u*4/10)'E +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds v \h'-1'\o'\(aa\(ga' +. ds _ \h'-1'^ +. ds . \h'-1'. +. ds 3 3 +. ds oe oe +. ds Oe OE +.\} + +############################################################################ +# Documentation +############################################################################ + +=head1 NAME + +Pod::Man - Convert POD data to formatted *roff input + +=head1 SYNOPSIS + + use Pod::Man; + my $parser = Pod::Man->new (release => $VERSION, section => 8); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.1. + $parser->parse_from_file ('file.pod', 'file.1'); + +=head1 DESCRIPTION + +Pod::Man is a module to convert documentation in the POD format (the +preferred language for documenting Perl) into *roff input using the man +macro set. The resulting *roff code is suitable for display on a terminal +using nroff(1), normally via man(1), or printing using troff(1). It is +conventionally invoked using the driver script B<pod2man>, but it can also +be used directly. + +As a derived class from Pod::Parser, Pod::Man supports the same methods and +interfaces. See L<Pod::Parser> for all the details; briefly, one creates a +new parser with C<Pod::Man-E<gt>new()> and then calls either +parse_from_filehandle() or parse_from_file(). + +new() can take options, in the form of key/value pairs that control the +behavior of the parser. See below for details. + +If no options are given, Pod::Man uses the name of the input file with any +trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to +section 1 unless the file ended in C<.pm> in which case it defaults to +section 3, to a centered title of "User Contributed Perl Documentation", to +a centered footer of the Perl version it is run with, and to a left-hand +footer of the modification date of its input (or the current date if given +STDIN for input). + +Pod::Man assumes that your *roff formatters have a fixed-width font named +CW. If yours is called something else (like CR), use the C<fixed> option to +specify it. This generally only matters for troff output for printing. +Similarly, you can set the fonts used for bold, italic, and bold italic +fixed-width output. + +Besides the obvious pod conversions, Pod::Man also takes care of formatting +func(), func(n), and simple variable references like $foo or @bar so you +don't have to use code escapes for them; complex expressions like +C<$fred{'stuff'}> will still need to be escaped, though. It also translates +dashes that aren't used as hyphens into en dashes, makes long dashes--like +this--into proper em dashes, fixes "paired quotes," makes C++ and PI look +right, puts a little space between double underbars, makes ALLCAPS a teeny +bit smaller in troff(1), and escapes stuff that *roff treats as special so +that you don't have to. + +The recognized options to new() are as follows. All options take a single +argument. + +=over 4 + +=item center + +Sets the centered page header to use instead of "User Contributed Perl +Documentation". + +=item date + +Sets the left-hand footer. By default, the modification date of the input +file will be used, or the current date if stat() can't find that file (the +case if the input is from STDIN), and the date will be formatted as +YYYY-MM-DD. + +=item fixed + +The fixed-width font to use for vertabim text and code. Defaults to CW. +Some systems may want CR instead. Only matters for troff(1) output. + +=item fixedbold + +Bold version of the fixed-width font. Defaults to CB. Only matters for +troff(1) output. + +=item fixeditalic + +Italic version of the fixed-width font (actually, something of a misnomer, +since most fixed-width fonts only have an oblique version, not an italic +version). Defaults to CI. Only matters for troff(1) output. + +=item fixedbolditalic + +Bold italic (probably actually oblique) version of the fixed-width font. +Pod::Man doesn't assume you have this, and defaults to CB. Some systems +(such as Solaris) have this font available as CX. Only matters for troff(1) +output. + +=item release + +Set the centered footer. By default, this is the version of Perl you run +Pod::Man under. Note that some system an macro sets assume that the +centered footer will be a modification date and will prepend something like +"Last modified: "; if this is the case, you may want to set C<release> to +the last modified date and C<date> to the version number. + +=item section + +Set the section for the C<.TH> macro. The standard section numbering +convention is to use 1 for user commands, 2 for system calls, 3 for +functions, 4 for devices, 5 for file formats, 6 for games, 7 for +miscellaneous information, and 8 for administrator commands. There is a lot +of variation here, however; some systems (like Solaris) use 4 for file +formats, 5 for miscellaneous information, and 7 for devices. Still others +use 1m instead of 8, or some mix of both. About the only section numbers +that are reliably consistent are 1, 2, and 3. + +By default, section 1 will be used unless the file ends in .pm in which case +section 3 will be selected. + +=back + +The standard Pod::Parser method parse_from_filehandle() takes up to two +arguments, the first being the file handle to read POD from and the second +being the file handle to write the formatted output to. The first defaults +to STDIN if not given, and the second defaults to STDOUT. The method +parse_from_file() is almost identical, except that its two arguments are the +input and output disk files instead. See L<Pod::Parser> for the specific +details. + +=head1 DIAGNOSTICS + +=over 4 + +=item roff font should be 1 or 2 chars, not `%s' + +(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that +wasn't either one or two characters. Pod::Man doesn't support *roff fonts +longer than two characters, although some *roff extensions do (the canonical +versions of nroff(1) and troff(1) don't either). + +=item Invalid link %s + +(W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was +unable to parse. You should never see this error message; it probably +indicates a bug in Pod::Man. + +=item Unknown escape EE<lt>%sE<gt> + +(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't +know about. C<EE<lt>%sE<gt>> was printed verbatim in the output. + +=item Unknown sequence %s + +(W) The POD source contained a non-standard interior sequence (something of +the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored. + +=item Unmatched =back + +(W) Pod::Man encountered a C<=back> command that didn't correspond to an +C<=over> command. + +=back + +=head1 BUGS + +The lint-like features and strict POD format checking done by B<pod2man> are +not yet implemented and should be, along with the corresponding C<lax> +option. + +The NAME section should be recognized specially and index entries emitted +for everything in that section. This would have to be deferred until the +next section, since extraneous things in NAME tends to confuse various man +page processors. + +The handling of hyphens, en dashes, and em dashes is somewhat fragile, and +one may get the wrong one under some circumstances. This should only matter +for troff(1) output. + +When and whether to use small caps is somewhat tricky, and Pod::Man doesn't +necessarily get it right. + +Pod::Man doesn't handle font names longer than two characters. Neither do +most troff(1) implementations, but GNU troff does as an extension. It would +be nice to support as an option for those who want to use it. + +The preamble added to each output file is rather verbose, and most of it is +only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII +characters. It would ideally be nice if all of those definitions were only +output if needed, perhaps on the fly as the characters are used. + +Some of the automagic applied to file names assumes Unix directory +separators. + +Pod::Man is excessively slow. + +=head1 SEE ALSO + +L<Pod::Parser|Pod::Parser>, perlpod(1), pod2man(1), nroff(1), troff(1), +man(1), man(7) + +Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual," +Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is +the best documentation of standard nroff(1) and troff(1). At the time of +this writing, it's available at http://www.cs.bell-labs.com/cm/cs/cstr.html. + +The man page documenting the man macro set may be man(5) instead of man(7) +on your system. Also, please see pod2man(1) for extensive documentation on +writing manual pages if you've not done it before and aren't familiar with +the conventions. + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the +original B<pod2man> by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>. + +=cut diff --git a/contrib/perl5/lib/Pod/ParseUtils.pm b/contrib/perl5/lib/Pod/ParseUtils.pm new file mode 100644 index 0000000..2cb8cdc --- /dev/null +++ b/contrib/perl5/lib/Pod/ParseUtils.pm @@ -0,0 +1,837 @@ +############################################################################# +# Pod/ParseUtils.pm -- helpers for POD parsing and conversion +# +# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::ParseUtils; + +use vars qw($VERSION); +$VERSION = 0.2; ## Current version of this package +require 5.005; ## requires this Perl version or later + +=head1 NAME + +Pod::ParseUtils - helpers for POD parsing and conversion + +=head1 SYNOPSIS + + use Pod::ParseUtils; + + my $list = new Pod::List; + my $link = Pod::Hyperlink->new('Pod::Parser'); + +=head1 DESCRIPTION + +B<Pod::ParseUtils> contains a few object-oriented helper packages for +POD parsing and processing (i.e. in POD formatters and translators). + +=cut + +#----------------------------------------------------------------------------- +# Pod::List +# +# class to hold POD list info (=over, =item, =back) +#----------------------------------------------------------------------------- + +package Pod::List; + +use Carp; + +=head2 Pod::List + +B<Pod::List> can be used to hold information about POD lists +(written as =over ... =item ... =back) for further processing. +The following methods are available: + +=over 4 + +=item new() + +Create a new list object. Properties may be specified through a hash +reference like this: + + my $list = Pod::List->new({ -start => $., -indent => 4 }); + +See the individual methods/properties for details. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-file} ||= 'unknown'; + $self->{-start} ||= 'unknown'; + $self->{-indent} ||= 4; # perlpod: "should be the default" + $self->{_items} = []; + $self->{-type} ||= ''; +} + +=item file() + +Without argument, retrieves the file name the list is in. This must +have been set before by either specifying B<-file> in the B<new()> +method or by calling the B<file()> method with a scalar argument. + +=cut + +# The POD file name the list appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item start() + +Without argument, retrieves the line number where the list started. +This must have been set before by either specifying B<-start> in the +B<new()> method or by calling the B<start()> method with a scalar +argument. + +=cut + +# The line in the file the node appears +sub start { + return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; +} + +=item indent() + +Without argument, retrieves the indent level of the list as specified +in C<=over n>. This must have been set before by either specifying +B<-indent> in the B<new()> method or by calling the B<indent()> method +with a scalar argument. + +=cut + +# indent level +sub indent { + return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; +} + +=item type() + +Without argument, retrieves the list type, which can be an arbitrary value, +e.g. C<OL>, C<UL>, ... when thinking the HTML way. +This must have been set before by either specifying +B<-type> in the B<new()> method or by calling the B<type()> method +with a scalar argument. + +=cut + +# The type of the list (UL, OL, ...) +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item rx() + +Without argument, retrieves a regular expression for simplifying the +individual item strings once the list type has been determined. Usage: +E.g. when converting to HTML, one might strip the leading number in +an ordered list as C<E<lt>OLE<gt>> already prints numbers itself. +This must have been set before by either specifying +B<-rx> in the B<new()> method or by calling the B<rx()> method +with a scalar argument. + +=cut + +# The regular expression to simplify the items +sub rx { + return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx}; +} + +=item item() + +Without argument, retrieves the array of the items in this list. +The items may be represented by any scalar. +If an argument has been given, it is pushed on the list of items. + +=cut + +# The individual =items of this list +sub item { + my ($self,$item) = @_; + if(defined $item) { + push(@{$self->{_items}}, $item); + return $item; + } + else { + return @{$self->{_items}}; + } +} + +=item parent() + +Without argument, retrieves information about the parent holding this +list, which is represented as an arbitrary scalar. +This must have been set before by either specifying +B<-parent> in the B<new()> method or by calling the B<parent()> method +with a scalar argument. + +=cut + +# possibility for parsers/translators to store information about the +# lists's parent object +sub parent { + return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent}; +} + +=item tag() + +Without argument, retrieves information about the list tag, which can be +any scalar. +This must have been set before by either specifying +B<-tag> in the B<new()> method or by calling the B<tag()> method +with a scalar argument. + +=back + +=cut + +# possibility for parsers/translators to store information about the +# list's object +sub tag { + return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag}; +} + +#----------------------------------------------------------------------------- +# Pod::Hyperlink +# +# class to manipulate POD hyperlinks (L<>) +#----------------------------------------------------------------------------- + +package Pod::Hyperlink; + +=head2 Pod::Hyperlink + +B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage: + + my $link = Pod::Hyperlink->new('alternative text|page/"section in page"'); + +The B<Pod::Hyperlink> class is mainly designed to parse the contents of the +C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the +different parts of a POD hyperlink for further processing. It can also be +used to construct hyperlinks. + +=over 4 + +=item new() + +The B<new()> method can either be passed a set of key/value pairs or a single +scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object +of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a +failure, the error message is stored in C<$@>. + +=cut + +use Carp; + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = +{}; + bless $self, $class; + $self->initialize(); + if(defined $_[0]) { + if(ref($_[0])) { + # called with a list of parameters + %$self = %{$_[0]}; + $self->_construct_text(); + } + else { + # called with L<> contents + return undef unless($self->parse($_[0])); + } + } + return $self; +} + +sub initialize { + my $self = shift; + $self->{-line} ||= 'undef'; + $self->{-file} ||= 'undef'; + $self->{-page} ||= ''; + $self->{-node} ||= ''; + $self->{-alttext} ||= ''; + $self->{-type} ||= 'undef'; + $self->{_warnings} = []; +} + +=item parse($string) + +This method can be used to (re)parse a (new) hyperlink, i.e. the contents +of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object. + +=cut + +sub parse { + my $self = shift; + local($_) = $_[0]; + # syntax check the link and extract destination + my ($alttext,$page,$node,$type) = ('','','',''); + + $self->{_warnings} = []; + + # collapse newlines with whitespace + if(s/\s*\n+\s*/ /g) { + $self->warning("collapsing newlines to blanks"); + } + # strip leading/trailing whitespace + if(s/^[\s\n]+//) { + $self->warning("ignoring leading whitespace in link"); + } + if(s/[\s\n]+$//) { + $self->warning("ignoring trailing whitespace in link"); + } + unless(length($_)) { + _invalid_link("empty link"); + return undef; + } + + ## Check for different possibilities. This is tedious and error-prone + # we match all possibilities (alttext, page, section/item) + #warn "DEBUG: link=$_\n"; + + # only page + # problem: a lot of people use (), or (1) or the like to indicate + # man page sections. But this collides with L<func()> that is supposed + # to point to an internal funtion... + # I would like the following better, here and below: + #if(m!^(\w+(?:::\w+)*)$!) { + my $page_rx = '[\w.]+(?:::[\w.]+)*'; + if(m!^($page_rx)$!o) { + $page = $1; + $type = 'page'; + } + # alttext, page and "section" + elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) { + ($alttext, $page, $node) = ($1, $2, $3); + $type = 'section'; + } + # alttext and page + elsif(m!^(.+?)\s*[|]\s*($page_rx)$!o) { + ($alttext, $page) = ($1, $2); + $type = 'page'; + } + # alttext and "section" + elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { + ($alttext, $node) = ($1,$2); + $type = 'section'; + } + # page and "section" + elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) { + ($page, $node) = ($1, $2); + $type = 'section'; + } + # page and item + elsif(m!^($page_rx)\s*/\s*(.+)$!o) { + ($page, $node) = ($1, $2); + $type = 'item'; + } + # only "section" + elsif(m!^/?"(.+)"$!) { + $node = $1; + $type = 'section'; + } + # only item + elsif(m!^\s*/(.+)$!) { + $node = $1; + $type = 'item'; + } + # non-standard: Hyperlink + elsif(m!^((?:http|ftp|mailto|news):.+)$!i) { + $node = $1; + $type = 'hyperlink'; + } + # alttext, page and item + elsif(m!^(.+?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) { + ($alttext, $page, $node) = ($1, $2, $3); + $type = 'item'; + } + # alttext and item + elsif(m!^(.+?)\s*[|]\s*/(.+)$!) { + ($alttext, $node) = ($1,$2); + } + # nonstandard: alttext and hyperlink + elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) { + ($alttext, $node) = ($1,$2); + $type = 'hyperlink'; + } + # must be an item or a "malformed" section (without "") + else { + $node = $_; + $type = 'item'; + } + # collapse whitespace in nodes + $node =~ s/\s+/ /gs; + + #if($page =~ /[(]\w*[)]$/) { + # $self->warning("section in '$page' deprecated"); + #} + if($node =~ m:[|/]:) { + $self->warning("node '$node' contains non-escaped | or /"); + } + if($alttext =~ m:[|/]:) { + $self->warning("alternative text '$node' contains non-escaped | or /"); + } + $self->{-page} = $page; + $self->{-node} = $node; + $self->{-alttext} = $alttext; + #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n"; + $self->{-type} = $type; + $self->_construct_text(); + 1; +} + +sub _construct_text { + my $self = shift; + my $alttext = $self->alttext(); + my $type = $self->type(); + my $section = $self->node(); + my $page = $self->page(); + my $page_ext = ''; + $page =~ s/([(]\w*[)])$// && ($page_ext = $1); + if($alttext) { + $self->{_text} = $alttext; + } + elsif($type eq 'hyperlink') { + $self->{_text} = $section; + } + else { + $self->{_text} = (!$section ? '' : + $type eq 'item' ? "the $section entry" : + "the section on $section" ) . + ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" : + ' elsewhere in this document'); + } + # for being marked up later + # use the non-standard markers P<> and Q<>, so that the resulting + # text can be parsed by the translators. It's their job to put + # the correct hypertext around the linktext + if($alttext) { + $self->{_markup} = "Q<$alttext>"; + } + elsif($type eq 'hyperlink') { + $self->{_markup} = "Q<$section>"; + } + else { + $self->{_markup} = (!$section ? '' : + $type eq 'item' ? "the Q<$section> entry" : + "the section on Q<$section>" ) . + ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" : + ' elsewhere in this document'); + } +} + +=item markup($string) + +Set/retrieve the textual value of the link. This string contains special +markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the +translator's interior sequence expansion engine to the +formatter-specific code to highlight/activate the hyperlink. The details +have to be implemented in the translator. + +=cut + +#' retrieve/set markuped text +sub markup { + return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup}; +} + +=item text() + +This method returns the textual representation of the hyperlink as above, +but without markers (read only). Depending on the link type this is one of +the following alternatives (the + and * denote the portions of the text +that are marked up): + + the +perl+ manpage + the *$|* entry in the +perlvar+ manpage + the section on *OPTIONS* in the +perldoc+ manpage + the section on *DESCRIPTION* elsewhere in this document + +=cut + +# The complete link's text +sub text { + $_[0]->{_text}; +} + +=item warning() + +After parsing, this method returns any warnings encountered during the +parsing process. + +=cut + +# Set/retrieve warnings +sub warning { + my $self = shift; + if(@_) { + push(@{$self->{_warnings}}, @_); + return @_; + } + return @{$self->{_warnings}}; +} + +=item line(), file() + +Just simple slots for storing information about the line and the file +the link was encountered in. Has to be filled in manually. + +=cut + +# The line in the file the link appears +sub line { + return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line}; +} + +# The POD file name the link appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item page() + +This method sets or returns the POD page this link points to. + +=cut + +# The POD page the link appears on +sub page { + if (@_ > 1) { + $_[0]->{-page} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-page}; +} + +=item node() + +As above, but the destination node text of the link. + +=cut + +# The link destination +sub node { + if (@_ > 1) { + $_[0]->{-node} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-node}; +} + +=item alttext() + +Sets or returns an alternative text specified in the link. + +=cut + +# Potential alternative text +sub alttext { + if (@_ > 1) { + $_[0]->{-alttext} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-alttext}; +} + +=item type() + +The node type, either C<section> or C<item>. As an unofficial type, +there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>> + +=cut + +# The type: item or headn +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item link() + +Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>. + +=back + +=cut + +# The link itself +sub link { + my $self = shift; + my $link = $self->page() || ''; + if($self->node()) { + my $node = $self->node(); + $text =~ s/\|/E<verbar>/g; + $text =~ s:/:E<sol>:g; + if($self->type() eq 'section') { + $link .= ($link ? '/' : '') . '"' . $node . '"'; + } + elsif($self->type() eq 'hyperlink') { + $link = $self->node(); + } + else { # item + $link .= '/' . $node; + } + } + if($self->alttext()) { + my $text = $self->alttext(); + $text =~ s/\|/E<verbar>/g; + $text =~ s:/:E<sol>:g; + $link = "$text|$link"; + } + $link; +} + +sub _invalid_link { + my ($msg) = @_; + # this sets @_ + #eval { die "$msg\n" }; + #chomp $@; + $@ = $msg; # this seems to work, too! + undef; +} + +#----------------------------------------------------------------------------- +# Pod::Cache +# +# class to hold POD page details +#----------------------------------------------------------------------------- + +package Pod::Cache; + +=head2 Pod::Cache + +B<Pod::Cache> holds information about a set of POD documents, +especially the nodes for hyperlinks. +The following methods are available: + +=over 4 + +=item new() + +Create a new cache object. This object can hold an arbitrary number of +POD documents of class Pod::Cache::Item. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = []; + bless $self, $class; + return $self; +} + +=item item() + +Add a new item to the cache. Without arguments, this method returns a +list of all cache elements. + +=cut + +sub item { + my ($self,%param) = @_; + if(%param) { + my $item = Pod::Cache::Item->new(%param); + push(@$self, $item); + return $item; + } + else { + return @{$self}; + } +} + +=item find_page($name) + +Look for a POD document named C<$name> in the cache. Returns the +reference to the corresponding Pod::Cache::Item object or undef if +not found. + +=back + +=cut + +sub find_page { + my ($self,$page) = @_; + foreach(@$self) { + if($_->page() eq $page) { + return $_; + } + } + undef; +} + +package Pod::Cache::Item; + +=head2 Pod::Cache::Item + +B<Pod::Cache::Item> holds information about individual POD documents, +that can be grouped in a Pod::Cache object. +It is intended to hold information about the hyperlink nodes of POD +documents. +The following methods are available: + +=over 4 + +=item new() + +Create a new object. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-nodes} = [] unless(defined $self->{-nodes}); +} + +=item page() + +Set/retrieve the POD document name (e.g. "Pod::Parser"). + +=cut + +# The POD page +sub page { + return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; +} + +=item description() + +Set/retrieve the POD short description as found in the C<=head1 NAME> +section. + +=cut + +# The POD description, taken out of NAME if present +sub description { + return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description}; +} + +=item path() + +Set/retrieve the POD file storage path. + +=cut + +# The file path +sub path { + return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path}; +} + +=item file() + +Set/retrieve the POD file name. + +=cut + +# The POD file name +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item nodes() + +Add a node (or a list of nodes) to the document's node list. Note that +the order is kept, i.e. start with the first node and end with the last. +If no argument is given, the current list of nodes is returned in the +same order the nodes have been added. +A node can be any scalar, but usually is a pair of node string and +unique id for the C<find_node> method to work correctly. + +=cut + +# The POD nodes +sub nodes { + my ($self,@nodes) = @_; + if(@nodes) { + push(@{$self->{-nodes}}, @nodes); + return @nodes; + } + else { + return @{$self->{-nodes}}; + } +} + +=item find_node($name) + +Look for a node or index entry named C<$name> in the object. +Returns the unique id of the node (i.e. the second element of the array +stored in the node arry) or undef if not found. + +=back + +=cut + +sub find_node { + my ($self,$node) = @_; + my @search; + push(@search, @{$self->{-nodes}}) if($self->{-nodes}); + push(@search, @{$self->{-idx}}) if($self->{-idx}); + foreach(@search) { + if($_->[0] eq $node) { + return $_->[1]; # id + } + } + undef; +} + +=item idx() + +Add an index entry (or a list of them) to the document's index list. Note that +the order is kept, i.e. start with the first node and end with the last. +If no argument is given, the current list of index entries is returned in the +same order the entries have been added. +An index entry can be any scalar, but usually is a pair of string and +unique id. + +=cut + +# The POD index entries +sub idx { + my ($self,@idx) = @_; + if(@idx) { + push(@{$self->{-idx}}, @idx); + return @idx; + } + else { + return @{$self->{-idx}}; + } +} + +=head1 AUTHOR + +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing +a lot of things from L<pod2man> and L<pod2roff> as well as other POD +processing tools by Tom Christiansen, Brad Appleton and Russ Allbery. + +=head1 SEE ALSO + +L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>, +L<pod2html> + +=cut + +1; diff --git a/contrib/perl5/lib/Pod/Parser.pm b/contrib/perl5/lib/Pod/Parser.pm new file mode 100644 index 0000000..48fc198 --- /dev/null +++ b/contrib/perl5/lib/Pod/Parser.pm @@ -0,0 +1,1771 @@ +############################################################################# +# Pod/Parser.pm -- package which defines a base class for parsing POD docs. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Parser; + +use vars qw($VERSION); +$VERSION = 1.12; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::Parser - base class for creating POD filters and translators + +=head1 SYNOPSIS + + use Pod::Parser; + + package MyParser; + @ISA = qw(Pod::Parser); + + sub command { + my ($parser, $command, $paragraph, $line_num) = @_; + ## Interpret the command and its text; sample actions might be: + if ($command eq 'head1') { ... } + elsif ($command eq 'head2') { ... } + ## ... other commands and their actions + my $out_fh = $parser->output_handle(); + my $expansion = $parser->interpolate($paragraph, $line_num); + print $out_fh $expansion; + } + + sub verbatim { + my ($parser, $paragraph, $line_num) = @_; + ## Format verbatim paragraph; sample actions might be: + my $out_fh = $parser->output_handle(); + print $out_fh $paragraph; + } + + sub textblock { + my ($parser, $paragraph, $line_num) = @_; + ## Translate/Format this block of text; sample actions might be: + my $out_fh = $parser->output_handle(); + my $expansion = $parser->interpolate($paragraph, $line_num); + print $out_fh $expansion; + } + + sub interior_sequence { + my ($parser, $seq_command, $seq_argument) = @_; + ## Expand an interior sequence; sample actions might be: + return "*$seq_argument*" if ($seq_command eq 'B'); + return "`$seq_argument'" if ($seq_command eq 'C'); + return "_${seq_argument}_'" if ($seq_command eq 'I'); + ## ... other sequence commands and their resulting text + } + + package main; + + ## Create a parser object and have it parse file whose name was + ## given on the command-line (use STDIN if no files were given). + $parser = new MyParser(); + $parser->parse_from_filehandle(\*STDIN) if (@ARGV == 0); + for (@ARGV) { $parser->parse_from_file($_); } + +=head1 REQUIRES + +perl5.005, Pod::InputObjects, Exporter, Symbol, Carp + +=head1 EXPORTS + +Nothing. + +=head1 DESCRIPTION + +B<Pod::Parser> is a base class for creating POD filters and translators. +It handles most of the effort involved with parsing the POD sections +from an input stream, leaving subclasses free to be concerned only with +performing the actual translation of text. + +B<Pod::Parser> parses PODs, and makes method calls to handle the various +components of the POD. Subclasses of B<Pod::Parser> override these methods +to translate the POD into whatever output format they desire. + +=head1 QUICK OVERVIEW + +To create a POD filter for translating POD documentation into some other +format, you create a subclass of B<Pod::Parser> which typically overrides +just the base class implementation for the following methods: + +=over 2 + +=item * + +B<command()> + +=item * + +B<verbatim()> + +=item * + +B<textblock()> + +=item * + +B<interior_sequence()> + +=back + +You may also want to override the B<begin_input()> and B<end_input()> +methods for your subclass (to perform any needed per-file and/or +per-document initialization or cleanup). + +If you need to perform any preprocesssing of input before it is parsed +you may want to override one or more of B<preprocess_line()> and/or +B<preprocess_paragraph()>. + +Sometimes it may be necessary to make more than one pass over the input +files. If this is the case you have several options. You can make the +first pass using B<Pod::Parser> and override your methods to store the +intermediate results in memory somewhere for the B<end_pod()> method to +process. You could use B<Pod::Parser> for several passes with an +appropriate state variable to control the operation for each pass. If +your input source can't be reset to start at the beginning, you can +store it in some other structure as a string or an array and have that +structure implement a B<getline()> method (which is all that +B<parse_from_filehandle()> uses to read input). + +Feel free to add any member data fields you need to keep track of things +like current font, indentation, horizontal or vertical position, or +whatever else you like. Be sure to read L<"PRIVATE METHODS AND DATA"> +to avoid name collisions. + +For the most part, the B<Pod::Parser> base class should be able to +do most of the input parsing for you and leave you free to worry about +how to intepret the commands and translate the result. + +Note that all we have described here in this quick overview is the +simplest most straightforward use of B<Pod::Parser> to do stream-based +parsing. It is also possible to use the B<Pod::Parser::parse_text> function +to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">. + +=head1 PARSING OPTIONS + +A I<parse-option> is simply a named option of B<Pod::Parser> with a +value that corresponds to a certain specified behavior. These various +behaviors of B<Pod::Parser> may be enabled/disabled by setting or +or unsetting one or more I<parse-options> using the B<parseopts()> method. +The set of currently accepted parse-options is as follows: + +=over 3 + +=item B<-want_nonPODs> (default: unset) + +Normally (by default) B<Pod::Parser> will only provide access to +the POD sections of the input. Input paragraphs that are not part +of the POD-format documentation are not made available to the caller +(not even using B<preprocess_paragraph()>). Setting this option to a +non-empty, non-zero value will allow B<preprocess_paragraph()> to see +non-POD sections of the input as well as POD sections. The B<cutting()> +method can be used to determine if the corresponding paragraph is a POD +paragraph, or some other input paragraph. + +=item B<-process_cut_cmd> (default: unset) + +Normally (by default) B<Pod::Parser> handles the C<=cut> POD directive +by itself and does not pass it on to the caller for processing. Setting +this option to a non-empty, non-zero value will cause B<Pod::Parser> to +pass the C<=cut> directive to the caller just like any other POD command +(and hence it may be processed by the B<command()> method). + +B<Pod::Parser> will still interpret the C<=cut> directive to mean that +"cutting mode" has been (re)entered, but the caller will get a chance +to capture the actual C<=cut> paragraph itself for whatever purpose +it desires. + +=item B<-warnings> (default: unset) + +Normally (by default) B<Pod::Parser> recognizes a bare minimum of +pod syntax errors and warnings and issues diagnostic messages +for errors, but not for warnings. (Use B<Pod::Checker> to do more +thorough checking of POD syntax.) Setting this option to a non-empty, +non-zero value will cause B<Pod::Parser> to issue diagnostics for +the few warnings it recognizes as well as the errors. + +=back + +Please see L<"parseopts()"> for a complete description of the interface +for the setting and unsetting of parse-options. + +=cut + +############################################################################# + +use vars qw(@ISA); +use strict; +#use diagnostics; +use Pod::InputObjects; +use Carp; +use Exporter; +require VMS::Filespec if $^O eq 'VMS'; +BEGIN { + if ($] < 5.6) { + require Symbol; + import Symbol; + } +} +@ISA = qw(Exporter); + +## These "variables" are used as local "glob aliases" for performance +use vars qw(%myData %myOpts @input_stack); + +############################################################################# + +=head1 RECOMMENDED SUBROUTINE/METHOD OVERRIDES + +B<Pod::Parser> provides several methods which most subclasses will probably +want to override. These methods are as follows: + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<command()> + + $parser->command($cmd,$text,$line_num,$pod_para); + +This method should be overridden by subclasses to take the appropriate +action when a POD command paragraph (denoted by a line beginning with +"=") is encountered. When such a POD directive is seen in the input, +this method is called and is passed: + +=over 3 + +=item C<$cmd> + +the name of the command for this POD paragraph + +=item C<$text> + +the paragraph text for the given POD paragraph command. + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph command (see L<Pod::InputObjects> +for details). + +=back + +B<Note> that this method I<is> called for C<=pod> paragraphs. + +The base class implementation of this method simply treats the raw POD +command as normal block of paragraph text (invoking the B<textblock()> +method with the command paragraph). + +=cut + +sub command { + my ($self, $cmd, $text, $line_num, $pod_para) = @_; + ## Just treat this like a textblock + $self->textblock($pod_para->raw_text(), $line_num, $pod_para); +} + +##--------------------------------------------------------------------------- + +=head1 B<verbatim()> + + $parser->verbatim($text,$line_num,$pod_para); + +This method may be overridden by subclasses to take the appropriate +action when a block of verbatim text is encountered. It is passed the +following parameters: + +=over 3 + +=item C<$text> + +the block of text for the verbatim paragraph + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph (see L<Pod::InputObjects> +for details). + +=back + +The base class implementation of this method simply prints the textblock +(unmodified) to the output filehandle. + +=cut + +sub verbatim { + my ($self, $text, $line_num, $pod_para) = @_; + my $out_fh = $self->{_OUTPUT}; + print $out_fh $text; +} + +##--------------------------------------------------------------------------- + +=head1 B<textblock()> + + $parser->textblock($text,$line_num,$pod_para); + +This method may be overridden by subclasses to take the appropriate +action when a normal block of POD text is encountered (although the base +class method will usually do what you want). It is passed the following +parameters: + +=over 3 + +=item C<$text> + +the block of text for the a POD paragraph + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph (see L<Pod::InputObjects> +for details). + +=back + +In order to process interior sequences, subclasses implementations of +this method will probably want to invoke either B<interpolate()> or +B<parse_text()>, passing it the text block C<$text>, and the corresponding +line number in C<$line_num>, and then perform any desired processing upon +the returned result. + +The base class implementation of this method simply prints the text block +as it occurred in the input stream). + +=cut + +sub textblock { + my ($self, $text, $line_num, $pod_para) = @_; + my $out_fh = $self->{_OUTPUT}; + print $out_fh $self->interpolate($text, $line_num); +} + +##--------------------------------------------------------------------------- + +=head1 B<interior_sequence()> + + $parser->interior_sequence($seq_cmd,$seq_arg,$pod_seq); + +This method should be overridden by subclasses to take the appropriate +action when an interior sequence is encountered. An interior sequence is +an embedded command within a block of text which appears as a command +name (usually a single uppercase character) followed immediately by a +string of text which is enclosed in angle brackets. This method is +passed the sequence command C<$seq_cmd> and the corresponding text +C<$seq_arg>. It is invoked by the B<interpolate()> method for each interior +sequence that occurs in the string that it is passed. It should return +the desired text string to be used in place of the interior sequence. +The C<$pod_seq> argument is a reference to a C<Pod::InteriorSequence> +object which contains further information about the interior sequence. +Please see L<Pod::InputObjects> for details if you need to access this +additional information. + +Subclass implementations of this method may wish to invoke the +B<nested()> method of C<$pod_seq> to see if it is nested inside +some other interior-sequence (and if so, which kind). + +The base class implementation of the B<interior_sequence()> method +simply returns the raw text of the interior sequence (as it occurred +in the input) to the caller. + +=cut + +sub interior_sequence { + my ($self, $seq_cmd, $seq_arg, $pod_seq) = @_; + ## Just return the raw text of the interior sequence + return $pod_seq->raw_text(); +} + +############################################################################# + +=head1 OPTIONAL SUBROUTINE/METHOD OVERRIDES + +B<Pod::Parser> provides several methods which subclasses may want to override +to perform any special pre/post-processing. These methods do I<not> have to +be overridden, but it may be useful for subclasses to take advantage of them. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<new()> + + my $parser = Pod::Parser->new(); + +This is the constructor for B<Pod::Parser> and its subclasses. You +I<do not> need to override this method! It is capable of constructing +subclass objects as well as base class objects, provided you use +any of the following constructor invocation styles: + + my $parser1 = MyParser->new(); + my $parser2 = new MyParser(); + my $parser3 = $parser2->new(); + +where C<MyParser> is some subclass of B<Pod::Parser>. + +Using the syntax C<MyParser::new()> to invoke the constructor is I<not> +recommended, but if you insist on being able to do this, then the +subclass I<will> need to override the B<new()> constructor method. If +you do override the constructor, you I<must> be sure to invoke the +B<initialize()> method of the newly blessed object. + +Using any of the above invocations, the first argument to the +constructor is always the corresponding package name (or object +reference). No other arguments are required, but if desired, an +associative array (or hash-table) my be passed to the B<new()> +constructor, as in: + + my $parser1 = MyParser->new( MYDATA => $value1, MOREDATA => $value2 ); + my $parser2 = new MyParser( -myflag => 1 ); + +All arguments passed to the B<new()> constructor will be treated as +key/value pairs in a hash-table. The newly constructed object will be +initialized by copying the contents of the given hash-table (which may +have been empty). The B<new()> constructor for this class and all of its +subclasses returns a blessed reference to the initialized object (hash-table). + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. + my %params = @_; + my $self = { %params }; + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + $self->initialize(); + return $self; +} + +##--------------------------------------------------------------------------- + +=head1 B<initialize()> + + $parser->initialize(); + +This method performs any necessary object initialization. It takes no +arguments (other than the object instance of course, which is typically +copied to a local variable named C<$self>). If subclasses override this +method then they I<must> be sure to invoke C<$self-E<gt>SUPER::initialize()>. + +=cut + +sub initialize { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<begin_pod()> + + $parser->begin_pod(); + +This method is invoked at the beginning of processing for each POD +document that is encountered in the input. Subclasses should override +this method to perform any per-document initialization. + +=cut + +sub begin_pod { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<begin_input()> + + $parser->begin_input(); + +This method is invoked by B<parse_from_filehandle()> immediately I<before> +processing input from a filehandle. The base class implementation does +nothing, however, subclasses may override it to perform any per-file +initializations. + +Note that if multiple files are parsed for a single POD document +(perhaps the result of some future C<=include> directive) this method +is invoked for every file that is parsed. If you wish to perform certain +initializations once per document, then you should use B<begin_pod()>. + +=cut + +sub begin_input { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<end_input()> + + $parser->end_input(); + +This method is invoked by B<parse_from_filehandle()> immediately I<after> +processing input from a filehandle. The base class implementation does +nothing, however, subclasses may override it to perform any per-file +cleanup actions. + +Please note that if multiple files are parsed for a single POD document +(perhaps the result of some kind of C<=include> directive) this method +is invoked for every file that is parsed. If you wish to perform certain +cleanup actions once per document, then you should use B<end_pod()>. + +=cut + +sub end_input { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<end_pod()> + + $parser->end_pod(); + +This method is invoked at the end of processing for each POD document +that is encountered in the input. Subclasses should override this method +to perform any per-document finalization. + +=cut + +sub end_pod { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<preprocess_line()> + + $textline = $parser->preprocess_line($text, $line_num); + +This method should be overridden by subclasses that wish to perform +any kind of preprocessing for each I<line> of input (I<before> it has +been determined whether or not it is part of a POD paragraph). The +parameter C<$text> is the input line; and the parameter C<$line_num> is +the line number of the corresponding text line. + +The value returned should correspond to the new text to use in its +place. If the empty string or an undefined value is returned then no +further processing will be performed for this line. + +Please note that the B<preprocess_line()> method is invoked I<before> +the B<preprocess_paragraph()> method. After all (possibly preprocessed) +lines in a paragraph have been assembled together and it has been +determined that the paragraph is part of the POD documentation from one +of the selected sections, then B<preprocess_paragraph()> is invoked. + +The base class implementation of this method returns the given text. + +=cut + +sub preprocess_line { + my ($self, $text, $line_num) = @_; + return $text; +} + +##--------------------------------------------------------------------------- + +=head1 B<preprocess_paragraph()> + + $textblock = $parser->preprocess_paragraph($text, $line_num); + +This method should be overridden by subclasses that wish to perform any +kind of preprocessing for each block (paragraph) of POD documentation +that appears in the input stream. The parameter C<$text> is the POD +paragraph from the input file; and the parameter C<$line_num> is the +line number for the beginning of the corresponding paragraph. + +The value returned should correspond to the new text to use in its +place If the empty string is returned or an undefined value is +returned, then the given C<$text> is ignored (not processed). + +This method is invoked after gathering up all the lines in a paragraph +and after determining the cutting state of the paragraph, +but before trying to further parse or interpret them. After +B<preprocess_paragraph()> returns, the current cutting state (which +is returned by C<$self-E<gt>cutting()>) is examined. If it evaluates +to true then input text (including the given C<$text>) is cut (not +processed) until the next POD directive is encountered. + +Please note that the B<preprocess_line()> method is invoked I<before> +the B<preprocess_paragraph()> method. After all (possibly preprocessed) +lines in a paragraph have been assembled together and either it has been +determined that the paragraph is part of the POD documentation from one +of the selected sections or the C<-want_nonPODs> option is true, +then B<preprocess_paragraph()> is invoked. + +The base class implementation of this method returns the given text. + +=cut + +sub preprocess_paragraph { + my ($self, $text, $line_num) = @_; + return $text; +} + +############################################################################# + +=head1 METHODS FOR PARSING AND PROCESSING + +B<Pod::Parser> provides several methods to process input text. These +methods typically won't need to be overridden (and in some cases they +can't be overridden), but subclasses may want to invoke them to exploit +their functionality. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<parse_text()> + + $ptree1 = $parser->parse_text($text, $line_num); + $ptree2 = $parser->parse_text({%opts}, $text, $line_num); + $ptree3 = $parser->parse_text(\%opts, $text, $line_num); + +This method is useful if you need to perform your own interpolation +of interior sequences and can't rely upon B<interpolate> to expand +them in simple bottom-up order order. + +The parameter C<$text> is a string or block of text to be parsed +for interior sequences; and the parameter C<$line_num> is the +line number curresponding to the beginning of C<$text>. + +B<parse_text()> will parse the given text into a parse-tree of "nodes." +and interior-sequences. Each "node" in the parse tree is either a +text-string, or a B<Pod::InteriorSequence>. The result returned is a +parse-tree of type B<Pod::ParseTree>. Please see L<Pod::InputObjects> +for more information about B<Pod::InteriorSequence> and B<Pod::ParseTree>. + +If desired, an optional hash-ref may be specified as the first argument +to customize certain aspects of the parse-tree that is created and +returned. The set of recognized option keywords are: + +=over 3 + +=item B<-expand_seq> =E<gt> I<code-ref>|I<method-name> + +Normally, the parse-tree returned by B<parse_text()> will contain an +unexpanded C<Pod::InteriorSequence> object for each interior-sequence +encountered. Specifying B<-expand_seq> tells B<parse_text()> to "expand" +every interior-sequence it sees by invoking the referenced function +(or named method of the parser object) and using the return value as the +expanded result. + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $sequence ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $sequence ) + +where C<$parser> is a reference to the parser object, and C<$sequence> +is a reference to the interior-sequence object. +[I<NOTE>: If the B<interior_sequence()> method is specified, then it is +invoked according to the interface specified in L<"interior_sequence()">]. + +=item B<-expand_text> =E<gt> I<code-ref>|I<method-name> + +Normally, the parse-tree returned by B<parse_text()> will contain a +text-string for each contiguous sequence of characters outside of an +interior-sequence. Specifying B<-expand_text> tells B<parse_text()> to +"preprocess" every such text-string it sees by invoking the referenced +function (or named method of the parser object) and using the return value +as the preprocessed (or "expanded") result. [Note that if the result is +an interior-sequence, then it will I<not> be expanded as specified by the +B<-expand_seq> option; Any such recursive expansion needs to be handled by +the specified callback routine.] + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $text, $ptree_node ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $text, $ptree_node ) + +where C<$parser> is a reference to the parser object, C<$text> is the +text-string encountered, and C<$ptree_node> is a reference to the current +node in the parse-tree (usually an interior-sequence object or else the +top-level node of the parse-tree). + +=item B<-expand_ptree> =E<gt> I<code-ref>|I<method-name> + +Rather than returning a C<Pod::ParseTree>, pass the parse-tree as an +argument to the referenced subroutine (or named method of the parser +object) and return the result instead of the parse-tree object. + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $ptree ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $ptree ) + +where C<$parser> is a reference to the parser object, and C<$ptree> +is a reference to the parse-tree object. + +=back + +=cut + +sub parse_text { + my $self = shift; + local $_ = ''; + + ## Get options and set any defaults + my %opts = (ref $_[0]) ? %{ shift() } : (); + my $expand_seq = $opts{'-expand_seq'} || undef; + my $expand_text = $opts{'-expand_text'} || undef; + my $expand_ptree = $opts{'-expand_ptree'} || undef; + + my $text = shift; + my $line = shift; + my $file = $self->input_file(); + my $cmd = ""; + + ## Convert method calls into closures, for our convenience + my $xseq_sub = $expand_seq; + my $xtext_sub = $expand_text; + my $xptree_sub = $expand_ptree; + if (defined $expand_seq and $expand_seq eq 'interior_sequence') { + ## If 'interior_sequence' is the method to use, we have to pass + ## more than just the sequence object, we also need to pass the + ## sequence name and text. + $xseq_sub = sub { + my ($self, $iseq) = @_; + my $args = join("", $iseq->parse_tree->children); + return $self->interior_sequence($iseq->name, $args, $iseq); + }; + } + ref $xseq_sub or $xseq_sub = sub { shift()->$expand_seq(@_) }; + ref $xtext_sub or $xtext_sub = sub { shift()->$expand_text(@_) }; + ref $xptree_sub or $xptree_sub = sub { shift()->$expand_ptree(@_) }; + + ## Keep track of the "current" interior sequence, and maintain a stack + ## of "in progress" sequences. + ## + ## NOTE that we push our own "accumulator" at the very beginning of the + ## stack. It's really a parse-tree, not a sequence; but it implements + ## the methods we need so we can use it to gather-up all the sequences + ## and strings we parse. Thus, by the end of our parsing, it should be + ## the only thing left on our stack and all we have to do is return it! + ## + my $seq = Pod::ParseTree->new(); + my @seq_stack = ($seq); + my ($ldelim, $rdelim) = ('', ''); + + ## Iterate over all sequence starts text (NOTE: split with + ## capturing parens keeps the delimiters) + $_ = $text; + my @tokens = split /([A-Z]<(?:<+\s+)?)/; + while ( @tokens ) { + $_ = shift @tokens; + ## Look for the beginning of a sequence + if ( /^([A-Z])(<(?:<+\s+)?)$/ ) { + ## Push a new sequence onto the stack of those "in-progress" + ($cmd, $ldelim) = ($1, $2); + $seq = Pod::InteriorSequence->new( + -name => $cmd, + -ldelim => $ldelim, -rdelim => '', + -file => $file, -line => $line + ); + $ldelim =~ s/\s+$//, ($rdelim = $ldelim) =~ tr/</>/; + (@seq_stack > 1) and $seq->nested($seq_stack[-1]); + push @seq_stack, $seq; + } + ## Look for sequence ending + elsif ( @seq_stack > 1 ) { + ## Make sure we match the right kind of closing delimiter + my ($seq_end, $post_seq) = ("", ""); + if ( ($ldelim eq '<' and /\A(.*?)(>)/s) + or /\A(.*?)(\s+$rdelim)/s ) + { + ## Found end-of-sequence, capture the interior and the + ## closing the delimiter, and put the rest back on the + ## token-list + $post_seq = substr($_, length($1) + length($2)); + ($_, $seq_end) = ($1, $2); + (length $post_seq) and unshift @tokens, $post_seq; + } + if (length) { + ## In the middle of a sequence, append this text to it, and + ## dont forget to "expand" it if that's what the caller wanted + $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); + $_ .= $seq_end; + } + if (length $seq_end) { + ## End of current sequence, record terminating delimiter + $seq->rdelim($seq_end); + ## Pop it off the stack of "in progress" sequences + pop @seq_stack; + ## Append result to its parent in current parse tree + $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) + : $seq); + ## Remember the current cmd-name and left-delimiter + $cmd = (@seq_stack > 1) ? $seq_stack[-1]->name : ''; + $ldelim = (@seq_stack > 1) ? $seq_stack[-1]->ldelim : ''; + $ldelim =~ s/\s+$//, ($rdelim = $ldelim) =~ tr/</>/; + } + } + elsif (length) { + ## In the middle of a sequence, append this text to it, and + ## dont forget to "expand" it if that's what the caller wanted + $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); + } + ## Keep track of line count + $line += tr/\n//; + ## Remember the "current" sequence + $seq = $seq_stack[-1]; + } + + ## Handle unterminated sequences + my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef; + while (@seq_stack > 1) { + ($cmd, $file, $line) = ($seq->name, $seq->file_line); + $file = VMS::Filespec::unixify($file) if $^O eq 'VMS'; + $ldelim = $seq->ldelim; + ($rdelim = $ldelim) =~ tr/</>/; + $rdelim =~ s/^(\S+)(\s*)$/$2$1/; + pop @seq_stack; + my $errmsg = "*** ERROR: unterminated ${cmd}${ldelim}...${rdelim}". + " at line $line in file $file\n"; + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $self->$errorsub($errmsg) + or warn($errmsg); + $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq); + $seq = $seq_stack[-1]; + } + + ## Return the resulting parse-tree + my $ptree = (pop @seq_stack)->parse_tree; + return $expand_ptree ? &$xptree_sub($self, $ptree) : $ptree; +} + +##--------------------------------------------------------------------------- + +=head1 B<interpolate()> + + $textblock = $parser->interpolate($text, $line_num); + +This method translates all text (including any embedded interior sequences) +in the given text string C<$text> and returns the interpolated result. The +parameter C<$line_num> is the line number corresponding to the beginning +of C<$text>. + +B<interpolate()> merely invokes a private method to recursively expand +nested interior sequences in bottom-up order (innermost sequences are +expanded first). If there is a need to expand nested sequences in +some alternate order, use B<parse_text> instead. + +=cut + +sub interpolate { + my($self, $text, $line_num) = @_; + my %parse_opts = ( -expand_seq => 'interior_sequence' ); + my $ptree = $self->parse_text( \%parse_opts, $text, $line_num ); + return join "", $ptree->children(); +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<parse_paragraph()> + + $parser->parse_paragraph($text, $line_num); + +This method takes the text of a POD paragraph to be processed, along +with its corresponding line number, and invokes the appropriate method +(one of B<command()>, B<verbatim()>, or B<textblock()>). + +For performance reasons, this method is invoked directly without any +dynamic lookup; Hence subclasses may I<not> override it! + +=end __PRIVATE__ + +=cut + +sub parse_paragraph { + my ($self, $text, $line_num) = @_; + local *myData = $self; ## alias to avoid deref-ing overhead + local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options + local $_; + + ## See if we want to preprocess nonPOD paragraphs as well as POD ones. + my $wantNonPods = $myOpts{'-want_nonPODs'}; + + ## Update cutting status + $myData{_CUTTING} = 0 if $text =~ /^={1,2}\S/; + + ## Perform any desired preprocessing if we wanted it this early + $wantNonPods and $text = $self->preprocess_paragraph($text, $line_num); + + ## Ignore up until next POD directive if we are cutting + return if $myData{_CUTTING}; + + ## Now we know this is block of text in a POD section! + + ##----------------------------------------------------------------- + ## This is a hook (hack ;-) for Pod::Select to do its thing without + ## having to override methods, but also without Pod::Parser assuming + ## $self is an instance of Pod::Select (if the _SELECTED_SECTIONS + ## field exists then we assume there is an is_selected() method for + ## us to invoke (calling $self->can('is_selected') could verify this + ## but that is more overhead than I want to incur) + ##----------------------------------------------------------------- + + ## Ignore this block if it isnt in one of the selected sections + if (exists $myData{_SELECTED_SECTIONS}) { + $self->is_selected($text) or return ($myData{_CUTTING} = 1); + } + + ## If we havent already, perform any desired preprocessing and + ## then re-check the "cutting" state + unless ($wantNonPods) { + $text = $self->preprocess_paragraph($text, $line_num); + return 1 unless ((defined $text) and (length $text)); + return 1 if ($myData{_CUTTING}); + } + + ## Look for one of the three types of paragraphs + my ($pfx, $cmd, $arg, $sep) = ('', '', '', ''); + my $pod_para = undef; + if ($text =~ /^(={1,2})(?=\S)/) { + ## Looks like a command paragraph. Capture the command prefix used + ## ("=" or "=="), as well as the command-name, its paragraph text, + ## and whatever sequence of characters was used to separate them + $pfx = $1; + $_ = substr($text, length $pfx); + ($cmd, $sep, $text) = split /(\s+)/, $_, 2; + ## If this is a "cut" directive then we dont need to do anything + ## except return to "cutting" mode. + if ($cmd eq 'cut') { + $myData{_CUTTING} = 1; + return unless $myOpts{'-process_cut_cmd'}; + } + } + ## Save the attributes indicating how the command was specified. + $pod_para = new Pod::Paragraph( + -name => $cmd, + -text => $text, + -prefix => $pfx, + -separator => $sep, + -file => $myData{_INFILE}, + -line => $line_num + ); + # ## Invoke appropriate callbacks + # if (exists $myData{_CALLBACKS}) { + # ## Look through the callback list, invoke callbacks, + # ## then see if we need to do the default actions + # ## (invoke_callbacks will return true if we do). + # return 1 unless $self->invoke_callbacks($cmd, $text, $line_num, $pod_para); + # } + if (length $cmd) { + ## A command paragraph + $self->command($cmd, $text, $line_num, $pod_para); + } + elsif ($text =~ /^\s+/) { + ## Indented text - must be a verbatim paragraph + $self->verbatim($text, $line_num, $pod_para); + } + else { + ## Looks like an ordinary block of text + $self->textblock($text, $line_num, $pod_para); + } + return 1; +} + +##--------------------------------------------------------------------------- + +=head1 B<parse_from_filehandle()> + + $parser->parse_from_filehandle($in_fh,$out_fh); + +This method takes an input filehandle (which is assumed to already be +opened for reading) and reads the entire input stream looking for blocks +(paragraphs) of POD documentation to be processed. If no first argument +is given the default input filehandle C<STDIN> is used. + +The C<$in_fh> parameter may be any object that provides a B<getline()> +method to retrieve a single line of input text (hence, an appropriate +wrapper object could be used to parse PODs from a single string or an +array of strings). + +Using C<$in_fh-E<gt>getline()>, input is read line-by-line and assembled +into paragraphs or "blocks" (which are separated by lines containing +nothing but whitespace). For each block of POD documentation +encountered it will invoke a method to parse the given paragraph. + +If a second argument is given then it should correspond to a filehandle where +output should be sent (otherwise the default output filehandle is +C<STDOUT> if no output filehandle is currently in use). + +B<NOTE:> For performance reasons, this method caches the input stream at +the top of the stack in a local variable. Any attempts by clients to +change the stack contents during processing when in the midst executing +of this method I<will not affect> the input stream used by the current +invocation of this method. + +This method does I<not> usually need to be overridden by subclasses. + +=cut + +sub parse_from_filehandle { + my $self = shift; + my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); + my ($in_fh, $out_fh) = @_; + $in_fh = \*STDIN unless ($in_fh); + local *myData = $self; ## alias to avoid deref-ing overhead + local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options + local $_; + + ## Put this stream at the top of the stack and do beginning-of-input + ## processing. NOTE that $in_fh might be reset during this process. + my $topstream = $self->_push_input_stream($in_fh, $out_fh); + (exists $opts{-cutting}) and $self->cutting( $opts{-cutting} ); + + ## Initialize line/paragraph + my ($textline, $paragraph) = ('', ''); + my ($nlines, $plines) = (0, 0); + + ## Use <$fh> instead of $fh->getline where possible (for speed) + $_ = ref $in_fh; + my $tied_fh = (/^(?:GLOB|FileHandle|IO::\w+)$/ or tied $in_fh); + + ## Read paragraphs line-by-line + while (defined ($textline = $tied_fh ? <$in_fh> : $in_fh->getline)) { + $textline = $self->preprocess_line($textline, ++$nlines); + next unless ((defined $textline) && (length $textline)); + $_ = $paragraph; ## save previous contents + + if ((! length $paragraph) && ($textline =~ /^==/)) { + ## '==' denotes a one-line command paragraph + $paragraph = $textline; + $plines = 1; + $textline = ''; + } else { + ## Append this line to the current paragraph + $paragraph .= $textline; + ++$plines; + } + + ## See if this line is blank and ends the current paragraph. + ## If it isnt, then keep iterating until it is. + next unless (($textline =~ /^([^\S\r\n]*)[\r\n]*$/) + && (length $paragraph)); + + ## Issue a warning about any non-empty blank lines + if (length($1) > 1 and $myOpts{'-warnings'} and ! $myData{_CUTTING}) { + my $errorsub = $self->errorsub(); + my $file = $self->input_file(); + $file = VMS::Filespec::unixify($file) if $^O eq 'VMS'; + my $errmsg = "*** WARNING: line containing nothing but whitespace". + " in paragraph at line $nlines in file $file\n"; + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $self->$errorsub($errmsg) + or warn($errmsg); + } + + ## Now process the paragraph + parse_paragraph($self, $paragraph, ($nlines - $plines) + 1); + $paragraph = ''; + $plines = 0; + } + ## Dont forget about the last paragraph in the file + if (length $paragraph) { + parse_paragraph($self, $paragraph, ($nlines - $plines) + 1) + } + + ## Now pop the input stream off the top of the input stack. + $self->_pop_input_stream(); +} + +##--------------------------------------------------------------------------- + +=head1 B<parse_from_file()> + + $parser->parse_from_file($filename,$outfile); + +This method takes a filename and does the following: + +=over 2 + +=item * + +opens the input and output files for reading +(creating the appropriate filehandles) + +=item * + +invokes the B<parse_from_filehandle()> method passing it the +corresponding input and output filehandles. + +=item * + +closes the input and output files. + +=back + +If the special input filename "-" or "<&STDIN" is given then the STDIN +filehandle is used for input (and no open or close is performed). If no +input filename is specified then "-" is implied. + +If a second argument is given then it should be the name of the desired +output file. If the special output filename "-" or ">&STDOUT" is given +then the STDOUT filehandle is used for output (and no open or close is +performed). If the special output filename ">&STDERR" is given then the +STDERR filehandle is used for output (and no open or close is +performed). If no output filehandle is currently in use and no output +filename is specified, then "-" is implied. + +This method does I<not> usually need to be overridden by subclasses. + +=cut + +sub parse_from_file { + my $self = shift; + my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); + my ($infile, $outfile) = @_; + my ($in_fh, $out_fh) = (gensym, gensym) if ($] < 5.6); + my ($close_input, $close_output) = (0, 0); + local *myData = $self; + local $_; + + ## Is $infile a filename or a (possibly implied) filehandle + $infile = '-' unless ((defined $infile) && (length $infile)); + if (($infile eq '-') || ($infile =~ /^<&(STDIN|0)$/i)) { + ## Not a filename, just a string implying STDIN + $myData{_INFILE} = "<standard input>"; + $in_fh = \*STDIN; + } + elsif (ref $infile) { + ## Must be a filehandle-ref (or else assume its a ref to an object + ## that supports the common IO read operations). + $myData{_INFILE} = ${$infile}; + $in_fh = $infile; + } + else { + ## We have a filename, open it for reading + $myData{_INFILE} = $infile; + open($in_fh, "< $infile") or + croak "Can't open $infile for reading: $!\n"; + $close_input = 1; + } + + ## NOTE: we need to be *very* careful when "defaulting" the output + ## file. We only want to use a default if this is the beginning of + ## the entire document (but *not* if this is an included file). We + ## determine this by seeing if the input stream stack has been set-up + ## already + ## + unless ((defined $outfile) && (length $outfile)) { + (defined $myData{_TOP_STREAM}) && ($out_fh = $myData{_OUTPUT}) + || ($outfile = '-'); + } + ## Is $outfile a filename or a (possibly implied) filehandle + if ((defined $outfile) && (length $outfile)) { + if (($outfile eq '-') || ($outfile =~ /^>&?(?:STDOUT|1)$/i)) { + ## Not a filename, just a string implying STDOUT + $myData{_OUTFILE} = "<standard output>"; + $out_fh = \*STDOUT; + } + elsif ($outfile =~ /^>&(STDERR|2)$/i) { + ## Not a filename, just a string implying STDERR + $myData{_OUTFILE} = "<standard error>"; + $out_fh = \*STDERR; + } + elsif (ref $outfile) { + ## Must be a filehandle-ref (or else assume its a ref to an + ## object that supports the common IO write operations). + $myData{_OUTFILE} = ${$outfile}; + $out_fh = $outfile; + } + else { + ## We have a filename, open it for writing + $myData{_OUTFILE} = $outfile; + (-d $outfile) and croak "$outfile is a directory, not POD input!\n"; + open($out_fh, "> $outfile") or + croak "Can't open $outfile for writing: $!\n"; + $close_output = 1; + } + } + + ## Whew! That was a lot of work to set up reasonably/robust behavior + ## in the case of a non-filename for reading and writing. Now we just + ## have to parse the input and close the handles when we're finished. + $self->parse_from_filehandle(\%opts, $in_fh, $out_fh); + + $close_input and + close($in_fh) || croak "Can't close $infile after reading: $!\n"; + $close_output and + close($out_fh) || croak "Can't close $outfile after writing: $!\n"; +} + +############################################################################# + +=head1 ACCESSOR METHODS + +Clients of B<Pod::Parser> should use the following methods to access +instance data fields: + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<errorsub()> + + $parser->errorsub("method_name"); + $parser->errorsub(\&warn_user); + $parser->errorsub(sub { print STDERR, @_ }); + +Specifies the method or subroutine to use when printing error messages +about POD syntax. The supplied method/subroutine I<must> return TRUE upon +successful printing of the message. If C<undef> is given, then the B<warn> +builtin is used to issue error messages (this is the default behavior). + + my $errorsub = $parser->errorsub() + my $errmsg = "This is an error message!\n" + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $parser->$errorsub($errmsg) + or warn($errmsg); + +Returns a method name, or else a reference to the user-supplied subroutine +used to print error messages. Returns C<undef> if the B<warn> builtin +is used to issue error messages (this is the default behavior). + +=cut + +sub errorsub { + return (@_ > 1) ? ($_[0]->{_ERRORSUB} = $_[1]) : $_[0]->{_ERRORSUB}; +} + +##--------------------------------------------------------------------------- + +=head1 B<cutting()> + + $boolean = $parser->cutting(); + +Returns the current C<cutting> state: a boolean-valued scalar which +evaluates to true if text from the input file is currently being "cut" +(meaning it is I<not> considered part of the POD document). + + $parser->cutting($boolean); + +Sets the current C<cutting> state to the given value and returns the +result. + +=cut + +sub cutting { + return (@_ > 1) ? ($_[0]->{_CUTTING} = $_[1]) : $_[0]->{_CUTTING}; +} + +##--------------------------------------------------------------------------- + +##--------------------------------------------------------------------------- + +=head1 B<parseopts()> + +When invoked with no additional arguments, B<parseopts> returns a hashtable +of all the current parsing options. + + ## See if we are parsing non-POD sections as well as POD ones + my %opts = $parser->parseopts(); + $opts{'-want_nonPODs}' and print "-want_nonPODs\n"; + +When invoked using a single string, B<parseopts> treats the string as the +name of a parse-option and returns its corresponding value if it exists +(returns C<undef> if it doesn't). + + ## Did we ask to see '=cut' paragraphs? + my $want_cut = $parser->parseopts('-process_cut_cmd'); + $want_cut and print "-process_cut_cmd\n"; + +When invoked with multiple arguments, B<parseopts> treats them as +key/value pairs and the specified parse-option names are set to the +given values. Any unspecified parse-options are unaffected. + + ## Set them back to the default + $parser->parseopts(-warnings => 0); + +When passed a single hash-ref, B<parseopts> uses that hash to completely +reset the existing parse-options, all previous parse-option values +are lost. + + ## Reset all options to default + $parser->parseopts( { } ); + +See L<"PARSING OPTIONS"> for more information on the name and meaning of each +parse-option currently recognized. + +=cut + +sub parseopts { + local *myData = shift; + local *myOpts = ($myData{_PARSEOPTS} ||= {}); + return %myOpts if (@_ == 0); + if (@_ == 1) { + local $_ = shift; + return ref($_) ? $myData{_PARSEOPTS} = $_ : $myOpts{$_}; + } + my @newOpts = (%myOpts, @_); + $myData{_PARSEOPTS} = { @newOpts }; +} + +##--------------------------------------------------------------------------- + +=head1 B<output_file()> + + $fname = $parser->output_file(); + +Returns the name of the output file being written. + +=cut + +sub output_file { + return $_[0]->{_OUTFILE}; +} + +##--------------------------------------------------------------------------- + +=head1 B<output_handle()> + + $fhandle = $parser->output_handle(); + +Returns the output filehandle object. + +=cut + +sub output_handle { + return $_[0]->{_OUTPUT}; +} + +##--------------------------------------------------------------------------- + +=head1 B<input_file()> + + $fname = $parser->input_file(); + +Returns the name of the input file being read. + +=cut + +sub input_file { + return $_[0]->{_INFILE}; +} + +##--------------------------------------------------------------------------- + +=head1 B<input_handle()> + + $fhandle = $parser->input_handle(); + +Returns the current input filehandle object. + +=cut + +sub input_handle { + return $_[0]->{_INPUT}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<input_streams()> + + $listref = $parser->input_streams(); + +Returns a reference to an array which corresponds to the stack of all +the input streams that are currently in the middle of being parsed. + +While parsing an input stream, it is possible to invoke +B<parse_from_file()> or B<parse_from_filehandle()> to parse a new input +stream and then return to parsing the previous input stream. Each input +stream to be parsed is pushed onto the end of this input stack +before any of its input is read. The input stream that is currently +being parsed is always at the end (or top) of the input stack. When an +input stream has been exhausted, it is popped off the end of the +input stack. + +Each element on this input stack is a reference to C<Pod::InputSource> +object. Please see L<Pod::InputObjects> for more details. + +This method might be invoked when printing diagnostic messages, for example, +to obtain the name and line number of the all input files that are currently +being processed. + +=end __PRIVATE__ + +=cut + +sub input_streams { + return $_[0]->{_INPUT_STREAMS}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<top_stream()> + + $hashref = $parser->top_stream(); + +Returns a reference to the hash-table that represents the element +that is currently at the top (end) of the input stream stack +(see L<"input_streams()">). The return value will be the C<undef> +if the input stack is empty. + +This method might be used when printing diagnostic messages, for example, +to obtain the name and line number of the current input file. + +=end __PRIVATE__ + +=cut + +sub top_stream { + return $_[0]->{_TOP_STREAM} || undef; +} + +############################################################################# + +=head1 PRIVATE METHODS AND DATA + +B<Pod::Parser> makes use of several internal methods and data fields +which clients should not need to see or use. For the sake of avoiding +name collisions for client data and methods, these methods and fields +are briefly discussed here. Determined hackers may obtain further +information about them by reading the B<Pod::Parser> source code. + +Private data fields are stored in the hash-object whose reference is +returned by the B<new()> constructor for this class. The names of all +private methods and data-fields used by B<Pod::Parser> begin with a +prefix of "_" and match the regular expression C</^_\w+$/>. + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_push_input_stream()> + + $hashref = $parser->_push_input_stream($in_fh,$out_fh); + +This method will push the given input stream on the input stack and +perform any necessary beginning-of-document or beginning-of-file +processing. The argument C<$in_fh> is the input stream filehandle to +push, and C<$out_fh> is the corresponding output filehandle to use (if +it is not given or is undefined, then the current output stream is used, +which defaults to standard output if it doesnt exist yet). + +The value returned will be reference to the hash-table that represents +the new top of the input stream stack. I<Please Note> that it is +possible for this method to use default values for the input and output +file handles. If this happens, you will need to look at the C<INPUT> +and C<OUTPUT> instance data members to determine their new values. + +=end _PRIVATE_ + +=cut + +sub _push_input_stream { + my ($self, $in_fh, $out_fh) = @_; + local *myData = $self; + + ## Initialize stuff for the entire document if this is *not* + ## an included file. + ## + ## NOTE: we need to be *very* careful when "defaulting" the output + ## filehandle. We only want to use a default value if this is the + ## beginning of the entire document (but *not* if this is an included + ## file). + unless (defined $myData{_TOP_STREAM}) { + $out_fh = \*STDOUT unless (defined $out_fh); + $myData{_CUTTING} = 1; ## current "cutting" state + $myData{_INPUT_STREAMS} = []; ## stack of all input streams + } + + ## Initialize input indicators + $myData{_OUTFILE} = '(unknown)' unless (defined $myData{_OUTFILE}); + $myData{_OUTPUT} = $out_fh if (defined $out_fh); + $in_fh = \*STDIN unless (defined $in_fh); + $myData{_INFILE} = '(unknown)' unless (defined $myData{_INFILE}); + $myData{_INPUT} = $in_fh; + my $input_top = $myData{_TOP_STREAM} + = new Pod::InputSource( + -name => $myData{_INFILE}, + -handle => $in_fh, + -was_cutting => $myData{_CUTTING} + ); + local *input_stack = $myData{_INPUT_STREAMS}; + push(@input_stack, $input_top); + + ## Perform beginning-of-document and/or beginning-of-input processing + $self->begin_pod() if (@input_stack == 1); + $self->begin_input(); + + return $input_top; +} + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_pop_input_stream()> + + $hashref = $parser->_pop_input_stream(); + +This takes no arguments. It will perform any necessary end-of-file or +end-of-document processing and then pop the current input stream from +the top of the input stack. + +The value returned will be reference to the hash-table that represents +the new top of the input stream stack. + +=end _PRIVATE_ + +=cut + +sub _pop_input_stream { + my ($self) = @_; + local *myData = $self; + local *input_stack = $myData{_INPUT_STREAMS}; + + ## Perform end-of-input and/or end-of-document processing + $self->end_input() if (@input_stack > 0); + $self->end_pod() if (@input_stack == 1); + + ## Restore cutting state to whatever it was before we started + ## parsing this file. + my $old_top = pop(@input_stack); + $myData{_CUTTING} = $old_top->was_cutting(); + + ## Dont forget to reset the input indicators + my $input_top = undef; + if (@input_stack > 0) { + $input_top = $myData{_TOP_STREAM} = $input_stack[-1]; + $myData{_INFILE} = $input_top->name(); + $myData{_INPUT} = $input_top->handle(); + } else { + delete $myData{_TOP_STREAM}; + delete $myData{_INPUT_STREAMS}; + } + + return $input_top; +} + +############################################################################# + +=head1 TREE-BASED PARSING + +If straightforward stream-based parsing wont meet your needs (as is +likely the case for tasks such as translating PODs into structured +markup languages like HTML and XML) then you may need to take the +tree-based approach. Rather than doing everything in one pass and +calling the B<interpolate()> method to expand sequences into text, it +may be desirable to instead create a parse-tree using the B<parse_text()> +method to return a tree-like structure which may contain an ordered list +list of children (each of which may be a text-string, or a similar +tree-like structure). + +Pay special attention to L<"METHODS FOR PARSING AND PROCESSING"> and +to the objects described in L<Pod::InputObjects>. The former describes +the gory details and parameters for how to customize and extend the +parsing behavior of B<Pod::Parser>. B<Pod::InputObjects> provides +several objects that may all be used interchangeably as parse-trees. The +most obvious one is the B<Pod::ParseTree> object. It defines the basic +interface and functionality that all things trying to be a POD parse-tree +should do. A B<Pod::ParseTree> is defined such that each "node" may be a +text-string, or a reference to another parse-tree. Each B<Pod::Paragraph> +object and each B<Pod::InteriorSequence> object also supports the basic +parse-tree interface. + +The B<parse_text()> method takes a given paragraph of text, and +returns a parse-tree that contains one or more children, each of which +may be a text-string, or an InteriorSequence object. There are also +callback-options that may be passed to B<parse_text()> to customize +the way it expands or transforms interior-sequences, as well as the +returned result. These callbacks can be used to create a parse-tree +with custom-made objects (which may or may not support the parse-tree +interface, depending on how you choose to do it). + +If you wish to turn an entire POD document into a parse-tree, that process +is fairly straightforward. The B<parse_text()> method is the key to doing +this successfully. Every paragraph-callback (i.e. the polymorphic methods +for B<command()>, B<verbatim()>, and B<textblock()> paragraphs) takes +a B<Pod::Paragraph> object as an argument. Each paragraph object has a +B<parse_tree()> method that can be used to get or set a corresponding +parse-tree. So for each of those paragraph-callback methods, simply call +B<parse_text()> with the options you desire, and then use the returned +parse-tree to assign to the given paragraph object. + +That gives you a parse-tree for each paragraph - so now all you need is +an ordered list of paragraphs. You can maintain that yourself as a data +element in the object/hash. The most straightforward way would be simply +to use an array-ref, with the desired set of custom "options" for each +invocation of B<parse_text>. Let's assume the desired option-set is +given by the hash C<%options>. Then we might do something like the +following: + + package MyPodParserTree; + + @ISA = qw( Pod::Parser ); + + ... + + sub begin_pod { + my $self = shift; + $self->{'-paragraphs'} = []; ## initialize paragraph list + } + + sub command { + my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({%options}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + sub verbatim { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + sub textblock { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({%options}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + ... + + package main; + ... + my $parser = new MyPodParserTree(...); + $parser->parse_from_file(...); + my $paragraphs_ref = $parser->{'-paragraphs'}; + +Of course, in this module-author's humble opinion, I'd be more inclined to +use the existing B<Pod::ParseTree> object than a simple array. That way +everything in it, paragraphs and sequences, all respond to the same core +interface for all parse-tree nodes. The result would look something like: + + package MyPodParserTree2; + + ... + + sub begin_pod { + my $self = shift; + $self->{'-ptree'} = new Pod::ParseTree; ## initialize parse-tree + } + + sub parse_tree { + ## convenience method to get/set the parse-tree for the entire POD + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; + } + + sub command { + my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + $parser->parse_tree()->append( $pod_para ); + } + + sub verbatim { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + $parser->parse_tree()->append( $pod_para ); + } + + sub textblock { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + $parser->parse_tree()->append( $pod_para ); + } + + ... + + package main; + ... + my $parser = new MyPodParserTree2(...); + $parser->parse_from_file(...); + my $ptree = $parser->parse_tree; + ... + +Now you have the entire POD document as one great big parse-tree. You +can even use the B<-expand_seq> option to B<parse_text> to insert +whole different kinds of objects. Just don't expect B<Pod::Parser> +to know what to do with them after that. That will need to be in your +code. Or, alternatively, you can insert any object you like so long as +it conforms to the B<Pod::ParseTree> interface. + +One could use this to create subclasses of B<Pod::Paragraphs> and +B<Pod::InteriorSequences> for specific commands (or to create your own +custom node-types in the parse-tree) and add some kind of B<emit()> +method to each custom node/subclass object in the tree. Then all you'd +need to do is recursively walk the tree in the desired order, processing +the children (most likely from left to right) by formatting them if +they are text-strings, or by calling their B<emit()> method if they +are objects/references. + +=head1 SEE ALSO + +L<Pod::InputObjects>, L<Pod::Select> + +B<Pod::InputObjects> defines POD input objects corresponding to +command paragraphs, parse-trees, and interior-sequences. + +B<Pod::Select> is a subclass of B<Pod::Parser> which provides the ability +to selectively include and/or exclude sections of a POD document from being +translated based upon the current heading, subheading, subsubheading, etc. + +=for __PRIVATE__ +B<Pod::Callbacks> is a subclass of B<Pod::Parser> which gives its users +the ability the employ I<callback functions> instead of, or in addition +to, overriding methods of the base class. + +=for __PRIVATE__ +B<Pod::Select> and B<Pod::Callbacks> do not override any +methods nor do they define any new methods with the same name. Because +of this, they may I<both> be used (in combination) as a base class of +the same subclass in order to combine their functionality without +causing any namespace clashes due to multiple inheritance. + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<Pod::Text> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + +1; diff --git a/contrib/perl5/lib/Pod/Plainer.pm b/contrib/perl5/lib/Pod/Plainer.pm new file mode 100644 index 0000000..373e8d0 --- /dev/null +++ b/contrib/perl5/lib/Pod/Plainer.pm @@ -0,0 +1,69 @@ +package Pod::Plainer; +use strict; +use Pod::Parser; +our @ISA = qw(Pod::Parser); +our $VERSION = '0.01'; + +our %E = qw( < lt > gt ); + +sub escape_ltgt { + (undef, my $text) = @_; + $text =~ s/([<>])/E<$E{$1}>/g; + $text +} + +sub simple_delimiters { + (undef, my $seq) = @_; + $seq -> left_delimiter( '<' ); + $seq -> right_delimiter( '>' ); + $seq; +} + +sub textblock { + my($parser,$text,$line) = @_; + print {$parser->output_handle()} + $parser->parse_text( + { -expand_text => q(escape_ltgt), + -expand_seq => q(simple_delimiters) }, + $text, $line ) -> raw_text(); +} + +1; + +__END__ + +=head1 NAME + +Pod::Plainer - Perl extension for converting Pod to old style Pod. + +=head1 SYNOPSIS + + use Pod::Plainer; + + my $parser = Pod::Plainer -> new (); + $parser -> parse_from_filehandle(\*STDIN); + +=head1 DESCRIPTION + +Pod::Plainer uses Pod::Parser which takes Pod with the (new) +'CE<lt>E<lt> .. E<gt>E<gt>' constructs +and returns the old(er) style with just 'CE<lt>E<gt>'; +'<' and '>' are replaced by 'EE<lt>ltE<gt>' and 'EE<lt>gtE<gt>'. + +This can be used to pre-process Pod before using tools which do not +recognise the new style Pods. + +=head2 EXPORT + +None by default. + +=head1 AUTHOR + +Robin Barker, rmb1@cise.npl.co.uk + +=head1 SEE ALSO + +See L<Pod::Parser>. + +=cut + diff --git a/contrib/perl5/lib/Pod/Select.pm b/contrib/perl5/lib/Pod/Select.pm new file mode 100644 index 0000000..5dd1595 --- /dev/null +++ b/contrib/perl5/lib/Pod/Select.pm @@ -0,0 +1,745 @@ +############################################################################# +# Pod/Select.pm -- function to select portions of POD docs +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Select; + +use vars qw($VERSION); +$VERSION = 1.12; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::Select, podselect() - extract selected sections of POD from input + +=head1 SYNOPSIS + + use Pod::Select; + + ## Select all the POD sections for each file in @filelist + ## and print the result on standard output. + podselect(@filelist); + + ## Same as above, but write to tmp.out + podselect({-output => "tmp.out"}, @filelist): + + ## Select from the given filelist, only those POD sections that are + ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. + podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist): + + ## Select the "DESCRIPTION" section of the PODs from STDIN and write + ## the result to STDERR. + podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN); + +or + + use Pod::Select; + + ## Create a parser object for selecting POD sections from the input + $parser = new Pod::Select(); + + ## Select all the POD sections for each file in @filelist + ## and print the result to tmp.out. + $parser->parse_from_file("<&STDIN", "tmp.out"); + + ## Select from the given filelist, only those POD sections that are + ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. + $parser->select("NAME|SYNOPSIS", "OPTIONS"); + for (@filelist) { $parser->parse_from_file($_); } + + ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from + ## STDIN and write the result to STDERR. + $parser->select("DESCRIPTION"); + $parser->add_selection("SEE ALSO"); + $parser->parse_from_filehandle(\*STDIN, \*STDERR); + +=head1 REQUIRES + +perl5.005, Pod::Parser, Exporter, Carp + +=head1 EXPORTS + +podselect() + +=head1 DESCRIPTION + +B<podselect()> is a function which will extract specified sections of +pod documentation from an input stream. This ability is provided by the +B<Pod::Select> module which is a subclass of B<Pod::Parser>. +B<Pod::Select> provides a method named B<select()> to specify the set of +POD sections to select for processing/printing. B<podselect()> merely +creates a B<Pod::Select> object and then invokes the B<podselect()> +followed by B<parse_from_file()>. + +=head1 SECTION SPECIFICATIONS + +B<podselect()> and B<Pod::Select::select()> may be given one or more +"section specifications" to restrict the text processed to only the +desired set of sections and their corresponding subsections. A section +specification is a string containing one or more Perl-style regular +expressions separated by forward slashes ("/"). If you need to use a +forward slash literally within a section title you can escape it with a +backslash ("\/"). + +The formal syntax of a section specification is: + +=over 4 + +=item + +I<head1-title-regex>/I<head2-title-regex>/... + +=back + +Any omitted or empty regular expressions will default to ".*". +Please note that each regular expression given is implicitly +anchored by adding "^" and "$" to the beginning and end. Also, if a +given regular expression starts with a "!" character, then the +expression is I<negated> (so C<!foo> would match anything I<except> +C<foo>). + +Some example section specifications follow. + +=over 4 + +=item +Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections: + +C<NAME|SYNOPSIS> + +=item +Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION> +section: + +C<DESCRIPTION/Question|Answer> + +=item +Match the C<Comments> subsection of I<all> sections: + +C</Comments> + +=item +Match all subsections of C<DESCRIPTION> I<except> for C<Comments>: + +C<DESCRIPTION/!Comments> + +=item +Match the C<DESCRIPTION> section but do I<not> match any of its subsections: + +C<DESCRIPTION/!.+> + +=item +Match all top level sections but none of their subsections: + +C</!.+> + +=back + +=begin _NOT_IMPLEMENTED_ + +=head1 RANGE SPECIFICATIONS + +B<podselect()> and B<Pod::Select::select()> may be given one or more +"range specifications" to restrict the text processed to only the +desired ranges of paragraphs in the desired set of sections. A range +specification is a string containing a single Perl-style regular +expression (a regex), or else two Perl-style regular expressions +(regexs) separated by a ".." (Perl's "range" operator is ".."). +The regexs in a range specification are delimited by forward slashes +("/"). If you need to use a forward slash literally within a regex you +can escape it with a backslash ("\/"). + +The formal syntax of a range specification is: + +=over 4 + +=item + +/I<start-range-regex>/[../I<end-range-regex>/] + +=back + +Where each the item inside square brackets (the ".." followed by the +end-range-regex) is optional. Each "range-regex" is of the form: + + =cmd-expr text-expr + +Where I<cmd-expr> is intended to match the name of one or more POD +commands, and I<text-expr> is intended to match the paragraph text for +the command. If a range-regex is supposed to match a POD command, then +the first character of the regex (the one after the initial '/') +absolutely I<must> be an single '=' character; it may not be anything +else (not even a regex meta-character) if it is supposed to match +against the name of a POD command. + +If no I<=cmd-expr> is given then the text-expr will be matched against +plain textblocks unless it is preceded by a space, in which case it is +matched against verbatim text-blocks. If no I<text-expr> is given then +only the command-portion of the paragraph is matched against. + +Note that these two expressions are each implicitly anchored. This +means that when matching against the command-name, there will be an +implicit '^' and '$' around the given I<=cmd-expr>; and when matching +against the paragraph text there will be an implicit '\A' and '\Z' +around the given I<text-expr>. + +Unlike with section-specs, the '!' character does I<not> have any special +meaning (negation or otherwise) at the beginning of a range-spec! + +Some example range specifications follow. + +=over 4 + +=item +Match all C<=for html> paragraphs: + +C</=for html/> + +=item +Match all paragraphs between C<=begin html> and C<=end html> +(note that this will I<not> work correctly if such sections +are nested): + +C</=begin html/../=end html/> + +=item +Match all paragraphs between the given C<=item> name until the end of the +current section: + +C</=item mine/../=head\d/> + +=item +Match all paragraphs between the given C<=item> until the next item, or +until the end of the itemized list (note that this will I<not> work as +desired if the item contains an itemized list nested within it): + +C</=item mine/../=(item|back)/> + +=back + +=end _NOT_IMPLEMENTED_ + +=cut + +############################################################################# + +use strict; +#use diagnostics; +use Carp; +use Pod::Parser 1.04; +use vars qw(@ISA @EXPORT $MAX_HEADING_LEVEL); + +@ISA = qw(Pod::Parser); +@EXPORT = qw(&podselect); + +## Maximum number of heading levels supported for '=headN' directives +*MAX_HEADING_LEVEL = \3; + +############################################################################# + +=head1 OBJECT METHODS + +The following methods are provided in this module. Each one takes a +reference to the object itself as an implicit first parameter. + +=cut + +##--------------------------------------------------------------------------- + +## =begin _PRIVATE_ +## +## =head1 B<_init_headings()> +## +## Initialize the current set of active section headings. +## +## =cut +## +## =end _PRIVATE_ + +use vars qw(%myData @section_headings); + +sub _init_headings { + my $self = shift; + local *myData = $self; + + ## Initialize current section heading titles if necessary + unless (defined $myData{_SECTION_HEADINGS}) { + local *section_headings = $myData{_SECTION_HEADINGS} = []; + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + $section_headings[$i] = ''; + } + } +} + +##--------------------------------------------------------------------------- + +=head1 B<curr_headings()> + + ($head1, $head2, $head3, ...) = $parser->curr_headings(); + $head1 = $parser->curr_headings(1); + +This method returns a list of the currently active section headings and +subheadings in the document being parsed. The list of headings returned +corresponds to the most recently parsed paragraph of the input. + +If an argument is given, it must correspond to the desired section +heading number, in which case only the specified section heading is +returned. If there is no current section heading at the specified +level, then C<undef> is returned. + +=cut + +sub curr_headings { + my $self = shift; + $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS}); + my @headings = @{ $self->{_SECTION_HEADINGS} }; + return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings; +} + +##--------------------------------------------------------------------------- + +=head1 B<select()> + + $parser->select($section_spec1,$section_spec2,...); + +This method is used to select the particular sections and subsections of +POD documentation that are to be printed and/or processed. The existing +set of selected sections is I<replaced> with the given set of sections. +See B<add_selection()> for adding to the current set of selected +sections. + +Each of the C<$section_spec> arguments should be a section specification +as described in L<"SECTION SPECIFICATIONS">. The section specifications +are parsed by this method and the resulting regular expressions are +stored in the invoking object. + +If no C<$section_spec> arguments are given, then the existing set of +selected sections is cleared out (which means C<all> sections will be +processed). + +This method should I<not> normally be overridden by subclasses. + +=cut + +use vars qw(@selected_sections); + +sub select { + my $self = shift; + my @sections = @_; + local *myData = $self; + local $_; + +### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?) + + ##--------------------------------------------------------------------- + ## The following is a blatant hack for backward compatibility, and for + ## implementing add_selection(). If the *first* *argument* is the + ## string "+", then the remaining section specifications are *added* + ## to the current set of selections; otherwise the given section + ## specifications will *replace* the current set of selections. + ## + ## This should probably be fixed someday, but for the present time, + ## it seems incredibly unlikely that "+" would ever correspond to + ## a legitimate section heading + ##--------------------------------------------------------------------- + my $add = ($sections[0] eq "+") ? shift(@sections) : ""; + + ## Reset the set of sections to use + unless (@sections > 0) { + delete $myData{_SELECTED_SECTIONS} unless ($add); + return; + } + $myData{_SELECTED_SECTIONS} = [] + unless ($add && exists $myData{_SELECTED_SECTIONS}); + local *selected_sections = $myData{_SELECTED_SECTIONS}; + + ## Compile each spec + my $spec; + for $spec (@sections) { + if ( defined($_ = &_compile_section_spec($spec)) ) { + ## Store them in our sections array + push(@selected_sections, $_); + } + else { + carp "Ignoring section spec \"$spec\"!\n"; + } + } +} + +##--------------------------------------------------------------------------- + +=head1 B<add_selection()> + + $parser->add_selection($section_spec1,$section_spec2,...); + +This method is used to add to the currently selected sections and +subsections of POD documentation that are to be printed and/or +processed. See <select()> for replacing the currently selected sections. + +Each of the C<$section_spec> arguments should be a section specification +as described in L<"SECTION SPECIFICATIONS">. The section specifications +are parsed by this method and the resulting regular expressions are +stored in the invoking object. + +This method should I<not> normally be overridden by subclasses. + +=cut + +sub add_selection { + my $self = shift; + $self->select("+", @_); +} + +##--------------------------------------------------------------------------- + +=head1 B<clear_selections()> + + $parser->clear_selections(); + +This method takes no arguments, it has the exact same effect as invoking +<select()> with no arguments. + +=cut + +sub clear_selections { + my $self = shift; + $self->select(); +} + +##--------------------------------------------------------------------------- + +=head1 B<match_section()> + + $boolean = $parser->match_section($heading1,$heading2,...); + +Returns a value of true if the given section and subsection heading +titles match any of the currently selected section specifications in +effect from prior calls to B<select()> and B<add_selection()> (or if +there are no explictly selected/deselected sections). + +The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of +the corresponding sections, subsections, etc. to try and match. If +C<$headingN> is omitted then it defaults to the current corresponding +section heading title in the input. + +This method should I<not> normally be overridden by subclasses. + +=cut + +sub match_section { + my $self = shift; + my (@headings) = @_; + local *myData = $self; + + ## Return true if no restrictions were explicitly specified + my $selections = (exists $myData{_SELECTED_SECTIONS}) + ? $myData{_SELECTED_SECTIONS} : undef; + return 1 unless ((defined $selections) && (@{$selections} > 0)); + + ## Default any unspecified sections to the current one + my @current_headings = $self->curr_headings(); + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + (defined $headings[$i]) or $headings[$i] = $current_headings[$i]; + } + + ## Look for a match against the specified section expressions + my ($section_spec, $regex, $negated, $match); + for $section_spec ( @{$selections} ) { + ##------------------------------------------------------ + ## Each portion of this spec must match in order for + ## the spec to be matched. So we will start with a + ## match-value of 'true' and logically 'and' it with + ## the results of matching a given element of the spec. + ##------------------------------------------------------ + $match = 1; + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + $regex = $section_spec->[$i]; + $negated = ($regex =~ s/^\!//); + $match &= ($negated ? ($headings[$i] !~ /${regex}/) + : ($headings[$i] =~ /${regex}/)); + last unless ($match); + } + return 1 if ($match); + } + return 0; ## no match +} + +##--------------------------------------------------------------------------- + +=head1 B<is_selected()> + + $boolean = $parser->is_selected($paragraph); + +This method is used to determine if the block of text given in +C<$paragraph> falls within the currently selected set of POD sections +and subsections to be printed or processed. This method is also +responsible for keeping track of the current input section and +subsections. It is assumed that C<$paragraph> is the most recently read +(but not yet processed) input paragraph. + +The value returned will be true if the C<$paragraph> and the rest of the +text in the same section as C<$paragraph> should be selected (included) +for processing; otherwise a false value is returned. + +=cut + +sub is_selected { + my ($self, $paragraph) = @_; + local $_; + local *myData = $self; + + $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS}); + + ## Keep track of current sections levels and headings + $_ = $paragraph; + if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*)\s*$/) { + ## This is a section heading command + my ($level, $heading) = ($2, $3); + $level = 1 + (length($1) / 3) if ((! length $level) || (length $1)); + ## Reset the current section heading at this level + $myData{_SECTION_HEADINGS}->[$level - 1] = $heading; + ## Reset subsection headings of this one to empty + for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) { + $myData{_SECTION_HEADINGS}->[$i] = ''; + } + } + + return $self->match_section(); +} + +############################################################################# + +=head1 EXPORTED FUNCTIONS + +The following functions are exported by this module. Please note that +these are functions (not methods) and therefore C<do not> take an +implicit first argument. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<podselect()> + + podselect(\%options,@filelist); + +B<podselect> will print the raw (untranslated) POD paragraphs of all +POD sections in the given input files specified by C<@filelist> +according to the given options. + +If any argument to B<podselect> is a reference to a hash +(associative array) then the values with the following keys are +processed as follows: + +=over 4 + +=item B<-output> + +A string corresponding to the desired output file (or ">&STDOUT" +or ">&STDERR"). The default is to use standard output. + +=item B<-sections> + +A reference to an array of sections specifications (as described in +L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD +sections and subsections to be selected from input. If no section +specifications are given, then all sections of the PODs are used. + +=begin _NOT_IMPLEMENTED_ + +=item B<-ranges> + +A reference to an array of range specifications (as described in +L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD +paragraphs to be selected from the desired input sections. If no range +specifications are given, then all paragraphs of the desired sections +are used. + +=end _NOT_IMPLEMENTED_ + +=back + +All other arguments should correspond to the names of input files +containing POD sections. A file name of "-" or "<&STDIN" will +be interpeted to mean standard input (which is the default if no +filenames are given). + +=cut + +sub podselect { + my(@argv) = @_; + my %defaults = (); + my $pod_parser = new Pod::Select(%defaults); + my $num_inputs = 0; + my $output = ">&STDOUT"; + my %opts = (); + local $_; + for (@argv) { + if (ref($_)) { + next unless (ref($_) eq 'HASH'); + %opts = (%defaults, %{$_}); + + ##------------------------------------------------------------- + ## Need this for backward compatibility since we formerly used + ## options that were all uppercase words rather than ones that + ## looked like Unix command-line options. + ## to be uppercase keywords) + ##------------------------------------------------------------- + %opts = map { + my ($key, $val) = (lc $_, $opts{$_}); + $key =~ s/^(?=\w)/-/; + $key =~ /^-se[cl]/ and $key = '-sections'; + #! $key eq '-range' and $key .= 's'; + ($key => $val); + } (keys %opts); + + ## Process the options + (exists $opts{'-output'}) and $output = $opts{'-output'}; + + ## Select the desired sections + $pod_parser->select(@{ $opts{'-sections'} }) + if ( (defined $opts{'-sections'}) + && ((ref $opts{'-sections'}) eq 'ARRAY') ); + + #! ## Select the desired paragraph ranges + #! $pod_parser->select(@{ $opts{'-ranges'} }) + #! if ( (defined $opts{'-ranges'}) + #! && ((ref $opts{'-ranges'}) eq 'ARRAY') ); + } + else { + $pod_parser->parse_from_file($_, $output); + ++$num_inputs; + } + } + $pod_parser->parse_from_file("-") unless ($num_inputs > 0); +} + +############################################################################# + +=head1 PRIVATE METHODS AND DATA + +B<Pod::Select> makes uses a number of internal methods and data fields +which clients should not need to see or use. For the sake of avoiding +name collisions with client data and methods, these methods and fields +are briefly discussed here. Determined hackers may obtain further +information about them by reading the B<Pod::Select> source code. + +Private data fields are stored in the hash-object whose reference is +returned by the B<new()> constructor for this class. The names of all +private methods and data-fields used by B<Pod::Select> begin with a +prefix of "_" and match the regular expression C</^_\w+$/>. + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_compile_section_spec()> + + $listref = $parser->_compile_section_spec($section_spec); + +This function (note it is a function and I<not> a method) takes a +section specification (as described in L<"SECTION SPECIFICATIONS">) +given in C<$section_sepc>, and compiles it into a list of regular +expressions. If C<$section_spec> has no syntax errors, then a reference +to the list (array) of corresponding regular expressions is returned; +otherwise C<undef> is returned and an error message is printed (using +B<carp>) for each invalid regex. + +=end _PRIVATE_ + +=cut + +sub _compile_section_spec { + my ($section_spec) = @_; + my (@regexs, $negated); + + ## Compile the spec into a list of regexs + local $_ = $section_spec; + s|\\\\|\001|g; ## handle escaped backward slashes + s|\\/|\002|g; ## handle escaped forward slashes + + ## Parse the regexs for the heading titles + @regexs = split('/', $_, $MAX_HEADING_LEVEL); + + ## Set default regex for ommitted levels + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + $regexs[$i] = '.*' unless ((defined $regexs[$i]) + && (length $regexs[$i])); + } + ## Modify the regexs as needed and validate their syntax + my $bad_regexs = 0; + for (@regexs) { + $_ .= '.+' if ($_ eq '!'); + s|\001|\\\\|g; ## restore escaped backward slashes + s|\002|\\/|g; ## restore escaped forward slashes + $negated = s/^\!//; ## check for negation + eval "/$_/"; ## check regex syntax + if ($@) { + ++$bad_regexs; + carp "Bad regular expression /$_/ in \"$section_spec\": $@\n"; + } + else { + ## Add the forward and rear anchors (and put the negator back) + $_ = '^' . $_ unless (/^\^/); + $_ = $_ . '$' unless (/\$$/); + $_ = '!' . $_ if ($negated); + } + } + return (! $bad_regexs) ? [ @regexs ] : undef; +} + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head2 $self->{_SECTION_HEADINGS} + +A reference to an array of the current section heading titles for each +heading level (note that the first heading level title is at index 0). + +=end _PRIVATE_ + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head2 $self->{_SELECTED_SECTIONS} + +A reference to an array of references to arrays. Each subarray is a list +of anchored regular expressions (preceded by a "!" if the expression is to +be negated). The index of the expression in the subarray should correspond +to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}> +that it is to be matched against. + +=end _PRIVATE_ + +=cut + +############################################################################# + +=head1 SEE ALSO + +L<Pod::Parser> + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<pod2text> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + +1; + diff --git a/contrib/perl5/lib/Pod/Text.pm b/contrib/perl5/lib/Pod/Text.pm index 549bab5..d93e5a4 100644 --- a/contrib/perl5/lib/Pod/Text.pm +++ b/contrib/perl5/lib/Pod/Text.pm @@ -1,551 +1,743 @@ -package Pod::Text; - -=head1 NAME +# Pod::Text -- Convert POD data to formatted ASCII text. +# $Id: Text.pm,v 2.3 1999/10/07 09:41:57 eagle Exp $ +# +# Copyright 1999 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# This module is intended to be a replacement for Pod::Text, and attempts to +# match its output except for some specific circumstances where other +# decisions seemed to produce better output. It uses Pod::Parser and is +# designed to be very easy to subclass. + +############################################################################ +# Modules and declarations +############################################################################ -Pod::Text - convert POD data to formatted ASCII text - -=head1 SYNOPSIS +package Pod::Text; - use Pod::Text; +require 5.004; - pod2text("perlfunc.pod"); +use Carp qw(carp croak); +use Exporter (); +use Pod::Select (); -Also: +use strict; +use vars qw(@ISA @EXPORT %ESCAPES $VERSION); - pod2text [B<-a>] [B<->I<width>] < input.pod +# We inherit from Pod::Select instead of Pod::Parser so that we can be used +# by Pod::Usage. +@ISA = qw(Pod::Select Exporter); -=head1 DESCRIPTION +# We have to export pod2text for backward compatibility. +@EXPORT = qw(pod2text); -Pod::Text is a module that can convert documentation in the POD format (such -as can be found throughout the Perl distribution) into formatted ASCII. -Termcap is optionally supported for boldface/underline, and can enabled via -C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces -will be used to simulate bold and underlined text. +($VERSION = (split (' ', q$Revision: 2.3 $ ))[1]) =~ s/\.(\d)$/.0$1/; + + +############################################################################ +# Table of supported E<> escapes +############################################################################ + +# This table is taken near verbatim from Pod::PlainText in Pod::Parser, +# which got it near verbatim from the original Pod::Text. It is therefore +# credited to Tom Christiansen, and I'm glad I didn't have to write it. :) +# "iexcl" to "divide" added by Tim Jenness +%ESCAPES = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + + "Aacute" => "\xC1", # capital A, acute accent + "aacute" => "\xE1", # small a, acute accent + "Acirc" => "\xC2", # capital A, circumflex accent + "acirc" => "\xE2", # small a, circumflex accent + "AElig" => "\xC6", # capital AE diphthong (ligature) + "aelig" => "\xE6", # small ae diphthong (ligature) + "Agrave" => "\xC0", # capital A, grave accent + "agrave" => "\xE0", # small a, grave accent + "Aring" => "\xC5", # capital A, ring + "aring" => "\xE5", # small a, ring + "Atilde" => "\xC3", # capital A, tilde + "atilde" => "\xE3", # small a, tilde + "Auml" => "\xC4", # capital A, dieresis or umlaut mark + "auml" => "\xE4", # small a, dieresis or umlaut mark + "Ccedil" => "\xC7", # capital C, cedilla + "ccedil" => "\xE7", # small c, cedilla + "Eacute" => "\xC9", # capital E, acute accent + "eacute" => "\xE9", # small e, acute accent + "Ecirc" => "\xCA", # capital E, circumflex accent + "ecirc" => "\xEA", # small e, circumflex accent + "Egrave" => "\xC8", # capital E, grave accent + "egrave" => "\xE8", # small e, grave accent + "ETH" => "\xD0", # capital Eth, Icelandic + "eth" => "\xF0", # small eth, Icelandic + "Euml" => "\xCB", # capital E, dieresis or umlaut mark + "euml" => "\xEB", # small e, dieresis or umlaut mark + "Iacute" => "\xCD", # capital I, acute accent + "iacute" => "\xED", # small i, acute accent + "Icirc" => "\xCE", # capital I, circumflex accent + "icirc" => "\xEE", # small i, circumflex accent + "Igrave" => "\xCD", # capital I, grave accent + "igrave" => "\xED", # small i, grave accent + "Iuml" => "\xCF", # capital I, dieresis or umlaut mark + "iuml" => "\xEF", # small i, dieresis or umlaut mark + "Ntilde" => "\xD1", # capital N, tilde + "ntilde" => "\xF1", # small n, tilde + "Oacute" => "\xD3", # capital O, acute accent + "oacute" => "\xF3", # small o, acute accent + "Ocirc" => "\xD4", # capital O, circumflex accent + "ocirc" => "\xF4", # small o, circumflex accent + "Ograve" => "\xD2", # capital O, grave accent + "ograve" => "\xF2", # small o, grave accent + "Oslash" => "\xD8", # capital O, slash + "oslash" => "\xF8", # small o, slash + "Otilde" => "\xD5", # capital O, tilde + "otilde" => "\xF5", # small o, tilde + "Ouml" => "\xD6", # capital O, dieresis or umlaut mark + "ouml" => "\xF6", # small o, dieresis or umlaut mark + "szlig" => "\xDF", # small sharp s, German (sz ligature) + "THORN" => "\xDE", # capital THORN, Icelandic + "thorn" => "\xFE", # small thorn, Icelandic + "Uacute" => "\xDA", # capital U, acute accent + "uacute" => "\xFA", # small u, acute accent + "Ucirc" => "\xDB", # capital U, circumflex accent + "ucirc" => "\xFB", # small u, circumflex accent + "Ugrave" => "\xD9", # capital U, grave accent + "ugrave" => "\xF9", # small u, grave accent + "Uuml" => "\xDC", # capital U, dieresis or umlaut mark + "uuml" => "\xFC", # small u, dieresis or umlaut mark + "Yacute" => "\xDD", # capital Y, acute accent + "yacute" => "\xFD", # small y, acute accent + "yuml" => "\xFF", # small y, dieresis or umlaut mark + + "lchevron" => "\xAB", # left chevron (double less than) laquo + "rchevron" => "\xBB", # right chevron (double greater than) raquo + + "iexcl" => "\xA1", # inverted exclamation mark + "cent" => "\xA2", # cent sign + "pound" => "\xA3", # (UK) pound sign + "curren" => "\xA4", # currency sign + "yen" => "\xA5", # yen sign + "brvbar" => "\xA6", # broken vertical bar + "sect" => "\xA7", # section sign + "uml" => "\xA8", # diaresis + "copy" => "\xA9", # Copyright symbol + "ordf" => "\xAA", # feminine ordinal indicator + "laquo" => "\xAB", # left pointing double angle quotation mark + "not" => "\xAC", # not sign + "shy" => "\xAD", # soft hyphen + "reg" => "\xAE", # registered trademark + "macr" => "\xAF", # macron, overline + "deg" => "\xB0", # degree sign + "plusmn" => "\xB1", # plus-minus sign + "sup2" => "\xB2", # superscript 2 + "sup3" => "\xB3", # superscript 3 + "acute" => "\xB4", # acute accent + "micro" => "\xB5", # micro sign + "para" => "\xB6", # pilcrow sign = paragraph sign + "middot" => "\xB7", # middle dot = Georgian comma + "cedil" => "\xB8", # cedilla + "sup1" => "\xB9", # superscript 1 + "ordm" => "\xBA", # masculine ordinal indicator + "raquo" => "\xBB", # right pointing double angle quotation mark + "frac14" => "\xBC", # vulgar fraction one quarter + "frac12" => "\xBD", # vulgar fraction one half + "frac34" => "\xBE", # vulgar fraction three quarters + "iquest" => "\xBF", # inverted question mark + "times" => "\xD7", # multiplication sign + "divide" => "\xF7", # division sign +); -A separate F<pod2text> program is included that is primarily a wrapper for -Pod::Text. -The single function C<pod2text()> can take the optional options B<-a> -for an alternative output format, then a B<->I<width> option with the -max terminal width, followed by one or two arguments. The first -should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from -STDIN. A second argument, if provided, should be a filehandle glob where -output should be sent. +############################################################################ +# Initialization +############################################################################ -=head1 AUTHOR +# Initialize the object. Must be sure to call our parent initializer. +sub initialize { + my $self = shift; -Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt> + $$self{alt} = 0 unless defined $$self{alt}; + $$self{indent} = 4 unless defined $$self{indent}; + $$self{loose} = 0 unless defined $$self{loose}; + $$self{sentence} = 0 unless defined $$self{sentence}; + $$self{width} = 76 unless defined $$self{width}; -=head1 TODO + $$self{INDENTS} = []; # Stack of indentations. + $$self{MARGIN} = $$self{indent}; # Current left margin in spaces. -Cleanup work. The input and output locations need to be more flexible, -termcap shouldn't be a global variable, and the terminal speed needs to -be properly calculated. + $self->SUPER::initialize; +} -=cut -use Term::Cap; -require Exporter; -@ISA = Exporter; -@EXPORT = qw(pod2text); - -use vars qw($VERSION); -$VERSION = "1.0203"; +############################################################################ +# Core overrides +############################################################################ + +# Called for each command paragraph. Gets the command, the associated +# paragraph, the line number, and a Pod::Paragraph object. Just dispatches +# the command to a method named the same as the command. =cut is handled +# internally by Pod::Parser. +sub command { + my $self = shift; + my $command = shift; + return if $command eq 'pod'; + return if ($$self{EXCLUDE} && $command ne 'end'); + $self->item ("\n") if defined $$self{ITEM}; + $command = 'cmd_' . $command; + $self->$command (@_); +} -use locale; # make \w work right in non-ASCII lands +# Called for a verbatim paragraph. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Just output it verbatim, but with tabs converted +# to spaces. +sub verbatim { + my $self = shift; + return if $$self{EXCLUDE}; + $self->item if defined $$self{ITEM}; + local $_ = shift; + return if /^\s*$/; + s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme; + $self->output ($_); +} -$termcap=0; +# Called for a regular text block. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Perform interpolation and output the results. +sub textblock { + my $self = shift; + return if $$self{EXCLUDE}; + $self->output ($_[0]), return if $$self{VERBATIM}; + local $_ = shift; + my $line = shift; -$opt_alt_format = 0; + # Perform a little magic to collapse multiple L<> references. This is + # here mostly for backwards-compatibility. We'll just rewrite the whole + # thing into actual text at this part, bypassing the whole internal + # sequence parsing thing. + s{ + ( + L< # A link of the form L</something>. + / + ( + [:\w]+ # The item has to be a simple word... + (\(\))? # ...or simple function. + ) + > + ( + ,?\s+(and\s+)? # Allow lots of them, conjuncted. + L< + / + ( + [:\w]+ + (\(\))? + ) + > + )+ + ) + } { + local $_ = $1; + s%L</([^>]+)>%$1%g; + my @items = split /(?:,?\s+(?:and\s+)?)/; + my $string = "the "; + my $i; + for ($i = 0; $i < @items; $i++) { + $string .= $items[$i]; + $string .= ", " if @items > 2 && $i != $#items; + $string .= " and " if ($i == $#items - 1); + } + $string .= " entries elsewhere in this document"; + $string; + }gex; + + # Now actually interpolate and output the paragraph. + $_ = $self->interpolate ($_, $line); + s/\s+$/\n/; + if (defined $$self{ITEM}) { + $self->item ($_ . "\n"); + } else { + $self->output ($self->reformat ($_ . "\n")); + } +} -#$use_format=1; +# Called for an interior sequence. Gets the command, argument, and a +# Pod::InteriorSequence object and is expected to return the resulting text. +# Calls code, bold, italic, file, and link to handle those types of +# sequences, and handles S<>, E<>, X<>, and Z<> directly. +sub interior_sequence { + my $self = shift; + my $command = shift; + local $_ = shift; + return '' if ($command eq 'X' || $command eq 'Z'); -$UNDL = "\x1b[4m"; -$INV = "\x1b[7m"; -$BOLD = "\x1b[1m"; -$NORM = "\x1b[0m"; + # Expand escapes into the actual character now, carping if invalid. + if ($command eq 'E') { + if (/^\d+$/) { + return chr; + } else { + return $ESCAPES{$_} if defined $ESCAPES{$_}; + carp "Unknown escape: E<$_>"; + return "E<$_>"; + } + } -sub pod2text { -shift if $opt_alt_format = ($_[0] eq '-a'); + # For all the other sequences, empty content produces no output. + return if $_ eq ''; -if($termcap and !$setuptermcap) { - $setuptermcap=1; + # For S<>, compress all internal whitespace and then map spaces to \01. + # When we output the text, we'll map this back. + if ($command eq 'S') { + s/\s{2,}/ /g; + tr/ /\01/; + return $_; + } - my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 }; - $UNDL = $term->{'_us'}; - $INV = $term->{'_mr'}; - $BOLD = $term->{'_md'}; - $NORM = $term->{'_me'}; + # Anything else needs to get dispatched to another method. + if ($command eq 'B') { return $self->seq_b ($_) } + elsif ($command eq 'C') { return $self->seq_c ($_) } + elsif ($command eq 'F') { return $self->seq_f ($_) } + elsif ($command eq 'I') { return $self->seq_i ($_) } + elsif ($command eq 'L') { return $self->seq_l ($_) } + else { carp "Unknown sequence $command<$_>" } } -$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1)) - || $ENV{COLUMNS} - || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] - || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]) - || 72; +# Called for each paragraph that's actually part of the POD. We take +# advantage of this opportunity to untabify the input. +sub preprocess_paragraph { + my $self = shift; + local $_ = shift; + 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me; + $_; +} -@_ = ("<&STDIN") unless @_; -local($file,*OUTPUT) = @_; -*OUTPUT = *STDOUT if @_<2; -local $: = $:; -$: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''. +############################################################################ +# Command paragraphs +############################################################################ -$/ = ""; +# All command paragraphs take the paragraph and the line number. -$FANCY = 0; +# First level heading. +sub cmd_head1 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $_ = $self->interpolate ($_, shift); + if ($$self{alt}) { + $self->output ("\n==== $_ ====\n\n"); + } else { + $_ .= "\n" if $$self{loose}; + $self->output ($_ . "\n"); + } +} -$cutting = 1; -$DEF_INDENT = 4; -$indent = $DEF_INDENT; -$needspace = 0; -$begun = ""; +# Second level heading. +sub cmd_head2 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $_ = $self->interpolate ($_, shift); + if ($$self{alt}) { + $self->output ("\n== $_ ==\n\n"); + } else { + $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n"); + } +} -open(IN, $file) || die "Couldn't open $file: $!"; +# Start a list. +sub cmd_over { + my $self = shift; + local $_ = shift; + unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} } + push (@{ $$self{INDENTS} }, $$self{MARGIN}); + $$self{MARGIN} += ($_ + 0); +} -POD_DIRECTIVE: while (<IN>) { - if ($cutting) { - next unless /^=/; - $cutting = 0; - } - if ($begun) { - if (/^=end\s+$begun/) { - $begun = ""; - } - elsif ($begun eq "text") { - print OUTPUT $_; - } - next; - } - 1 while s{^(.*?)(\t+)(.*)$}{ - $1 - . (' ' x (length($2) * 8 - length($1) % 8)) - . $3 - }me; - # Translate verbatim paragraph - if (/^\s/) { - output($_); - next; +# End a list. +sub cmd_back { + my $self = shift; + $$self{MARGIN} = pop @{ $$self{INDENTS} }; + unless (defined $$self{MARGIN}) { + carp "Unmatched =back"; + $$self{MARGIN} = $$self{indent}; } +} - if (/^=for\s+(\S+)\s*(.*)/s) { - if ($1 eq "text") { - print OUTPUT $2,""; - } else { - # ignore unknown for - } - next; - } - elsif (/^=begin\s+(\S+)\s*(.*)/s) { - $begun = $1; - if ($1 eq "text") { - print OUTPUT $2.""; - } - next; - } +# An individual list item. +sub cmd_item { + my $self = shift; + if (defined $$self{ITEM}) { $self->item } + local $_ = shift; + s/\s+$//; + $$self{ITEM} = $self->interpolate ($_); +} -sub prepare_for_output { - - s/\s*$/\n/; - &init_noremap; - - # need to hide E<> first; they're processed in clear_noremap - s/(E<[^<>]+>)/noremap($1)/ge; - $maxnest = 10; - while ($maxnest-- && /[A-Z]</) { - unless ($FANCY) { - if ($opt_alt_format) { - s/[BC]<(.*?)>/``$1''/sg; - s/F<(.*?)>/"$1"/sg; - } else { - s/C<(.*?)>/`$1'/sg; - } - } else { - s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge; - } - # s/[IF]<(.*?)>/italic($1)/ge; - s/I<(.*?)>/*$1*/sg; - # s/[CB]<(.*?)>/bold($1)/ge; - s/X<.*?>//sg; - - # LREF: a la HREF L<show this text|man/section> - s:L<([^|>]+)\|[^>]+>:$1:g; - - # LREF: a manpage(3f) - s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; - # LREF: an =item on another manpage - s{ - L< - ([^/]+) - / - ( - [:\w]+ - (\(\))? - ) - > - } {the "$2" entry in the $1 manpage}gx; - - # LREF: an =item on this manpage - s{ - ((?: - L< - / - ( - [:\w]+ - (\(\))? - ) - > - (,?\s+(and\s+)?)? - )+) - } { internal_lrefs($1) }gex; - - # LREF: a =head2 (head1?), maybe on a manpage, maybe right here - # the "func" can disambiguate - s{ - L< - (?: - ([a-zA-Z]\S+?) / - )? - "?(.*?)"? - > - }{ - do { - $1 # if no $1, assume it means on this page. - ? "the section on \"$2\" in the $1 manpage" - : "the section on \"$2\"" - } - }sgex; - - s/[A-Z]<(.*?)>/$1/sg; +# Begin a block for a particular translator. Setting VERBATIM triggers +# special handling in textblock(). +sub cmd_begin { + my $self = shift; + local $_ = shift; + my ($kind) = /^(\S+)/ or return; + if ($kind eq 'text') { + $$self{VERBATIM} = 1; + } else { + $$self{EXCLUDE} = 1; } - clear_noremap(1); } - &prepare_for_output; - - if (s/^=//) { - # $needspace = 0; # Assume this. - # s/\n/ /g; - ($Cmd, $_) = split(' ', $_, 2); - # clear_noremap(1); - if ($Cmd eq 'cut') { - $cutting = 1; - } - elsif ($Cmd eq 'pod') { - $cutting = 0; - } - elsif ($Cmd eq 'head1') { - makespace(); - if ($opt_alt_format) { - print OUTPUT "\n"; - s/^(.+?)[ \t]*$/==== $1 ====/; - } - print OUTPUT; - # print OUTPUT uc($_); - $needspace = $opt_alt_format; - } - elsif ($Cmd eq 'head2') { - makespace(); - # s/(\w+)/\u\L$1/g; - #print ' ' x $DEF_INDENT, $_; - # print "\xA7"; - s/(\w)/\xA7 $1/ if $FANCY; - if ($opt_alt_format) { - s/^(.+?)[ \t]*$/== $1 ==/; - print OUTPUT "\n", $_; - } else { - print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n"; - } - $needspace = $opt_alt_format; - } - elsif ($Cmd eq 'over') { - push(@indent,$indent); - $indent += ($_ + 0) || $DEF_INDENT; - } - elsif ($Cmd eq 'back') { - $indent = pop(@indent); - warn "Unmatched =back\n" unless defined $indent; - } - elsif ($Cmd eq 'item') { - makespace(); - # s/\A(\s*)\*/$1\xb7/ if $FANCY; - # s/^(\s*\*\s+)/$1 /; - { - if (length() + 3 < $indent) { - my $paratag = $_; - $_ = <IN>; - if (/^=/) { # tricked! - local($indent) = $indent[$#indent - 1] || $DEF_INDENT; - output($paratag); - redo POD_DIRECTIVE; - } - &prepare_for_output; - IP_output($paratag, $_); - } else { - local($indent) = $indent[$#indent - 1] || $DEF_INDENT; - output($_, 0); - } - } - } - else { - warn "Unrecognized directive: $Cmd\n"; - } - } - else { - # clear_noremap(1); - makespace(); - output($_, 1); - } +# End a block for a particular translator. We assume that all =begin/=end +# pairs are properly closed. +sub cmd_end { + my $self = shift; + $$self{EXCLUDE} = 0; + $$self{VERBATIM} = 0; +} + +# One paragraph for a particular translator. Ignore it unless it's intended +# for text, in which case we treat it as a verbatim text block. +sub cmd_for { + my $self = shift; + local $_ = shift; + my $line = shift; + return unless s/^text\b[ \t]*\n?//; + $self->verbatim ($_, $line); } -close(IN); -} +############################################################################ +# Interior sequences +############################################################################ + +# The simple formatting ones. These are here mostly so that subclasses can +# override them and do more complicated things. +sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] } +sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" } +sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] } +sub seq_i { return '*' . $_[1] . '*' } + +# The complicated one. Handle links. Since this is plain text, we can't +# actually make any real links, so this is all to figure out what text we +# print out. +sub seq_l { + my $self = shift; + local $_ = shift; -######################################################################### + # Smash whitespace in case we were split across multiple lines. + s/\s+/ /g; -sub makespace { - if ($needspace) { - print OUTPUT "\n"; - $needspace = 0; + # If we were given any explicit text, just output it. + if (/^([^|]+)\|/) { return $1 } + + # Okay, leading and trailing whitespace isn't important; get rid of it. + s/^\s+//; + s/\s+$//; + + # Default to using the whole content of the link entry as a section + # name. Note that L<manpage/> forces a manpage interpretation, as does + # something looking like L<manpage(section)>. The latter is an + # enhancement over the original Pod::Text. + my ($manpage, $section) = ('', $_); + if (/^"\s*(.*?)\s*"$/) { + $section = '"' . $1 . '"'; + } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) { + ($manpage, $section) = ($_, ''); + } elsif (m%/%) { + ($manpage, $section) = split (/\s*\/\s*/, $_, 2); } -} -sub bold { - my $line = shift; - return $line if $use_format; - if($termcap) { - $line = "$BOLD$line$NORM"; + # Now build the actual output text. + my $text = ''; + if (!length $section) { + $text = "the $manpage manpage" if length $manpage; + } elsif ($section =~ /^[:\w]+(?:\(\))?/) { + $text .= 'the ' . $section . ' entry'; + $text .= (length $manpage) ? " in the $manpage manpage" + : " elsewhere in this document"; } else { - $line =~ s/(.)/$1\b$1/g; - } -# $line = "$BOLD$line$NORM" if $ansify; - return $line; + $section =~ s/^\"\s*//; + $section =~ s/\s*\"$//; + $text .= 'the section on "' . $section . '"'; + $text .= " in the $manpage manpage" if length $manpage; + } + $text; } -sub italic { - my $line = shift; - return $line if $use_format; - if($termcap) { - $line = "$UNDL$line$NORM"; + +############################################################################ +# List handling +############################################################################ + +# This method is called whenever an =item command is complete (in other +# words, we've seen its associated paragraph or know for certain that it +# doesn't have one). It gets the paragraph associated with the item as an +# argument. If that argument is empty, just output the item tag; if it +# contains a newline, output the item tag followed by the newline. +# Otherwise, see if there's enough room for us to output the item tag in the +# margin of the text or if we have to put it on a separate line. +sub item { + my $self = shift; + local $_ = shift; + my $tag = $$self{ITEM}; + unless (defined $tag) { + carp "item called without tag"; + return; + } + undef $$self{ITEM}; + my $indent = $$self{INDENTS}[-1]; + unless (defined $indent) { $indent = $$self{indent} } + my $space = ' ' x $indent; + $space =~ s/^ /:/ if $$self{alt}; + if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) { + my $margin = $$self{MARGIN}; + $$self{MARGIN} = $indent; + my $output = $self->reformat ($tag); + $output =~ s/\n*$/\n/; + $self->output ($output); + $$self{MARGIN} = $margin; + $self->output ($self->reformat ($_)) if /\S/; } else { - $line =~ s/(.)/$1\b_/g; + $_ = $self->reformat ($_); + s/^ /:/ if ($$self{alt} && $indent > 0); + my $tagspace = ' ' x length $tag; + s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item"; + $self->output ($_); } -# $line = "$UNDL$line$NORM" if $ansify; - return $line; } -# Fill a paragraph including underlined and overstricken chars. -# It's not perfect for words longer than the margin, and it's probably -# slow, but it works. -sub fill { + +############################################################################ +# Output formatting +############################################################################ + +# Wrap a line, indenting by the current left margin. We can't use +# Text::Wrap because it plays games with tabs. We can't use formline, even +# though we'd really like to, because it screws up non-printing characters. +# So we have to do the wrapping ourselves. +sub wrap { + my $self = shift; local $_ = shift; - my $par = ""; - my $indent_space = " " x $indent; - my $marg = $SCREEN-$indent; - my $line = $indent_space; - my $line_length; - foreach (split) { - my $word_length = length; - $word_length -= 2 while /\010/g; # Subtract backspaces - - if ($line_length + $word_length > $marg) { - $par .= $line . "\n"; - $line= $indent_space . $_; - $line_length = $word_length; - } - else { - if ($line_length) { - $line_length++; - $line .= " "; - } - $line_length += $word_length; - $line .= $_; - } + my $output = ''; + my $spaces = ' ' x $$self{MARGIN}; + my $width = $$self{width} - $$self{MARGIN}; + while (length > $width) { + if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) { + $output .= $spaces . $1 . "\n"; + } else { + last; + } } - $par .= "$line\n" if $line; - $par .= "\n"; - return $par; + $output .= $spaces . $_; + $output =~ s/\s+$/\n\n/; + $output; } -sub IP_output { - local($tag, $_) = @_; - local($tag_indent) = $indent[$#indent - 1] || $DEF_INDENT; - $tag_cols = $SCREEN - $tag_indent; - $cols = $SCREEN - $indent; - $tag =~ s/\s*$//; - s/\s+/ /g; - s/^ //; - $str = "format OUTPUT = \n" - . (($opt_alt_format && $tag_indent > 1) - ? ":" . " " x ($tag_indent - 1) - : " " x ($tag_indent)) - . '@' . ('<' x ($indent - $tag_indent - 1)) - . "^" . ("<" x ($cols - 1)) . "\n" - . '$tag, $_' - . "\n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - #warn $str; warn "tag is $tag, _ is $_"; - eval $str || die; - write OUTPUT; -} +# Reformat a paragraph of text for the current margin. Takes the text to +# reformat and returns the formatted text. +sub reformat { + my $self = shift; + local $_ = shift; -sub output { - local($_, $reformat) = @_; - if ($reformat) { - $cols = $SCREEN - $indent; - s/\s+/ /g; - s/^ //; - $str = "format OUTPUT = \n~~" - . (" " x ($indent-2)) - . "^" . ("<" x ($cols - 5)) . "\n" - . '$_' . "\n\n.\n1"; - eval $str || die; - write OUTPUT; + # If we're trying to preserve two spaces after sentences, do some + # munging to support that. Otherwise, smash all repeated whitespace. + if ($$self{sentence}) { + s/ +$//mg; + s/\.\n/. \n/g; + s/\n/ /g; + s/ +/ /g; } else { - s/^/' ' x $indent/gem; - s/^\s+\n$/\n/gm; - s/^ /: /s if defined($reformat) && $opt_alt_format; - print OUTPUT; + s/\s+/ /g; } + $self->wrap ($_); } -sub noremap { - local($thing_to_hide) = shift; - $thing_to_hide =~ tr/\000-\177/\200-\377/; - return $thing_to_hide; -} +# Output text to the output device. +sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] } -sub init_noremap { - die "unmatched init" if $mapready++; - #mask off high bit characters in input stream - s/([\200-\377])/"E<".ord($1).">"/ge; -} -sub clear_noremap { - my $ready_to_print = $_[0]; - die "unmatched clear" unless $mapready--; - tr/\200-\377/\000-\177/; - # now for the E<>s, which have been hidden until now - # otherwise the interative \w<> processing would have - # been hosed by the E<gt> - s { - E< - ( - ( \d+ ) - | ( [A-Za-z]+ ) - ) - > - } { - do { - defined $2 - ? chr($2) - : - defined $HTML_Escapes{$3} - ? do { $HTML_Escapes{$3} } - : do { - warn "Unknown escape: E<$1> in $_"; - "E<$1>"; - } - } - }egx if $ready_to_print; -} +############################################################################ +# Backwards compatibility +############################################################################ -sub internal_lrefs { - local($_) = shift; - s{L</([^>]+)>}{$1}g; - my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); - my $retstr = "the "; - my $i; - for ($i = 0; $i <= $#items; $i++) { - $retstr .= "C<$items[$i]>"; - $retstr .= ", " if @items > 2 && $i != $#items; - $retstr .= " and " if $i+2 == @items; +# The old Pod::Text module did everything in a pod2text() function. This +# tries to provide the same interface for legacy applications. +sub pod2text { + my @args; + + # This is really ugly; I hate doing option parsing in the middle of a + # module. But the old Pod::Text module supported passing flags to its + # entry function, so handle -a and -<number>. + while ($_[0] =~ /^-/) { + my $flag = shift; + if ($flag eq '-a') { push (@args, alt => 1) } + elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } + else { + unshift (@_, $flag); + last; + } } - $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) - . " elsewhere in this document "; - - return $retstr; - + # Now that we know what arguments we're using, create the parser. + my $parser = Pod::Text->new (@args); + + # If two arguments were given, the second argument is going to be a file + # handle. That means we want to call parse_from_filehandle(), which + # means we need to turn the first argument into a file handle. Magic + # open will handle the <&STDIN case automagically. + if (defined $_[1]) { + local *IN; + unless (open (IN, $_[0])) { + croak ("Can't open $_[0] for reading: $!\n"); + return; + } + $_[0] = \*IN; + return $parser->parse_from_filehandle (@_); + } else { + return $parser->parse_from_file (@_); + } } -BEGIN { - -%HTML_Escapes = ( - 'amp' => '&', # ampersand - 'lt' => '<', # left chevron, less-than - 'gt' => '>', # right chevron, greater-than - 'quot' => '"', # double quote - - "Aacute" => "\xC1", # capital A, acute accent - "aacute" => "\xE1", # small a, acute accent - "Acirc" => "\xC2", # capital A, circumflex accent - "acirc" => "\xE2", # small a, circumflex accent - "AElig" => "\xC6", # capital AE diphthong (ligature) - "aelig" => "\xE6", # small ae diphthong (ligature) - "Agrave" => "\xC0", # capital A, grave accent - "agrave" => "\xE0", # small a, grave accent - "Aring" => "\xC5", # capital A, ring - "aring" => "\xE5", # small a, ring - "Atilde" => "\xC3", # capital A, tilde - "atilde" => "\xE3", # small a, tilde - "Auml" => "\xC4", # capital A, dieresis or umlaut mark - "auml" => "\xE4", # small a, dieresis or umlaut mark - "Ccedil" => "\xC7", # capital C, cedilla - "ccedil" => "\xE7", # small c, cedilla - "Eacute" => "\xC9", # capital E, acute accent - "eacute" => "\xE9", # small e, acute accent - "Ecirc" => "\xCA", # capital E, circumflex accent - "ecirc" => "\xEA", # small e, circumflex accent - "Egrave" => "\xC8", # capital E, grave accent - "egrave" => "\xE8", # small e, grave accent - "ETH" => "\xD0", # capital Eth, Icelandic - "eth" => "\xF0", # small eth, Icelandic - "Euml" => "\xCB", # capital E, dieresis or umlaut mark - "euml" => "\xEB", # small e, dieresis or umlaut mark - "Iacute" => "\xCD", # capital I, acute accent - "iacute" => "\xED", # small i, acute accent - "Icirc" => "\xCE", # capital I, circumflex accent - "icirc" => "\xEE", # small i, circumflex accent - "Igrave" => "\xCD", # capital I, grave accent - "igrave" => "\xED", # small i, grave accent - "Iuml" => "\xCF", # capital I, dieresis or umlaut mark - "iuml" => "\xEF", # small i, dieresis or umlaut mark - "Ntilde" => "\xD1", # capital N, tilde - "ntilde" => "\xF1", # small n, tilde - "Oacute" => "\xD3", # capital O, acute accent - "oacute" => "\xF3", # small o, acute accent - "Ocirc" => "\xD4", # capital O, circumflex accent - "ocirc" => "\xF4", # small o, circumflex accent - "Ograve" => "\xD2", # capital O, grave accent - "ograve" => "\xF2", # small o, grave accent - "Oslash" => "\xD8", # capital O, slash - "oslash" => "\xF8", # small o, slash - "Otilde" => "\xD5", # capital O, tilde - "otilde" => "\xF5", # small o, tilde - "Ouml" => "\xD6", # capital O, dieresis or umlaut mark - "ouml" => "\xF6", # small o, dieresis or umlaut mark - "szlig" => "\xDF", # small sharp s, German (sz ligature) - "THORN" => "\xDE", # capital THORN, Icelandic - "thorn" => "\xFE", # small thorn, Icelandic - "Uacute" => "\xDA", # capital U, acute accent - "uacute" => "\xFA", # small u, acute accent - "Ucirc" => "\xDB", # capital U, circumflex accent - "ucirc" => "\xFB", # small u, circumflex accent - "Ugrave" => "\xD9", # capital U, grave accent - "ugrave" => "\xF9", # small u, grave accent - "Uuml" => "\xDC", # capital U, dieresis or umlaut mark - "uuml" => "\xFC", # small u, dieresis or umlaut mark - "Yacute" => "\xDD", # capital Y, acute accent - "yacute" => "\xFD", # small y, acute accent - "yuml" => "\xFF", # small y, dieresis or umlaut mark - - "lchevron" => "\xAB", # left chevron (double less than) - "rchevron" => "\xBB", # right chevron (double greater than) -); -} + +############################################################################ +# Module return value and documentation +############################################################################ 1; +__END__ + +=head1 NAME + +Pod::Text - Convert POD data to formatted ASCII text + +=head1 SYNOPSIS + + use Pod::Text; + my $parser = Pod::Text->new (sentence => 0, width => 78); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.txt. + $parser->parse_from_file ('file.pod', 'file.txt'); + +=head1 DESCRIPTION + +Pod::Text is a module that can convert documentation in the POD format (the +preferred language for documenting Perl) into formatted ASCII. It uses no +special formatting controls or codes whatsoever, and its output is therefore +suitable for nearly any device. + +As a derived class from Pod::Parser, Pod::Text supports the same methods and +interfaces. See L<Pod::Parser> for all the details; briefly, one creates a +new parser with C<Pod::Text-E<gt>new()> and then calls either +parse_from_filehandle() or parse_from_file(). + +new() can take options, in the form of key/value pairs, that control the +behavior of the parser. The currently recognized options are: + +=over 4 + +=item alt + +If set to a true value, selects an alternate output format that, among other +things, uses a different heading style and marks C<=item> entries with a +colon in the left margin. Defaults to false. + +=item indent + +The number of spaces to indent regular text, and the default indentation for +C<=over> blocks. Defaults to 4. + +=item loose + +If set to a true value, a blank line is printed after a C<=head1> heading. +If set to false (the default), no blank line is printed after C<=head1>, +although one is still printed after C<=head2>. This is the default because +it's the expected formatting for manual pages; if you're formatting +arbitrary text documents, setting this to true may result in more pleasing +output. + +=item sentence + +If set to a true value, Pod::Text will assume that each sentence ends in two +spaces, and will try to preserve that spacing. If set to false, all +consecutive whitespace in non-verbatim paragraphs is compressed into a +single space. Defaults to true. + +=item width + +The column at which to wrap text on the right-hand side. Defaults to 76. + +=back + +The standard Pod::Parser method parse_from_filehandle() takes up to two +arguments, the first being the file handle to read POD from and the second +being the file handle to write the formatted output to. The first defaults +to STDIN if not given, and the second defaults to STDOUT. The method +parse_from_file() is almost identical, except that its two arguments are the +input and output disk files instead. See L<Pod::Parser> for the specific +details. + +=head1 DIAGNOSTICS + +=over 4 + +=item Bizarre space in item + +(W) Something has gone wrong in internal C<=item> processing. This message +indicates a bug in Pod::Text; you should never see it. + +=item Can't open %s for reading: %s + +(F) Pod::Text was invoked via the compatibility mode pod2text() interface +and the input file it was given could not be opened. + +=item Unknown escape: %s + +(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't +know about. + +=item Unknown sequence: %s + +(W) The POD source contained a non-standard internal sequence (something of +the form C<XE<lt>E<gt>>) that Pod::Text didn't know about. + +=item Unmatched =back + +(W) Pod::Text encountered a C<=back> command that didn't correspond to an +C<=over> command. + +=back + +=head1 RESTRICTIONS + +Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on +output, due to an internal implementation detail. + +=head1 NOTES + +This is a replacement for an earlier Pod::Text module written by Tom +Christiansen. It has a revamped interface, since it now uses Pod::Parser, +but an interface roughly compatible with the old Pod::Text::pod2text() +function is still available. Please change to the new calling convention, +though. + +The original Pod::Text contained code to do formatting via termcap +sequences, although it wasn't turned on by default and it was problematic to +get it to work at all. This rewrite doesn't even try to do that, but a +subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>. + +=head1 SEE ALSO + +L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>, +pod2text(1) + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the +original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and +its conversion to Pod::Parser by Brad Appleton +E<lt>bradapp@enteract.comE<gt>. + +=cut diff --git a/contrib/perl5/lib/Pod/Text/Color.pm b/contrib/perl5/lib/Pod/Text/Color.pm new file mode 100644 index 0000000..10e1d9f --- /dev/null +++ b/contrib/perl5/lib/Pod/Text/Color.pm @@ -0,0 +1,125 @@ +# Pod::Text::Color -- Convert POD data to formatted color ASCII text +# $Id: Color.pm,v 0.5 1999/09/20 10:15:16 eagle Exp $ +# +# Copyright 1999 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# This is just a basic proof of concept. It should later be modified to +# make better use of color, take options changing what colors are used for +# what text, and the like. + +############################################################################ +# Modules and declarations +############################################################################ + +package Pod::Text::Color; + +require 5.004; + +use Pod::Text (); +use Term::ANSIColor qw(colored); + +use strict; +use vars qw(@ISA $VERSION); + +@ISA = qw(Pod::Text); + +# Use the CVS revision of this file as its version number. +($VERSION = (split (' ', q$Revision: 0.5 $ ))[1]) =~ s/\.(\d)$/.0$1/; + + +############################################################################ +# Overrides +############################################################################ + +# Make level one headings bold. +sub cmd_head1 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $self->SUPER::cmd_head1 (colored ($_, 'bold')); +} + +# Make level two headings bold. +sub cmd_head2 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $self->SUPER::cmd_head2 (colored ($_, 'bold')); +} + +# Fix the various interior sequences. +sub seq_b { return colored ($_[1], 'bold') } +sub seq_f { return colored ($_[1], 'cyan') } +sub seq_i { return colored ($_[1], 'yellow') } + +# We unfortunately have to override the wrapping code here, since the normal +# wrapping code gets really confused by all the escape sequences. +sub wrap { + my $self = shift; + local $_ = shift; + my $output = ''; + my $spaces = ' ' x $$self{MARGIN}; + my $width = $$self{width} - $$self{MARGIN}; + while (length > $width) { + if (s/^((?:(?:\e\[[\d;]+m)?[^\n]){0,$width})\s+// + || s/^((?:(?:\e\[[\d;]+m)?[^\n]){$width})//) { + $output .= $spaces . $1 . "\n"; + } else { + last; + } + } + $output .= $spaces . $_; + $output =~ s/\s+$/\n\n/; + $output; +} + +############################################################################ +# Module return value and documentation +############################################################################ + +1; +__END__ + +=head1 NAME + +Pod::Text::Color - Convert POD data to formatted color ASCII text + +=head1 SYNOPSIS + + use Pod::Text::Color; + my $parser = Pod::Text::Color->new (sentence => 0, width => 78); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.txt. + $parser->parse_from_file ('file.pod', 'file.txt'); + +=head1 DESCRIPTION + +Pod::Text::Color is a simple subclass of Pod::Text that highlights output +text using ANSI color escape sequences. Apart from the color, it in all +ways functions like Pod::Text. See L<Pod::Text> for details and available +options. + +Term::ANSIColor is used to get colors and therefore must be installed to use +this module. + +=head1 BUGS + +This is just a basic proof of concept. It should be seriously expanded to +support configurable coloration via options passed to the constructor, and +B<pod2text> should be taught about those. + +=head1 SEE ALSO + +L<Pod::Text|Pod::Text>, L<Pod::Parser|Pod::Parser> + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>. + +=cut diff --git a/contrib/perl5/lib/Pod/Text/Termcap.pm b/contrib/perl5/lib/Pod/Text/Termcap.pm new file mode 100644 index 0000000..7e89ec6 --- /dev/null +++ b/contrib/perl5/lib/Pod/Text/Termcap.pm @@ -0,0 +1,142 @@ +# Pod::Text::Termcap -- Convert POD data to ASCII text with format escapes. +# $Id: Termcap.pm,v 0.4 1999/09/20 10:17:45 eagle Exp $ +# +# Copyright 1999 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# This is a simple subclass of Pod::Text that overrides a few key methods to +# output the right termcap escape sequences for formatted text on the +# current terminal type. + +############################################################################ +# Modules and declarations +############################################################################ + +package Pod::Text::Termcap; + +require 5.004; + +use Pod::Text (); +use POSIX (); +use Term::Cap; + +use strict; +use vars qw(@ISA $VERSION); + +@ISA = qw(Pod::Text); + +# Use the CVS revision of this file as its version number. +($VERSION = (split (' ', q$Revision: 0.4 $ ))[1]) =~ s/\.(\d)$/.0$1/; + + +############################################################################ +# Overrides +############################################################################ + +# In the initialization method, grab our terminal characteristics as well as +# do all the stuff we normally do. +sub initialize { + my $self = shift; + + # The default Term::Cap path won't work on Solaris. + $ENV{TERMPATH} = "$ENV{HOME}/.termcap:/etc/termcap" + . ":/usr/share/misc/termcap:/usr/share/lib/termcap"; + + my $termios = POSIX::Termios->new; + $termios->getattr; + my $ospeed = $termios->getospeed; + my $term = Tgetent Term::Cap { TERM => undef, OSPEED => $ospeed }; + $$self{BOLD} = $$term{_md} or die 'BOLD'; + $$self{UNDL} = $$term{_us} or die 'UNDL'; + $$self{NORM} = $$term{_me} or die 'NORM'; + + unless (defined $$self{width}) { + $$self{width} = $ENV{COLUMNS} || $$term{_co} || 78; + $$self{width} -= 2; + } + + $self->SUPER::initialize; +} + +# Make level one headings bold. +sub cmd_head1 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $self->SUPER::cmd_head1 ("$$self{BOLD}$_$$self{NORM}"); +} + +# Make level two headings bold. +sub cmd_head2 { + my $self = shift; + local $_ = shift; + s/\s+$//; + $self->SUPER::cmd_head2 ("$$self{BOLD}$_$$self{NORM}"); +} + +# Fix up B<> and I<>. Note that we intentionally don't do F<>. +sub seq_b { my $self = shift; return "$$self{BOLD}$_[0]$$self{NORM}" } +sub seq_i { my $self = shift; return "$$self{UNDL}$_[0]$$self{NORM}" } + +# Override the wrapping code to igore the special sequences. +sub wrap { + my $self = shift; + local $_ = shift; + my $output = ''; + my $spaces = ' ' x $$self{MARGIN}; + my $width = $$self{width} - $$self{MARGIN}; + my $code = "(?:\Q$$self{BOLD}\E|\Q$$self{UNDL}\E|\Q$$self{NORM}\E)"; + while (length > $width) { + if (s/^((?:$code?[^\n]){0,$width})\s+// + || s/^((?:$code?[^\n]){$width})//) { + $output .= $spaces . $1 . "\n"; + } else { + last; + } + } + $output .= $spaces . $_; + $output =~ s/\s+$/\n\n/; + $output; +} + + +############################################################################ +# Module return value and documentation +############################################################################ + +1; +__END__ + +=head1 NAME + +Pod::Text::Color - Convert POD data to ASCII text with format escapes + +=head1 SYNOPSIS + + use Pod::Text::Termcap; + my $parser = Pod::Text::Termcap->new (sentence => 0, width => 78); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.txt. + $parser->parse_from_file ('file.pod', 'file.txt'); + +=head1 DESCRIPTION + +Pod::Text::Termcap is a simple subclass of Pod::Text that highlights output +text using the correct termcap escape sequences for the current terminal. +Apart from the format codes, it in all ways functions like Pod::Text. See +L<Pod::Text> for details and available options. + +=head1 SEE ALSO + +L<Pod::Text|Pod::Text>, L<Pod::Parser|Pod::Parser> + +=head1 AUTHOR + +Russ Allbery E<lt>rra@stanford.eduE<gt>. + +=cut diff --git a/contrib/perl5/lib/Pod/Usage.pm b/contrib/perl5/lib/Pod/Usage.pm new file mode 100644 index 0000000..aa8f712 --- /dev/null +++ b/contrib/perl5/lib/Pod/Usage.pm @@ -0,0 +1,544 @@ +############################################################################# +# Pod/Usage.pm -- print usage messages for the running script. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Usage; + +use vars qw($VERSION); +$VERSION = 1.12; ## Current version of this package +require 5.005; ## requires this Perl version or later + +=head1 NAME + +Pod::Usage, pod2usage() - print a usage message from embedded pod documentation + +=head1 SYNOPSIS + + use Pod::Usage + + my $message_text = "This text precedes the usage message."; + my $exit_status = 2; ## The exit status to use + my $verbose_level = 0; ## The verbose level to use + my $filehandle = \*STDERR; ## The filehandle to write to + + pod2usage($message_text); + + pod2usage($exit_status); + + pod2usage( { -message => $message_text , + -exitval => $exit_status , + -verbose => $verbose_level, + -output => $filehandle } ); + + pod2usage( -msg => $message_text , + -exitval => $exit_status , + -verbose => $verbose_level, + -output => $filehandle ); + +=head1 ARGUMENTS + +B<pod2usage> should be given either a single argument, or a list of +arguments corresponding to an associative array (a "hash"). When a single +argument is given, it should correspond to exactly one of the following: + +=over + +=item * + +A string containing the text of a message to print I<before> printing +the usage message + +=item * + +A numeric value corresponding to the desired exit status + +=item * + +A reference to a hash + +=back + +If more than one argument is given then the entire argument list is +assumed to be a hash. If a hash is supplied (either as a reference or +as a list) it should contain one or more elements with the following +keys: + +=over + +=item C<-message> + +=item C<-msg> + +The text of a message to print immediately prior to printing the +program's usage message. + +=item C<-exitval> + +The desired exit status to pass to the B<exit()> function. + +=item C<-verbose> + +The desired level of "verboseness" to use when printing the usage +message. If the corresponding value is 0, then only the "SYNOPSIS" +section of the pod documentation is printed. If the corresponding value +is 1, then the "SYNOPSIS" section, along with any section entitled +"OPTIONS", "ARGUMENTS", or "OPTIONS AND ARGUMENTS" is printed. If the +corresponding value is 2 or more then the entire manpage is printed. + +=item C<-output> + +A reference to a filehandle, or the pathname of a file to which the +usage message should be written. The default is C<\*STDERR> unless the +exit value is less than 2 (in which case the default is C<\*STDOUT>). + +=item C<-input> + +A reference to a filehandle, or the pathname of a file from which the +invoking script's pod documentation should be read. It defaults to the +file indicated by C<$0> (C<$PROGRAM_NAME> for users of F<English.pm>). + +=item C<-pathlist> + +A list of directory paths. If the input file does not exist, then it +will be searched for in the given directory list (in the order the +directories appear in the list). It defaults to the list of directories +implied by C<$ENV{PATH}>. The list may be specified either by a reference +to an array, or by a string of directory paths which use the same path +separator as C<$ENV{PATH}> on your system (e.g., C<:> for Unix, C<;> for +MSWin32 and DOS). + +=back + +=head1 DESCRIPTION + +B<pod2usage> will print a usage message for the invoking script (using +its embedded pod documentation) and then exit the script with the +desired exit status. The usage message printed may have any one of three +levels of "verboseness": If the verbose level is 0, then only a synopsis +is printed. If the verbose level is 1, then the synopsis is printed +along with a description (if present) of the command line options and +arguments. If the verbose level is 2, then the entire manual page is +printed. + +Unless they are explicitly specified, the default values for the exit +status, verbose level, and output stream to use are determined as +follows: + +=over + +=item * + +If neither the exit status nor the verbose level is specified, then the +default is to use an exit status of 2 with a verbose level of 0. + +=item * + +If an exit status I<is> specified but the verbose level is I<not>, then the +verbose level will default to 1 if the exit status is less than 2 and +will default to 0 otherwise. + +=item * + +If an exit status is I<not> specified but verbose level I<is> given, then +the exit status will default to 2 if the verbose level is 0 and will +default to 1 otherwise. + +=item * + +If the exit status used is less than 2, then output is printed on +C<STDOUT>. Otherwise output is printed on C<STDERR>. + +=back + +Although the above may seem a bit confusing at first, it generally does +"the right thing" in most situations. This determination of the default +values to use is based upon the following typical Unix conventions: + +=over + +=item * + +An exit status of 0 implies "success". For example, B<diff(1)> exits +with a status of 0 if the two files have the same contents. + +=item * + +An exit status of 1 implies possibly abnormal, but non-defective, program +termination. For example, B<grep(1)> exits with a status of 1 if +it did I<not> find a matching line for the given regular expression. + +=item * + +An exit status of 2 or more implies a fatal error. For example, B<ls(1)> +exits with a status of 2 if you specify an illegal (unknown) option on +the command line. + +=item * + +Usage messages issued as a result of bad command-line syntax should go +to C<STDERR>. However, usage messages issued due to an explicit request +to print usage (like specifying B<-help> on the command line) should go +to C<STDOUT>, just in case the user wants to pipe the output to a pager +(such as B<more(1)>). + +=item * + +If program usage has been explicitly requested by the user, it is often +desireable to exit with a status of 1 (as opposed to 0) after issuing +the user-requested usage message. It is also desireable to give a +more verbose description of program usage in this case. + +=back + +B<pod2usage> doesn't force the above conventions upon you, but it will +use them by default if you don't expressly tell it to do otherwise. The +ability of B<pod2usage()> to accept a single number or a string makes it +convenient to use as an innocent looking error message handling function: + + use Pod::Usage; + use Getopt::Long; + + ## Parse options + GetOptions("help", "man", "flag1") || pod2usage(2); + pod2usage(1) if ($opt_help); + pod2usage(-verbose => 2) if ($opt_man); + + ## Check for too many filenames + pod2usage("$0: Too many files given.\n") if (@ARGV > 1); + +Some user's however may feel that the above "economy of expression" is +not particularly readable nor consistent and may instead choose to do +something more like the following: + + use Pod::Usage; + use Getopt::Long; + + ## Parse options + GetOptions("help", "man", "flag1") || pod2usage(-verbose => 0); + pod2usage(-verbose => 1) if ($opt_help); + pod2usage(-verbose => 2) if ($opt_man); + + ## Check for too many filenames + pod2usage(-verbose => 2, -message => "$0: Too many files given.\n") + if (@ARGV > 1); + +As with all things in Perl, I<there's more than one way to do it>, and +B<pod2usage()> adheres to this philosophy. If you are interested in +seeing a number of different ways to invoke B<pod2usage> (although by no +means exhaustive), please refer to L<"EXAMPLES">. + +=head1 EXAMPLES + +Each of the following invocations of C<pod2usage()> will print just the +"SYNOPSIS" section to C<STDERR> and will exit with a status of 2: + + pod2usage(); + + pod2usage(2); + + pod2usage(-verbose => 0); + + pod2usage(-exitval => 2); + + pod2usage({-exitval => 2, -output => \*STDERR}); + + pod2usage({-verbose => 0, -output => \*STDERR}); + + pod2usage(-exitval => 2, -verbose => 0); + + pod2usage(-exitval => 2, -verbose => 0, -output => \*STDERR); + +Each of the following invocations of C<pod2usage()> will print a message +of "Syntax error." (followed by a newline) to C<STDERR>, immediately +followed by just the "SYNOPSIS" section (also printed to C<STDERR>) and +will exit with a status of 2: + + pod2usage("Syntax error."); + + pod2usage(-message => "Syntax error.", -verbose => 0); + + pod2usage(-msg => "Syntax error.", -exitval => 2); + + pod2usage({-msg => "Syntax error.", -exitval => 2, -output => \*STDERR}); + + pod2usage({-msg => "Syntax error.", -verbose => 0, -output => \*STDERR}); + + pod2usage(-msg => "Syntax error.", -exitval => 2, -verbose => 0); + + pod2usage(-message => "Syntax error.", + -exitval => 2, + -verbose => 0, + -output => \*STDERR); + +Each of the following invocations of C<pod2usage()> will print the +"SYNOPSIS" section and any "OPTIONS" and/or "ARGUMENTS" sections to +C<STDOUT> and will exit with a status of 1: + + pod2usage(1); + + pod2usage(-verbose => 1); + + pod2usage(-exitval => 1); + + pod2usage({-exitval => 1, -output => \*STDOUT}); + + pod2usage({-verbose => 1, -output => \*STDOUT}); + + pod2usage(-exitval => 1, -verbose => 1); + + pod2usage(-exitval => 1, -verbose => 1, -output => \*STDOUT}); + +Each of the following invocations of C<pod2usage()> will print the +entire manual page to C<STDOUT> and will exit with a status of 1: + + pod2usage(-verbose => 2); + + pod2usage({-verbose => 2, -output => \*STDOUT}); + + pod2usage(-exitval => 1, -verbose => 2); + + pod2usage({-exitval => 1, -verbose => 2, -output => \*STDOUT}); + +=head2 Recommended Use + +Most scripts should print some type of usage message to C<STDERR> when a +command line syntax error is detected. They should also provide an +option (usually C<-H> or C<-help>) to print a (possibly more verbose) +usage message to C<STDOUT>. Some scripts may even wish to go so far as to +provide a means of printing their complete documentation to C<STDOUT> +(perhaps by allowing a C<-man> option). The following complete example +uses B<Pod::Usage> in combination with B<Getopt::Long> to do all of these +things: + + use Getopt::Long; + use Pod::Usage; + + my $man = 0; + my $help = 0; + ## Parse options and print usage if there is a syntax error, + ## or if usage was explicitly requested. + GetOptions('help|?' => \$help, man => \$man) or pod2usage(2); + pod2usage(1) if $help; + pod2usage(-verbose => 2) if $man; + + ## If no arguments were given, then allow STDIN to be used only + ## if it's not connected to a terminal (otherwise print usage) + pod2usage("$0: No files given.") if ((@ARGV == 0) && (-t STDIN)); + __END__ + + =head1 NAME + + sample - Using GetOpt::Long and Pod::Usage + + =head1 SYNOPSIS + + sample [options] [file ...] + + Options: + -help brief help message + -man full documentation + + =head1 OPTIONS + + =over 8 + + =item B<-help> + + Print a brief help message and exits. + + =item B<-man> + + Prints the manual page and exits. + + =back + + =head1 DESCRIPTION + + B<This program> will read the given input file(s) and do something + useful with the contents thereof. + + =cut + +=head1 CAVEATS + +By default, B<pod2usage()> will use C<$0> as the path to the pod input +file. Unfortunately, not all systems on which Perl runs will set C<$0> +properly (although if C<$0> isn't found, B<pod2usage()> will search +C<$ENV{PATH}> or else the list specified by the C<-pathlist> option). +If this is the case for your system, you may need to explicitly specify +the path to the pod docs for the invoking script using something +similar to the following: + + pod2usage(-exitval => 2, -input => "/path/to/your/pod/docs"); + +=head1 AUTHOR + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<Pod::Text::pod2text()> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=head1 ACKNOWLEDGEMENTS + +Steven McDougall E<lt>swmcd@world.std.comE<gt> for his help and patience +with re-writing this manpage. + +=cut + +############################################################################# + +use strict; +#use diagnostics; +use Carp; +use Exporter; +use File::Spec; + +use vars qw(@ISA @EXPORT); +@EXPORT = qw(&pod2usage); +BEGIN { + if ( $] >= 5.005_58 ) { + require Pod::Text; + @ISA = qw( Pod::Text ); + } + else { + require Pod::PlainText; + @ISA = qw( Pod::PlainText ); + } +} + + +##--------------------------------------------------------------------------- + +##--------------------------------- +## Function definitions begin here +##--------------------------------- + +sub pod2usage { + local($_) = shift || ""; + my %opts; + ## Collect arguments + if (@_ > 0) { + ## Too many arguments - assume that this is a hash and + ## the user forgot to pass a reference to it. + %opts = ($_, @_); + } + elsif (ref $_) { + ## User passed a ref to a hash + %opts = %{$_} if (ref($_) eq 'HASH'); + } + elsif (/^[-+]?\d+$/) { + ## User passed in the exit value to use + $opts{"-exitval"} = $_; + } + else { + ## User passed in a message to print before issuing usage. + $_ and $opts{"-message"} = $_; + } + + ## Need this for backward compatibility since we formerly used + ## options that were all uppercase words rather than ones that + ## looked like Unix command-line options. + ## to be uppercase keywords) + %opts = map { + my $val = $opts{$_}; + s/^(?=\w)/-/; + /^-msg/i and $_ = '-message'; + /^-exit/i and $_ = '-exitval'; + lc($_) => $val; + } (keys %opts); + + ## Now determine default -exitval and -verbose values to use + if ((! defined $opts{"-exitval"}) && (! defined $opts{"-verbose"})) { + $opts{"-exitval"} = 2; + $opts{"-verbose"} = 0; + } + elsif (! defined $opts{"-exitval"}) { + $opts{"-exitval"} = ($opts{"-verbose"} > 0) ? 1 : 2; + } + elsif (! defined $opts{"-verbose"}) { + $opts{"-verbose"} = ($opts{"-exitval"} < 2); + } + + ## Default the output file + $opts{"-output"} = ($opts{"-exitval"} < 2) ? \*STDOUT : \*STDERR + unless (defined $opts{"-output"}); + ## Default the input file + $opts{"-input"} = $0 unless (defined $opts{"-input"}); + + ## Look up input file in path if it doesnt exist. + unless ((ref $opts{"-input"}) || (-e $opts{"-input"})) { + my ($dirname, $basename) = ('', $opts{"-input"}); + my $pathsep = ($^O =~ /^(?:dos|os2|MSWin32)$/) ? ";" + : (($^O eq 'MacOS') ? ',' : ":"); + my $pathspec = $opts{"-pathlist"} || $ENV{PATH} || $ENV{PERL5LIB}; + + my @paths = (ref $pathspec) ? @$pathspec : split($pathsep, $pathspec); + for $dirname (@paths) { + $_ = File::Spec->catfile($dirname, $basename) if length; + last if (-e $_) && ($opts{"-input"} = $_); + } + } + + ## Now create a pod reader and constrain it to the desired sections. + my $parser = new Pod::Usage(USAGE_OPTIONS => \%opts); + if ($opts{"-verbose"} == 0) { + $parser->select("SYNOPSIS"); + } + elsif ($opts{"-verbose"} == 1) { + my $opt_re = '(?i)' . + '(?:OPTIONS|ARGUMENTS)' . + '(?:\s*(?:AND|\/)\s*(?:OPTIONS|ARGUMENTS))?'; + $parser->select( 'SYNOPSIS', $opt_re, "DESCRIPTION/$opt_re" ); + } + + ## Now translate the pod document and then exit with the desired status + $parser->parse_from_file($opts{"-input"}, $opts{"-output"}); + exit($opts{"-exitval"}); +} + +##--------------------------------------------------------------------------- + +##------------------------------- +## Method definitions begin here +##------------------------------- + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub begin_pod { + my $self = shift; + $self->SUPER::begin_pod(); ## Have to call superclass + my $msg = $self->{USAGE_OPTIONS}->{-message} or return 1; + my $out_fh = $self->output_handle(); + print $out_fh "$msg\n"; +} + +sub preprocess_paragraph { + my $self = shift; + local $_ = shift; + my $line = shift; + ## See if this is a heading and we arent printing the entire manpage. + if (($self->{USAGE_OPTIONS}->{-verbose} < 2) && /^=head/) { + ## Change the title of the SYNOPSIS section to USAGE + s/^=head1\s+SYNOPSIS\s*$/=head1 USAGE/; + ## Try to do some lowercasing instead of all-caps in headings + s{([A-Z])([A-Z]+)}{((length($2) > 2) ? $1 : lc($1)) . lc($2)}ge; + ## Use a colon to end all headings + s/\s*$/:/ unless (/:\s*$/); + $_ .= "\n"; + } + return $self->SUPER::preprocess_paragraph($_); +} + |