diff options
author | markm <markm@FreeBSD.org> | 1998-09-09 07:00:04 +0000 |
---|---|---|
committer | markm <markm@FreeBSD.org> | 1998-09-09 07:00:04 +0000 |
commit | 4fcbc3669aa997848e15198cc9fb856287a6788c (patch) | |
tree | 58b20e81687d6d5931f120b50802ed21225bf440 /contrib/perl5/lib/Pod | |
download | FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.zip FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.tar.gz |
Initial import of Perl5. The king is dead; long live the king!
Diffstat (limited to 'contrib/perl5/lib/Pod')
-rw-r--r-- | contrib/perl5/lib/Pod/Functions.pm | 296 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Html.pm | 1571 | ||||
-rw-r--r-- | contrib/perl5/lib/Pod/Text.pm | 549 |
3 files changed, 2416 insertions, 0 deletions
diff --git a/contrib/perl5/lib/Pod/Functions.pm b/contrib/perl5/lib/Pod/Functions.pm new file mode 100644 index 0000000..3cc9b38 --- /dev/null +++ b/contrib/perl5/lib/Pod/Functions.pm @@ -0,0 +1,296 @@ +package Pod::Functions; + +#:vi:set ts=20 + +require Exporter; + +@ISA = qw(Exporter); +@EXPORT = qw(%Kinds %Type %Flavor %Type_Description @Type_Order); + +%Type_Description = ( + 'ARRAY' => 'Functions for real @ARRAYs', + 'Binary' => 'Functions for fixed length data or records', + 'File' => 'Functions for filehandles, files, or directories', + 'Flow' => 'Keywords related to control flow of your perl program', + 'HASH' => 'Functions for real %HASHes', + 'I/O' => 'Input and output functions', + 'LIST' => 'Functions for list data', + 'Math' => 'Numeric functions', + 'Misc' => 'Miscellaneous functions', + 'Modules' => 'Keywords related to perl modules', + 'Network' => 'Fetching network info', + 'Objects' => 'Keywords related to classes and object-orientedness', + 'Process' => 'Functions for processes and process groups', + 'Regexp' => 'Regular expressions and pattern matching', + 'Socket' => 'Low-level socket functions', + 'String' => 'Functions for SCALARs or strings', + 'SysV' => 'System V interprocess communication functions', + 'Time' => 'Time-related functions', + 'User' => 'Fetching user and group info', + 'Namespace' => 'Keywords altering or affecting scoping of identifiers', +); + +@Type_Order = qw{ + String + Regexp + Math + ARRAY + LIST + HASH + I/O + Binary + File + Flow + Namespace + Misc + Process + Modules + Objects + Socket + SysV + User + Network + Time +}; + +while (<DATA>) { + chomp; + s/#.*//; + next unless $_; + ($name, $type, $text) = split " ", $_, 3; + $Type{$name} = $type; + $Flavor{$name} = $text; + for $type ( split /[,\s]+/, $type ) { + push @{$Kinds{$type}}, $name; + } +} + +unless (caller) { + foreach $type ( @Type_Order ) { + $list = join(", ", sort @{$Kinds{$type}}); + $typedesc = $Type_Description{$type} . ":"; + write; + } +} + +format = + +^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + $typedesc +~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + $typedesc + ~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + $list +. + +1 + +__DATA__ +-X File a file test (-r, -x, etc) +abs Math absolute value function +accept Socket accept an incoming socket connect +alarm Process schedule a SIGALRM +atan2 Math arctangent of Y/X +bind Socket binds an address to a socket +binmode I/O prepare binary files on old systems +bless Objects create an object +caller Flow,Namespace get context of the current subroutine call +chdir File change your current working directory +chmod File changes the permissions on a list of files +chomp String remove a trailing record separator from a string +chop String remove the last character from a string +chown File change the owership on a list of files +chr String get character this number represents +chroot File make directory new root for path lookups +close I/O close file (or pipe or socket) handle +closedir I/O close directory handle +connect Socket connect to a remove socket +continue Flow optional trailing block in a while or foreach +cos Math cosine function +crypt String one-way passwd-style encryption +dbmclose Objects,I/O breaks binding on a tied dbm file +dbmopen Objects,I/O create binding on a tied dbm file +defined Misc test whether a value, variable, or function is defined +delete HASH deletes a value from a hash +die I/O,Flow raise an exception or bail out +do Flow,Modules turn a BLOCK into a TERM +dump Misc,Flow create an immediate core dump +each HASH retrieve the next key/value pair from a hash +endgrent User be done using group file +endhostent User be done using hosts file +endnetent User be done using networks file +endprotoent Network be done using protocols file +endpwent User be done using passwd file +endservent Network be done using services file +eof I/O test a filehandle for its end +eval Flow,Misc catch exceptions or compile code +exec Process abandon this program to run another +exists HASH test whether a hash key is present +exit Flow terminate this program +exp Math raise I<e> to a power +fcntl File file control system all +fileno I/O return file descriptor from filehandle +flock I/O lock an entire file with an advisory lock +fork Process create a new process just like this one +format I/O declare a picture format with use by the write() function +formline Misc internal function used for formats +getc I/O get the next character from the filehandle +getgrent User get next group record +getgrgid User get group record given group user ID +getgrnam User get group record given group name +gethostbyaddr Network get host record given its address +gethostbyname Network get host record given name +gethostent Network get next hosts record +getlogin User return who logged in at this tty +getnetbyaddr Network get network record given its address +getnetbyname Network get networks record given name +getnetent Network get next networks record +getpeername Socket find the other hend of a socket connection +getpgrp Process get process group +getppid Process get parent process ID +getpriority Process get current nice value +getprotobyname Network get protocol record given name +getprotobynumber Network get protocol record numeric protocol +getprotoent Network get next protocols record +getpwent User get next passwd record +getpwnam User get passwd record given user login name +getpwuid User get passwd record given user ID +getservbyname Network get services record given its name +getservbyport Network get services record given numeric port +getservent Network get next services record +getsockname Socket retrieve the sockaddr for a given socket +getsockopt Socket get socket options on a given socket +glob File expand filenames using wildcards +gmtime Time convert UNIX time into record or string using Greenwich time +goto Flow create spaghetti code +grep LIST locate elements in a list test true against a given criterion +hex Math,String convert a string to a hexadecimal number +import Modules,Namespace patch a module's namespace into your own +index String find a substring within a string +int Math get the integer portion of a number +ioctl File system-dependent device control system call +join LIST join a list into a string using a separator +keys HASH retrieve list of indices from a hash +kill Process send a signal to a process or process group +last Flow exit a block prematurely +lc String return lower-case version of a string +lcfirst String return a string with just the next letter in lower case +length String return the number of bytes in a string +link File create a hard link in the filesytem +listen Socket register your socket as a server +local Misc,Namespace create a temporary value for a global variable (dynamic scoping) +localtime Time convert UNIX time into record or string using local time +log Math retrieve the natural logarithm for a number +lstat File stat a symbolic link +m// Regexp match a string with a regular expression pattern +map LIST apply a change to a list to get back a new list with the changes +mkdir File create a directory +msgctl SysV SysV IPC message control operations +msgget SysV get SysV IPC message queue +msgrcv SysV receive a SysV IPC message from a message queue +msgsnd SysV send a SysV IPC message to a message queue +my Misc,Namespace declare and assign a local variable (lexical scoping) +next Flow iterate a block prematurely +no Modules unimport some module symbols or semantics at compile time +package Modules,Objects,Namespace declare a separate global namespace +prototype Flow,Misc get the prototype (if any) of a subroutine +oct String,Math convert a string to an octal number +open File open a file, pipe, or descriptor +opendir File open a directory +ord String find a character's numeric representation +pack Binary,String convert a list into a binary representation +pipe Process open a pair of connected filehandles +pop ARRAY remove the last element from an array and return it +pos Regexp find or set the offset for the last/next m//g search +print I/O output a list to a filehandle +printf I/O output a formatted list to a filehandle +push ARRAY append one or more elements to an array +q/STRING/ String singly quote a string +qq/STRING/ String doubly quote a string +quotemeta Regexp quote regular expression magic characters +qw/STRING/ LIST quote a list of words +qx/STRING/ Process backquote quote a string +rand Math retrieve the next pseudorandom number +read I/O,Binary fixed-length buffered input from a filehandle +readdir I/O get a directory from a directory handle +readlink File determine where a symbolic link is pointing +recv Socket receive a message over a Socket +redo Flow start this loop iteration over again +ref Objects find out the type of thing being referenced +rename File change a filename +require Modules load in external functions from a library at runtime +reset Misc clear all variables of a given name +return Flow get out of a function early +reverse String,LIST flip a string or a list +rewinddir I/O reset directory handle +rindex String right-to-left substring search +rmdir File remove a directory +s/// Regexp replace a pattern with a string +scalar Misc force a scalar context +seek I/O reposition file pointer for random-access I/O +seekdir I/O reposition directory pointer +select I/O reset default output or do I/O multiplexing +semctl SysV SysV semaphore control operations +semget SysV get set of SysV semaphores +semop SysV SysV semaphore operations +send Socket send a message over a socket +setgrent User prepare group file for use +sethostent Network prepare hosts file for use +setnetent Network prepare networks file for use +setpgrp Process set the process group of a process +setpriority Process set a process's nice value +setprotoent Network prepare protocols file for use +setpwent User prepare passwd file for use +setservent Network prepare services file for use +setsockopt Socket set some socket options +shift ARRAY remove the first element of an array, and return it +shmctl SysV SysV shared memory operations +shmget SysV get SysV shared memory segment identifier +shmread SysV read SysV shared memory +shmwrite SysV write SysV shared memory +shutdown Socket close down just half of a socket connection +sin Math return the sin of a number +sleep Process block for some number of seconds +socket Socket create a socket +socketpair Socket create a pair of sockets +sort LIST sort a list of values +splice ARRAY add or remove elements anywhere in an array +split Regexp split up a string using a regexp delimiter +sprintf String formatted print into a string +sqrt Math square root function +srand Math seed the random number generator +stat File get a file's status information +study Regexp optimize input data for repeated searches +sub Flow declare a subroutine, possibly anonymously +substr String get or alter a portion of a stirng +symlink File create a symbolic link to a file +syscall I/O,Binary execute an arbitrary system call +sysread I/O,Binary fixed-length unbuffered input from a filehandle +system Process run a separate program +syswrite I/O,Binary fixed-length unbuffered output to a filehandle +tell I/O get current seekpointer on a filehandle +telldir I/O get current seekpointer on a directory handle +tie Objects bind a variable to an object class +time Time return number of seconds since 1970 +times Process,Time return elapsed time for self and child processes +tr/// String transliterate a string +truncate I/O shorten a file +uc String return upper-case version of a string +ucfirst String return a string with just the next letter in upper case +umask File set file creation mode mask +undef Misc remove a variable or function definition +unlink File remove one link to a file +unpack Binary,LIST convert binary structure into normal perl variables +unshift ARRAY prepend more elements to the beginning of a list +untie Objects break a tie binding to a variable +use Modules,Namespace load a module and import its namespace +use Objects load in a module at compile time +utime File set a file's last access and modify times +values HASH return a list of the values in a hash +vec Binary test or set particular bits in a string +wait Process wait for any child process to die +waitpid Process wait for a particular child process to die +wantarray Misc,Flow get list vs array context of current subroutine call +warn I/O print debugging info +write I/O print a picture record +y/// String transliterate a string diff --git a/contrib/perl5/lib/Pod/Html.pm b/contrib/perl5/lib/Pod/Html.pm new file mode 100644 index 0000000..5d2e07b --- /dev/null +++ b/contrib/perl5/lib/Pod/Html.pm @@ -0,0 +1,1571 @@ +package Pod::Html; + +use Pod::Functions; +use Getopt::Long; # package for handling command-line parameters +require Exporter; +use vars qw($VERSION); +$VERSION = 1.01; +@ISA = Exporter; +@EXPORT = qw(pod2html htmlify); +use Cwd; + +use Carp; + +use strict; + +use Config; + +=head1 NAME + +Pod::Html - module to convert pod files to HTML + +=head1 SYNOPSIS + + use Pod::Html; + pod2html([options]); + +=head1 DESCRIPTION + +Converts files from pod format (see L<perlpod>) to HTML format. It +can automatically generate indexes and cross-references, and it keeps +a cache of things it knows how to cross-reference. + +=head1 ARGUMENTS + +Pod::Html takes the following arguments: + +=over 4 + +=item help + + --help + +Displays the usage message. + +=item htmlroot + + --htmlroot=name + +Sets the base URL for the HTML files. When cross-references are made, +the HTML root is prepended to the URL. + +=item infile + + --infile=name + +Specify the pod file to convert. Input is taken from STDIN if no +infile is specified. + +=item outfile + + --outfile=name + +Specify the HTML file to create. Output goes to STDOUT if no outfile +is specified. + +=item podroot + + --podroot=name + +Specify the base directory for finding library pods. + +=item podpath + + --podpath=name:...:name + +Specify which subdirectories of the podroot contain pod files whose +HTML converted forms can be linked-to in cross-references. + +=item libpods + + --libpods=name:...:name + +List of page names (eg, "perlfunc") which contain linkable C<=item>s. + +=item netscape + + --netscape + +Use Netscape HTML directives when applicable. + +=item nonetscape + + --nonetscape + +Do not use Netscape HTML directives (default). + +=item index + + --index + +Generate an index at the top of the HTML file (default behaviour). + +=item noindex + + --noindex + +Do not generate an index at the top of the HTML file. + + +=item recurse + + --recurse + +Recurse into subdirectories specified in podpath (default behaviour). + +=item norecurse + + --norecurse + +Do not recurse into subdirectories specified in podpath. + +=item title + + --title=title + +Specify the title of the resulting HTML file. + +=item verbose + + --verbose + +Display progress messages. + +=back + +=head1 EXAMPLE + + pod2html("pod2html", + "--podpath=lib:ext:pod:vms", + "--podroot=/usr/src/perl", + "--htmlroot=/perl/nmanual", + "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop", + "--recurse", + "--infile=foo.pod", + "--outfile=/perl/nmanual/foo.html"); + +=head1 AUTHOR + +Tom Christiansen, E<lt>tchrist@perl.comE<gt>. + +=head1 BUGS + +Has trouble with C<> etc in = commands. + +=head1 SEE ALSO + +L<perlpod> + +=head1 COPYRIGHT + +This program is distributed under the Artistic License. + +=cut + +my $dircache = "pod2html-dircache"; +my $itemcache = "pod2html-itemcache"; + +my @begin_stack = (); # begin/end stack + +my @libpods = (); # files to search for links from C<> directives +my $htmlroot = "/"; # http-server base directory from which all + # relative paths in $podpath stem. +my $htmlfile = ""; # write to stdout by default +my $podfile = ""; # read from stdin by default +my @podpath = (); # list of directories containing library pods. +my $podroot = "."; # filesystem base directory from which all + # relative paths in $podpath stem. +my $recurse = 1; # recurse on subdirectories in $podpath. +my $verbose = 0; # not verbose by default +my $doindex = 1; # non-zero if we should generate an index +my $listlevel = 0; # current list depth +my @listitem = (); # stack of HTML commands to use when a =item is + # encountered. the top of the stack is the + # current list. +my @listdata = (); # similar to @listitem, but for the text after + # an =item +my @listend = (); # similar to @listitem, but the text to use to + # end the list. +my $ignore = 1; # whether or not to format text. we don't + # format text until we hit our first pod + # directive. + +my %items_named = (); # for the multiples of the same item in perlfunc +my @items_seen = (); +my $netscape = 0; # whether or not to use netscape directives. +my $title; # title to give the pod(s) +my $top = 1; # true if we are at the top of the doc. used + # to prevent the first <HR> directive. +my $paragraph; # which paragraph we're processing (used + # for error messages) +my %pages = (); # associative array used to find the location + # of pages referenced by L<> links. +my %sections = (); # sections within this page +my %items = (); # associative array used to find the location + # of =item directives referenced by C<> links +my $Is83; # is dos with short filenames (8.3) + +sub init_globals { +$dircache = "pod2html-dircache"; +$itemcache = "pod2html-itemcache"; + +@begin_stack = (); # begin/end stack + +@libpods = (); # files to search for links from C<> directives +$htmlroot = "/"; # http-server base directory from which all + # relative paths in $podpath stem. +$htmlfile = ""; # write to stdout by default +$podfile = ""; # read from stdin by default +@podpath = (); # list of directories containing library pods. +$podroot = "."; # filesystem base directory from which all + # relative paths in $podpath stem. +$recurse = 1; # recurse on subdirectories in $podpath. +$verbose = 0; # not verbose by default +$doindex = 1; # non-zero if we should generate an index +$listlevel = 0; # current list depth +@listitem = (); # stack of HTML commands to use when a =item is + # encountered. the top of the stack is the + # current list. +@listdata = (); # similar to @listitem, but for the text after + # an =item +@listend = (); # similar to @listitem, but the text to use to + # end the list. +$ignore = 1; # whether or not to format text. we don't + # format text until we hit our first pod + # directive. + +@items_seen = (); +%items_named = (); +$netscape = 0; # whether or not to use netscape directives. +$title = ''; # title to give the pod(s) +$top = 1; # true if we are at the top of the doc. used + # to prevent the first <HR> directive. +$paragraph = ''; # which paragraph we're processing (used + # for error messages) +%sections = (); # sections within this page + +# These are not reinitialised here but are kept as a cache. +# See get_cache and related cache management code. +#%pages = (); # associative array used to find the location + # of pages referenced by L<> links. +#%items = (); # associative array used to find the location + # of =item directives referenced by C<> links +$Is83=$^O eq 'dos'; +} + +sub pod2html { + local(@ARGV) = @_; + local($/); + local $_; + + init_globals(); + + $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN()); + + # cache of %pages and %items from last time we ran pod2html + + #undef $opt_help if defined $opt_help; + + # parse the command-line parameters + parse_command_line(); + + # set some variables to their default values if necessary + local *POD; + unless (@ARGV && $ARGV[0]) { + $podfile = "-" unless $podfile; # stdin + open(POD, "<$podfile") + || die "$0: cannot open $podfile file for input: $!\n"; + } else { + $podfile = $ARGV[0]; # XXX: might be more filenames + *POD = *ARGV; + } + $htmlfile = "-" unless $htmlfile; # stdout + $htmlroot = "" if $htmlroot eq "/"; # so we don't get a // + + # read the pod a paragraph at a time + warn "Scanning for sections in input file(s)\n" if $verbose; + $/ = ""; + my @poddata = <POD>; + close(POD); + + # scan the pod for =head[1-6] directives and build an index + my $index = scan_headings(\%sections, @poddata); + + unless($index) { + warn "No pod in $podfile\n" if $verbose; + return; + } + + # open the output file + open(HTML, ">$htmlfile") + || die "$0: cannot open $htmlfile file for output: $!\n"; + + # put a title in the HTML file + $title = ''; + TITLE_SEARCH: { + for (my $i = 0; $i < @poddata; $i++) { + if ($poddata[$i] =~ /^=head1\s*NAME\b/m) { + for my $para ( @poddata[$i, $i+1] ) { + last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+.*\S)/s; + } + } + + } + } + if (!$title and $podfile =~ /\.pod$/) { + # probably a split pod so take first =head[12] as title + for (my $i = 0; $i < @poddata; $i++) { + last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; + } + warn "adopted '$title' as title for $podfile\n" + if $verbose and $title; + } + if ($title) { + $title =~ s/\s*\(.*\)//; + } else { + warn "$0: no title for $podfile"; + $podfile =~ /^(.*)(\.[^.\/]+)?$/; + $title = ($podfile eq "-" ? 'No Title' : $1); + warn "using $title" if $verbose; + } + print HTML <<END_OF_HEAD; +<HTML> +<HEAD> +<TITLE>$title</TITLE> +<LINK REV="made" HREF="mailto:$Config{perladmin}"> +</HEAD> + +<BODY> + +END_OF_HEAD + + # load/reload/validate/cache %pages and %items + get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse); + + # scan the pod for =item directives + scan_items("", \%items, @poddata); + + # put an index at the top of the file. note, if $doindex is 0 we + # still generate an index, but surround it with an html comment. + # that way some other program can extract it if desired. + $index =~ s/--+/-/g; + print HTML "<!-- INDEX BEGIN -->\n"; + print HTML "<!--\n" unless $doindex; + print HTML $index; + print HTML "-->\n" unless $doindex; + print HTML "<!-- INDEX END -->\n\n"; + print HTML "<HR>\n" if $doindex; + + # now convert this file + warn "Converting input file\n" if $verbose; + foreach my $i (0..$#poddata) { + $_ = $poddata[$i]; + $paragraph = $i+1; + if (/^(=.*)/s) { # is it a pod directive? + $ignore = 0; + $_ = $1; + if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin + process_begin($1, $2); + } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end + process_end($1, $2); + } elsif (/^=cut/) { # =cut + process_cut(); + } elsif (/^=pod/) { # =pod + process_pod(); + } else { + next if @begin_stack && $begin_stack[-1] ne 'html'; + + if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading + process_head($1, $2); + } elsif (/^=item\s*(.*\S)/sm) { # =item text + process_item($1); + } elsif (/^=over\s*(.*)/) { # =over N + process_over(); + } elsif (/^=back/) { # =back + process_back(); + } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for + process_for($1,$2); + } else { + /^=(\S*)\s*/; + warn "$0: $podfile: unknown pod directive '$1' in " + . "paragraph $paragraph. ignoring.\n"; + } + } + $top = 0; + } + else { + next if $ignore; + next if @begin_stack && $begin_stack[-1] ne 'html'; + my $text = $_; + process_text(\$text, 1); + print HTML "<P>\n$text"; + } + } + + # finish off any pending directives + finish_list(); + print HTML <<END_OF_TAIL; +</BODY> + +</HTML> +END_OF_TAIL + + # close the html file + close(HTML); + + warn "Finished\n" if $verbose; +} + +############################################################################## + +my $usage; # see below +sub usage { + my $podfile = shift; + warn "$0: $podfile: @_\n" if @_; + die $usage; +} + +$usage =<<END_OF_USAGE; +Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> + --podpath=<name>:...:<name> --podroot=<name> + --libpods=<name>:...:<name> --recurse --verbose --index + --netscape --norecurse --noindex + + --flush - flushes the item and directory caches. + --help - prints this message. + --htmlroot - http-server base directory from which all relative paths + in podpath stem (default is /). + --index - generate an index at the top of the resulting html + (default). + --infile - filename for the pod to convert (input taken from stdin + by default). + --libpods - colon-separated list of pages to search for =item pod + directives in as targets of C<> and implicit links (empty + by default). note, these are not filenames, but rather + page names like those that appear in L<> links. + --netscape - will use netscape html directives when applicable. + --nonetscape - will not use netscape directives (default). + --outfile - filename for the resulting html file (output sent to + stdout by default). + --podpath - colon-separated list of directories containing library + pods. empty by default. + --podroot - filesystem base directory from which all relative paths + in podpath stem (default is .). + --noindex - don't generate an index at the top of the resulting html. + --norecurse - don't recurse on those subdirectories listed in podpath. + --recurse - recurse on those subdirectories listed in podpath + (default behavior). + --title - title that will appear in resulting html file. + --verbose - self-explanatory + +END_OF_USAGE + +sub parse_command_line { + my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose); + my $result = GetOptions( + 'flush' => \$opt_flush, + 'help' => \$opt_help, + 'htmlroot=s' => \$opt_htmlroot, + 'index!' => \$opt_index, + 'infile=s' => \$opt_infile, + 'libpods=s' => \$opt_libpods, + 'netscape!' => \$opt_netscape, + 'outfile=s' => \$opt_outfile, + 'podpath=s' => \$opt_podpath, + 'podroot=s' => \$opt_podroot, + 'norecurse' => \$opt_norecurse, + 'recurse!' => \$opt_recurse, + 'title=s' => \$opt_title, + 'verbose' => \$opt_verbose, + ); + usage("-", "invalid parameters") if not $result; + + usage("-") if defined $opt_help; # see if the user asked for help + $opt_help = ""; # just to make -w shut-up. + + $podfile = $opt_infile if defined $opt_infile; + $htmlfile = $opt_outfile if defined $opt_outfile; + + @podpath = split(":", $opt_podpath) if defined $opt_podpath; + @libpods = split(":", $opt_libpods) if defined $opt_libpods; + + warn "Flushing item and directory caches\n" + if $opt_verbose && defined $opt_flush; + unlink($dircache, $itemcache) if defined $opt_flush; + + $htmlroot = $opt_htmlroot if defined $opt_htmlroot; + $podroot = $opt_podroot if defined $opt_podroot; + + $doindex = $opt_index if defined $opt_index; + $recurse = $opt_recurse if defined $opt_recurse; + $title = $opt_title if defined $opt_title; + $verbose = defined $opt_verbose ? 1 : 0; + $netscape = $opt_netscape if defined $opt_netscape; +} + + +my $saved_cache_key; + +sub get_cache { + my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; + my @cache_key_args = @_; + + # A first-level cache: + # Don't bother reading the cache files if they still apply + # and haven't changed since we last read them. + + my $this_cache_key = cache_key(@cache_key_args); + + return if $saved_cache_key and $this_cache_key eq $saved_cache_key; + + # load the cache of %pages and %items if possible. $tests will be + # non-zero if successful. + my $tests = 0; + if (-f $dircache && -f $itemcache) { + warn "scanning for item cache\n" if $verbose; + $tests = load_cache($dircache, $itemcache, $podpath, $podroot); + } + + # if we didn't succeed in loading the cache then we must (re)build + # %pages and %items. + if (!$tests) { + warn "scanning directories in pod-path\n" if $verbose; + scan_podpath($podroot, $recurse, 0); + } + $saved_cache_key = cache_key(@cache_key_args); +} + +sub cache_key { + my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; + return join('!', $dircache, $itemcache, $recurse, + @$podpath, $podroot, stat($dircache), stat($itemcache)); +} + +# +# load_cache - tries to find if the caches stored in $dircache and $itemcache +# are valid caches of %pages and %items. if they are valid then it loads +# them and returns a non-zero value. +# + +sub load_cache { + my($dircache, $itemcache, $podpath, $podroot) = @_; + my($tests); + local $_; + + $tests = 0; + + open(CACHE, "<$itemcache") || + die "$0: error opening $itemcache for reading: $!\n"; + $/ = "\n"; + + # is it the same podpath? + $_ = <CACHE>; + chomp($_); + $tests++ if (join(":", @$podpath) eq $_); + + # is it the same podroot? + $_ = <CACHE>; + chomp($_); + $tests++ if ($podroot eq $_); + + # load the cache if its good + if ($tests != 2) { + close(CACHE); + return 0; + } + + warn "loading item cache\n" if $verbose; + while (<CACHE>) { + /(.*?) (.*)$/; + $items{$1} = $2; + } + close(CACHE); + + warn "scanning for directory cache\n" if $verbose; + open(CACHE, "<$dircache") || + die "$0: error opening $dircache for reading: $!\n"; + $/ = "\n"; + $tests = 0; + + # is it the same podpath? + $_ = <CACHE>; + chomp($_); + $tests++ if (join(":", @$podpath) eq $_); + + # is it the same podroot? + $_ = <CACHE>; + chomp($_); + $tests++ if ($podroot eq $_); + + # load the cache if its good + if ($tests != 2) { + close(CACHE); + return 0; + } + + warn "loading directory cache\n" if $verbose; + while (<CACHE>) { + /(.*?) (.*)$/; + $pages{$1} = $2; + } + + close(CACHE); + + return 1; +} + +# +# scan_podpath - scans the directories specified in @podpath for directories, +# .pod files, and .pm files. it also scans the pod files specified in +# @libpods for =item directives. +# +sub scan_podpath { + my($podroot, $recurse, $append) = @_; + my($pwd, $dir); + my($libpod, $dirname, $pod, @files, @poddata); + + unless($append) { + %items = (); + %pages = (); + } + + # scan each directory listed in @podpath + $pwd = getcwd(); + chdir($podroot) + || die "$0: error changing to directory $podroot: $!\n"; + foreach $dir (@podpath) { + scan_dir($dir, $recurse); + } + + # scan the pods listed in @libpods for =item directives + foreach $libpod (@libpods) { + # if the page isn't defined then we won't know where to find it + # on the system. + next unless defined $pages{$libpod} && $pages{$libpod}; + + # if there is a directory then use the .pod and .pm files within it. + if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { + # find all the .pod and .pm files within the directory + $dirname = $1; + opendir(DIR, $dirname) || + die "$0: error opening directory $dirname: $!\n"; + @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR)); + closedir(DIR); + + # scan each .pod and .pm file for =item directives + foreach $pod (@files) { + open(POD, "<$dirname/$pod") || + die "$0: error opening $dirname/$pod for input: $!\n"; + @poddata = <POD>; + close(POD); + + scan_items("$dirname/$pod", @poddata); + } + + # use the names of files as =item directives too. + foreach $pod (@files) { + $pod =~ /^(.*)(\.pod|\.pm)$/; + $items{$1} = "$dirname/$1.html" if $1; + } + } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ || + $pages{$libpod} =~ /([^:]*\.pm):/) { + # scan the .pod or .pm file for =item directives + $pod = $1; + open(POD, "<$pod") || + die "$0: error opening $pod for input: $!\n"; + @poddata = <POD>; + close(POD); + + scan_items("$pod", @poddata); + } else { + warn "$0: shouldn't be here (line ".__LINE__."\n"; + } + } + @poddata = (); # clean-up a bit + + chdir($pwd) + || die "$0: error changing to directory $pwd: $!\n"; + + # cache the item list for later use + warn "caching items for later use\n" if $verbose; + open(CACHE, ">$itemcache") || + die "$0: error open $itemcache for writing: $!\n"; + + print CACHE join(":", @podpath) . "\n$podroot\n"; + foreach my $key (keys %items) { + print CACHE "$key $items{$key}\n"; + } + + close(CACHE); + + # cache the directory list for later use + warn "caching directories for later use\n" if $verbose; + open(CACHE, ">$dircache") || + die "$0: error open $dircache for writing: $!\n"; + + print CACHE join(":", @podpath) . "\n$podroot\n"; + foreach my $key (keys %pages) { + print CACHE "$key $pages{$key}\n"; + } + + close(CACHE); +} + +# +# scan_dir - scans the directory specified in $dir for subdirectories, .pod +# files, and .pm files. notes those that it finds. this information will +# be used later in order to figure out where the pages specified in L<> +# links are on the filesystem. +# +sub scan_dir { + my($dir, $recurse) = @_; + my($t, @subdirs, @pods, $pod, $dirname, @dirs); + local $_; + + @subdirs = (); + @pods = (); + + opendir(DIR, $dir) || + die "$0: error opening directory $dir: $!\n"; + while (defined($_ = readdir(DIR))) { + if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory + $pages{$_} = "" unless defined $pages{$_}; + $pages{$_} .= "$dir/$_:"; + push(@subdirs, $_); + } elsif (/\.pod$/) { # .pod + s/\.pod$//; + $pages{$_} = "" unless defined $pages{$_}; + $pages{$_} .= "$dir/$_.pod:"; + push(@pods, "$dir/$_.pod"); + } elsif (/\.pm$/) { # .pm + s/\.pm$//; + $pages{$_} = "" unless defined $pages{$_}; + $pages{$_} .= "$dir/$_.pm:"; + push(@pods, "$dir/$_.pm"); + } + } + closedir(DIR); + + # recurse on the subdirectories if necessary + if ($recurse) { + foreach my $subdir (@subdirs) { + scan_dir("$dir/$subdir", $recurse); + } + } +} + +# +# scan_headings - scan a pod file for head[1-6] tags, note the tags, and +# build an index. +# +sub scan_headings { + my($sections, @data) = @_; + my($tag, $which_head, $title, $listdepth, $index); + + # here we need local $ignore = 0; + # unfortunately, we can't have it, because $ignore is lexical + $ignore = 0; + + $listdepth = 0; + $index = ""; + + # scan for =head directives, note their name, and build an index + # pointing to each of them. + foreach my $line (@data) { + if ($line =~ /^=(head)([1-6])\s+(.*)/) { + ($tag,$which_head, $title) = ($1,$2,$3); + chomp($title); + $$sections{htmlify(0,$title)} = 1; + + while ($which_head != $listdepth) { + if ($which_head > $listdepth) { + $index .= "\n" . ("\t" x $listdepth) . "<UL>\n"; + $listdepth++; + } elsif ($which_head < $listdepth) { + $listdepth--; + $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; + } + } + + $index .= "\n" . ("\t" x $listdepth) . "<LI>" . + "<A HREF=\"#" . htmlify(0,$title) . "\">" . + html_escape(process_text(\$title, 0)) . "</A>"; + } + } + + # finish off the lists + while ($listdepth--) { + $index .= "\n" . ("\t" x $listdepth) . "</UL>\n"; + } + + # get rid of bogus lists + $index =~ s,\t*<UL>\s*</UL>\n,,g; + + $ignore = 1; # restore old value; + + return $index; +} + +# +# scan_items - scans the pod specified by $pod for =item directives. we +# will use this information later on in resolving C<> links. +# +sub scan_items { + my($pod, @poddata) = @_; + my($i, $item); + local $_; + + $pod =~ s/\.pod$//; + $pod .= ".html" if $pod; + + foreach $i (0..$#poddata) { + $_ = $poddata[$i]; + + # remove any formatting instructions + s,[A-Z]<([^<>]*)>,$1,g; + + # figure out what kind of item it is and get the first word of + # it's name. + if (/^=item\s+(\w*)\s*.*$/s) { + if ($1 eq "*") { # bullet list + /\A=item\s+\*\s*(.*?)\s*\Z/s; + $item = $1; + } elsif ($1 =~ /^\d+/) { # numbered list + /\A=item\s+\d+\.?(.*?)\s*\Z/s; + $item = $1; + } else { +# /\A=item\s+(.*?)\s*\Z/s; + /\A=item\s+(\w*)/s; + $item = $1; + } + + $items{$item} = "$pod" if $item; + } + } +} + +# +# process_head - convert a pod head[1-6] tag and convert it to HTML format. +# +sub process_head { + my($tag, $heading) = @_; + my $firstword; + + # figure out the level of the =head + $tag =~ /head([1-6])/; + my $level = $1; + + # can't have a heading full of spaces and speechmarks and so on + $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/; + + print HTML "<P>\n" unless $listlevel; + print HTML "<HR>\n" unless $listlevel || $top; + print HTML "<H$level>"; # unless $listlevel; + #print HTML "<H$level>" unless $listlevel; + my $convert = $heading; process_text(\$convert, 0); + $convert = html_escape($convert); + print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>"; + print HTML "</H$level>"; # unless $listlevel; + print HTML "\n"; +} + +# +# process_item - convert a pod item tag and convert it to HTML format. +# +sub process_item { + my $text = $_[0]; + my($i, $quote, $name); + + my $need_preamble = 0; + my $this_entry; + + + # lots of documents start a list without doing an =over. this is + # bad! but, the proper thing to do seems to be to just assume + # they did do an =over. so warn them once and then continue. + warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n" + unless $listlevel; + process_over() unless $listlevel; + + return unless $listlevel; + + # remove formatting instructions from the text + 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g; + pre_escape(\$text); + + $need_preamble = $items_seen[$listlevel]++ == 0; + + # check if this is the first =item after an =over + $i = $listlevel - 1; + my $need_new = $listlevel >= @listitem; + + if ($text =~ /\A\*/) { # bullet + + if ($need_preamble) { + push(@listend, "</UL>"); + print HTML "<UL>\n"; + } + + print HTML '<LI>'; + if ($text =~ /\A\*\s*(.+)\Z/s) { + print HTML '<STRONG>'; + if ($items_named{$1}++) { + print HTML html_escape($1); + } else { + my $name = 'item_' . htmlify(1,$1); + print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; + } + print HTML '</STRONG>'; + } + + } elsif ($text =~ /\A[\d#]+/) { # numbered list + + if ($need_preamble) { + push(@listend, "</OL>"); + print HTML "<OL>\n"; + } + + print HTML '<LI>'; + if ($text =~ /\A\d+\.?\s*(.+)\Z/s) { + print HTML '<STRONG>'; + if ($items_named{$1}++) { + print HTML html_escape($1); + } else { + my $name = 'item_' . htmlify(0,$1); + print HTML qq(<A NAME="$name">), html_escape($1), '</A>'; + } + print HTML '</STRONG>'; + } + + } else { # all others + + if ($need_preamble) { + push(@listend, '</DL>'); + print HTML "<DL>\n"; + } + + print HTML '<DT>'; + if ($text =~ /(\S+)/) { + print HTML '<STRONG>'; + if ($items_named{$1}++) { + print HTML html_escape($text); + } else { + my $name = 'item_' . htmlify(1,$text); + print HTML qq(<A NAME="$name">), html_escape($text), '</A>'; + } + print HTML '</STRONG>'; + } + print HTML '<DD>'; + } + + print HTML "\n"; +} + +# +# process_over - process a pod over tag and start a corresponding HTML +# list. +# +sub process_over { + # start a new list + $listlevel++; +} + +# +# process_back - process a pod back tag and convert it to HTML format. +# +sub process_back { + warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n" + unless $listlevel; + return unless $listlevel; + + # close off the list. note, I check to see if $listend[$listlevel] is + # defined because an =item directive may have never appeared and thus + # $listend[$listlevel] may have never been initialized. + $listlevel--; + print HTML $listend[$listlevel] if defined $listend[$listlevel]; + print HTML "\n"; + + # don't need the corresponding perl code anymore + pop(@listitem); + pop(@listdata); + pop(@listend); + + pop(@items_seen); +} + +# +# process_cut - process a pod cut tag, thus stop ignoring pod directives. +# +sub process_cut { + $ignore = 1; +} + +# +# process_pod - process a pod pod tag, thus ignore pod directives until we see a +# corresponding cut. +# +sub process_pod { + # no need to set $ignore to 0 cause the main loop did it +} + +# +# process_for - process a =for pod tag. if it's for html, split +# it out verbatim, if illustration, center it, otherwise ignore it. +# +sub process_for { + my($whom, $text) = @_; + if ( $whom =~ /^(pod2)?html$/i) { + print HTML $text; + } elsif ($whom =~ /^illustration$/i) { + 1 while chomp $text; + for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { + $text .= $ext, last if -r "$text$ext"; + } + print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>}; + } +} + +# +# process_begin - process a =begin pod tag. this pushes +# whom we're beginning on the begin stack. if there's a +# begin stack, we only print if it us. +# +sub process_begin { + my($whom, $text) = @_; + $whom = lc($whom); + push (@begin_stack, $whom); + if ( $whom =~ /^(pod2)?html$/) { + print HTML $text if $text; + } +} + +# +# process_end - process a =end pod tag. pop the +# begin stack. die if we're mismatched. +# +sub process_end { + my($whom, $text) = @_; + $whom = lc($whom); + if ($begin_stack[-1] ne $whom ) { + die "Unmatched begin/end at chunk $paragraph\n" + } + pop @begin_stack; +} + +# +# process_text - handles plaintext that appears in the input pod file. +# there may be pod commands embedded within the text so those must be +# converted to html commands. +# +sub process_text { + my($text, $escapeQuotes) = @_; + my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf); + my($podcommand, $params, $tag, $quote); + + return if $ignore; + + $quote = 0; # status of double-quote conversion + $result = ""; + $rest = $$text; + + if ($rest =~ /^\s+/) { # preformatted text, no pod directives + $rest =~ s/\n+\Z//; + $rest =~ s#.*# + my $line = $&; + 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; + $line; + #eg; + + $rest =~ s/&/&/g; + $rest =~ s/</</g; + $rest =~ s/>/>/g; + $rest =~ s/"/"/g; + + # try and create links for all occurrences of perl.* within + # the preformatted text. + $rest =~ s{ + (\s*)(perl\w+) + }{ + if (defined $pages{$2}) { # is a link + qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>); + } elsif (defined $pages{dosify($2)}) { # is a link + qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>); + } else { + "$1$2"; + } + }xeg; + $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g; + + my $urls = '(' . join ('|', qw{ + http + telnet + mailto + news + gopher + file + wais + ftp + } ) + . ')'; + + my $ltrs = '\w'; + my $gunk = '/#~:.?+=&%@!\-'; + my $punc = '.:?\-'; + my $any = "${ltrs}${gunk}${punc}"; + + $rest =~ s{ + \b # start at word boundary + ( # begin $1 { + $urls : # need resource and a colon + [$any] +? # followed by on or more + # of any valid character, but + # be conservative and take only + # what you need to.... + ) # end $1 } + (?= # look-ahead non-consumptive assertion + [$punc]* # either 0 or more puntuation + [^$any] # followed by a non-url char + | # or else + $ # then end of the string + ) + }{<A HREF="$1">$1</A>}igox; + + $result = "<PRE>" # text should be as it is (verbatim) + . "$rest\n" + . "</PRE>\n"; + } else { # formatted text + # parse through the string, stopping each time we find a + # pod-escape. once the string has been throughly processed + # we can output it. + while (length $rest) { + # check to see if there are any possible pod directives in + # the remaining part of the text. + if ($rest =~ m/[BCEIFLSZ]</) { + warn "\$rest\t= $rest\n" unless + $rest =~ /\A + ([^<]*?) + ([BCEIFLSZ]?) + < + (.*)\Z/xs; + + $s1 = $1; # pure text + $s2 = $2; # the type of pod-escape that follows + $s3 = '<'; # '<' + $s4 = $3; # the rest of the string + } else { + $s1 = $rest; + $s2 = ""; + $s3 = ""; + $s4 = ""; + } + + if ($s3 eq '<' && $s2) { # a pod-escape + $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1); + $podcommand = "$s2<"; + $rest = $s4; + + # find the matching '>' + $match = 1; + $bf = 0; + while ($match && !$bf) { + $bf = 1; + if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) { + $bf = 0; + $match++; + $podcommand .= $1; + $rest = $2; + } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) { + $bf = 0; + $match--; + $podcommand .= $1; + $rest = $2; + } + } + + if ($match != 0) { + warn <<WARN; +$0: $podfile: cannot find matching > for $s2 in paragraph $paragraph. +WARN + $result .= substr $podcommand, 0, 2; + $rest = substr($podcommand, 2) . $rest; + next; + } + + # pull out the parameters to the pod-escape + $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s; + $tag = $1; + $params = $2; + + # process the text within the pod-escape so that any escapes + # which must occur do. + process_text(\$params, 0) unless $tag eq 'L'; + + $s1 = $params; + if (!$tag || $tag eq " ") { # <> : no tag + $s1 = "<$params>"; + } elsif ($tag eq "L") { # L<> : link + $s1 = process_L($params); + } elsif ($tag eq "I" || # I<> : italicize text + $tag eq "B" || # B<> : bold text + $tag eq "F") { # F<> : file specification + $s1 = process_BFI($tag, $params); + } elsif ($tag eq "C") { # C<> : literal code + $s1 = process_C($params, 1); + } elsif ($tag eq "E") { # E<> : escape + $s1 = process_E($params); + } elsif ($tag eq "Z") { # Z<> : zero-width character + $s1 = process_Z($params); + } elsif ($tag eq "S") { # S<> : non-breaking space + $s1 = process_S($params); + } elsif ($tag eq "X") { # S<> : non-breaking space + $s1 = process_X($params); + } else { + warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n"; + } + + $result .= "$s1"; + } else { + # for pure text we must deal with implicit links and + # double-quotes among other things. + $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3"); + $rest = $s4; + } + } + } + $$text = $result; +} + +sub html_escape { + my $rest = $_[0]; + $rest =~ s/&/&/g; + $rest =~ s/</</g; + $rest =~ s/>/>/g; + $rest =~ s/"/"/g; + return $rest; +} + +# +# process_puretext - process pure text (without pod-escapes) converting +# double-quotes and handling implicit C<> links. +# +sub process_puretext { + my($text, $quote) = @_; + my(@words, $result, $rest, $lead, $trail); + + # convert double-quotes to single-quotes + $text =~ s/\A([^"]*)"/$1''/s if $$quote; + while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {} + + $$quote = ($text =~ m/"/ ? 1 : 0); + $text =~ s/\A([^"]*)"/$1``/s if $$quote; + + # keep track of leading and trailing white-space + $lead = ($text =~ /\A(\s*)/s ? $1 : ""); + $trail = ($text =~ /(\s*)\Z/s ? $1 : ""); + + # collapse all white space into a single space + $text =~ s/\s+/ /g; + @words = split(" ", $text); + + # process each word individually + foreach my $word (@words) { + # see if we can infer a link + if ($word =~ /^\w+\(/) { + # has parenthesis so should have been a C<> ref + $word = process_C($word); +# $word =~ /^[^()]*]\(/; +# if (defined $items{$1} && $items{$1}) { +# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_" +# . htmlify(0,$word) +# . "\">$word</A></CODE>"; +# } elsif (defined $items{$word} && $items{$word}) { +# $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_" +# . htmlify(0,$word) +# . "\">$word</A></CODE>"; +# } else { +# $word = "\n<CODE><A HREF=\"#item_" +# . htmlify(0,$word) +# . "\">$word</A></CODE>"; +# } + } elsif ($word =~ /^[\$\@%&*]+\w+$/) { + # perl variables, should be a C<> ref + $word = process_C($word, 1); + } elsif ($word =~ m,^\w+://\w,) { + # looks like a URL + $word = qq(<A HREF="$word">$word</A>); + } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { + # looks like an e-mail address + my ($w1, $w2, $w3) = ("", $word, ""); + ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/; + ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/; + $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3); + } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase? + $word = html_escape($word) if $word =~ /["&<>]/; + $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape; + } else { + $word = html_escape($word) if $word =~ /["&<>]/; + } + } + + # build a new string based upon our conversion + $result = ""; + $rest = join(" ", @words); + while (length($rest) > 75) { + if ( $rest =~ m/^(.{0,75})\s(.*?)$/o || + $rest =~ m/^(\S*)\s(.*?)$/o) { + + $result .= "$1\n"; + $rest = $2; + } else { + $result .= "$rest\n"; + $rest = ""; + } + } + $result .= $rest if $rest; + + # restore the leading and trailing white-space + $result = "$lead$result$trail"; + + return $result; +} + +# +# pre_escape - convert & in text to $amp; +# +sub pre_escape { + my($str) = @_; + + $$str =~ s,&,&,g; +} + +# +# dosify - convert filenames to 8.3 +# +sub dosify { + my($str) = @_; + if ($Is83) { + $str = lc $str; + $str =~ s/(\.\w+)/substr ($1,0,4)/ge; + $str =~ s/(\w+)/substr ($1,0,8)/ge; + } + return $str; +} + +# +# process_L - convert a pod L<> directive to a corresponding HTML link. +# most of the links made are inferred rather than known about directly +# (i.e it's not known whether the =head\d section exists in the target file, +# or whether a .pod file exists in the case of split files). however, the +# guessing usually works. +# +# Unlike the other directives, this should be called with an unprocessed +# string, else tags in the link won't be matched. +# +sub process_L { + my($str) = @_; + my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings + + $str =~ s/\n/ /g; # undo word-wrapped tags + $s1 = $str; + for ($s1) { + # LREF: a la HREF L<show this text|man/section> + $linktext = $1 if s:^([^|]+)\|::; + + # a :: acts like a / + s,::,/,; + + # make sure sections start with a / + s,^",/",g; + s,^,/,g if (!m,/, && / /); + + # check if there's a section specified + if (m,^(.*?)/"?(.*?)"?$,) { # yes + ($page, $section) = ($1, $2); + } else { # no + ($page, $section) = ($str, ""); + } + + # check if we know that this is a section in this page + if (!defined $pages{$page} && defined $sections{$page}) { + $section = $page; + $page = ""; + } + } + + $page83=dosify($page); + $page=$page83 if (defined $pages{$page83}); + if ($page eq "") { + $link = "#" . htmlify(0,$section); + $linktext = $section unless defined($linktext); + } elsif (!defined $pages{$page}) { + warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n"; + $link = ""; + $linktext = $page unless defined($linktext); + } else { + $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext); + $section = htmlify(0,$section) if $section ne ""; + + # if there is a directory by the name of the page, then assume that an + # appropriate section will exist in the subdirectory + if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { + $link = "$htmlroot/$1/$section.html"; + + # since there is no directory by the name of the page, the section will + # have to exist within a .html of the same name. thus, make sure there + # is a .pod or .pm that might become that .html + } else { + $section = "#$section"; + # check if there is a .pod with the page name + if ($pages{$page} =~ /([^:]*)\.pod:/) { + $link = "$htmlroot/$1.html$section"; + } elsif ($pages{$page} =~ /([^:]*)\.pm:/) { + $link = "$htmlroot/$1.html$section"; + } else { + warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ". + "no .pod or .pm found\n"; + $link = ""; + $linktext = $section unless defined($linktext); + } + } + } + + process_text(\$linktext, 0); + if ($link) { + $s1 = "<A HREF=\"$link\">$linktext</A>"; + } else { + $s1 = "<EM>$linktext</EM>"; + } + return $s1; +} + +# +# process_BFI - process any of the B<>, F<>, or I<> pod-escapes and +# convert them to corresponding HTML directives. +# +sub process_BFI { + my($tag, $str) = @_; + my($s1); # work string + my(%repltext) = ( 'B' => 'STRONG', + 'F' => 'EM', + 'I' => 'EM'); + + # extract the modified text and convert to HTML + $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>"; + return $s1; +} + +# +# process_C - process the C<> pod-escape. +# +sub process_C { + my($str, $doref) = @_; + my($s1, $s2); + + $s1 = $str; + $s1 =~ s/\([^()]*\)//g; # delete parentheses + $s2 = $s1; + $s1 =~ s/\W//g; # delete bogus characters + $str = html_escape($str); + + # if there was a pod file that we found earlier with an appropriate + # =item directive, then create a link to that page. + if ($doref && defined $items{$s1}) { + $s1 = ($items{$s1} ? + "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) . "\">$str</A>" : + "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>"); + $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,; + confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/; + } else { + $s1 = "<CODE>$str</CODE>"; + # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose + } + + + return $s1; +} + +# +# process_E - process the E<> pod directive which seems to escape a character. +# +sub process_E { + my($str) = @_; + + for ($str) { + s,([^/].*),\&$1\;,g; + } + + return $str; +} + +# +# process_Z - process the Z<> pod directive which really just amounts to +# ignoring it. this allows someone to start a paragraph with an = +# +sub process_Z { + my($str) = @_; + + # there is no equivalent in HTML for this so just ignore it. + $str = ""; + return $str; +} + +# +# process_S - process the S<> pod directive which means to convert all +# spaces in the string to non-breaking spaces (in HTML-eze). +# +sub process_S { + my($str) = @_; + + # convert all spaces in the text to non-breaking spaces in HTML. + $str =~ s/ / /g; + return $str; +} + +# +# process_X - this is supposed to make an index entry. we'll just +# ignore it. +# +sub process_X { + return ''; +} + + +# +# finish_list - finish off any pending HTML lists. this should be called +# after the entire pod file has been read and converted. +# +sub finish_list { + while ($listlevel > 0) { + print HTML "</DL>\n"; + $listlevel--; + } +} + +# +# htmlify - converts a pod section specification to a suitable section +# specification for HTML. if first arg is 1, only takes 1st word. +# +sub htmlify { + my($compact, $heading) = @_; + + if ($compact) { + $heading =~ /^(\w+)/; + $heading = $1; + } + + # $heading = lc($heading); + $heading =~ s/[^\w\s]/_/g; + $heading =~ s/(\s+)/ /g; + $heading =~ s/^\s*(.*?)\s*$/$1/s; + $heading =~ s/ /_/g; + $heading =~ s/\A(.{32}).*\Z/$1/s; + $heading =~ s/\s+\Z//; + $heading =~ s/_{2,}/_/g; + + return $heading; +} + +BEGIN { +} + +1; diff --git a/contrib/perl5/lib/Pod/Text.pm b/contrib/perl5/lib/Pod/Text.pm new file mode 100644 index 0000000..67993db --- /dev/null +++ b/contrib/perl5/lib/Pod/Text.pm @@ -0,0 +1,549 @@ +package Pod::Text; + +=head1 NAME + +Pod::Text - convert POD data to formatted ASCII text + +=head1 SYNOPSIS + + use Pod::Text; + + pod2text("perlfunc.pod"); + +Also: + + pod2text [B<-a>] [B<->I<width>] < input.pod + +=head1 DESCRIPTION + +Pod::Text is a module that can convert documentation in the POD format (such +as can be found throughout the Perl distribution) into formatted ASCII. +Termcap is optionally supported for boldface/underline, and can enabled via +C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces +will be used to simulate bold and underlined text. + +A separate F<pod2text> program is included that is primarily a wrapper for +Pod::Text. + +The single function C<pod2text()> can take the optional options B<-a> +for an alternative output format, then a B<->I<width> option with the +max terminal width, followed by one or two arguments. The first +should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from +STDIN. A second argument, if provided, should be a filehandle glob where +output should be sent. + +=head1 AUTHOR + +Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt> + +=head1 TODO + +Cleanup work. The input and output locations need to be more flexible, +termcap shouldn't be a global variable, and the terminal speed needs to +be properly calculated. + +=cut + +use Term::Cap; +require Exporter; +@ISA = Exporter; +@EXPORT = qw(pod2text); + +use vars qw($VERSION); +$VERSION = "1.0203"; + +$termcap=0; + +$opt_alt_format = 0; + +#$use_format=1; + +$UNDL = "\x1b[4m"; +$INV = "\x1b[7m"; +$BOLD = "\x1b[1m"; +$NORM = "\x1b[0m"; + +sub pod2text { +shift if $opt_alt_format = ($_[0] eq '-a'); + +if($termcap and !$setuptermcap) { + $setuptermcap=1; + + my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 }; + $UNDL = $term->{'_us'}; + $INV = $term->{'_mr'}; + $BOLD = $term->{'_md'}; + $NORM = $term->{'_me'}; +} + +$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1)) + || $ENV{COLUMNS} + || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] + || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]) + || 72; + +@_ = ("<&STDIN") unless @_; +local($file,*OUTPUT) = @_; +*OUTPUT = *STDOUT if @_<2; + +local $: = $:; +$: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''. + +$/ = ""; + +$FANCY = 0; + +$cutting = 1; +$DEF_INDENT = 4; +$indent = $DEF_INDENT; +$needspace = 0; +$begun = ""; + +open(IN, $file) || die "Couldn't open $file: $!"; + +POD_DIRECTIVE: while (<IN>) { + if ($cutting) { + next unless /^=/; + $cutting = 0; + } + if ($begun) { + if (/^=end\s+$begun/) { + $begun = ""; + } + elsif ($begun eq "text") { + print OUTPUT $_; + } + next; + } + 1 while s{^(.*?)(\t+)(.*)$}{ + $1 + . (' ' x (length($2) * 8 - length($1) % 8)) + . $3 + }me; + # Translate verbatim paragraph + if (/^\s/) { + output($_); + next; + } + + if (/^=for\s+(\S+)\s*(.*)/s) { + if ($1 eq "text") { + print OUTPUT $2,""; + } else { + # ignore unknown for + } + next; + } + elsif (/^=begin\s+(\S+)\s*(.*)/s) { + $begun = $1; + if ($1 eq "text") { + print OUTPUT $2.""; + } + next; + } + +sub prepare_for_output { + + s/\s*$/\n/; + &init_noremap; + + # need to hide E<> first; they're processed in clear_noremap + s/(E<[^<>]+>)/noremap($1)/ge; + $maxnest = 10; + while ($maxnest-- && /[A-Z]</) { + unless ($FANCY) { + if ($opt_alt_format) { + s/[BC]<(.*?)>/``$1''/sg; + s/F<(.*?)>/"$1"/sg; + } else { + s/C<(.*?)>/`$1'/sg; + } + } else { + s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge; + } + # s/[IF]<(.*?)>/italic($1)/ge; + s/I<(.*?)>/*$1*/sg; + # s/[CB]<(.*?)>/bold($1)/ge; + s/X<.*?>//sg; + + # LREF: a la HREF L<show this text|man/section> + s:L<([^|>]+)\|[^>]+>:$1:g; + + # LREF: a manpage(3f) + s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; + # LREF: an =item on another manpage + s{ + L< + ([^/]+) + / + ( + [:\w]+ + (\(\))? + ) + > + } {the "$2" entry in the $1 manpage}gx; + + # LREF: an =item on this manpage + s{ + ((?: + L< + / + ( + [:\w]+ + (\(\))? + ) + > + (,?\s+(and\s+)?)? + )+) + } { internal_lrefs($1) }gex; + + # LREF: a =head2 (head1?), maybe on a manpage, maybe right here + # the "func" can disambiguate + s{ + L< + (?: + ([a-zA-Z]\S+?) / + )? + "?(.*?)"? + > + }{ + do { + $1 # if no $1, assume it means on this page. + ? "the section on \"$2\" in the $1 manpage" + : "the section on \"$2\"" + } + }sgex; + + s/[A-Z]<(.*?)>/$1/sg; + } + clear_noremap(1); +} + + &prepare_for_output; + + if (s/^=//) { + # $needspace = 0; # Assume this. + # s/\n/ /g; + ($Cmd, $_) = split(' ', $_, 2); + # clear_noremap(1); + if ($Cmd eq 'cut') { + $cutting = 1; + } + elsif ($Cmd eq 'pod') { + $cutting = 0; + } + elsif ($Cmd eq 'head1') { + makespace(); + if ($opt_alt_format) { + print OUTPUT "\n"; + s/^(.+?)[ \t]*$/==== $1 ====/; + } + print OUTPUT; + # print OUTPUT uc($_); + $needspace = $opt_alt_format; + } + elsif ($Cmd eq 'head2') { + makespace(); + # s/(\w+)/\u\L$1/g; + #print ' ' x $DEF_INDENT, $_; + # print "\xA7"; + s/(\w)/\xA7 $1/ if $FANCY; + if ($opt_alt_format) { + s/^(.+?)[ \t]*$/== $1 ==/; + print OUTPUT "\n", $_; + } else { + print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n"; + } + $needspace = $opt_alt_format; + } + elsif ($Cmd eq 'over') { + push(@indent,$indent); + $indent += ($_ + 0) || $DEF_INDENT; + } + elsif ($Cmd eq 'back') { + $indent = pop(@indent); + warn "Unmatched =back\n" unless defined $indent; + } + elsif ($Cmd eq 'item') { + makespace(); + # s/\A(\s*)\*/$1\xb7/ if $FANCY; + # s/^(\s*\*\s+)/$1 /; + { + if (length() + 3 < $indent) { + my $paratag = $_; + $_ = <IN>; + if (/^=/) { # tricked! + local($indent) = $indent[$#index - 1] || $DEF_INDENT; + output($paratag); + redo POD_DIRECTIVE; + } + &prepare_for_output; + IP_output($paratag, $_); + } else { + local($indent) = $indent[$#index - 1] || $DEF_INDENT; + output($_, 0); + } + } + } + else { + warn "Unrecognized directive: $Cmd\n"; + } + } + else { + # clear_noremap(1); + makespace(); + output($_, 1); + } +} + +close(IN); + +} + +######################################################################### + +sub makespace { + if ($needspace) { + print OUTPUT "\n"; + $needspace = 0; + } +} + +sub bold { + my $line = shift; + return $line if $use_format; + if($termcap) { + $line = "$BOLD$line$NORM"; + } else { + $line =~ s/(.)/$1\b$1/g; + } +# $line = "$BOLD$line$NORM" if $ansify; + return $line; +} + +sub italic { + my $line = shift; + return $line if $use_format; + if($termcap) { + $line = "$UNDL$line$NORM"; + } else { + $line =~ s/(.)/$1\b_/g; + } +# $line = "$UNDL$line$NORM" if $ansify; + return $line; +} + +# Fill a paragraph including underlined and overstricken chars. +# It's not perfect for words longer than the margin, and it's probably +# slow, but it works. +sub fill { + local $_ = shift; + my $par = ""; + my $indent_space = " " x $indent; + my $marg = $SCREEN-$indent; + my $line = $indent_space; + my $line_length; + foreach (split) { + my $word_length = length; + $word_length -= 2 while /\010/g; # Subtract backspaces + + if ($line_length + $word_length > $marg) { + $par .= $line . "\n"; + $line= $indent_space . $_; + $line_length = $word_length; + } + else { + if ($line_length) { + $line_length++; + $line .= " "; + } + $line_length += $word_length; + $line .= $_; + } + } + $par .= "$line\n" if $line; + $par .= "\n"; + return $par; +} + +sub IP_output { + local($tag, $_) = @_; + local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT; + $tag_cols = $SCREEN - $tag_indent; + $cols = $SCREEN - $indent; + $tag =~ s/\s*$//; + s/\s+/ /g; + s/^ //; + $str = "format OUTPUT = \n" + . (($opt_alt_format && $tag_indent > 1) + ? ":" . " " x ($tag_indent - 1) + : " " x ($tag_indent)) + . '@' . ('<' x ($indent - $tag_indent - 1)) + . "^" . ("<" x ($cols - 1)) . "\n" + . '$tag, $_' + . "\n~~" + . (" " x ($indent-2)) + . "^" . ("<" x ($cols - 5)) . "\n" + . '$_' . "\n\n.\n1"; + #warn $str; warn "tag is $tag, _ is $_"; + eval $str || die; + write OUTPUT; +} + +sub output { + local($_, $reformat) = @_; + if ($reformat) { + $cols = $SCREEN - $indent; + s/\s+/ /g; + s/^ //; + $str = "format OUTPUT = \n~~" + . (" " x ($indent-2)) + . "^" . ("<" x ($cols - 5)) . "\n" + . '$_' . "\n\n.\n1"; + eval $str || die; + write OUTPUT; + } else { + s/^/' ' x $indent/gem; + s/^\s+\n$/\n/gm; + s/^ /: /s if defined($reformat) && $opt_alt_format; + print OUTPUT; + } +} + +sub noremap { + local($thing_to_hide) = shift; + $thing_to_hide =~ tr/\000-\177/\200-\377/; + return $thing_to_hide; +} + +sub init_noremap { + die "unmatched init" if $mapready++; + #mask off high bit characters in input stream + s/([\200-\377])/"E<".ord($1).">"/ge; +} + +sub clear_noremap { + my $ready_to_print = $_[0]; + die "unmatched clear" unless $mapready--; + tr/\200-\377/\000-\177/; + # now for the E<>s, which have been hidden until now + # otherwise the interative \w<> processing would have + # been hosed by the E<gt> + s { + E< + ( + ( \d+ ) + | ( [A-Za-z]+ ) + ) + > + } { + do { + defined $2 + ? chr($2) + : + defined $HTML_Escapes{$3} + ? do { $HTML_Escapes{$3} } + : do { + warn "Unknown escape: E<$1> in $_"; + "E<$1>"; + } + } + }egx if $ready_to_print; +} + +sub internal_lrefs { + local($_) = shift; + s{L</([^>]+)>}{$1}g; + my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); + my $retstr = "the "; + my $i; + for ($i = 0; $i <= $#items; $i++) { + $retstr .= "C<$items[$i]>"; + $retstr .= ", " if @items > 2 && $i != $#items; + $retstr .= " and " if $i+2 == @items; + } + + $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) + . " elsewhere in this document "; + + return $retstr; + +} + +BEGIN { + +%HTML_Escapes = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + + "Aacute" => "\xC1", # capital A, acute accent + "aacute" => "\xE1", # small a, acute accent + "Acirc" => "\xC2", # capital A, circumflex accent + "acirc" => "\xE2", # small a, circumflex accent + "AElig" => "\xC6", # capital AE diphthong (ligature) + "aelig" => "\xE6", # small ae diphthong (ligature) + "Agrave" => "\xC0", # capital A, grave accent + "agrave" => "\xE0", # small a, grave accent + "Aring" => "\xC5", # capital A, ring + "aring" => "\xE5", # small a, ring + "Atilde" => "\xC3", # capital A, tilde + "atilde" => "\xE3", # small a, tilde + "Auml" => "\xC4", # capital A, dieresis or umlaut mark + "auml" => "\xE4", # small a, dieresis or umlaut mark + "Ccedil" => "\xC7", # capital C, cedilla + "ccedil" => "\xE7", # small c, cedilla + "Eacute" => "\xC9", # capital E, acute accent + "eacute" => "\xE9", # small e, acute accent + "Ecirc" => "\xCA", # capital E, circumflex accent + "ecirc" => "\xEA", # small e, circumflex accent + "Egrave" => "\xC8", # capital E, grave accent + "egrave" => "\xE8", # small e, grave accent + "ETH" => "\xD0", # capital Eth, Icelandic + "eth" => "\xF0", # small eth, Icelandic + "Euml" => "\xCB", # capital E, dieresis or umlaut mark + "euml" => "\xEB", # small e, dieresis or umlaut mark + "Iacute" => "\xCD", # capital I, acute accent + "iacute" => "\xED", # small i, acute accent + "Icirc" => "\xCE", # capital I, circumflex accent + "icirc" => "\xEE", # small i, circumflex accent + "Igrave" => "\xCD", # capital I, grave accent + "igrave" => "\xED", # small i, grave accent + "Iuml" => "\xCF", # capital I, dieresis or umlaut mark + "iuml" => "\xEF", # small i, dieresis or umlaut mark + "Ntilde" => "\xD1", # capital N, tilde + "ntilde" => "\xF1", # small n, tilde + "Oacute" => "\xD3", # capital O, acute accent + "oacute" => "\xF3", # small o, acute accent + "Ocirc" => "\xD4", # capital O, circumflex accent + "ocirc" => "\xF4", # small o, circumflex accent + "Ograve" => "\xD2", # capital O, grave accent + "ograve" => "\xF2", # small o, grave accent + "Oslash" => "\xD8", # capital O, slash + "oslash" => "\xF8", # small o, slash + "Otilde" => "\xD5", # capital O, tilde + "otilde" => "\xF5", # small o, tilde + "Ouml" => "\xD6", # capital O, dieresis or umlaut mark + "ouml" => "\xF6", # small o, dieresis or umlaut mark + "szlig" => "\xDF", # small sharp s, German (sz ligature) + "THORN" => "\xDE", # capital THORN, Icelandic + "thorn" => "\xFE", # small thorn, Icelandic + "Uacute" => "\xDA", # capital U, acute accent + "uacute" => "\xFA", # small u, acute accent + "Ucirc" => "\xDB", # capital U, circumflex accent + "ucirc" => "\xFB", # small u, circumflex accent + "Ugrave" => "\xD9", # capital U, grave accent + "ugrave" => "\xF9", # small u, grave accent + "Uuml" => "\xDC", # capital U, dieresis or umlaut mark + "uuml" => "\xFC", # small u, dieresis or umlaut mark + "Yacute" => "\xDD", # capital Y, acute accent + "yacute" => "\xFD", # small y, acute accent + "yuml" => "\xFF", # small y, dieresis or umlaut mark + + "lchevron" => "\xAB", # left chevron (double less than) + "rchevron" => "\xBB", # right chevron (double greater than) +); +} + +1; |