diff options
Diffstat (limited to 'contrib/perl5/lib/Text')
-rw-r--r-- | contrib/perl5/lib/Text/Abbrev.pm | 81 | ||||
-rw-r--r-- | contrib/perl5/lib/Text/ParseWords.pm | 262 | ||||
-rw-r--r-- | contrib/perl5/lib/Text/Soundex.pm | 150 | ||||
-rw-r--r-- | contrib/perl5/lib/Text/Tabs.pm | 97 | ||||
-rw-r--r-- | contrib/perl5/lib/Text/Wrap.pm | 175 |
5 files changed, 0 insertions, 765 deletions
diff --git a/contrib/perl5/lib/Text/Abbrev.pm b/contrib/perl5/lib/Text/Abbrev.pm deleted file mode 100644 index d4f12d0..0000000 --- a/contrib/perl5/lib/Text/Abbrev.pm +++ /dev/null @@ -1,81 +0,0 @@ -package Text::Abbrev; -require 5.005; # Probably works on earlier versions too. -require Exporter; - -=head1 NAME - -abbrev - create an abbreviation table from a list - -=head1 SYNOPSIS - - use Text::Abbrev; - abbrev $hashref, LIST - - -=head1 DESCRIPTION - -Stores all unambiguous truncations of each element of LIST -as keys in the associative array referenced by C<$hashref>. -The values are the original list elements. - -=head1 EXAMPLE - - $hashref = abbrev qw(list edit send abort gripe); - - %hash = abbrev qw(list edit send abort gripe); - - abbrev $hashref, qw(list edit send abort gripe); - - abbrev(*hash, qw(list edit send abort gripe)); - -=cut - -@ISA = qw(Exporter); -@EXPORT = qw(abbrev); - -# Usage: -# abbrev \%foo, LIST; -# ... -# $long = $foo{$short}; - -sub abbrev { - my ($word, $hashref, $glob, %table, $returnvoid); - - if (ref($_[0])) { # hash reference preferably - $hashref = shift; - $returnvoid = 1; - } elsif (ref \$_[0] eq 'GLOB') { # is actually a glob (deprecated) - $hashref = \%{shift()}; - $returnvoid = 1; - } - %{$hashref} = (); - - WORD: foreach $word (@_) { - for (my $len = (length $word) - 1; $len > 0; --$len) { - my $abbrev = substr($word,0,$len); - my $seen = ++$table{$abbrev}; - if ($seen == 1) { # We're the first word so far to have - # this abbreviation. - $hashref->{$abbrev} = $word; - } elsif ($seen == 2) { # We're the second word to have this - # abbreviation, so we can't use it. - delete $hashref->{$abbrev}; - } else { # We're the third word to have this - # abbreviation, so skip to the next word. - next WORD; - } - } - } - # Non-abbreviations always get entered, even if they aren't unique - foreach $word (@_) { - $hashref->{$word} = $word; - } - return if $returnvoid; - if (wantarray) { - %{$hashref}; - } else { - $hashref; - } -} - -1; diff --git a/contrib/perl5/lib/Text/ParseWords.pm b/contrib/perl5/lib/Text/ParseWords.pm deleted file mode 100644 index 23eace9..0000000 --- a/contrib/perl5/lib/Text/ParseWords.pm +++ /dev/null @@ -1,262 +0,0 @@ -package Text::ParseWords; - -use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); -$VERSION = "3.2"; - -require 5.000; - -use Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(shellwords quotewords nested_quotewords parse_line); -@EXPORT_OK = qw(old_shellwords); - - -sub shellwords { - local(@lines) = @_; - $lines[$#lines] =~ s/\s+$//; - return(quotewords('\s+', 0, @lines)); -} - - - -sub quotewords { - my($delim, $keep, @lines) = @_; - my($line, @words, @allwords); - - - foreach $line (@lines) { - @words = parse_line($delim, $keep, $line); - return() unless (@words || !length($line)); - push(@allwords, @words); - } - return(@allwords); -} - - - -sub nested_quotewords { - my($delim, $keep, @lines) = @_; - my($i, @allwords); - - for ($i = 0; $i < @lines; $i++) { - @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]); - return() unless (@{$allwords[$i]} || !length($lines[$i])); - } - return(@allwords); -} - - - -sub parse_line { - # We will be testing undef strings - no warnings; - - my($delimiter, $keep, $line) = @_; - my($quote, $quoted, $unquoted, $delim, $word, @pieces); - - while (length($line)) { - - ($quote, $quoted, undef, $unquoted, $delim, undef) = - $line =~ m/^(["']) # a $quote - ((?:\\.|(?!\1)[^\\])*) # and $quoted text - \1 # followed by the same quote - ([\000-\377]*) # and the rest - | # --OR-- - ^((?:\\.|[^\\"'])*?) # an $unquoted text - (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) - # plus EOL, delimiter, or quote - ([\000-\377]*) # the rest - /x; # extended layout - return() unless( $quote || length($unquoted) || length($delim)); - - $line = $+; - - if ($keep) { - $quoted = "$quote$quoted$quote"; - } - else { - $unquoted =~ s/\\(.)/$1/g; - if (defined $quote) { - $quoted =~ s/\\(.)/$1/g if ($quote eq '"'); - $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); - } - } - $word .= defined $quote ? $quoted : $unquoted; - - if (length($delim)) { - push(@pieces, $word); - push(@pieces, $delim) if ($keep eq 'delimiters'); - undef $word; - } - if (!length($line)) { - push(@pieces, $word); - } - } - return(@pieces); -} - - - -sub old_shellwords { - - # Usage: - # use ParseWords; - # @words = old_shellwords($line); - # or - # @words = old_shellwords(@lines); - - local($_) = join('', @_); - my(@words,$snippet,$field); - - s/^\s+//; - while ($_ ne '') { - $field = ''; - for (;;) { - if (s/^"(([^"\\]|\\.)*)"//) { - ($snippet = $1) =~ s#\\(.)#$1#g; - } - elsif (/^"/) { - return(); - } - elsif (s/^'(([^'\\]|\\.)*)'//) { - ($snippet = $1) =~ s#\\(.)#$1#g; - } - elsif (/^'/) { - return(); - } - elsif (s/^\\(.)//) { - $snippet = $1; - } - elsif (s/^([^\s\\'"]+)//) { - $snippet = $1; - } - else { - s/^\s+//; - last; - } - $field .= $snippet; - } - push(@words, $field); - } - @words; -} - -1; - -__END__ - -=head1 NAME - -Text::ParseWords - parse text into an array of tokens or array of arrays - -=head1 SYNOPSIS - - use Text::ParseWords; - @lists = &nested_quotewords($delim, $keep, @lines); - @words = "ewords($delim, $keep, @lines); - @words = &shellwords(@lines); - @words = &parse_line($delim, $keep, $line); - @words = &old_shellwords(@lines); # DEPRECATED! - -=head1 DESCRIPTION - -The &nested_quotewords() and "ewords() functions accept a delimiter -(which can be a regular expression) -and a list of lines and then breaks those lines up into a list of -words ignoring delimiters that appear inside quotes. "ewords() -returns all of the tokens in a single long list, while &nested_quotewords() -returns a list of token lists corresponding to the elements of @lines. -&parse_line() does tokenizing on a single string. The &*quotewords() -functions simply call &parse_lines(), so if you're only splitting -one line you can call &parse_lines() directly and save a function -call. - -The $keep argument is a boolean flag. If true, then the tokens are -split on the specified delimiter, but all other characters (quotes, -backslashes, etc.) are kept in the tokens. If $keep is false then the -&*quotewords() functions remove all quotes and backslashes that are -not themselves backslash-escaped or inside of single quotes (i.e., -"ewords() tries to interpret these characters just like the Bourne -shell). NB: these semantics are significantly different from the -original version of this module shipped with Perl 5.000 through 5.004. -As an additional feature, $keep may be the keyword "delimiters" which -causes the functions to preserve the delimiters in each string as -tokens in the token lists, in addition to preserving quote and -backslash characters. - -&shellwords() is written as a special case of "ewords(), and it -does token parsing with whitespace as a delimiter-- similar to most -Unix shells. - -=head1 EXAMPLES - -The sample program: - - use Text::ParseWords; - @words = "ewords('\s+', 0, q{this is "a test" of\ quotewords \"for you}); - $i = 0; - foreach (@words) { - print "$i: <$_>\n"; - $i++; - } - -produces: - - 0: <this> - 1: <is> - 2: <a test> - 3: <of quotewords> - 4: <"for> - 5: <you> - -demonstrating: - -=over 4 - -=item 0 - -a simple word - -=item 1 - -multiple spaces are skipped because of our $delim - -=item 2 - -use of quotes to include a space in a word - -=item 3 - -use of a backslash to include a space in a word - -=item 4 - -use of a backslash to remove the special meaning of a double-quote - -=item 5 - -another simple word (note the lack of effect of the -backslashed double-quote) - -=back - -Replacing C<"ewords('\s+', 0, q{this is...})> -with C<&shellwords(q{this is...})> -is a simpler way to accomplish the same thing. - -=head1 AUTHORS - -Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original -author unknown). Much of the code for &parse_line() (including the -primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>. - -Examples section another documentation provided by John Heidemann -<johnh@ISI.EDU> - -Bug reports, patches, and nagging provided by lots of folks-- thanks -everybody! Special thanks to Michael Schwern <schwern@envirolink.org> -for assuring me that a &nested_quotewords() would be useful, and to -Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about -error-checking (sort of-- you had to be there). - -=cut diff --git a/contrib/perl5/lib/Text/Soundex.pm b/contrib/perl5/lib/Text/Soundex.pm deleted file mode 100644 index d588764..0000000 --- a/contrib/perl5/lib/Text/Soundex.pm +++ /dev/null @@ -1,150 +0,0 @@ -package Text::Soundex; -require 5.000; -require Exporter; - -@ISA = qw(Exporter); -@EXPORT = qw(&soundex $soundex_nocode); - -$VERSION = '1.0'; - -# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $ -# -# Implementation of soundex algorithm as described by Knuth in volume -# 3 of The Art of Computer Programming, with ideas stolen from Ian -# Phillips <ian@pipex.net>. -# -# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994. -# -# Knuth's test cases are: -# -# Euler, Ellery -> E460 -# Gauss, Ghosh -> G200 -# Hilbert, Heilbronn -> H416 -# Knuth, Kant -> K530 -# Lloyd, Ladd -> L300 -# Lukasiewicz, Lissajous -> L222 -# -# $Log: soundex.pl,v $ -# Revision 1.2 1994/03/24 00:30:27 mike -# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu> -# in the way I handles leasing characters which were different but had -# the same soundex code. This showed up comparing it with Oracle's -# soundex output. -# -# Revision 1.1 1994/03/02 13:01:30 mike -# Initial revision -# -# -############################################################################## - -# $soundex_nocode is used to indicate a string doesn't have a soundex -# code, I like undef other people may want to set it to 'Z000'. - -$soundex_nocode = undef; - -sub soundex -{ - local (@s, $f, $fc, $_) = @_; - - push @s, '' unless @s; # handle no args as a single empty string - - foreach (@s) - { - $_ = uc $_; - tr/A-Z//cd; - - if ($_ eq '') - { - $_ = $soundex_nocode; - } - else - { - ($f) = /^(.)/; - tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/; - ($fc) = /^(.)/; - s/^$fc+//; - tr///cs; - tr/0//d; - $_ = $f . $_ . '000'; - s/^(.{4}).*/$1/; - } - } - - wantarray ? @s : shift @s; -} - -1; - -__END__ - -=head1 NAME - -Text::Soundex - Implementation of the Soundex Algorithm as Described by Knuth - -=head1 SYNOPSIS - - use Text::Soundex; - - $code = soundex $string; # get soundex code for a string - @codes = soundex @list; # get list of codes for list of strings - - # set value to be returned for strings without soundex code - - $soundex_nocode = 'Z000'; - -=head1 DESCRIPTION - -This module implements the soundex algorithm as described by Donald Knuth -in Volume 3 of B<The Art of Computer Programming>. The algorithm is -intended to hash words (in particular surnames) into a small space using a -simple model which approximates the sound of the word when spoken by an English -speaker. Each word is reduced to a four character string, the first -character being an upper case letter and the remaining three being digits. - -If there is no soundex code representation for a string then the value of -C<$soundex_nocode> is returned. This is initially set to C<undef>, but -many people seem to prefer an I<unlikely> value like C<Z000> -(how unlikely this is depends on the data set being dealt with.) Any value -can be assigned to C<$soundex_nocode>. - -In scalar context C<soundex> returns the soundex code of its first -argument, and in list context a list is returned in which each element is the -soundex code for the corresponding argument passed to C<soundex> e.g. - - @codes = soundex qw(Mike Stok); - -leaves C<@codes> containing C<('M200', 'S320')>. - -=head1 EXAMPLES - -Knuth's examples of various names and the soundex codes they map to -are listed below: - - Euler, Ellery -> E460 - Gauss, Ghosh -> G200 - Hilbert, Heilbronn -> H416 - Knuth, Kant -> K530 - Lloyd, Ladd -> L300 - Lukasiewicz, Lissajous -> L222 - -so: - - $code = soundex 'Knuth'; # $code contains 'K530' - @list = soundex qw(Lloyd Gauss); # @list contains 'L300', 'G200' - -=head1 LIMITATIONS - -As the soundex algorithm was originally used a B<long> time ago in the US -it considers only the English alphabet and pronunciation. - -As it is mapping a large space (arbitrary length strings) onto a small -space (single letter plus 3 digits) no inference can be made about the -similarity of two strings which end up with the same soundex code. For -example, both C<Hilbert> and C<Heilbronn> end up with a soundex code -of C<H416>. - -=head1 AUTHOR - -This code was implemented by Mike Stok (C<stok@cybercom.net>) from the -description given by Knuth. Ian Phillips (C<ian@pipex.net>) and Rich Pinder -(C<rpinder@hsc.usc.edu>) supplied ideas and spotted mistakes. diff --git a/contrib/perl5/lib/Text/Tabs.pm b/contrib/perl5/lib/Text/Tabs.pm deleted file mode 100644 index c431019..0000000 --- a/contrib/perl5/lib/Text/Tabs.pm +++ /dev/null @@ -1,97 +0,0 @@ - -package Text::Tabs; - -require Exporter; - -@ISA = (Exporter); -@EXPORT = qw(expand unexpand $tabstop); - -use vars qw($VERSION $tabstop $debug); -$VERSION = 98.112801; - -use strict; - -BEGIN { - $tabstop = 8; - $debug = 0; -} - -sub expand -{ - my (@l) = @_; - for $_ (@l) { - 1 while s/(^|\n)([^\t\n]*)(\t+)/ - $1. $2 . (" " x - ($tabstop * length($3) - - (length($2) % $tabstop))) - /sex; - } - return @l if wantarray; - return $l[0]; -} - -sub unexpand -{ - my (@l) = @_; - my @e; - my $x; - my $line; - my @lines; - my $lastbit; - for $x (@l) { - @lines = split("\n", $x, -1); - for $line (@lines) { - $line = expand($line); - @e = split(/(.{$tabstop})/,$line,-1); - $lastbit = pop(@e); - $lastbit = '' unless defined $lastbit; - $lastbit = "\t" - if $lastbit eq " "x$tabstop; - for $_ (@e) { - if ($debug) { - my $x = $_; - $x =~ s/\t/^I\t/gs; - print "sub on '$x'\n"; - } - s/ +$/\t/; - } - $line = join('',@e, $lastbit); - } - $x = join("\n", @lines); - } - return @l if wantarray; - return $l[0]; -} - -1; -__END__ - - -=head1 NAME - -Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1) - -=head1 SYNOPSIS - -use Text::Tabs; - -$tabstop = 4; -@lines_without_tabs = expand(@lines_with_tabs); -@lines_with_tabs = unexpand(@lines_without_tabs); - -=head1 DESCRIPTION - -Text::Tabs does about what the unix utilities expand(1) and unexpand(1) -do. Given a line with tabs in it, expand will replace the tabs with -the appropriate number of spaces. Given a line with or without tabs in -it, unexpand will add tabs when it can save bytes by doing so. Invisible -compression with plain ascii! - -=head1 BUGS - -expand doesn't handle newlines very quickly -- do not feed it an -entire document in one string. Instead feed it an array of lines. - -=head1 AUTHOR - -David Muir Sharnoff <muir@idiom.com> diff --git a/contrib/perl5/lib/Text/Wrap.pm b/contrib/perl5/lib/Text/Wrap.pm deleted file mode 100644 index 579e09b..0000000 --- a/contrib/perl5/lib/Text/Wrap.pm +++ /dev/null @@ -1,175 +0,0 @@ -package Text::Wrap; - -require Exporter; - -@ISA = qw(Exporter); -@EXPORT = qw(wrap fill); -@EXPORT_OK = qw($columns $break $huge); - -$VERSION = 2001.0131; - -use vars qw($VERSION $columns $debug $break $huge); -use strict; - -BEGIN { - $columns = 76; # <= screen width - $debug = 0; - $break = '\s'; - $huge = 'wrap'; # alternatively: 'die' or 'overflow' -} - -use Text::Tabs qw(expand unexpand); - -sub wrap -{ - my ($ip, $xp, @t) = @_; - - my $r = ""; - my $tail = pop(@t); - my $t = expand(join("", (map { /\s+\Z/ ? ( $_ ) : ($_, ' ') } @t), $tail)); - my $lead = $ip; - my $ll = $columns - length(expand($ip)) - 1; - my $nll = $columns - length(expand($xp)) - 1; - my $nl = ""; - my $remainder = ""; - - pos($t) = 0; - while ($t !~ /\G\s*\Z/gc) { - if ($t =~ /\G([^\n]{0,$ll})($break|\Z(?!\n))/xmgc) { - $r .= unexpand($nl . $lead . $1); - $remainder = $2; - } elsif ($huge eq 'wrap' && $t =~ /\G([^\n]{$ll})/gc) { - $r .= unexpand($nl . $lead . $1); - $remainder = "\n"; - } elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)($break|\Z(?!\n))/xmgc) { - $r .= unexpand($nl . $lead . $1); - $remainder = $2; - } elsif ($huge eq 'die') { - die "couldn't wrap '$t'"; - } else { - die "This shouldn't happen"; - } - - $lead = $xp; - $ll = $nll; - $nl = "\n"; - } - $r .= $remainder; - - print "-----------$r---------\n" if $debug; - - print "Finish up with '$lead'\n" if $debug; - - $r .= $lead . substr($t, pos($t), length($t)-pos($t)) - if pos($t) ne length($t); - - print "-----------$r---------\n" if $debug;; - - return $r; -} - -sub fill -{ - my ($ip, $xp, @raw) = @_; - my @para; - my $pp; - - for $pp (split(/\n\s+/, join("\n",@raw))) { - $pp =~ s/\s+/ /g; - my $x = wrap($ip, $xp, $pp); - push(@para, $x); - } - - # if paragraph_indent is the same as line_indent, - # separate paragraphs with blank lines - - my $ps = ($ip eq $xp) ? "\n\n" : "\n"; - return join ($ps, @para); -} - -1; -__END__ - -=head1 NAME - -Text::Wrap - line wrapping to form simple paragraphs - -=head1 SYNOPSIS - -B<Example 1> - - use Text::Wrap - - $initial_tab = "\t"; # Tab before first line - $subsequent_tab = ""; # All other lines flush left - - print wrap($initial_tab, $subsequent_tab, @text); - print fill($initial_tab, $subsequent_tab, @text); - - @lines = wrap($initial_tab, $subsequent_tab, @text); - - @paragraphs = fill($initial_tab, $subsequent_tab, @text); - -B<Example 2> - - use Text::Wrap qw(wrap $columns $huge); - - $columns = 132; # Wrap at 132 characters - $huge = 'die'; - $huge = 'wrap'; - $huge = 'overflow'; - -B<Example 3> - - use Text::Wrap - - $Text::Wrap::columns = 72; - print wrap('', '', @text); - -=head1 DESCRIPTION - -Text::Wrap::wrap() is a very simple paragraph formatter. It formats a -single paragraph at a time by breaking lines at word boundries. -Indentation is controlled for the first line (C<$initial_tab>) and -all subsquent lines (C<$subsequent_tab>) independently. Please note: -C<$initial_tab> and C<$subsequent_tab> are the literal strings that will -be used: it is unlikley you would want to pass in a number. - -Lines are wrapped at C<$Text::Wrap::columns> columns. C<$Text::Wrap::columns> -should be set to the full width of your output device. In fact, -every resulting line will have length of no more than C<$columns - 1>. - -Beginner note: In example 2, above C<$columns> is imported into -the local namespace, and set locally. In example 3, -C<$Text::Wrap::columns> is set in its own namespace without importing it. - -When words that are longer than C<$columns> are encountered, they -are broken up. C<wrap()> adds a C<"\n"> at column C<$columns>. -This behavior can be overridden by setting C<$huge> to -'die' or to 'overflow'. When set to 'die', large words will cause -C<die()> to be called. When set to 'overflow', large words will be -left intact. - -Text::Wrap::fill() is a simple multi-paragraph formatter. It formats -each paragraph separately and then joins them together when it's done. It -will destory any whitespace in the original text. It breaks text into -paragraphs by looking for whitespace after a newline. In other respects -it acts like wrap(). - -When called in list context, C<wrap()> will return a list of lines and -C<fill()> will return a list of paragraphs. - -Historical notes: Older versions of C<wrap()> and C<fill()> always -returned strings. Also, 'die' used to be the default value of -C<$huge>. Now, 'wrap' is the default value. - -=head1 EXAMPLE - - print wrap("\t","","This is a bit of text that forms - a normal book-style paragraph"); - -=head1 AUTHOR - -David Muir Sharnoff <muir@idiom.com> with help from Tim Pierce and -many many others. - |