Initial import of Perl5. The king is dead; long live the king!

author: markm <markm@FreeBSD.org> 1998-09-09 07:00:04 +0000
committer: markm <markm@FreeBSD.org> 1998-09-09 07:00:04 +0000
commit: 4fcbc3669aa997848e15198cc9fb856287a6788c (patch)
tree: 58b20e81687d6d5931f120b50802ed21225bf440 /contrib/perl5/lib/Text
download: FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.zip
FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.tar.gz
5 files changed, 713 insertions, 0 deletions
diff --git a/contrib/perl5/lib/Text/Abbrev.pm b/contrib/perl5/lib/Text/Abbrev.pm
new file mode 100644
index 0000000..ae6797c
--- /dev/null
+++ b/contrib/perl5/lib/Text/Abbrev.pm
@@ -0,0 +1,87 @@
+package Text::Abbrev;
+require 5.000;
+require Exporter;
+
+=head1 NAME
+
+abbrev - create an abbreviation table from a list
+
+=head1 SYNOPSIS
+
+    use Text::Abbrev;
+    abbrev $hashref, LIST
+
+
+=head1 DESCRIPTION
+
+Stores all unambiguous truncations of each element of LIST
+as keys key in the associative array referenced to by C<$hashref>.
+The values are the original list elements.
+
+=head1 EXAMPLE
+
+    $hashref = abbrev qw(list edit send abort gripe);
+
+    %hash = abbrev qw(list edit send abort gripe);
+
+    abbrev $hashref, qw(list edit send abort gripe);
+
+    abbrev(*hash, qw(list edit send abort gripe));
+
+=cut
+
+@ISA = qw(Exporter);
+@EXPORT = qw(abbrev);
+
+# Usage:
+#	&abbrev(*foo,LIST);
+#	...
+#	$long = $foo{$short};
+
+sub abbrev {
+    my (%domain);
+    my ($name, $ref, $glob);
+
+    if (ref($_[0])) {           # hash reference preferably
+      $ref = shift;
+    } elsif ($_[0] =~ /^\*/) {  # looks like a glob (deprecated)
+      $glob = shift;
+    } 
+    my @cmp = @_;
+
+    foreach $name (@_) {
+	my @extra = split(//,$name);
+	my $abbrev = shift(@extra);
+	my $len = 1;
+        my $cmp;
+	WORD: foreach $cmp (@cmp) {
+	    next if $cmp eq $name;
+	    while (substr($cmp,0,$len) eq $abbrev) {
+                last WORD unless @extra;
+                $abbrev .= shift(@extra);
+		++$len;
+	    }
+	}
+	$domain{$abbrev} = $name;
+	while (@extra) {
+	    $abbrev .= shift(@extra);
+	    $domain{$abbrev} = $name;
+	}
+    }
+    if ($ref) {
+      %$ref = %domain;
+      return;
+    } elsif ($glob) {           # old style
+      local (*hash) = $glob;
+      %hash = %domain;
+      return;
+    }
+    if (wantarray) {
+      %domain;
+    } else {
+      \%domain;
+    }
+}
+
+1;
+
diff --git a/contrib/perl5/lib/Text/ParseWords.pm b/contrib/perl5/lib/Text/ParseWords.pm
new file mode 100644
index 0000000..2414f80
--- /dev/null
+++ b/contrib/perl5/lib/Text/ParseWords.pm
@@ -0,0 +1,256 @@
+package Text::ParseWords;
+
+use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
+$VERSION = "3.1";
+
+require 5.000;
+
+use Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
+@EXPORT_OK = qw(old_shellwords);
+
+
+sub shellwords {
+    local(@lines) = @_;
+    $lines[$#lines] =~ s/\s+$//;
+    return(quotewords('\s+', 0, @lines));
+}
+
+
+
+sub quotewords {
+    my($delim, $keep, @lines) = @_;
+    my($line, @words, @allwords);
+    
+
+    foreach $line (@lines) {
+	@words = parse_line($delim, $keep, $line);
+	return() unless (@words || !length($line));
+	push(@allwords, @words);
+    }
+    return(@allwords);
+}
+
+
+
+sub nested_quotewords {
+    my($delim, $keep, @lines) = @_;
+    my($i, @allwords);
+    
+    for ($i = 0; $i < @lines; $i++) {
+	@{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
+	return() unless (@{$allwords[$i]} || !length($lines[$i]));
+    }
+    return(@allwords);
+}
+
+
+
+sub parse_line {
+	# We will be testing undef strings
+	local($^W) = 0;
+
+    my($delimiter, $keep, $line) = @_;
+    my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+
+    while (length($line)) {
+
+	($quote, $quoted, undef, $unquoted, $delim, undef) =
+	    $line =~ m/^(["'])                 # a $quote
+                        ((?:\\.|(?!\1)[^\\])*)    # and $quoted text
+                        \1 		       # followed by the same quote
+                        ([\000-\377]*)	       # and the rest
+		       |                       # --OR--
+                       ^((?:\\.|[^\\"'])*?)    # an $unquoted text
+		      (\Z(?!\n)|$delimiter|(?!^)(?=["']))  
+                                               # plus EOL, delimiter, or quote
+                      ([\000-\377]*)	       # the rest
+		      /x;		       # extended layout
+	return() unless( $quote || length($unquoted) || length($delim));
+
+	$line = $+;
+
+        if ($keep) {
+	    $quoted = "$quote$quoted$quote";
+	}
+        else {
+	    $unquoted =~ s/\\(.)/$1/g;
+	    if (defined $quote) {
+		$quoted =~ s/\\(.)/$1/g if ($quote eq '"');
+		$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
+            }
+	}
+        $word .= defined $quote ? $quoted : $unquoted;
+ 
+        if (length($delim)) {
+            push(@pieces, $word);
+            push(@pieces, $delim) if ($keep eq 'delimiters');
+            undef $word;
+        }
+        if (!length($line)) {
+            push(@pieces, $word);
+	}
+    }
+    return(@pieces);
+}
+
+
+
+sub old_shellwords {
+
+    # Usage:
+    #	use ParseWords;
+    #	@words = old_shellwords($line);
+    #	or
+    #	@words = old_shellwords(@lines);
+
+    local($_) = join('', @_);
+    my(@words,$snippet,$field);
+
+    s/^\s+//;
+    while ($_ ne '') {
+	$field = '';
+	for (;;) {
+	    if (s/^"(([^"\\]|\\.)*)"//) {
+		($snippet = $1) =~ s#\\(.)#$1#g;
+	    }
+	    elsif (/^"/) {
+		return();
+	    }
+	    elsif (s/^'(([^'\\]|\\.)*)'//) {
+		($snippet = $1) =~ s#\\(.)#$1#g;
+	    }
+	    elsif (/^'/) {
+		return();
+	    }
+	    elsif (s/^\\(.)//) {
+		$snippet = $1;
+	    }
+	    elsif (s/^([^\s\\'"]+)//) {
+		$snippet = $1;
+	    }
+	    else {
+		s/^\s+//;
+		last;
+	    }
+	    $field .= $snippet;
+	}
+	push(@words, $field);
+    }
+    @words;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Text::ParseWords - parse text into an array of tokens or array of arrays
+
+=head1 SYNOPSIS
+
+  use Text::ParseWords;
+  @lists = &nested_quotewords($delim, $keep, @lines);
+  @words = &quotewords($delim, $keep, @lines);
+  @words = &shellwords(@lines);
+  @words = &parse_line($delim, $keep, $line);
+  @words = &old_shellwords(@lines); # DEPRECATED!
+
+=head1 DESCRIPTION
+
+The &nested_quotewords() and &quotewords() functions accept a delimiter 
+(which can be a regular expression)
+and a list of lines and then breaks those lines up into a list of
+words ignoring delimiters that appear inside quotes.  &quotewords()
+returns all of the tokens in a single long list, while &nested_quotewords()
+returns a list of token lists corresponding to the elements of @lines.
+&parse_line() does tokenizing on a single string.  The &*quotewords()
+functions simply call &parse_lines(), so if you're only splitting
+one line you can call &parse_lines() directly and save a function
+call.
+
+The $keep argument is a boolean flag.  If true, then the tokens are
+split on the specified delimiter, but all other characters (quotes,
+backslashes, etc.) are kept in the tokens.  If $keep is false then the
+&*quotewords() functions remove all quotes and backslashes that are
+not themselves backslash-escaped or inside of single quotes (i.e.,
+&quotewords() tries to interpret these characters just like the Bourne
+shell).  NB: these semantics are significantly different from the
+original version of this module shipped with Perl 5.000 through 5.004.
+As an additional feature, $keep may be the keyword "delimiters" which
+causes the functions to preserve the delimiters in each string as
+tokens in the token lists, in addition to preserving quote and
+backslash characters.
+
+&shellwords() is written as a special case of &quotewords(), and it
+does token parsing with whitespace as a delimiter-- similar to most
+Unix shells.
+
+=head1 EXAMPLES
+
+The sample program:
+
+  use Text::ParseWords;
+  @words = &quotewords('\s+', 0, q{this   is "a test" of\ quotewords \"for you});
+  $i = 0;
+  foreach (@words) {
+      print "$i: <$_>\n";
+      $i++;
+  }
+
+produces:
+
+  0: <this>
+  1: <is>
+  2: <a test>
+  3: <of quotewords>
+  4: <"for>
+  5: <you>
+
+demonstrating:
+
+=over 4
+
+=item 0
+a simple word
+
+=item 1
+multiple spaces are skipped because of our $delim
+
+=item 2
+use of quotes to include a space in a word
+
+=item 3
+use of a backslash to include a space in a word
+
+=item 4
+use of a backslash to remove the special meaning of a double-quote
+
+=item 5
+another simple word (note the lack of effect of the
+backslashed double-quote)
+
+=back
+
+Replacing C<&quotewords('\s+', 0, q{this   is...})>
+with C<&shellwords(q{this   is...})>
+is a simpler way to accomplish the same thing.
+
+=head1 AUTHORS
+
+Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
+author unknown).  Much of the code for &parse_line() (including the
+primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
+
+Examples section another documentation provided by John Heidemann 
+<johnh@ISI.EDU>
+
+Bug reports, patches, and nagging provided by lots of folks-- thanks
+everybody!  Special thanks to Michael Schwern <schwern@envirolink.org>
+for assuring me that a &nested_quotewords() would be useful, and to 
+Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
+error-checking (sort of-- you had to be there).
+
+=cut
diff --git a/contrib/perl5/lib/Text/Soundex.pm b/contrib/perl5/lib/Text/Soundex.pm
new file mode 100644
index 0000000..ddc758c
--- /dev/null
+++ b/contrib/perl5/lib/Text/Soundex.pm
@@ -0,0 +1,148 @@
+package Text::Soundex;
+require 5.000;
+require Exporter;
+
+@ISA = qw(Exporter);
+@EXPORT = qw(&soundex $soundex_nocode);
+
+# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
+#
+# Implementation of soundex algorithm as described by Knuth in volume
+# 3 of The Art of Computer Programming, with ideas stolen from Ian
+# Phillips <ian@pipex.net>.
+#
+# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
+#
+# Knuth's test cases are:
+# 
+# Euler, Ellery -> E460
+# Gauss, Ghosh -> G200
+# Hilbert, Heilbronn -> H416
+# Knuth, Kant -> K530
+# Lloyd, Ladd -> L300
+# Lukasiewicz, Lissajous -> L222
+#
+# $Log: soundex.pl,v $
+# Revision 1.2  1994/03/24  00:30:27  mike
+# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
+# in the way I handles leasing characters which were different but had
+# the same soundex code.  This showed up comparing it with Oracle's
+# soundex output.
+#
+# Revision 1.1  1994/03/02  13:01:30  mike
+# Initial revision
+#
+#
+##############################################################################
+
+# $soundex_nocode is used to indicate a string doesn't have a soundex
+# code, I like undef other people may want to set it to 'Z000'.
+
+$soundex_nocode = undef;
+
+sub soundex
+{
+  local (@s, $f, $fc, $_) = @_;
+
+  push @s, '' unless @s;	# handle no args as a single empty string
+
+  foreach (@s)
+  {
+    $_ = uc $_;
+    tr/A-Z//cd;
+
+    if ($_ eq '')
+    {
+      $_ = $soundex_nocode;
+    }
+    else
+    {
+      ($f) = /^(.)/;
+      tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
+      ($fc) = /^(.)/;
+      s/^$fc+//;
+      tr///cs;
+      tr/0//d;
+      $_ = $f . $_ . '000';
+      s/^(.{4}).*/$1/;
+    }
+  }
+
+  wantarray ? @s : shift @s;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Text::Soundex - Implementation of the Soundex Algorithm as Described by Knuth
+
+=head1 SYNOPSIS
+
+  use Text::Soundex;
+
+  $code = soundex $string;            # get soundex code for a string
+  @codes = soundex @list;             # get list of codes for list of strings
+
+  # set value to be returned for strings without soundex code
+
+  $soundex_nocode = 'Z000';
+
+=head1 DESCRIPTION
+
+This module implements the soundex algorithm as described by Donald Knuth
+in Volume 3 of B<The Art of Computer Programming>.  The algorithm is
+intended to hash words (in particular surnames) into a small space using a
+simple model which approximates the sound of the word when spoken by an English
+speaker.  Each word is reduced to a four character string, the first
+character being an upper case letter and the remaining three being digits.
+
+If there is no soundex code representation for a string then the value of
+C<$soundex_nocode> is returned.  This is initially set to C<undef>, but
+many people seem to prefer an I<unlikely> value like C<Z000>
+(how unlikely this is depends on the data set being dealt with.)  Any value
+can be assigned to C<$soundex_nocode>.
+
+In scalar context C<soundex> returns the soundex code of its first
+argument, and in array context a list is returned in which each element is the 
+soundex code for the corresponding argument passed to C<soundex> e.g.
+
+  @codes = soundex qw(Mike Stok);
+
+leaves C<@codes> containing C<('M200', 'S320')>.
+
+=head1 EXAMPLES
+
+Knuth's examples of various names and the soundex codes they map to
+are listed below:
+
+  Euler, Ellery -> E460
+  Gauss, Ghosh -> G200
+  Hilbert, Heilbronn -> H416
+  Knuth, Kant -> K530
+  Lloyd, Ladd -> L300
+  Lukasiewicz, Lissajous -> L222
+
+so:
+
+  $code = soundex 'Knuth';              # $code contains 'K530'
+  @list = soundex qw(Lloyd Gauss);	# @list contains 'L300', 'G200'
+
+=head1 LIMITATIONS
+
+As the soundex algorithm was originally used a B<long> time ago in the US
+it considers only the English alphabet and pronunciation.
+
+As it is mapping a large space (arbitrary length strings) onto a small
+space (single letter plus 3 digits) no inference can be made about the
+similarity of two strings which end up with the same soundex code.  For 
+example, both C<Hilbert> and C<Heilbronn> end up with a soundex code
+of C<H416>.
+
+=head1 AUTHOR
+
+This code was implemented by Mike Stok (C<stok@cybercom.net>) from the 
+description given by Knuth.  Ian Phillips (C<ian@pipex.net>) and Rich Pinder 
+(C<rpinder@hsc.usc.edu>) supplied ideas and spotted mistakes.
diff --git a/contrib/perl5/lib/Text/Tabs.pm b/contrib/perl5/lib/Text/Tabs.pm
new file mode 100644
index 0000000..acd7afb
--- /dev/null
+++ b/contrib/perl5/lib/Text/Tabs.pm
@@ -0,0 +1,97 @@
+
+package Text::Tabs;
+
+require Exporter;
+
+@ISA = (Exporter);
+@EXPORT = qw(expand unexpand $tabstop);
+
+use vars qw($VERSION $tabstop $debug);
+$VERSION = 96.121201;
+
+use strict;
+
+BEGIN	{
+	$tabstop = 8;
+	$debug = 0;
+}
+
+sub expand
+{
+	my @l = @_;
+	for $_ (@l) {
+		1 while s/(^|\n)([^\t\n]*)(\t+)/
+			$1. $2 . (" " x 
+				($tabstop * length($3)
+				- (length($2) % $tabstop)))
+			/sex;
+	}
+	return @l if wantarray;
+	return $l[0];
+}
+
+sub unexpand
+{
+	my @l = @_;
+	my @e;
+	my $x;
+	my $line;
+	my @lines;
+	my $lastbit;
+	for $x (@l) {
+		@lines = split("\n", $x, -1);
+		for $line (@lines) {
+			$line = expand($line);
+			@e = split(/(.{$tabstop})/,$line,-1);
+			$lastbit = pop(@e);
+			$lastbit = '' unless defined $lastbit;
+			$lastbit = "\t"
+				if $lastbit eq " "x$tabstop;
+			for $_ (@e) {
+				if ($debug) {
+					my $x = $_;
+					$x =~ s/\t/^I\t/gs;
+					print "sub on '$x'\n";
+				}
+				s/  +$/\t/;
+			}
+			$line = join('',@e, $lastbit);
+		}
+		$x = join("\n", @lines);
+	}
+	return @l if wantarray;
+	return $l[0];
+}
+
+1;
+__END__
+
+
+=head1 NAME
+
+Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1)
+
+=head1 SYNOPSIS
+
+use Text::Tabs;
+
+$tabstop = 4;
+@lines_without_tabs = expand(@lines_with_tabs);
+@lines_with_tabs = unexpand(@lines_without_tabs);
+
+=head1 DESCRIPTION
+
+Text::Tabs does about what the unix utilities expand(1) and unexpand(1)
+do.  Given a line with tabs in it, expand will replace the tabs with
+the appropriate number of spaces.  Given a line with or without tabs in
+it, unexpand will add tabs when it can save bytes by doing so.  Invisible
+compression with plain ascii!
+
+=head1 BUGS
+
+expand doesn't handle newlines very quickly -- do not feed it an
+entire document in one string.  Instead feed it an array of lines.
+
+=head1 AUTHOR
+
+David Muir Sharnoff <muir@idiom.com>
diff --git a/contrib/perl5/lib/Text/Wrap.pm b/contrib/perl5/lib/Text/Wrap.pm
new file mode 100644
index 0000000..0fe7fb9
--- /dev/null
+++ b/contrib/perl5/lib/Text/Wrap.pm
@@ -0,0 +1,125 @@
+package Text::Wrap;
+
+use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $columns $debug);
+use strict;
+use Exporter;
+
+$VERSION = "97.02";
+@ISA = qw(Exporter);
+@EXPORT = qw(wrap);
+@EXPORT_OK = qw($columns $tabstop fill);
+
+use Text::Tabs qw(expand unexpand $tabstop);
+
+
+BEGIN	{
+    $columns = 76;  # <= screen width
+    $debug = 0;
+}
+
+sub wrap
+{
+    my ($ip, $xp, @t) = @_;
+
+    my @rv;
+    my $t = expand(join(" ",@t));
+
+    my $lead = $ip;
+    my $ll = $columns - length(expand($lead)) - 1;
+    my $nl = "";
+
+    $t =~ s/^\s+//;
+    while(length($t) > $ll) {
+	# remove up to a line length of things that
+	# aren't new lines and tabs.
+	if ($t =~ s/^([^\n]{0,$ll})(\s|\Z(?!\n))//) {
+	    my ($l,$r) = ($1,$2);
+	    $l =~ s/\s+$//;
+	    print "WRAP  $lead$l..($r)\n" if $debug;
+	    push @rv, unexpand($lead . $l), "\n";
+		
+	} elsif ($t =~ s/^([^\n]{$ll})//) {
+	    print "SPLIT $lead$1..\n" if $debug;
+	    push @rv, unexpand($lead . $1),"\n";
+	}
+	# recompute the leader
+	$lead = $xp;
+	$ll = $columns - length(expand($lead)) - 1;
+	$t =~ s/^\s+//;
+    } 
+    print "TAIL  $lead$t\n" if $debug;
+    push @rv, $lead.$t if $t ne "";
+    return join '', @rv;
+}
+
+
+sub fill 
+{
+	my ($ip, $xp, @raw) = @_;
+	my @para;
+	my $pp;
+
+	for $pp (split(/\n\s+/, join("\n",@raw))) {
+		$pp =~ s/\s+/ /g;
+		my $x = wrap($ip, $xp, $pp);
+		push(@para, $x);
+	}
+
+	# if paragraph_indent is the same as line_indent, 
+	# separate paragraphs with blank lines
+
+	return join ($ip eq $xp ? "\n\n" : "\n", @para);
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Text::Wrap - line wrapping to form simple paragraphs
+
+=head1 SYNOPSIS 
+
+	use Text::Wrap
+
+	print wrap($initial_tab, $subsequent_tab, @text);
+
+	use Text::Wrap qw(wrap $columns $tabstop fill);
+
+	$columns = 132;
+	$tabstop = 4;
+
+	print fill($initial_tab, $subsequent_tab, @text);
+	print fill("", "", `cat book`);
+
+=head1 DESCRIPTION
+
+Text::Wrap::wrap() is a very simple paragraph formatter.  It formats a
+single paragraph at a time by breaking lines at word boundries.
+Indentation is controlled for the first line ($initial_tab) and
+all subsquent lines ($subsequent_tab) independently.  $Text::Wrap::columns
+should be set to the full width of your output device.
+
+Text::Wrap::fill() is a simple multi-paragraph formatter.  It formats
+each paragraph separately and then joins them together when it's done.  It
+will destory any whitespace in the original text.  It breaks text into
+paragraphs by looking for whitespace after a newline.  In other respects
+it acts like wrap().
+
+=head1 EXAMPLE
+
+	print wrap("\t","","This is a bit of text that forms 
+		a normal book-style paragraph");
+
+=head1 BUGS
+
+It's not clear what the correct behavior should be when Wrap() is
+presented with a word that is longer than a line.  The previous 
+behavior was to die.  Now the word is now split at line-length.
+
+=head1 AUTHOR
+
+David Muir Sharnoff <muir@idiom.com> with help from Tim Pierce and
+others. Updated by Jacqui Caren.
+
+=cut
author	markm <markm@FreeBSD.org>	1998-09-09 07:00:04 +0000
committer	markm <markm@FreeBSD.org>	1998-09-09 07:00:04 +0000
commit	4fcbc3669aa997848e15198cc9fb856287a6788c (patch)
tree	58b20e81687d6d5931f120b50802ed21225bf440 /contrib/perl5/lib/Text
download	FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.zip FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.tar.gz