summaryrefslogtreecommitdiffstats
path: root/contrib/perl5/lib/Text
diff options
context:
space:
mode:
authormarkm <markm@FreeBSD.org>1998-09-09 07:00:04 +0000
committermarkm <markm@FreeBSD.org>1998-09-09 07:00:04 +0000
commit4fcbc3669aa997848e15198cc9fb856287a6788c (patch)
tree58b20e81687d6d5931f120b50802ed21225bf440 /contrib/perl5/lib/Text
downloadFreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.zip
FreeBSD-src-4fcbc3669aa997848e15198cc9fb856287a6788c.tar.gz
Initial import of Perl5. The king is dead; long live the king!
Diffstat (limited to 'contrib/perl5/lib/Text')
-rw-r--r--contrib/perl5/lib/Text/Abbrev.pm87
-rw-r--r--contrib/perl5/lib/Text/ParseWords.pm256
-rw-r--r--contrib/perl5/lib/Text/Soundex.pm148
-rw-r--r--contrib/perl5/lib/Text/Tabs.pm97
-rw-r--r--contrib/perl5/lib/Text/Wrap.pm125
5 files changed, 713 insertions, 0 deletions
diff --git a/contrib/perl5/lib/Text/Abbrev.pm b/contrib/perl5/lib/Text/Abbrev.pm
new file mode 100644
index 0000000..ae6797c
--- /dev/null
+++ b/contrib/perl5/lib/Text/Abbrev.pm
@@ -0,0 +1,87 @@
+package Text::Abbrev;
+require 5.000;
+require Exporter;
+
+=head1 NAME
+
+abbrev - create an abbreviation table from a list
+
+=head1 SYNOPSIS
+
+ use Text::Abbrev;
+ abbrev $hashref, LIST
+
+
+=head1 DESCRIPTION
+
+Stores all unambiguous truncations of each element of LIST
+as keys key in the associative array referenced to by C<$hashref>.
+The values are the original list elements.
+
+=head1 EXAMPLE
+
+ $hashref = abbrev qw(list edit send abort gripe);
+
+ %hash = abbrev qw(list edit send abort gripe);
+
+ abbrev $hashref, qw(list edit send abort gripe);
+
+ abbrev(*hash, qw(list edit send abort gripe));
+
+=cut
+
+@ISA = qw(Exporter);
+@EXPORT = qw(abbrev);
+
+# Usage:
+# &abbrev(*foo,LIST);
+# ...
+# $long = $foo{$short};
+
+sub abbrev {
+ my (%domain);
+ my ($name, $ref, $glob);
+
+ if (ref($_[0])) { # hash reference preferably
+ $ref = shift;
+ } elsif ($_[0] =~ /^\*/) { # looks like a glob (deprecated)
+ $glob = shift;
+ }
+ my @cmp = @_;
+
+ foreach $name (@_) {
+ my @extra = split(//,$name);
+ my $abbrev = shift(@extra);
+ my $len = 1;
+ my $cmp;
+ WORD: foreach $cmp (@cmp) {
+ next if $cmp eq $name;
+ while (substr($cmp,0,$len) eq $abbrev) {
+ last WORD unless @extra;
+ $abbrev .= shift(@extra);
+ ++$len;
+ }
+ }
+ $domain{$abbrev} = $name;
+ while (@extra) {
+ $abbrev .= shift(@extra);
+ $domain{$abbrev} = $name;
+ }
+ }
+ if ($ref) {
+ %$ref = %domain;
+ return;
+ } elsif ($glob) { # old style
+ local (*hash) = $glob;
+ %hash = %domain;
+ return;
+ }
+ if (wantarray) {
+ %domain;
+ } else {
+ \%domain;
+ }
+}
+
+1;
+
diff --git a/contrib/perl5/lib/Text/ParseWords.pm b/contrib/perl5/lib/Text/ParseWords.pm
new file mode 100644
index 0000000..2414f80
--- /dev/null
+++ b/contrib/perl5/lib/Text/ParseWords.pm
@@ -0,0 +1,256 @@
+package Text::ParseWords;
+
+use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
+$VERSION = "3.1";
+
+require 5.000;
+
+use Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
+@EXPORT_OK = qw(old_shellwords);
+
+
+sub shellwords {
+ local(@lines) = @_;
+ $lines[$#lines] =~ s/\s+$//;
+ return(quotewords('\s+', 0, @lines));
+}
+
+
+
+sub quotewords {
+ my($delim, $keep, @lines) = @_;
+ my($line, @words, @allwords);
+
+
+ foreach $line (@lines) {
+ @words = parse_line($delim, $keep, $line);
+ return() unless (@words || !length($line));
+ push(@allwords, @words);
+ }
+ return(@allwords);
+}
+
+
+
+sub nested_quotewords {
+ my($delim, $keep, @lines) = @_;
+ my($i, @allwords);
+
+ for ($i = 0; $i < @lines; $i++) {
+ @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
+ return() unless (@{$allwords[$i]} || !length($lines[$i]));
+ }
+ return(@allwords);
+}
+
+
+
+sub parse_line {
+ # We will be testing undef strings
+ local($^W) = 0;
+
+ my($delimiter, $keep, $line) = @_;
+ my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+
+ while (length($line)) {
+
+ ($quote, $quoted, undef, $unquoted, $delim, undef) =
+ $line =~ m/^(["']) # a $quote
+ ((?:\\.|(?!\1)[^\\])*) # and $quoted text
+ \1 # followed by the same quote
+ ([\000-\377]*) # and the rest
+ | # --OR--
+ ^((?:\\.|[^\\"'])*?) # an $unquoted text
+ (\Z(?!\n)|$delimiter|(?!^)(?=["']))
+ # plus EOL, delimiter, or quote
+ ([\000-\377]*) # the rest
+ /x; # extended layout
+ return() unless( $quote || length($unquoted) || length($delim));
+
+ $line = $+;
+
+ if ($keep) {
+ $quoted = "$quote$quoted$quote";
+ }
+ else {
+ $unquoted =~ s/\\(.)/$1/g;
+ if (defined $quote) {
+ $quoted =~ s/\\(.)/$1/g if ($quote eq '"');
+ $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
+ }
+ }
+ $word .= defined $quote ? $quoted : $unquoted;
+
+ if (length($delim)) {
+ push(@pieces, $word);
+ push(@pieces, $delim) if ($keep eq 'delimiters');
+ undef $word;
+ }
+ if (!length($line)) {
+ push(@pieces, $word);
+ }
+ }
+ return(@pieces);
+}
+
+
+
+sub old_shellwords {
+
+ # Usage:
+ # use ParseWords;
+ # @words = old_shellwords($line);
+ # or
+ # @words = old_shellwords(@lines);
+
+ local($_) = join('', @_);
+ my(@words,$snippet,$field);
+
+ s/^\s+//;
+ while ($_ ne '') {
+ $field = '';
+ for (;;) {
+ if (s/^"(([^"\\]|\\.)*)"//) {
+ ($snippet = $1) =~ s#\\(.)#$1#g;
+ }
+ elsif (/^"/) {
+ return();
+ }
+ elsif (s/^'(([^'\\]|\\.)*)'//) {
+ ($snippet = $1) =~ s#\\(.)#$1#g;
+ }
+ elsif (/^'/) {
+ return();
+ }
+ elsif (s/^\\(.)//) {
+ $snippet = $1;
+ }
+ elsif (s/^([^\s\\'"]+)//) {
+ $snippet = $1;
+ }
+ else {
+ s/^\s+//;
+ last;
+ }
+ $field .= $snippet;
+ }
+ push(@words, $field);
+ }
+ @words;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Text::ParseWords - parse text into an array of tokens or array of arrays
+
+=head1 SYNOPSIS
+
+ use Text::ParseWords;
+ @lists = &nested_quotewords($delim, $keep, @lines);
+ @words = &quotewords($delim, $keep, @lines);
+ @words = &shellwords(@lines);
+ @words = &parse_line($delim, $keep, $line);
+ @words = &old_shellwords(@lines); # DEPRECATED!
+
+=head1 DESCRIPTION
+
+The &nested_quotewords() and &quotewords() functions accept a delimiter
+(which can be a regular expression)
+and a list of lines and then breaks those lines up into a list of
+words ignoring delimiters that appear inside quotes. &quotewords()
+returns all of the tokens in a single long list, while &nested_quotewords()
+returns a list of token lists corresponding to the elements of @lines.
+&parse_line() does tokenizing on a single string. The &*quotewords()
+functions simply call &parse_lines(), so if you're only splitting
+one line you can call &parse_lines() directly and save a function
+call.
+
+The $keep argument is a boolean flag. If true, then the tokens are
+split on the specified delimiter, but all other characters (quotes,
+backslashes, etc.) are kept in the tokens. If $keep is false then the
+&*quotewords() functions remove all quotes and backslashes that are
+not themselves backslash-escaped or inside of single quotes (i.e.,
+&quotewords() tries to interpret these characters just like the Bourne
+shell). NB: these semantics are significantly different from the
+original version of this module shipped with Perl 5.000 through 5.004.
+As an additional feature, $keep may be the keyword "delimiters" which
+causes the functions to preserve the delimiters in each string as
+tokens in the token lists, in addition to preserving quote and
+backslash characters.
+
+&shellwords() is written as a special case of &quotewords(), and it
+does token parsing with whitespace as a delimiter-- similar to most
+Unix shells.
+
+=head1 EXAMPLES
+
+The sample program:
+
+ use Text::ParseWords;
+ @words = &quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you});
+ $i = 0;
+ foreach (@words) {
+ print "$i: <$_>\n";
+ $i++;
+ }
+
+produces:
+
+ 0: <this>
+ 1: <is>
+ 2: <a test>
+ 3: <of quotewords>
+ 4: <"for>
+ 5: <you>
+
+demonstrating:
+
+=over 4
+
+=item 0
+a simple word
+
+=item 1
+multiple spaces are skipped because of our $delim
+
+=item 2
+use of quotes to include a space in a word
+
+=item 3
+use of a backslash to include a space in a word
+
+=item 4
+use of a backslash to remove the special meaning of a double-quote
+
+=item 5
+another simple word (note the lack of effect of the
+backslashed double-quote)
+
+=back
+
+Replacing C<&quotewords('\s+', 0, q{this is...})>
+with C<&shellwords(q{this is...})>
+is a simpler way to accomplish the same thing.
+
+=head1 AUTHORS
+
+Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
+author unknown). Much of the code for &parse_line() (including the
+primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
+
+Examples section another documentation provided by John Heidemann
+<johnh@ISI.EDU>
+
+Bug reports, patches, and nagging provided by lots of folks-- thanks
+everybody! Special thanks to Michael Schwern <schwern@envirolink.org>
+for assuring me that a &nested_quotewords() would be useful, and to
+Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
+error-checking (sort of-- you had to be there).
+
+=cut
diff --git a/contrib/perl5/lib/Text/Soundex.pm b/contrib/perl5/lib/Text/Soundex.pm
new file mode 100644
index 0000000..ddc758c
--- /dev/null
+++ b/contrib/perl5/lib/Text/Soundex.pm
@@ -0,0 +1,148 @@
+package Text::Soundex;
+require 5.000;
+require Exporter;
+
+@ISA = qw(Exporter);
+@EXPORT = qw(&soundex $soundex_nocode);
+
+# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
+#
+# Implementation of soundex algorithm as described by Knuth in volume
+# 3 of The Art of Computer Programming, with ideas stolen from Ian
+# Phillips <ian@pipex.net>.
+#
+# Mike Stok <Mike.Stok@meiko.concord.ma.us>, 2 March 1994.
+#
+# Knuth's test cases are:
+#
+# Euler, Ellery -> E460
+# Gauss, Ghosh -> G200
+# Hilbert, Heilbronn -> H416
+# Knuth, Kant -> K530
+# Lloyd, Ladd -> L300
+# Lukasiewicz, Lissajous -> L222
+#
+# $Log: soundex.pl,v $
+# Revision 1.2 1994/03/24 00:30:27 mike
+# Subtle bug (any excuse :-) spotted by Rich Pinder <rpinder@hsc.usc.edu>
+# in the way I handles leasing characters which were different but had
+# the same soundex code. This showed up comparing it with Oracle's
+# soundex output.
+#
+# Revision 1.1 1994/03/02 13:01:30 mike
+# Initial revision
+#
+#
+##############################################################################
+
+# $soundex_nocode is used to indicate a string doesn't have a soundex
+# code, I like undef other people may want to set it to 'Z000'.
+
+$soundex_nocode = undef;
+
+sub soundex
+{
+ local (@s, $f, $fc, $_) = @_;
+
+ push @s, '' unless @s; # handle no args as a single empty string
+
+ foreach (@s)
+ {
+ $_ = uc $_;
+ tr/A-Z//cd;
+
+ if ($_ eq '')
+ {
+ $_ = $soundex_nocode;
+ }
+ else
+ {
+ ($f) = /^(.)/;
+ tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
+ ($fc) = /^(.)/;
+ s/^$fc+//;
+ tr///cs;
+ tr/0//d;
+ $_ = $f . $_ . '000';
+ s/^(.{4}).*/$1/;
+ }
+ }
+
+ wantarray ? @s : shift @s;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Text::Soundex - Implementation of the Soundex Algorithm as Described by Knuth
+
+=head1 SYNOPSIS
+
+ use Text::Soundex;
+
+ $code = soundex $string; # get soundex code for a string
+ @codes = soundex @list; # get list of codes for list of strings
+
+ # set value to be returned for strings without soundex code
+
+ $soundex_nocode = 'Z000';
+
+=head1 DESCRIPTION
+
+This module implements the soundex algorithm as described by Donald Knuth
+in Volume 3 of B<The Art of Computer Programming>. The algorithm is
+intended to hash words (in particular surnames) into a small space using a
+simple model which approximates the sound of the word when spoken by an English
+speaker. Each word is reduced to a four character string, the first
+character being an upper case letter and the remaining three being digits.
+
+If there is no soundex code representation for a string then the value of
+C<$soundex_nocode> is returned. This is initially set to C<undef>, but
+many people seem to prefer an I<unlikely> value like C<Z000>
+(how unlikely this is depends on the data set being dealt with.) Any value
+can be assigned to C<$soundex_nocode>.
+
+In scalar context C<soundex> returns the soundex code of its first
+argument, and in array context a list is returned in which each element is the
+soundex code for the corresponding argument passed to C<soundex> e.g.
+
+ @codes = soundex qw(Mike Stok);
+
+leaves C<@codes> containing C<('M200', 'S320')>.
+
+=head1 EXAMPLES
+
+Knuth's examples of various names and the soundex codes they map to
+are listed below:
+
+ Euler, Ellery -> E460
+ Gauss, Ghosh -> G200
+ Hilbert, Heilbronn -> H416
+ Knuth, Kant -> K530
+ Lloyd, Ladd -> L300
+ Lukasiewicz, Lissajous -> L222
+
+so:
+
+ $code = soundex 'Knuth'; # $code contains 'K530'
+ @list = soundex qw(Lloyd Gauss); # @list contains 'L300', 'G200'
+
+=head1 LIMITATIONS
+
+As the soundex algorithm was originally used a B<long> time ago in the US
+it considers only the English alphabet and pronunciation.
+
+As it is mapping a large space (arbitrary length strings) onto a small
+space (single letter plus 3 digits) no inference can be made about the
+similarity of two strings which end up with the same soundex code. For
+example, both C<Hilbert> and C<Heilbronn> end up with a soundex code
+of C<H416>.
+
+=head1 AUTHOR
+
+This code was implemented by Mike Stok (C<stok@cybercom.net>) from the
+description given by Knuth. Ian Phillips (C<ian@pipex.net>) and Rich Pinder
+(C<rpinder@hsc.usc.edu>) supplied ideas and spotted mistakes.
diff --git a/contrib/perl5/lib/Text/Tabs.pm b/contrib/perl5/lib/Text/Tabs.pm
new file mode 100644
index 0000000..acd7afb
--- /dev/null
+++ b/contrib/perl5/lib/Text/Tabs.pm
@@ -0,0 +1,97 @@
+
+package Text::Tabs;
+
+require Exporter;
+
+@ISA = (Exporter);
+@EXPORT = qw(expand unexpand $tabstop);
+
+use vars qw($VERSION $tabstop $debug);
+$VERSION = 96.121201;
+
+use strict;
+
+BEGIN {
+ $tabstop = 8;
+ $debug = 0;
+}
+
+sub expand
+{
+ my @l = @_;
+ for $_ (@l) {
+ 1 while s/(^|\n)([^\t\n]*)(\t+)/
+ $1. $2 . (" " x
+ ($tabstop * length($3)
+ - (length($2) % $tabstop)))
+ /sex;
+ }
+ return @l if wantarray;
+ return $l[0];
+}
+
+sub unexpand
+{
+ my @l = @_;
+ my @e;
+ my $x;
+ my $line;
+ my @lines;
+ my $lastbit;
+ for $x (@l) {
+ @lines = split("\n", $x, -1);
+ for $line (@lines) {
+ $line = expand($line);
+ @e = split(/(.{$tabstop})/,$line,-1);
+ $lastbit = pop(@e);
+ $lastbit = '' unless defined $lastbit;
+ $lastbit = "\t"
+ if $lastbit eq " "x$tabstop;
+ for $_ (@e) {
+ if ($debug) {
+ my $x = $_;
+ $x =~ s/\t/^I\t/gs;
+ print "sub on '$x'\n";
+ }
+ s/ +$/\t/;
+ }
+ $line = join('',@e, $lastbit);
+ }
+ $x = join("\n", @lines);
+ }
+ return @l if wantarray;
+ return $l[0];
+}
+
+1;
+__END__
+
+
+=head1 NAME
+
+Text::Tabs -- expand and unexpand tabs per the unix expand(1) and unexpand(1)
+
+=head1 SYNOPSIS
+
+use Text::Tabs;
+
+$tabstop = 4;
+@lines_without_tabs = expand(@lines_with_tabs);
+@lines_with_tabs = unexpand(@lines_without_tabs);
+
+=head1 DESCRIPTION
+
+Text::Tabs does about what the unix utilities expand(1) and unexpand(1)
+do. Given a line with tabs in it, expand will replace the tabs with
+the appropriate number of spaces. Given a line with or without tabs in
+it, unexpand will add tabs when it can save bytes by doing so. Invisible
+compression with plain ascii!
+
+=head1 BUGS
+
+expand doesn't handle newlines very quickly -- do not feed it an
+entire document in one string. Instead feed it an array of lines.
+
+=head1 AUTHOR
+
+David Muir Sharnoff <muir@idiom.com>
diff --git a/contrib/perl5/lib/Text/Wrap.pm b/contrib/perl5/lib/Text/Wrap.pm
new file mode 100644
index 0000000..0fe7fb9
--- /dev/null
+++ b/contrib/perl5/lib/Text/Wrap.pm
@@ -0,0 +1,125 @@
+package Text::Wrap;
+
+use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $columns $debug);
+use strict;
+use Exporter;
+
+$VERSION = "97.02";
+@ISA = qw(Exporter);
+@EXPORT = qw(wrap);
+@EXPORT_OK = qw($columns $tabstop fill);
+
+use Text::Tabs qw(expand unexpand $tabstop);
+
+
+BEGIN {
+ $columns = 76; # <= screen width
+ $debug = 0;
+}
+
+sub wrap
+{
+ my ($ip, $xp, @t) = @_;
+
+ my @rv;
+ my $t = expand(join(" ",@t));
+
+ my $lead = $ip;
+ my $ll = $columns - length(expand($lead)) - 1;
+ my $nl = "";
+
+ $t =~ s/^\s+//;
+ while(length($t) > $ll) {
+ # remove up to a line length of things that
+ # aren't new lines and tabs.
+ if ($t =~ s/^([^\n]{0,$ll})(\s|\Z(?!\n))//) {
+ my ($l,$r) = ($1,$2);
+ $l =~ s/\s+$//;
+ print "WRAP $lead$l..($r)\n" if $debug;
+ push @rv, unexpand($lead . $l), "\n";
+
+ } elsif ($t =~ s/^([^\n]{$ll})//) {
+ print "SPLIT $lead$1..\n" if $debug;
+ push @rv, unexpand($lead . $1),"\n";
+ }
+ # recompute the leader
+ $lead = $xp;
+ $ll = $columns - length(expand($lead)) - 1;
+ $t =~ s/^\s+//;
+ }
+ print "TAIL $lead$t\n" if $debug;
+ push @rv, $lead.$t if $t ne "";
+ return join '', @rv;
+}
+
+
+sub fill
+{
+ my ($ip, $xp, @raw) = @_;
+ my @para;
+ my $pp;
+
+ for $pp (split(/\n\s+/, join("\n",@raw))) {
+ $pp =~ s/\s+/ /g;
+ my $x = wrap($ip, $xp, $pp);
+ push(@para, $x);
+ }
+
+ # if paragraph_indent is the same as line_indent,
+ # separate paragraphs with blank lines
+
+ return join ($ip eq $xp ? "\n\n" : "\n", @para);
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Text::Wrap - line wrapping to form simple paragraphs
+
+=head1 SYNOPSIS
+
+ use Text::Wrap
+
+ print wrap($initial_tab, $subsequent_tab, @text);
+
+ use Text::Wrap qw(wrap $columns $tabstop fill);
+
+ $columns = 132;
+ $tabstop = 4;
+
+ print fill($initial_tab, $subsequent_tab, @text);
+ print fill("", "", `cat book`);
+
+=head1 DESCRIPTION
+
+Text::Wrap::wrap() is a very simple paragraph formatter. It formats a
+single paragraph at a time by breaking lines at word boundries.
+Indentation is controlled for the first line ($initial_tab) and
+all subsquent lines ($subsequent_tab) independently. $Text::Wrap::columns
+should be set to the full width of your output device.
+
+Text::Wrap::fill() is a simple multi-paragraph formatter. It formats
+each paragraph separately and then joins them together when it's done. It
+will destory any whitespace in the original text. It breaks text into
+paragraphs by looking for whitespace after a newline. In other respects
+it acts like wrap().
+
+=head1 EXAMPLE
+
+ print wrap("\t","","This is a bit of text that forms
+ a normal book-style paragraph");
+
+=head1 BUGS
+
+It's not clear what the correct behavior should be when Wrap() is
+presented with a word that is longer than a line. The previous
+behavior was to die. Now the word is now split at line-length.
+
+=head1 AUTHOR
+
+David Muir Sharnoff <muir@idiom.com> with help from Tim Pierce and
+others. Updated by Jacqui Caren.
+
+=cut
OpenPOWER on IntegriCloud