blob: 6134c75c3eb6a5cfb713b309b509f6fcaf13a7f1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
#!/usr/bin/perl
# sq(eeze) / unsq(eeze) - pre-compressor for sorted word lists
# Copyright (C) 2000 Björn Jacke <bjoern.jacke@gmx.de>
#
# This program comes with ABSOLUTELY NO WARRANTY; it may be copied or modified
# under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.
# This is a `multi-call-program'. If it's called as `unsq' or `unsq.pl'
# it decompresses -- otherwise it is in compress mode. Input and Output only
# via STDIN and STDOUT. It does almost the same job as the sq/unsq from
# Ispell -- just better ;-)
#
# PS: For best compression results use POSIX sorting order in spite of any
# other locale-depending sorting order (set LC_ALL and LC_COLLATE to POSIX)
# version 1.2
@size_arr = qw(0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J
K L M N O P Q R S T U V W X Y Z a b c d
e f g h i j k l m n o p q r s t u v w x y z);
$MAX_PREFIX = $#size_arr;
sub trunc {
if ($word eq $prev) {
$same = length($word);
}
else {
$same = 0;
while (substr($word,$same,1) eq substr($prev,$same,1)) {
$same++;
}
}
if ($same > $MAX_PREFIX) {
$same = $MAX_PREFIX;
}
print STDOUT $size_arr[$same], substr($word,$same);
$prev = $word;
}
sub expand {
# keep relevant part of previous word:
$prev = substr($prev, 0, $to_num{substr($word,0,1)} );
# strip first (meta)-character from word:
$word = substr($word,1);
# concatenate new word and name it prev ...
$prev = "$prev$word";
print STDOUT $prev;
}
###### main ######
$0 =~ s/.*?unsq(\.pl)?$/unsq/i;
$prev = "";
if ($0 eq "unsq") {
$i = 0;
foreach (@size_arr) {
$to_num{$_} = $i++;
}
$do_it = \&expand;
}
else {
$do_it = \&trunc;
}
while ($word = <STDIN>) {
&$do_it;
}
|