textproc/ispell/files/unsq.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

#!/usr/bin/perl

# sq(eeze) / unsq(eeze) - pre-compressor for sorted word lists
# Copyright (C) 2000 Björn Jacke <bjoern.jacke@gmx.de>
#
# This program comes with ABSOLUTELY NO WARRANTY; it may be copied or modified
# under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.

# This is a `multi-call-program'. If it's called as `unsq' or `unsq.pl'
# it decompresses -- otherwise it is in compress mode. Input and Output only
# via STDIN and STDOUT. It does almost the same job as the sq/unsq from
# Ispell -- just better ;-)
#
# PS: For best compression results use POSIX sorting order in spite of any
#     other locale-depending sorting order (set LC_ALL and LC_COLLATE to POSIX)

# version 1.2


@size_arr =  qw(0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J
		K L M N O P Q R S T U V W X Y Z a b c d
		e f g h i j k l m n o p q r s t u v w x y z);
$MAX_PREFIX = $#size_arr;


sub trunc {

	if ($word eq $prev) {
		$same = length($word);
	}
	else {
		$same = 0;
		while (substr($word,$same,1) eq substr($prev,$same,1)) {
			$same++;
		}
	}

	if ($same > $MAX_PREFIX) {
		$same = $MAX_PREFIX;
	}

	print STDOUT $size_arr[$same], substr($word,$same);

	$prev = $word;
}


sub expand {

	  # keep relevant part of previous word:
	$prev = substr($prev, 0, $to_num{substr($word,0,1)} );
	  # strip first (meta)-character from word:
	$word = substr($word,1);
	  # concatenate new word and name it prev ...
	$prev = "$prev$word";
	print STDOUT $prev;
}


######  main ######

$0 =~ s/.*?unsq(\.pl)?$/unsq/i;

$prev = "";


if ($0 eq "unsq") {
    
	$i = 0;
	foreach (@size_arr) {
		$to_num{$_} = $i++;
	}
	$do_it = \&expand;
}

else {
	$do_it = \&trunc;
}


while ($word = <STDIN>) {
	&$do_it;
}