summaryrefslogtreecommitdiffstats
path: root/games/fortune/tools/do_uniq.py
blob: 6fde74967e10b2671ffc5afed503c0aa107b9d1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/local/bin/python
#
# $FreeBSD$
#
# an aggressive little script for trimming duplicate cookies

import re, sys

wordlist = [
    'hadnot',
    'donot', 'hadnt',
    'dont', 'have', 'more', 'will', 'your',
    'and', 'are', 'had', 'the', 'you',
    'am', 'an', 'is', 'll', 've', 'we',
    'a', 'd', 'i', 'm', 's',
]

def hash(fortune):
    f = fortune
    f = f.lower()
    f = re.sub('[\W_]', '', f)
    for word in wordlist:
        f = re.sub(word, '', f)
#    f = re.sub('[aeiouy]', '', f)
#    f = re.sub('[^aeiouy]', '', f)
    f = f[:30]
#    f = f[-30:]
    return f

def edit(datfile):
    dups = {}
    fortunes = []
    fortune = ""
    for line in file(datfile):
        if line == "%\n":
            key = hash(fortune)
            if key not in dups:
                dups[key] = []
            dups[key].append(fortune)
            fortunes.append(fortune)
            fortune = ""
        else:
            fortune += line
    for key in list(dups.keys()):
        if len(dups[key]) == 1:
            del dups[key]
    o = file(datfile + '~', "w")
    for fortune in fortunes:
        key = hash(fortune)
        if key in dups:
            print('\n' * 50)
            for f in dups[key]:
                if f != fortune:
                    print(f, '%')
            print(fortune, '%')
            if input("Remove last fortune? ") == 'y':
                del dups[key]
                continue
        o.write(fortune + "%\n")
    o.close()

assert len(sys.argv) == 2
edit(sys.argv[1])
OpenPOWER on IntegriCloud