from words import clean_words
import random, sys

def cache(func):
    _cache = {}
    def inner(self, *args):
        key = (func.__name__,) + args
        if not key in _cache:
            _cache[key] = func(self, *args)
        return _cache[key]
    return inner        

class PhraseGenerator(object):
    """
    Phrase generator class
    """    
    def __init__(self, filename):
        """
        Load from file
        """
        self._cache = {}
        self.chains = self.count(filename)
    
    def count(self, filename):
        """
        Count chains of words in file
        """
        oldwords = []
        chains = {}
        for word in clean_words(open(filename)):
            if len(oldwords) == chainlen:
                key = tuple(oldwords)
                chains[key] = chains.get(key, 0) + 1
                oldwords.pop(0)
            oldwords.append(word)
        return chains

    def totalize(self, chain):
        """
        Return total of a chain
        """
        total = 0
        for word, value in chain.items():
            total += value
        return total

    def starstwith(self, chain, beginning):
        """
        Check if a tuple starts with another one
        """
        for i1, i2 in zip(chain, beginning):
            if i1 != i2:
                return False
        return True

    @cache
    def filter_chains(self, beginning):
        """
        Filter chains to only return those with given beginning
        """
        result = {}
        for chain, prob in self.chains.items():
            if self.starstwith(chain, beginning):
                result[chain] = prob
        return result

    def chose_word(self, beginning):
        """
        Chose a word in a chain
        """
        filtered = self.filter_chains(beginning)
        value = random.randint(0, self.totalize(filtered))
        for chain, prob in filtered.items():
            if prob >= value:
                return chain[len(beginning)]
            value -= prob

    def generate(self):
        """
        Generate a sentence from a chain
        """
        sentence = []
        curwords = [ "SEP" ]
        while True:
            word = self.chose_word(tuple(curwords))
            if word == "SEP":
                break
            sentence.append(word)
            curwords.append(word)
            if len(curwords) >= chainlen:
                curwords.pop(0)
        return sentence


if not 2 <= len(sys.argv) <= 4:
    print "Syntax: %s <filename> [<nb>] [<chainlen>]" % sys.argv[0]
    sys.exit(1)

filename = sys.argv[1]
if len(sys.argv) <= 2:
    chainlen = 2
else:
    nb = int(sys.argv[2])

if len(sys.argv) <= 3:
    chainlen = 2
else:
    chainlen = int(sys.argv[3])

random.seed(0)

gen = PhraseGenerator(filename)

for i in range(nb):
    sentence = gen.generate()
    print " ".join(sentence)

