from words import clean_words
import random, sys

class PhraseGenerator(object):
    """
    Phrase generator class
    """        
    def __init__(self, filename):
        """
        Load from file
        """
        self.chains = self.count(filename)
        self.totalize()
        
    def count(self, filename):
        """
        Count chains of words in file
        """
        oldwords = []
        chains = {}
        for word in clean_words(open(filename)):
            if len(oldwords) == chainlen:
                chain = chains
                for oldword in oldwords:
                    if not oldword in chain:
                        chain[oldword] = {}
                    chain = chain[oldword]
                chain[word] = chain.get(word, 0) + 1
                oldwords.pop(0)
            oldwords.append(word)
        return chains

    def totalize(self, chain = None):
        """
        Recursively totalize chains
        """
        if chain is None:
            chain = self.chains
        total = 0
        for word, value in chain.items():
            if isinstance(value, dict):
                self.totalize(value)
                value = value["TOTAL"]
            total += value
        chain["TOTAL"] = total

    def chose_word(self, chain):
        """
        Chose a word in a chain
        """
        value = random.randint(0, chain["TOTAL"])
        for word, prob in chain.items():
            if isinstance(prob, dict):
                prob = prob["TOTAL"]
            if word == "TOTAL":
                continue
            if prob >= value:
                return word
            value -= prob

    def generate(self):
        """
        Generate a sentence from a chain
        """
        sentence = []
        curwords = [ "SEP" ]
        while True:
            chain = self.chains
            for curword in curwords:
                if not curword in chain:
                    break
                chain = chain[curword]
            word = self.chose_word(chain)
            if word == "SEP":
                break
            sentence.append(word)
            curwords.append(word)
            if len(curwords) > chainlen:
                curwords.pop(0)
        return sentence

if not 2 <= len(sys.argv) <= 4:
    print "Syntax: %s <filename> [<nb>] [<chainlen>]" % sys.argv[0]
    sys.exit(1)

filename = sys.argv[1]
if len(sys.argv) <= 2:
    chainlen = 2
else:
    nb = int(sys.argv[2])

if len(sys.argv) <= 3:
    chainlen = 2
else:
    chainlen = int(sys.argv[3])

random.seed(0)

gen = PhraseGenerator(filename)

for i in range(nb):
    sentence = gen.generate()
    print " ".join(sentence)
