import re, types
from collections import defaultdict

def tokenize(s, removePunctuation = True):
    if removePunctuation:
        p = re.compile('[\.,!?()\[\]{}:;"\'<>/ \n\r\t]')
        return [el for el in p.split(s) if el]
    else:
        t = []
        s = re.split('\s', s)
        p = re.compile('(\W)')
        for phrase in s:
            words = p.split(phrase)
            for word in words:
                t.append(word)
        return [el for el in t if el]

class Node(object):
   
    def __init__(self):
        self.freq = 0
        self.next = defaultdict(int)

class Trie(object):
   
    def __init__(self, inFile):

        self.nodes = []
        self.nodes.append(Node())
        self.n = 1
        self.numberOfWords = 0

        for line in file(inFile):
            words = tokenize(line)
            for w in words:
                currNode = 0
                for char in w:
                    if self.nodes[currNode].next[char] == 0:
                        self.nodes.append(Node())
                        self.nodes[currNode].next[char] = self.n
                        currNode = self.n
                        self.n += 1
                    else:
                        currNode = self.nodes[currNode].next[char]
                self.nodes[currNode].freq += 1
        self.numberOfWords = len([node for node in self.nodes if node.freq != 0])

    def __getitem__(self, word):
        """Return the frequency of the given word."""

        if isinstance(word, types.StringTypes):
            currNode = 0
            for char in word:
                if self.nodes[currNode].next[char] == 0:
                    raise AttributeError("No such word: %s" % word)
                else:
                    currNode = self.nodes[currNode].next[char]
            return self.nodes[currNode].freq
        else:
            raise TypeError()

    def __len__(self):
        """Return the number of nodes in the trie."""

        return self.n