1 def _build_vocab(filename):
2   data = _read_words(filename)
3
4   counter = collections.Counter(data)
5   count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
6
7   words, _ = list(zip(*count_pairs))
8   word_to_id = dict(zip(words, range(len(words))))
9
10   return word_to_id

Figure 1: Flow chart of _build_vocab.py file

results matching ""

    No results matching ""