https://github.com/mlpnlp/mlpnlp-nmt
Raw File
Tip revision: 2b1fd7d818775afe3c40f48990ad84b9362c9454 authored by jnszk on 21 November 2017, 00:28:49 UTC
Merge pull request #4 from butsugiri/fix-adam-eps
Tip revision: 2b1fd7d
count_freq.py
import sys
from collections import Counter

word_count = Counter()

threshold = 3
args = sys.argv
if len(args) >= 2:
    threshold = int(args[1])
sys.stderr.write('threshold = {}\n'.format(threshold))

for line in sys.stdin:
    line = line.strip("\n")
    words = line.split()
    for word in words:
        word_count[word] += 1

for word, num in word_count.most_common():
    if num < threshold:
        break
    print(word + "\t" + str(num))
back to top