https://github.com/snowballstem/pystemmer
Tip revision: cbe740d525d01ae668ad59354aa75eff2b325f4e authored by Stefano Rivera on 26 November 2023, 19:48:16 UTC
Add Python 3.12 to CI
Add Python 3.12 to CI
Tip revision: cbe740d
benchmark.py
#!/usr/bin/env python
# This script runs a simple benchmark of the python stemmer interface.
import timeit
datafiles = ('sampledata/englishvoc.txt', 'sampledata/puttydoc.txt',)
words_lst = [None]
for datafile in datafiles:
words = []
for line in open(datafile):
words.extend(line.split())
for cache_size in (0, 1, 10000, 30000):
setup = r"""
import Stemmer
stemmer = Stemmer.Stemmer('en', %d)
words = []
for line in open('%s'):
words.extend(line.split())
""" % (cache_size, datafile)
t = timeit.Timer(setup=setup,
stmt='stemmer.stemWords(words)')
for iters in (1, 2, 3, 10):
times = [time / iters for time in t.repeat(5, iters)]
print("'%s':words=%d,cacheSize=%d,iters=%d,mintime=%f" %
(datafile, len(words), cache_size, iters, min(times)))