https://github.com/cran/dtw
Tip revision: 07f4472642283665752cb0b4a03836d0698fe255 authored by Toni Giorgino on 17 June 2008, 00:00:00 UTC
version 1.12-5
version 1.12-5
Tip revision: 07f4472
dtw.bib
@ARTICLE{Sakoe1978,
title = {Dynamic programming algorithm optimization for spoken word recognition},
author = {Sakoe, H. and Chiba, S.},
journal = {Acoustics, Speech, and Signal Processing [see also {IEEE} Transactions on Signal Processing], {IEEE} Transactions on},
year = {1978},
volume = {26},
number = {1},
pages = {43--49},
month = feb,
abstract = {This paper reports on an optimum dynamic progxamming (DP) based time-
normalization algorithm for spoken word recognition. First, a
general principle of time-normalization is given using time-
warping function. Then, two time-normalized distance
definitions, called symmetric and asymmetric forms, are derived
from the principle. These two forms are compared with each
other through theoretical discussions and experimental studies.
The symmetric form algorithm superiority is established. A new
technique, called slope constraint, is successfully introduced,
in which the warping function slope is restricted so as to
improve discrimination between words in different categories.
The effective slope constraint characteristic is qualitatively
analyzed, and the optimum slope constraint condition is
determined through experiments. The optimized algorithm is then
extensively subjected to experimental comparison with various
DP-algorithms, previously applied to spoken word recognition by
different research groups. The experiment shows that the
present algorithm gives no more than about two-thirds errors,
even compared to the best conventional algorithm.},
ISSN = {0096-3518},
}
@ARTICLE{Itakura1975,
title = {Minimum prediction residual principle applied to speech recognition},
author = {Itakura, F.},
journal = {Acoustics, Speech, and Signal Processing [see also {IEEE} Transactions on Signal Processing], {IEEE} Transactions on},
year = {1975},
volume = {23},
number = {1},
pages = {67--72},
month = feb,
abstract = {A computer system is described in which isolated words, spoken by a
designated talker, are recognized through calculation of a
minimum prediction residual. A reference pattern for each word
to be recognized is stored as a time pattern of linear
prediction coefficients (LPC). The total log prediction
residual of an input signal is minimized by optimally
registering the reference LPC onto the input autocorrelation
coefficients using the dynamic programming algorithm (DP). The
input signal is recognized as the reference word which produces
the minimum prediction residual. A sequential decision
procedure is used to reduce the amount of computation in DP. A
frequency normalization with respect to the long-time spectral
distribution is used to reduce effects of variations in the
frequency response of telephone connections. The system has
been implemented on a DDP-516 computer for the 200-word
recognition experiment. The recognition rate for a designated
male talker is 97.3 percent for telephone input, and the
recognition time is about 22 times real time.},
ISSN = {0096-3518},
}
@article{Velichko,
author = {V. M. Velichko and N. G. Zagoruyko},
title = {Automatic Recognition of 200 Words},
journal = {International Journal of Man-Machine Studies},
volume = {2},
issue = {3},
year = {1970},
pages = {223-234},
bibsource = {http://www.interaction-design.org/references/},
}
@ARTICLE{White1976,
title = {Speech recognition experiments with linear predication, bandpass filtering, and dynamic programming},
author = {White, G. and Neely, R.},
journal = {Acoustics, Speech, and Signal Processing [see also {IEEE} Transactions on Signal Processing], {IEEE} Transactions on},
year = {1976},
volume = {24},
number = {2},
pages = {183--188},
month = apr,
abstract = {Automatic speech recognition experiments are described in which
several popular preprocessing and classification strategies are
compared. Preprocessing is done either by linear predictive
analysis or by bandpass filtering. The two approaches are shown
to produce similar recognition scores. The classifier uses
either linear time stretching or dynamic programming to achieve
time alignment. It is shown that dynamic programming is of
major importance for recognition of polysyllabic words. The
speech is compressed into a quasi-phoneme character string or
preserved uncompressed. Best results are obtained with
uncompressed data, using nonlinear time registration for
multisyllabic words.},
ISSN = {0096-3518},
}
@ARTICLE{Myers1980,
title = {Performance tradeoffs in dynamic time warping algorithms for isolated word recognition},
author = {Myers, C. and Rabiner, L. and Rosenberg, A.},
journal = {Acoustics, Speech, and Signal Processing [see also {IEEE} Transactions on Signal Processing], {IEEE} Transactions on},
year = {1980},
volume = {28},
number = {6},
pages = {623--635},
month = dec,
abstract = {The technique of dynamic programming for the time registration of a
reference and a test pattern has found widespread use in the
area of isolated word recognition. Recently, a number of
variations on the basic time warping algorithm have been
proposed by Sakoe and Chiba, and Rabiner, Rosenberg, and
Levinson. These algorithms all assume that the test input is
the time pattern of a feature vector from an isolated word
whose endpoints are known (at least approximately). The major
differences in the methods are the global path constraints
(i.e., the region of possible warping paths), the local
continuity constraints on the path, and the distance weighting
and normalization used to give the overall minimum distance.
The purpose of this investigation is to study the effects of
such variations on the performance of different dynamic time
warping algorithms for a realistic speech database. The
performance measures that were used include: speed of
operation, memory requirements, and recognition accuracy. The
results show that both axis orientation and relative length of
the reference and the test patterns are important factors in
recognition accuracy. Our results suggest a new approach to
dynamic time warping for isolated words in which both the
reference and test patterns are linearly warped to a fixed
length, and then a simplified dynamic time warping algorithm is
used to handle the nonlinear component of the time alignment.
Results with this new algorithm show performance comparable to
or better than that of all other dynamic time warping
algorithms that were studied. },
ISSN = {0096-3518},
}
@MASTERSTHESIS{MyersMS,
author = {Myers, C. S.},
title = {A Comparative Study Of Several Dynamic Time Warping
Algorithms For Speech Recognition},
school = {MIT},
year = {1980},
month = {Jun 20},
owner = {toni},
timestamp = {2008.04.17},
url =
{http://dspace.mit.edu/bitstream/1721.1/27909/1/07888629.pdf}
}