https://github.com/Microsoft/CNTK
Raw File
Tip revision: cd446335f32fcefcc3821a15456f630cd4243bb1 authored by Ratan Rai Sur on 13 July 2017, 19:35:29 UTC
wip: bring in stuff from cntk_py.i
Tip revision: cd44633
MLFUtils.h
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

#pragma once

#include <unordered_map>
#include <vector>
#include <string>
#include <boost/algorithm/string.hpp>
#include <boost/noncopyable.hpp>
#include <boost/range/iterator_range_core.hpp>


namespace CNTK {

    // Representation of a state list table.
    // The table is preserved in memory, the number of states is only expected to be a couple of thousands,
    // so it is fine to keep all in memory.
    class StateTable : boost::noncopyable
    {
    public:
        void ReadStateList(const std::wstring& stateListPath);

        const std::vector<bool>& SilStateMask() const
        {
            return m_silStateMask;
        }

        const std::unordered_map<std::string, size_t>& States() const
        {
            return m_stateTable;
        }

    private:
        bool IsSilState(const std::string& stateName) const
        {
            return stateName.size() > 3 && !strncmp(stateName.c_str(), "sil", 3);
        }

        static std::vector<boost::iterator_range<char*>> ReadNonEmptyLines(const std::wstring& path, std::vector<char>& buffer);

        std::vector<bool> m_silStateMask;                     // [state index] => true if is sil state (cached)
        std::unordered_map<std::string, size_t> m_stateTable; // for state <=> index
    };

    typedef std::shared_ptr<StateTable> StateTablePtr;
    typedef unsigned short ClassIdType;

    // Representation of an MLF range.
    class MLFFrameRange
    {
        static const double s_htkTimeToFrame;

        uint32_t m_firstFrame;     // start frame
        uint32_t m_numFrames;      // number of frames
        ClassIdType m_classId;     // numeric state id

    public:
        // Parses format with original HTK state align MLF format and state list and builds an MLFFrameRange.
        void Build(const std::vector<boost::iterator_range<char*>>& tokens, const std::unordered_map<std::string, size_t>& stateTable, size_t byteOffset);

        ClassIdType ClassId() const { return m_classId;    }
        uint32_t FirstFrame() const { return m_firstFrame; }
        uint32_t NumFrames()  const { return m_numFrames;  }

        // Note: preserving logic of the old speech reader.
        // Parse the time range.
        // There are two formats:
        //  - original HTK
        //  - Dong's hacked format: ts te senonename senoneid
        static std::pair<size_t, size_t> ParseFrameRange(const std::vector<boost::iterator_range<char*>>& tokens, size_t byteOffset);

    private:
        void VerifyAndSaveRange(const std::pair<size_t, size_t>& frameRange, size_t uid, size_t byteOffset);
    };

    // Utility class for parsing an MLF utterance.
    class MLFUtteranceParser
    {
        const StateTablePtr m_states;

    public:
        MLFUtteranceParser(const StateTablePtr& states) : m_states(states)
        {}

        bool Parse(const boost::iterator_range<char*>& utteranceData, std::vector<MLFFrameRange>& result, size_t sequenceOffset);
    };

}
back to top