// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // #include "stdafx.h" #define _CRT_SECURE_NO_WARNINGS #define _SCL_SECURE_NO_WARNINGS #include "MLFUtils.h" // Disabling some deprecation warnings in boost. // Classes that we use are not deprecated. #pragma warning(disable:4348 4459 4100) #include #include #pragma warning(default:4348 4459 4100) using namespace std; namespace Microsoft { namespace MSR { namespace CNTK { inline void EraseEmptyLines(vector>& lines) { auto end = std::remove_if(lines.begin(), lines.end(), [](const boost::iterator_range& r) { return r.empty(); }); lines.erase(end, lines.end()); } void StateTable::ReadStateList(const wstring& stateListPath) { vector buffer; // buffer owns the characters -- don't release until done vector> lines = ReadNonEmptyLines(stateListPath, buffer); size_t index = 0; m_silStateMask.reserve(lines.size()); for (index = 0; index < lines.size(); index++) { string line(lines[index].begin(), lines[index].end()); if (m_stateTable.find(line) != m_stateTable.end()) RuntimeError("Deduplicate two states with the same name '%s' from the state table '%ls'.", line.c_str(), stateListPath.c_str()); m_stateTable[line] = index; m_silStateMask.push_back(IsSilState(line)); } assert(index == m_stateTable.size()); fprintf(stderr, "Total (%zu) state names in state list '%ls'\n", m_stateTable.size(), stateListPath.c_str()); if (m_stateTable.empty()) RuntimeError("State list table '%ls' is not allowed to be empty.", stateListPath.c_str()); } vector> StateTable::ReadNonEmptyLines(const wstring& path, vector& buffer) { // load it into RAM in one huge chunk, not more than a couple // thousand states. auto_file_ptr f(fopenOrDie(path, L"rb")); size_t len = filesize(f); buffer.reserve(len + 1); freadOrDie(buffer, len, f); buffer.push_back(0); // this makes it a proper C string vector> lines; auto range = boost::make_iterator_range(buffer.data(), buffer.data() + buffer.size()); boost::split(lines, range, boost::is_any_of("\r\n")); EraseEmptyLines(lines); return lines; } const double MLFFrameRange::s_htkTimeToFrame = 100000.0; void MLFFrameRange::Build(const vector>& tokens, const unordered_map& stateTable, size_t byteOffset) { auto range = ParseFrameRange(tokens, byteOffset); size_t uid; if (!stateTable.empty()) // state table is given, check the state against the table. { auto stateName = string(tokens[2].begin(), tokens[2].end()); auto index = stateTable.find(stateName); if (index == stateTable.end()) RuntimeError("Offset '%zu': frame range state '%s' is not found in the statelist", byteOffset, stateName.c_str()); uid = index->second; // get state index } else { // This is too simplistic for parsing more complex MLF formats. Fix when needed, // add support so that it can handle conditions where time instead of frame number is used. if (tokens.size() != 4) RuntimeError("Offset '%zu': CNTK supports 4-column format frame range or state list table.", byteOffset); if (!boost::spirit::qi::parse(tokens[3].begin(), tokens[3].end(), boost::spirit::qi::int_, uid)) RuntimeError("Offset '%zu': cannot parse class id of the frame range", byteOffset); } VerifyAndSaveRange(range, uid, byteOffset); } void MLFFrameRange::VerifyAndSaveRange(const pair& frameRange, size_t uid, size_t byteOffset) { if (frameRange.second < frameRange.first) RuntimeError("Offset '%zu': frame range end time is earlier than start time.", byteOffset); m_firstFrame = (unsigned int)frameRange.first; m_numFrames = (unsigned int)(frameRange.second - frameRange.first); m_classId = (ClassIdType)uid; // check for numeric overflow if (m_firstFrame != frameRange.first || m_firstFrame + m_numFrames != frameRange.second) RuntimeError("Offset '%zu': not enough bits for one of the frame range values.", byteOffset); if(m_classId != uid) RuntimeError("Offset '%zu': not enough bits to represent a class id '%zu'.", byteOffset, uid); } pair MLFFrameRange::ParseFrameRange(const vector>& tokens, size_t byteOffset) { if (tokens.size() < 2) RuntimeError("Offset '%zu': do not support frame range format with less than two columns.", byteOffset); double rts = 0; if (!boost::spirit::qi::parse(tokens[0].begin(), tokens[0].end(), boost::spirit::qi::double_, rts)) RuntimeError("Offset '%zu': cannot parse start frame of range.", byteOffset); double rte = 0; if (!boost::spirit::qi::parse(tokens[1].begin(), tokens[1].end(), boost::spirit::qi::double_, rte)) RuntimeError("Offset '%zu': cannot parse end frame of range.", byteOffset); // Simulating the old reader behavior. // If the difference between two frames is more than s_htkTimeToFrame, we expect conversion to time if (rte - rts >= s_htkTimeToFrame - 1) // convert time to frame { return make_pair( (size_t)(rts / s_htkTimeToFrame + 0.5), (size_t)(rte / s_htkTimeToFrame + 0.5)); } else { return make_pair((size_t)(rts), (size_t)(rte)); } } // Parses the data into a vector of MLFFrameRanges. bool MLFUtteranceParser::Parse(const boost::iterator_range& sequenceData, vector& utterance, size_t sequenceOffset) { // Split to lines. vector> lines; lines.reserve(512); boost::split(lines, sequenceData, boost::is_any_of("\r\n")); EraseEmptyLines(lines); // Start parsing of actual entry size_t idx = 0; string sequenceKey = string(lines[idx].begin(), lines[idx].end()); idx++; // Check that mlf entry has a correct sequence key. if (sequenceKey.length() < 3 || sequenceKey[0] != '"' || sequenceKey[sequenceKey.length() - 1] != '"') { fprintf(stderr, "WARNING: skipping sequence entry '%s' due to it being too short or not quoted\n", sequenceKey.c_str()); return false; } // strip quotes sequenceKey = sequenceKey.substr(1, sequenceKey.length() - 2); if (sequenceKey.size() > 2 && sequenceKey[0] == '*' && sequenceKey[1] == '/') sequenceKey = sequenceKey.substr(2); // Remove extension if specified. sequenceKey = sequenceKey.substr(0, sequenceKey.find_last_of(".")); // determine content line range [s,e) size_t s = idx; size_t e = lines.size() - 1; if (s >= e) { fprintf(stderr, "WARNING: sequence entry (%s) is empty\n", sequenceKey.c_str()); return false; } utterance.resize(e - s); vector> tokens; unordered_map empty; for (size_t i = s; i < e; i++) { tokens.clear(); boost::split(tokens, lines[i], boost::is_any_of(" ")); auto& current = utterance[i - s]; current.Build(tokens, m_states ? m_states->States() : empty, sequenceOffset + std::distance(sequenceData.begin(), lines[i].begin())); // Check that frames are sequential. if (i > s) { const auto& previous = utterance[i - s - 1]; if (previous.FirstFrame() + previous.NumFrames() != current.FirstFrame()) { fprintf(stderr, "WARNING: Labels are not in the consecutive order MLF in label set for utterance '%s'", sequenceKey.c_str()); return false; } } } if (utterance.front().FirstFrame() != 0) { fprintf(stderr, "WARNING: Invalid first frame in utterance '%s'", sequenceKey.c_str()); return false; } return true; } }}}