https://github.com/Microsoft/CNTK
Tip revision: 4419c2b48d055af4810fab27f2441bb91b22b45f authored by Binbin Zhang on 04 June 2018, 03:50:57 UTC
add bidirectional FSMN node and make it work in NDL and add FSMN CPU forward
add bidirectional FSMN node and make it work in NDL and add FSMN CPU forward
Tip revision: 4419c2b
File.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "Basics.h"
#define FORMAT_SPECIALIZE // to get the specialized version of the format routines
#include "File.h"
#include "Config.h"
#include <string>
#include <stdint.h>
#include <locale>
#include <unordered_map>
#ifdef _WIN32
#define NOMINMAX
#include "Windows.h"
#include <VersionHelpers.h>
#include <Shlwapi.h>
#pragma comment(lib, "Shlwapi.lib")
#endif
#ifdef __unix__
#include <unistd.h>
#include <linux/limits.h> // for PATH_MAX
#endif
#define PCLOSE_ERROR -1
#define WRITE_BUFFER_SIZE (1024 * 1024)
namespace Microsoft { namespace MSR { namespace CNTK {
// File creation
// filename - the path
// fileOptions - options to open the file
File::File(const std::wstring& filename, int fileOptions)
{
Init(filename.c_str(), fileOptions);
}
File::File(const std::string& filename, int fileOptions)
{
// this converts from string to wstring, and then to wchar_t*
Init(msra::strfun::utf16(filename).c_str(), fileOptions);
}
File::File(const wchar_t* filename, int fileOptions)
{
Init(filename, fileOptions);
}
template<class String>
static bool IsNonFilePath(const String& filename)
{
return
filename.front() == '|' || // "| command": output pipe
filename.back() == '|' || // "command |": input pipe
(filename.size() == 1 && filename[0] == '-'); // "-": stdin/stdout
}
// test if a file exists
// If the pathname is a pipe, it is considered to exist.
template<class String>
/*static*/ bool File::Exists(const String& filename)
{
return IsNonFilePath(filename) || fexists(filename);
}
template /*static*/ bool File::Exists<string> (const string& filename);
template /*static*/ bool File::Exists<wstring>(const wstring& filename);
template<class String>
/*static*/ void File::MakeIntermediateDirs(const String& filename)
{
if (!IsNonFilePath(filename))
msra::files::make_intermediate_dirs(filename);
}
//template /*static*/ void File::MakeIntermediateDirs<string> (const string& filename); // implement this if needed
template /*static*/ void File::MakeIntermediateDirs<wstring>(const wstring& filename);
// all constructors call this
void File::Init(const wchar_t* filename, int fileOptions)
{
m_filename = filename;
m_options = fileOptions;
if (m_filename.empty())
RuntimeError("File: filename is empty");
const auto outputPipe = (m_filename.front() == '|');
const auto inputPipe = (m_filename.back() == '|');
// translate the options string into a string for fopen()
const auto reading = !!(fileOptions & fileOptionsRead);
const auto writing = !!(fileOptions & fileOptionsWrite);
if (!reading && !writing)
RuntimeError("File: either fileOptionsRead or fileOptionsWrite must be specified");
// convert fileOptions to fopen()'s mode string
wstring options = reading ? L"r" : L"";
if (writing)
{
// if we already are reading the file, change to read/write
options.clear();
options.append(L"w");
if (!outputPipe && m_filename != L"-")
{
options.append(L"+");
msra::files::make_intermediate_dirs(m_filename.c_str()); // writing to regular file -> also create the intermediate directories as a convenience
}
}
if (fileOptions & fileOptionsBinary)
options += L"b";
else
options += L"t";
// add sequential flag to allocate big read buffer
if (fileOptions & fileOptionsSequential)
options += L"S";
// now open the file
// Special path syntax understood here:
// - "-" refers to stdin or stdout
// - "|cmd" writes to a pipe
// - "cmd|" reads from a pipe
m_pcloseNeeded = false;
m_seekable = false;
if (m_filename == L"-") // stdin/stdout
{
if (writing && reading)
RuntimeError("File: cannot specify fileOptionsRead and fileOptionsWrite at once with path '-'");
m_file = writing ? stdout : stdin;
}
else if (outputPipe || inputPipe) // pipe syntax
{
if (inputPipe && outputPipe)
RuntimeError("File: pipes cannot specify fileOptionsRead and fileOptionsWrite at once");
if (inputPipe != reading)
RuntimeError("File: pipes must use consistent fileOptionsRead/fileOptionsWrite");
const auto command = inputPipe ? m_filename.substr(0, m_filename.size() - 1) : m_filename.substr(1);
m_file = _wpopen(command.c_str(), options.c_str());
if (!m_file)
RuntimeError("File: error exexuting pipe command '%S': %s", command.c_str(), strerror(errno));
m_pcloseNeeded = true;
}
else
attempt([=]() // regular file: use a retry loop
{
m_file = fopenOrDie(filename, options.c_str());
m_seekable = true;
});
}
// determine the directory for a given pathname
// (wstring only for now; feel free to make this a template if needed)
/*static*/ wstring File::DirectoryPathOf(wstring path)
{
#ifdef _WIN32
// Win32 accepts forward slashes, but it seems that PathRemoveFileSpec() does not
// TODO:
// "PathCchCanonicalize does the / to \ conversion as a part of the canonicalization, it's
// probably a good idea to do that anyway since I suspect that the '..' characters might
// confuse the other PathCch functions" [Larry Osterman]
// "Consider GetFullPathName both for canonicalization and last element finding." [Jay Krell]
path = msra::strfun::ReplaceAll<wstring>(path, L"/", L"\\");
HRESULT hr;
if (IsWindows8OrGreater()) // PathCchRemoveFileSpec() only available on Windows 8+
{
typedef HRESULT(*PathCchRemoveFileSpecProc)(_Inout_updates_(_Inexpressible_(cchPath)) PWSTR, _In_ size_t);
HINSTANCE hinstLib = LoadLibrary(TEXT("api-ms-win-core-path-l1-1-0.dll"));
if (hinstLib == nullptr)
RuntimeError("DirectoryPathOf: LoadLibrary() unexpectedly failed.");
PathCchRemoveFileSpecProc PathCchRemoveFileSpec = reinterpret_cast<PathCchRemoveFileSpecProc>(GetProcAddress(hinstLib, "PathCchRemoveFileSpec"));
if (!PathCchRemoveFileSpec)
RuntimeError("DirectoryPathOf: GetProcAddress() unexpectedly failed.");
// this is the actual function call we care about
hr = PathCchRemoveFileSpec(&path[0], path.size());
FreeLibrary(hinstLib);
}
else // on Windows 7-, use older PathRemoveFileSpec() instead
hr = PathRemoveFileSpec(&path[0]) ? S_OK : S_FALSE;
if (hr == S_OK) // done
path.resize(wcslen(&path[0]));
else if (hr == S_FALSE) // nothing to remove: use .
path = L".";
else
RuntimeError("DirectoryPathOf: Path(Cch)RemoveFileSpec() unexpectedly failed with 0x%08x.", (unsigned int)hr);
#else
auto pos = path.find_last_of(L"/");
if (pos != path.npos)
path.erase(pos);
else // if no directory path at all, use current directory
return L".";
#endif
return path;
}
// determine the file name for a given pathname
// (wstring only for now; feel free to make this a template if needed)
/*static*/ wstring File::FileNameOf(wstring path)
{
#ifdef WIN32
static const wstring delim = L"\\:/";
#else
static const wstring delim = L"/";
#endif
auto pos = path.find_last_of(delim);
if (pos != path.npos)
return path.substr(pos + 1);
else // no directory path
return path;
}
// get path of current executable
/*static*/ wstring File::GetExecutablePath()
{
#ifdef WIN32
wchar_t path[33000];
if (GetModuleFileNameW(NULL, path, _countof(path)) == 0)
LogicError("GetExecutablePath: GetModuleFileNameW() unexpectedly failed.");
return path;
#else
// from http://stackoverflow.com/questions/4025370/can-an-executable-discover-its-own-path-linux
pid_t pid = getpid();
char path[PATH_MAX + 1] = { 0 };
sprintf(path, "/proc/%d/exe", pid);
char dest[PATH_MAX + 1] = { 0 };
if (readlink(path, dest, PATH_MAX) == -1)
RuntimeError("GetExecutableDirectory: readlink() call failed.");
else
return msra::strfun::utf16(dest);
#endif
}
// skip to given delimiter character
void File::SkipToDelimiter(int delim)
{
int ch = 0;
while (ch != delim)
{
ch = fgetc(m_file);
if (feof(m_file))
{
printf("Unexpected end of file\n");
LogicError("Unexpected end of file\n");
}
}
}
bool File::IsTextBased()
{
return !!(m_options & fileOptionsText);
}
// File Destructor
// closes the file
// Note: this does not check for errors when the File corresponds to pipe stream. In this case, use Flush() before closing a file you are writing.
File::~File(void)
{
int rc = 0;
if (m_pcloseNeeded)
{
rc = _pclose(m_file);
if ((rc == PCLOSE_ERROR) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str());
}
}
else if (m_file != stdin && m_file != stdout && m_file != stderr)
{
rc = fclose(m_file);
if ((rc != FCLOSE_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str());
}
}
}
void File::Flush()
{
fflushOrDie(m_file);
}
// read a line
// End of line is denoted by one of these, i.e. we don't support the old Mac OS convention of CR
// - LF
// - CR+LF
// - EOF
static bool fgetc(char& c, FILE * f) { int ci = getc(f); c = (char) ci; return ci != EOF; }
static inline bool BeginsWithUnicodeBOM(const char * s)
{
return ((unsigned char)s[0] == 0xEF && (unsigned char)s[1] == 0xBB && (unsigned char)s[2] == 0xBF);
}
// read a 8-bit string until newline is hit
template<class STRING>
static void fgets(STRING & s, FILE * f)
{
s.resize(0);
char c;
while (fgetc(c, f))
{
if (c == '\n' || c == '\r')
{
if (c == '\r' && (!fgetc(c, f) || c != '\n'))
RuntimeError("fgets: malformed text file, CR without LF");
break;
}
s.push_back(c);
// strip Unicode BOM
// We strip it from any string, not just at the start.
// This allows to UNIX-'cat' multiple UTF-8 files with BOMs.
// Since the BOM is otherwise invalid within a file, this is well-defined and upwards compatible.
if (s.size() == 3 && BeginsWithUnicodeBOM(s.c_str()))
s.clear();
}
}
// GetLine - get a line from the file
// str - string
void File::GetLine(string& str)
{
fgets(str, m_file);
}
static void PushBackString(vector<string>& lines, const string& s) { lines.push_back(s); }
static void PushBackString(vector<wstring>& lines, string& s) { lines.push_back(msra::strfun::utf16(s)); }
// GetLines - get all lines from a file
template <typename STRING>
static void FileGetLines(File& file, /*out*/ std::vector<STRING>& lines)
{
lines.clear();
string line;
while (!file.IsEOF())
{
file.GetLine(line);
PushBackString(lines, line);
}
}
void File::GetLines(std::vector<std::wstring>& lines)
{
FileGetLines(*this, lines);
};
void File::GetLines(std::vector<std::string>& lines)
{
FileGetLines(*this, lines);
}
// Put a zero/space terminated wstring into a file
// val - value to write to the file
File& File::operator<<(const std::wstring& val)
{
WriteString(val.c_str());
return *this;
}
// Put a zero/space terminated string into a file
// val - value to write to the file
File& File::operator<<(const std::string& val)
{
WriteString(val.c_str());
return *this;
}
// Put a marker in the file, the marker depends on the file type
// marker - marker to place in the file
File& File::operator<<(FileMarker marker)
{
File& file = *this;
switch (marker)
{
case fileMarkerBeginFile: // beginning of file marker
// TODO: why not write a BOM?
break;
case fileMarkerEndFile: // end of file marker
// use ^Z for end of file for text files
// TODO: What??
if (m_options & fileOptionsText)
file << char(26);
break;
case fileMarkerBeginList: // Beginning of list marker
// no marker written for either
break;
case fileMarkerListSeparator: // separate elements of a list
// do nothing for now, built in space deliminter for all types (before type)
// future: make this customizable, so you can specify a separator (i.e. ',')
break;
case fileMarkerEndList: // end of line/list marker
if (m_options & fileOptionsText)
file.WriteString("\r\n");
break;
case fileMarkerBeginSection: // beginning of section
case fileMarkerEndSection: // end of section
assert(false); // sections should use a string modifier
break;
}
return file;
}
// PutMarker for beginning of list support (lists with a count)
// count - [in] the number of elements in the list
File& File::PutMarker(FileMarker marker, size_t count)
{
assert(marker == fileMarkerBeginList);
marker; // only beginning of list supported for count markers
*this << count;
return *this;
}
// PutMarker for section beginning and ending tags
// section - [in]name of section
File& File::PutMarker(FileMarker marker, const std::string& section)
{
File& file = *this;
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
file << section;
return file;
}
// PutMarker for section beginning and ending tags
// section - [in]name of section
File& File::PutMarker(FileMarker marker, const std::wstring& section)
{
File& file = *this;
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
file << section;
return file;
}
// Get a zero terminated wstring from a file
// val - value to read from the file
File& File::operator>>(std::wstring& val)
{
if (IsTextBased())
val = fgetwtoken(m_file);
else
val = fgetwstring(m_file);
return *this;
}
// Get a zero terminated string from a file
// val - value to read from the file
File& File::operator>>(std::string& val)
{
if (IsTextBased())
val = fgettoken(m_file);
else
val = fgetstring(m_file);
return *this;
}
// ReadChars - read a specified number of characters, and reset read pointer if requested
// val - [in,out] return value will be returned here
// cnt - number of characters to read
// reset - reset the read pointer
void File::ReadChars(std::string& val, size_t cnt, bool reset)
{
size_t pos = 0; // (initialize to keep compiler happy)
if (reset)
pos = GetPosition();
val.resize(cnt);
char* str = const_cast<char*>(val.c_str());
for (int i = 0; i < cnt; ++i)
*this >> str[i];
if (reset)
SetPosition(pos);
}
// ReadChars - read a specified number of characters, and reset read pointer if requested
// val - [in,out] return value will be returned here
// cnt - number of characters to read
// reset - reset the read pointer
void File::ReadChars(std::wstring& val, size_t cnt, bool reset)
{
size_t pos = 0; // (initialize to keep compiler happy)
if (reset)
pos = GetPosition();
val.resize(cnt);
wchar_t* str = const_cast<wchar_t*>(val.c_str());
for (int i = 0; i < cnt; ++i)
*this >> str[i];
if (reset)
SetPosition(pos);
}
// WriteString - outputs a string into the file
// str - the string to output
// size - size of the string to output, if zero null terminated
void File::WriteString(const char* str, int size)
{
if (size > 0)
{
fwprintf(m_file, L" %.*hs", size, str);
}
else
{
if (IsTextBased())
fwprintf(m_file, L" %hs", str);
else
fputstring(m_file, str);
}
}
// ReadString - reads a string into the file
// str - the string buffer to read the string into
// size - size of the string buffer incl. zero terminator (we fail if input is too long)
void File::ReadString(char* str, int size)
{
if (IsTextBased())
{
fgettoken(m_file, str, size);
if (BeginsWithUnicodeBOM(str))
for (; str[3]; str++)
str[0] = str[3]; // delete it from start of line
}
else
fgetstring(m_file, str, size);
}
// WriteString - outputs a string into the file
// if writing to text based file and spaces are embedded, writes quotes around string
// BUGBUG: This should be consistent between char and wchar_t versions
// str - the string to output
// size - size of the string to output, if zero null terminated
void File::WriteString(const wchar_t* str, int size)
{
#ifdef EMBEDDED_SPACES
// start of implementation of embedded space support with quoting
// not complete, not sure if we need it
bool spacefound = false;
wchar_t quote = 0;
if (IsTextBased())
{
// search for embedded spaces and quotes
wstring searchString = L" \"'~";
const wchar_t* result = NULL;
while (result = wcspbrk(str, searchString.c_str()))
{
if (IsWhiteSpace(*result))
spacefound = true;
searchString.find(*result, 0);
}
}
#endif
if (size > 0)
{
fwprintf(m_file, L" %.*ls", size, str);
}
else
{
if (IsTextBased())
fwprintf(m_file, L" %ls", str);
else
fputstring(m_file, str);
}
}
// ReadString - reads a string from the file
// str - the string buffer to read the string into
// size - size of the string string buffer
void File::ReadString(wchar_t* str, int size)
{
if (IsTextBased())
fgettoken(m_file, str, size);
else
fgetstring(m_file, str, size);
}
// IsUnicodeBOM - is the next characters the Unicode Byte Order Mark?
// skip - skip the BOM mark if found (defaults to false)
// returns - true if on a unicode BOM
bool File::IsUnicodeBOM(bool skip)
{
File& file = *this;
uint64_t pos = GetPosition(); // Note: This is where we will fail for non-seekable streams.
// if we aren't at the beginning of the file, it can't be the byte order mark
if (pos != 0)
return false;
// only exists for UNICODE files
bool found = false;
if (m_options & fileOptionsText)
{
char val[3] = { 0 };
for (size_t i = 0; i < _countof(val) && !file.IsEOF(); i++)
val[i] = (char) getc(m_file);
found = BeginsWithUnicodeBOM(val);
}
// restore pointer if no BOM or we aren't skipping it
if (!found || !skip)
{
SetPosition(pos);
}
return found;
}
//Size - return the size of the file
// WARNING: calling this will reset the EOF marker, so do so with care
size_t File::Size()
{
if (!CanSeek())
RuntimeError("File: attempted to get Size() on non-seekable stream");
return filesize(m_file);
}
// IsEOF - if we have read past the end of the file
// return - true if end of file has been found
bool File::IsEOF()
{
return !!feof(m_file);
}
// IsWhiteSpace - are the next characters whitespace (space, \t, \r, \n, etc.)?
// skip - skip the whitespace if found (defaults to false)
// returns - true if whitespace found
// TODO: This function actually consumes the white-space characters. Document that behavior.
bool File::IsWhiteSpace(bool skip)
{
bool spaceFound = false;
bool spaceCur = false;
int c;
do
{
c = fgetc(m_file);
if (c == EOF) // hit the end
return spaceFound;
spaceCur = !!isspace(c);
spaceFound = spaceFound || spaceCur;
} while (spaceCur && skip);
// put back the last character (EOF is ignored)
ungetc(c, m_file);
return spaceFound;
}
// EndOfLineOrEOF - are the next characters an end of line sequence ('\r\n') possibly preceeded by (space, \t)? EOF detected too
// skip - skip the end of line if found (defaults to false)
// returns - true if end of line found, EOF if end of file found, or false if nothing found, in which case any leading space will have been stripped
int File::EndOfLineOrEOF(bool skip)
{
if (IsTextBased())
return fskipNewline(m_file, skip);
else
return false;
}
// Buffer write stream
int File::Setvbuf()
{
return setvbuf(this->m_file, NULL, _IOFBF, WRITE_BUFFER_SIZE);
}
// Get a marker from the file
// some are ignored others are expecting characters
// must use GetMarker methods for those that require parameters
File& File::operator>>(FileMarker marker)
{
File& file = *this;
switch (marker)
{
case fileMarkerBeginFile: // beginning of file marker
// check for Unicode BOM marker
if (IsTextBased() && CanSeek()) // files from a pipe cannot begin with Unicode BOM, sorry
IsUnicodeBOM(true);
break;
case fileMarkerEndFile: // end of file marker, should we throw if it's not the end of the file?
if (!IsEOF())
RuntimeError("fileMarkerEndFile not found");
break;
case fileMarkerBeginList: // Beginning of list marker
// no marker written unless an list with a count header
break;
case fileMarkerListSeparator: // separate elements of a list
// do nothing for now, built in space deliminter for all types (before type)
// future: make this customizable, so you can specify a separator (i.e. ',')
break;
case fileMarkerEndList: // end of line/list marker
if (IsTextBased())
{
int found = EndOfLineOrEOF(true);
if (found != (int) true) // EOF can also be returned
RuntimeError("Newline not found");
}
break;
case fileMarkerBeginSection: // beginning of section
case fileMarkerEndSection: // end of section
assert(false); // sections should use a string modifier
break;
}
return file;
}
// Get a marker from the file
// some are ignored others are expecting characters
// must use GetMarker methods for those that require parameters
// This function will fail for non-seekable streams.
bool File::IsMarker(FileMarker marker, bool skip)
{
bool retval = false;
switch (marker)
{
case fileMarkerBeginFile: // beginning of file marker
// check for Unicode BOM marker
retval = IsUnicodeBOM(skip);
break;
case fileMarkerEndFile: // end of file marker, should we throw if it's not the end of the file?
retval = IsEOF();
break;
case fileMarkerBeginList: // Beginning of list marker
// no marker written unless an list with a count header
// should we try to validate BOL header (just know it's an int, not negative, etc.)
break;
case fileMarkerListSeparator: // separate elements of a list
// do nothing for now, built in space deliminter for all types (before type)
// future: make this customizable, so you can specify a separator (i.e. ',')
break;
case fileMarkerEndList: // end of line/list marker
if (IsTextBased())
{
int eolSeen = false;
eolSeen = EndOfLineOrEOF(skip);
retval = (eolSeen == (int) true);
}
break;
case fileMarkerBeginSection: // beginning of section
case fileMarkerEndSection: // end of section
// can't destinquish from a string currently
break;
}
return retval;
}
// GetMarker for beginning of list support (lists with a count)
// count - [out] returns the number of elements in the list
File& File::GetMarker(FileMarker marker, size_t& count)
{
assert(marker == fileMarkerBeginList);
marker; // only beginning of list supported for count file markers
// use text based try, so it can fail without an exception
if (IsTextBased())
ftrygetText(m_file, count);
else
fget(m_file, count);
return *this;
}
// GetMarker for section beginning and ending tags
// section - [in]name of section that is expected
File& File::GetMarker(FileMarker marker, const std::string& section)
{
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
string str;
*this >> str;
if (str != section)
RuntimeError("section name mismatch %s != %s", str.c_str(), section.c_str());
return *this;
}
// GetMarker for section beginning and ending tags
// section - [in]name of section that is expected
File& File::GetMarker(FileMarker marker, const std::wstring& section)
{
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
wstring str;
*this >> str;
if (str != section)
RuntimeError("section name mismatch %ls != %ls", str.c_str(), section.c_str());
return *this;
}
// TryGetMarker for section beginning and ending tags
// section - [in]name of section that is expected
bool File::TryGetMarker(FileMarker marker, const std::wstring& section)
{
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
size_t pos = GetPosition();
std::wstring str;
try
{
*this >> str;
if (str == section)
return true;
}
catch (...)
{
// eat
}
SetPosition(pos);
return false;
}
// TryGetMarker for section beginning and ending tags
// section - [in]name of section that is expected
bool File::TryGetMarker(FileMarker marker, const std::string& section)
{
// only the section markers take a string parameter
assert(marker == fileMarkerBeginSection || marker == fileMarkerEndSection);
marker;
size_t pos = GetPosition();
std::string str;
try
{
*this >> str;
if (str == section)
return true;
}
catch (...)
{
return false;
}
SetPosition(pos);
return false;
}
// GetPosition - Get position in a file
uint64_t File::GetPosition()
{
if (!CanSeek())
RuntimeError("File: attempted to GetPosition() on non-seekable stream");
return fgetpos(m_file);
}
// Set the position in the file
// pos - position in the file
void File::SetPosition(uint64_t pos)
{
if (!CanSeek())
RuntimeError("File: attempted to SetPosition() on non-seekable stream");
fsetpos(m_file, pos);
}
// helper to load a matrix from a stream (file or string literal)
// The input string is expected to contain one line per matrix row (natural printing order for humans).
// Inputs:
// - getLineFn: a lambda that fills a string with the next input line (=next matrix row)
// The lambda returns an empty string to denote the end.
// Outputs:
// - numRows, numCols: matrix dimensions inferred from newlines
// - array: matrix values in column-major order (ready for SetValue())
template<class ElemType, class F>
static void LoadMatrixFromLambda(const F& getLineFn, const wstring& locationForMsg, vector<ElemType>& array, size_t& /*out*/ numRows, size_t& /*out*/ numCols)
{
// load matrix into vector of vectors (since we don't know the size in advance)
vector<ElemType> vec;
std::vector<std::vector<ElemType>> elements;
size_t numColsInFirstRow = 0;
std::string line;
for(;;)
{
// get next input line
getLineFn(line);
if (line.empty())
break;
// tokenize and parse
vec.clear();
const char * p = line.c_str();
for (;;)
{
while (isspace((unsigned char)*p))
p++;
if (!*p)
break;
char* ep; // will be set to point to first character that failed parsing
double value = strtod(p, &ep);
if (*ep != 0 && !isspace((unsigned char)*ep))
RuntimeError("LoadMatrixFromTextFile: Malformed number '%.15s...' in row %d of %ls", p, (int)elements.size(), locationForMsg.c_str());
p = ep;
vec.push_back((ElemType)value);
}
size_t numElementsInRow = vec.size();
if (elements.empty())
numColsInFirstRow = numElementsInRow;
else if (numElementsInRow != numColsInFirstRow)
RuntimeError("Row %d has column dimension %d, inconsistent with previous dimension %d: %ls", (int)elements.size(), (int)numElementsInRow, (int)numColsInFirstRow, locationForMsg.c_str());
elements.push_back(vec);
}
numRows = elements.size();
numCols = numColsInFirstRow;
// Perform transpose when copying elements from vectors to ElemType[],
// in order to store in column-major format.
array.resize(numRows * numCols);
for (int i = 0; i < numCols; i++)
for (int j = 0; j < numRows; j++)
array[i * numRows + j] = elements[j][i];
}
// Load matrix from file. The file is a simple text file consisting of one line per matrix row, where each line contains the elements of the row separated by white space.
template <class ElemType>
/*static*/ vector<ElemType> File::LoadMatrixFromTextFile(const std::wstring& filePath, size_t& /*out*/ numRows, size_t& /*out*/ numCols)
{
File myfile(filePath, FileOptions::fileOptionsText | FileOptions::fileOptionsRead);
// LoadMatrixFromLambda() reads its input lines from the following lambda
// return the next input line, or empty string when the end is reached
auto getLineFn = [&](string& line)
{
while (!myfile.IsEOF())
{
myfile.GetLine(line);
if (!line.empty())
return; // got the next line to return
// End of file manifests as an empty line at the end.
// Also, we allow empty lines within the file, as that may help to visually structure matrices that really are >2D tensors.
}
line.clear(); // empty line indicates end of file
};
vector<ElemType> array;
LoadMatrixFromLambda(getLineFn, filePath, array, numRows, numCols);
return array;
}
// Load matrix from file. The file is a simple text file consisting of one line per matrix row, where each line contains the elements of the row separated by white space.
template <class ElemType>
/*static*/ vector<ElemType> File::LoadMatrixFromStringLiteral(const std::string& literal, size_t& /*out*/ numRows, size_t& /*out*/ numCols)
{
// LoadMatrixFromLambda() reads its input lines from the following lambda
// return the next input line, or empty string when the end is reached
size_t pos = 0; // cursor for traversing the string. The lambda takes this by reference and modifies it.
auto getLineFn = [&](string& line)
{
// find first non-blank character of line
pos = literal.find_first_not_of(" \r\n", pos); // skip previous line end and any leading spaces
if (pos == string::npos)
return line.clear(); // hit the end: return empty line
// find end of line
auto endPos = literal.find_first_of("\r\n", pos + 1); // find line end
if (endPos == string::npos)
endPos = literal.size(); // no LF required at very end, so that it looks pretty in BS source code
line = literal.substr(pos, endPos - pos);
pos = endPos; // and advance cursor (we position it on the LF, which is skipped in next round)
return;
};
vector<ElemType> array;
LoadMatrixFromLambda(getLineFn, L"string literal", array, numRows, numCols);
return array;
}
template vector<float> File::LoadMatrixFromTextFile<float> (const std::wstring& filePath, size_t& /*out*/ numRows, size_t& /*out*/ numCols);
template vector<double> File::LoadMatrixFromTextFile<double>(const std::wstring& filePath, size_t& /*out*/ numRows, size_t& /*out*/ numCols);
template vector<float> File::LoadMatrixFromStringLiteral<float> (const std::string& literal, size_t& /*out*/ numRows, size_t& /*out*/ numCols);
template vector<double> File::LoadMatrixFromStringLiteral<double>(const std::string& literal, size_t& /*out*/ numRows, size_t& /*out*/ numCols);
#ifndef CNTK_COMPONENT_VERSION
#error CNTK_COMPONENT_VERSION must be set
#endif
extern std::unordered_map<std::wstring, std::wstring> g_deprecatedReaderWriterNameMap;
#ifdef _WIN32
FARPROC Plugin::LoadInternal(const std::wstring& plugin, const std::string& proc, bool isCNTKPlugin)
{
m_dllName = plugin;
if (isCNTKPlugin)
{
// map legacy names to new naming scheme
auto entry = g_deprecatedReaderWriterNameMap.find(m_dllName);
if (entry != g_deprecatedReaderWriterNameMap.end())
m_dllName = entry->second;
m_dllName += L"-" + msra::strfun::utf16(std::string(CNTK_COMPONENT_VERSION));
}
m_dllName += L".dll";
m_hModule = LoadLibrary(m_dllName.c_str());
if (m_hModule == NULL)
RuntimeError("Plugin not found: '%ls'", m_dllName.c_str());
// create a variable of each type just to call the proper templated version
FARPROC entryPoint = GetProcAddress(m_hModule, proc.c_str());
if (entryPoint == nullptr)
RuntimeError("Symbol '%s' not found in plugin '%ls'", proc.c_str(), m_dllName.c_str());
return entryPoint;
}
#else
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
void* Plugin::LoadInternal(const std::string& plugin, const std::string& proc, bool isCNTKPlugin)
{
string soName = plugin;
wstring soNameW = msra::strfun::utf16(plugin);
if (isCNTKPlugin)
{
// map legacy names to new naming scheme
auto entry = g_deprecatedReaderWriterNameMap.find(soNameW);
if (entry != g_deprecatedReaderWriterNameMap.end())
soName = msra::strfun::utf8(entry->second);
soName += "-" + std::string(TOSTRING(CNTK_COMPONENT_VERSION));
}
soName += ".so";
void* handle = dlopen(soName.c_str(), RTLD_LAZY);
if (handle == NULL)
RuntimeError("Plugin not found: '%s' (error: %s)", soName.c_str(), dlerror());
void* entryPoint = dlsym(handle, proc.c_str());
if (entryPoint == nullptr)
RuntimeError("Symbol '%s' not found in plugin '%s'", proc.c_str(), soName.c_str());
return entryPoint;
}
#endif
}}}