https://github.com/Microsoft/CNTK
Tip revision: de6c89f13eb90bc562351395ce7985cc532ea234 authored by Gaizka Navarro on 21 April 2016, 09:51:07 UTC
This commit changes which field to look for the "evalreader" specialized reader in the cntk config file.
This commit changes which field to look for the "evalreader" specialized reader in the cntk config file.
Tip revision: de6c89f
fileutil.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#pragma warning(disable : 4996) // ^^ this does not seem to work--TODO: make it work
#define _FILE_OFFSET_BITS 64 // to force fseeko() and ftello() 64 bit in Linux
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#include "Basics.h"
#include "fileutil.h"
#include "ProgressTracing.h"
#ifdef __unix__
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <glob.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <assert.h>
#ifdef _WIN32
#define NOMINMAX
#include "Windows.h" // for FILETIME
#endif
#include <algorithm> // for std::find
#include <limits.h>
#include <memory>
#include <cwctype>
#ifndef UNDER_CE // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
#if defined(_WIN32) || defined(__CYGWIN__)
#include <fcntl.h> // for _O_BINARY/TEXT - not needed for wince
#include <io.h> // for _setmode()
#define SET_BINARY_MODE(handle) setmode(handle, _O_BINARY)
#define SET_TEXT_MODE(handle) setmode(handle, _O_TEXT)
#else
#define SET_BINARY_MODE(handle) ((int) 0)
#define SET_TEXT_MODE(handle) ((int) 0)
#endif
#endif
#define __out_z_cap(x) // a fake SAL annotation; this may come in handy some day if we try static code analysis, so I don't want to delete it
#include <errno.h>
using namespace std;
using namespace Microsoft::MSR::CNTK;
// ----------------------------------------------------------------------------
// some mappings for non-Windows builds
// ----------------------------------------------------------------------------
template <>
const wchar_t* GetScanFormatString(char)
{
return L" %hc";
}
template <>
const wchar_t* GetScanFormatString(wchar_t)
{
return L" %lc";
}
template <>
const wchar_t* GetScanFormatString(short)
{
return L" %hi";
}
template <>
const wchar_t* GetScanFormatString(int)
{
return L" %i";
}
template <>
const wchar_t* GetScanFormatString(long)
{
return L" %li";
}
template <>
const wchar_t* GetScanFormatString(unsigned short)
{
return L" %hu";
}
template <>
const wchar_t* GetScanFormatString(unsigned int)
{
return L" %u";
}
//template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";}
template <>
const wchar_t* GetScanFormatString(float)
{
return L" %g";
}
template <>
const wchar_t* GetScanFormatString(double)
{
return L" %lg";
}
template <>
const wchar_t* GetScanFormatString(size_t)
{
return L" %llu";
}
template <>
const wchar_t* GetScanFormatString(long long)
{
return L" %lli";
}
template <>
const wchar_t* GetFormatString(char)
{
return L" %hc";
}
template <>
const wchar_t* GetFormatString(wchar_t)
{
return L" %lc";
}
template <>
const wchar_t* GetFormatString(short)
{
return L" %hi";
}
template <>
const wchar_t* GetFormatString(int)
{
return L" %i";
}
template <>
const wchar_t* GetFormatString(long)
{
return L" %li";
}
template <>
const wchar_t* GetFormatString(unsigned short)
{
return L" %hu";
}
template <>
const wchar_t* GetFormatString(unsigned int)
{
return L" %u";
}
//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";}
template <>
const wchar_t* GetFormatString(float)
{
return L" %.9g";
}
template <>
const wchar_t* GetFormatString(double)
{
return L" %.17g";
}
template <>
const wchar_t* GetFormatString(size_t)
{
return L" %llu";
}
template <>
const wchar_t* GetFormatString(long long)
{
return L" %lli";
}
template <>
const wchar_t* GetFormatString(const char*)
{
return L" %hs";
}
template <>
const wchar_t* GetFormatString(const wchar_t*)
{
return L" %ls";
}
// ----------------------------------------------------------------------------
// fgetText() specializations for fwscanf differences: get a value from a text file
// ----------------------------------------------------------------------------
void fgetText(FILE* f, char& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError("error reading value from file (invalid format): %ls", formatString);
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
}
void fgetText(FILE* f, wchar_t& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError("error reading value from file (invalid format): %ls", formatString);
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
}
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
static const wchar_t* strchr(const wchar_t* s, wchar_t v)
{
return wcschr(s, v);
}
// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
template <class _T>
FILE* fopenStdHandle(const _T* mode)
{
FILE* f = strchr(mode, 'r') ? stdin : stdout;
if (strchr(mode, 'b') || strchr(mode, 't')) // change binary mode
fsetmode(f, strchr(mode, 'b') ? 'b' : 't');
return f;
}
FILE* fopenOrDie(const string& pathname, const char* mode)
{
FILE* f = (pathname[0] == '-') ? fopenStdHandle(mode) : fopen(pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError("error opening file '%s': %s", pathname.c_str(), strerror(errno));
}
if (strchr(mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf(f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
FILE* fopenOrDie(const wstring& pathname, const wchar_t* mode)
{
FILE* f = (pathname[0] == '-') ? fopenStdHandle(mode) : _wfopen(pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError("error opening file '%ls': %s", pathname.c_str(), strerror(errno));
}
if (strchr(mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf(f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
// ----------------------------------------------------------------------------
// set mode to binary or text (pass 'b' or 't')
// ----------------------------------------------------------------------------
void fsetmode(FILE* f, char type)
{
if (type != 'b' && type != 't')
{
RuntimeError("fsetmode: invalid type '%c'", type);
}
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
FILE* fd = fileno(f); // note: no error check possible
#else
int fd = fileno(f); // note: no error check possible
#endif
int rc = (type == 'b' ? SET_BINARY_MODE(fd) : SET_TEXT_MODE(fd));
if (rc == -1)
{
RuntimeError("error changing file mode: %s", strerror(errno));
}
}
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie(void* ptr, size_t size, size_t count, FILE* f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min(count, (size_t) 15 * 1024 * 1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
size_t n = fread(ptr, size, chunkn, f);
if (n != chunkn)
RuntimeError("error reading from file: %s", strerror(errno));
count -= n;
ptr = n * size + (char*) ptr;
}
}
#ifdef _WIN32
void freadOrDie(void* ptr, size_t size, size_t count, const HANDLE f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min(count * size, (size_t) 15 * 1024 * 1024);
DWORD n;
ReadFile(f, ptr, (DWORD) chunkn, &n, NULL);
if (n != chunkn)
RuntimeError("error number for reading from file: %s", GetLastError());
count -= (size_t)(n / size);
ptr = n + (char*) ptr;
}
}
#endif
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
// with Invalid Argument error), so we break it into chunks (yak!!)
// ----------------------------------------------------------------------------
void fwriteOrDie(const void* ptr, size_t size, size_t count, FILE* f)
{
const char* p1 = (const char*) ptr;
size_t totalBytes = size * count;
while (totalBytes > 0)
{
size_t wantWrite = totalBytes;
#define LIMIT (16 * 1024 * 1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
size_t n = fwrite((const void*) p1, 1, wantWrite, f);
if (n != wantWrite)
{
RuntimeError("error writing to file (ptr=0x%08lx, size=%d, count=%d, writing %d bytes after %d): %s",
(unsigned long) (size_t) ptr, (int) size, (int) count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror(errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
#ifdef _WIN32
void fwriteOrDie(const void* ptr, size_t size, size_t count, const HANDLE f)
{
const char* p1 = (const char*) ptr;
DWORD totalBytes = (DWORD)(size * count);
while (totalBytes > 0)
{
DWORD wantWrite = totalBytes;
#define LIMIT (16 * 1024 * 1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
DWORD byteWritten = 0;
if (WriteFile(f, (const void*) p1, wantWrite, &byteWritten, NULL) == false)
{
RuntimeError("error writing to file (ptr=0x%08lx, size=%d,"
" count=%d, writing %d bytes after %d): %s",
ptr, size, count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror(errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
#endif
long fseekOrDie(FILE* f, long offset, int mode)
{
long curPos = ftell(f);
if (curPos == -1L)
{
RuntimeError("error seeking: %s", strerror(errno));
}
int rc = fseek(f, offset, mode);
if (rc != 0)
{
RuntimeError("error seeking: %s", strerror(errno));
}
return curPos;
}
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
#pragma warning(push)
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
void fprintfOrDie(FILE* f, const char* fmt, ...)
{
va_list arg_ptr;
va_start(arg_ptr, fmt);
int rc = vfprintf(f, fmt, arg_ptr);
if (rc < 0)
{
RuntimeError("error writing to file: %s", strerror(errno));
}
}
#pragma warning(pop)
// ----------------------------------------------------------------------------
// fsyncOrDie(): like fsync() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fsyncOrDie(FILE* f)
{
int fd = fileno(f);
if (fd == -1)
{
RuntimeError("unable to convert file handle to file descriptor: %s", strerror(errno));
}
// Ensure that all data is synced before returning from this function
#ifdef _WIN32
if (!FlushFileBuffers((HANDLE)_get_osfhandle(fd)))
{
RuntimeError("error syncing to file: %d", (int) ::GetLastError());
}
#else
int rc = fsync(fd);
if (rc != 0)
{
RuntimeError("error syncing to file: %s", strerror(errno));
}
#endif
}
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie(FILE* f)
{
int rc = fflush(f);
if (rc != 0)
{
RuntimeError("error flushing to file: %s", strerror(errno));
}
}
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes (with open file)
// ----------------------------------------------------------------------------
size_t filesize(FILE* f)
{
#ifdef _WIN32
size_t curPos = _ftelli64(f);
if (curPos == -1L)
{
RuntimeError("error determining file position: %s", strerror(errno));
}
int rc = _fseeki64(f, 0, SEEK_END);
if (rc != 0)
RuntimeError("error seeking to end of file: %s", strerror(errno));
size_t len = _ftelli64(f);
if (len == -1L)
RuntimeError("error determining file position: %s", strerror(errno));
rc = _fseeki64(f, curPos, SEEK_SET);
if (rc != 0)
RuntimeError("error resetting file position: %s", strerror(errno));
return len;
#else // TODO: test this
struct stat stat_buf;
int rc = fstat(fileno(f), &stat_buf);
if (rc != 0)
RuntimeError("error determining length of file: %s", strerror(errno));
static_assert(sizeof(stat_buf.st_size) >= sizeof(uint64_t), "struct stat not compiled for 64-bit mode");
return stat_buf.st_size;
#endif
}
// filesize(): determine size of the file in bytes (with pathname)
size_t filesize(const wchar_t* pathname)
{
FILE* f = fopenOrDie(pathname, L"rb");
try
{
size_t len = filesize(f);
fclose(f);
return (size_t) len;
}
catch (...)
{
fclose(f);
throw;
}
}
#ifndef UNDER_CE // no 64-bit under winCE
// filesize64(): determine size of the file in bytes (with pathname)
int64_t filesize64(const wchar_t* pathname)
{
#ifdef _WIN32
struct _stat64 fileinfo;
if (_wstat64(pathname, &fileinfo) == -1)
return 0;
else
return fileinfo.st_size;
#else
return filesize(pathname);
#endif
}
#endif
// ----------------------------------------------------------------------------
// fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
uint64_t fgetpos(FILE* f)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
fpos_t post;
int rc = ::fgetpos(f, &post);
if (rc != 0)
RuntimeError("error getting file position: %s", strerror(errno));
#else
auto pos = ftello(f);
uint64_t post = (uint64_t) pos;
static_assert(sizeof(post) >= sizeof(pos), "64-bit file offsets not enabled");
if ((decltype(pos)) post != pos)
LogicError("64-bit file offsets not enabled");
#endif
return post;
}
void fsetpos(FILE* f, uint64_t reqpos)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
#ifdef _MSC_VER // special hack for VS CRT
// Visual Studio's ::fsetpos() flushes the read buffer. This conflicts with a situation where
// we generally read linearly but skip a few bytes or KB occasionally, as is
// the case in speech recognition tools. This requires a number of optimizations.
uint64_t curpos = fgetpos(f);
uint64_t cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
while (reqpos >= curpos && reqpos < cureob)
{
// if we made it then do not call fsetpos()
if (reqpos == fgetpos(f))
return;
// if we seek within the existing buffer, then just move to the position by dummy reads
char buf[65536];
size_t n = min((size_t) reqpos - (size_t) curpos, _countof(buf));
fread(buf, sizeof(buf[0]), n, f); // (this may fail, but really shouldn't)
curpos += n;
// since we mess with f->_cnt, if something unexpected happened to the buffer then back off
if (curpos != fgetpos(f) || curpos + f->_cnt != cureob)
break; // oops
}
#endif // end special hack for VS CRT
// actually perform the seek
fpos_t post = reqpos;
int rc = ::fsetpos(f, &post);
#else // assuming __unix__
off_t post = (off_t) reqpos;
static_assert(sizeof(off_t) >= sizeof(reqpos), "64-bit file offsets not enabled");
if ((decltype(reqpos)) post != reqpos)
LogicError("64-bit file offsets not enabled");
int rc = fseeko(f, post, SEEK_SET);
#endif
if (rc != 0)
RuntimeError("error setting file position: %s", strerror(errno));
}
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie(const std::string& pathname)
{
if (unlink(pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError("error deleting file '%s': %s", pathname.c_str(), strerror(errno));
}
void unlinkOrDie(const std::wstring& pathname)
{
if (_wunlink(pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError("error deleting file '%ls': %s", pathname.c_str(), strerror(errno));
}
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
void renameOrDie(const std::string& from, const std::string& to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileA(to.c_str()))
RuntimeError("error deleting file: '%s': %d", to.c_str(), GetLastError());
if (!MoveFileA(from.c_str(), to.c_str()))
RuntimeError("error renaming file '%s': %d", from.c_str(), GetLastError());
#else
// Delete destination file if it exists
// WORKAROUND: "rename" should do this but this is a workaround
// to the HDFS FUSE implementation's bug of failing to do so
// workaround for FUSE rename when running on Philly
unlinkOrDie(to);
if (rename(from.c_str(), to.c_str()) != 0)
{
RuntimeError("error renaming file '%s': %s", from.c_str(), strerror(errno));
}
#endif
}
void renameOrDie(const std::wstring& from, const std::wstring& to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileW(to.c_str()))
RuntimeError("error deleting file '%ls': %d", to.c_str(), GetLastError());
if (!MoveFileW(from.c_str(), to.c_str()))
RuntimeError("error renaming file '%ls': %d", from.c_str(), GetLastError());
#else
renameOrDie(wtocharpath(from.c_str()).c_str(), wtocharpath(to.c_str()).c_str());
#endif
}
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string
// ----------------------------------------------------------------------------
void fputstring(FILE* f, const char* str)
{
fwriteOrDie((void*) str, sizeof(*str), strnlen(str, SIZE_MAX) + 1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring(FILE* f, const std::string& str)
{
fputstring(f, str.c_str());
}
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4127)
#endif
void fputstring(FILE* f, const wchar_t* str)
{
if (sizeof(*str) == 2)
{
fwriteOrDie((void*) str, sizeof(*str), wcsnlen(str, SIZE_MAX) + 1, f); // SECURITY NOTE: string use has been reviewed
}
else if (sizeof(*str) == 4)
{
size_t strLen = wcsnlen(str, SIZE_MAX);
std::unique_ptr<char16_t[]> str16(new char16_t[strLen + 1]);
for (int i = 0; i < strLen; i++)
{
str16[i] = (char16_t) str[i];
}
str16[strLen] = 0;
fwriteOrDie((void*) str16.get(), sizeof(*str) / 2, strLen + 1, f); // SECURITY NOTE: string use has been reviewed
}
else
{
RuntimeError("error: unknown encoding\n");
}
}
#ifdef _WIN32
#pragma warning(pop)
#endif
void fputstring(FILE* f, const std::wstring& str)
{
fputstring(f, str.c_str());
}
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists(const wchar_t* pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAW findFileData;
HANDLE hFind = FindFirstFileW(pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose(hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f(_wfopen(pathname, L"r"));
return f != nullptr;
#endif
}
bool fexists(const char* pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAA findFileData;
HANDLE hFind = FindFirstFileA(pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose(hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f(fopen(pathname, "r"));
return f != nullptr;
#endif
}
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode by reading its BOM
// ----------------------------------------------------------------------------
bool funicode(FILE* f)
{
unsigned short testCode;
if (fread(&testCode, sizeof(short), 1, f) == 1 &&
(int) testCode == 0xFEFF)
return true;
fseek(f, 0, SEEK_SET);
// rewind (f);
return false;
}
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets());
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------
#ifdef __CYGWIN__ // strnlen() is somehow missing in Cygwin, which we use to quick-check GCC builds under Windows (although it is not a real target platform)
static inline size_t strnlen(const char* s, size_t n)
{
return std::find(s, s + n, '\0') - s;
}
#endif
#ifdef UNDER_CE // strlen for char * not defined in winCE
static inline size_t strnlen(const char* s, size_t n)
{
return std::find(s, s + n, '\0') - s;
}
#endif
static inline wchar_t* fgets(wchar_t* buf, int n, FILE* f)
{
return fgetws(buf, n, f);
}
static inline size_t strnlen(wchar_t* s, size_t n)
{
return wcsnlen(s, n);
}
template <class CHAR>
CHAR* fgetline(FILE* f, CHAR* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
CHAR* p = fgets(buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror(f))
RuntimeError("error reading line: %s", strerror(errno));
buf[0] = 0;
return buf;
}
size_t n = strnlen(p, size);
// check for buffer overflow
if (n >= (size_t) size - 1)
{
basic_string<CHAR> example(p, n < 100 ? n : 100);
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %d (max. %d characters allowed) [%s ...]", (int) filepos, (int) size - 1, msra::strfun::utf8(example).c_str());
}
// remove newline at end
if (n > 0 && p[n - 1] == '\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n - 1] == '\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n - 1] == '\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
// STL string version
std::string fgetline(FILE* f)
{
vector<char> buf(1000000);
return fgetline(f, &buf[0], (int) buf.size());
}
// STL string version
std::wstring fgetlinew(FILE* f)
{
vector<wchar_t> buf(1000000);
return fgetline(f, &buf[0], (int) buf.size());
}
// STL string version avoiding most memory allocations
void fgetline(FILE* f, std::string& s, std::vector<char>& buf)
{
buf.resize(1000000); // enough? // KIT: increased to 1M to be safe
const char* p = fgetline(f, &buf[0], (int) buf.size());
s.assign(p);
}
void fgetline(FILE* f, std::wstring& s, std::vector<wchar_t>& buf)
{
buf.resize(1000000); // enough? // KIT: increased to 1M to be safe
const wchar_t* p = fgetline(f, &buf[0], (int) buf.size());
s.assign(p);
}
// char buffer version
void fgetline(FILE* f, std::vector<char>& buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize(BUF_SIZE);
fgetline(f, &buf[0], (int) buf.size());
buf.resize(strnlen(&buf[0], BUF_SIZE) + 1); // SECURITY NOTE: string use has been reviewed
}
void fgetline(FILE* f, std::vector<wchar_t>& buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize(BUF_SIZE);
fgetline(f, &buf[0], (int) buf.size());
buf.resize(wcsnlen(&buf[0], BUF_SIZE) + 1); // SECURITY NOTE: string use has been reviewed
}
// read a 0-terminated string
const char* fgetstring(FILE* f, __out_z_cap(size) char* buf, int size)
{
int i;
for (i = 0;; i++)
{
int c = fgetc(f);
if (c == EOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
if (i >= size - 1)
RuntimeError("input line too long (max. %d characters allowed)", size - 1);
buf[i] = (char) c;
}
assert(i < size);
buf[i] = 0;
return buf;
}
// read a 0-terminated wstring
string fgetstring(FILE* f)
{
string res;
for (;;)
{
int c = fgetc(f);
if (c == EOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((char) c);
}
return res;
}
// read a 0-terminated string
const wchar_t* fgetstring(FILE* f, __out_z_cap(size) wchar_t* buf, int size)
{
int i;
for (i = 0;; i++)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
if (i >= size - 1)
{
RuntimeError("input line too long (max. %d wchar_tacters allowed)", size - 1);
}
buf[i] = (wchar_t) c;
}
assert(i < size);
buf[i] = 0;
return buf;
}
#if (_MSC_VER < 1800)
// read a 0-terminated wstring
wstring fgetwstring(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
//
// there is a known vc++ runtime bug: Microsoft Connect 768113
// fgetwc can skip a byte in certain condition
// this is already fixed in update release to VS 2012
// for now the workaround is to use fgetc twice to simulate fgetwc
//
// wint_t c = fgetwc (f);
int c1 = fgetc(f);
int c2 = fgetc(f);
// synthetic fgetc output to simulate fgetwc
// note the order below works only for little endian
wint_t c = (wint_t)((c2 << 8) | c1);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((wchar_t) c);
}
return res;
}
#else
// read a 0-terminated wstring
wstring fgetwstring(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((wchar_t) c);
}
return res;
}
#endif
bool fskipspace(FILE* f)
{
int count = 0;
for (;; count++)
{
int c = fgetc(f);
if (c == EOF) // hit the end
{
if (ferror(f))
RuntimeError("error reading from file: %s", strerror(errno));
break;
}
if (!isspace(c)) // end of space: undo getting that character
{
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
break;
}
}
return count > 0;
}
bool fskipwspace(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
int count = 0;
for (;; count++)
{
wint_t c = fgetwc(f);
if (c == WEOF) // hit the end
{
if (ferror(f))
RuntimeError("error reading from file: %s", strerror(errno));
break;
}
if (!iswspace(c)) // end of space: undo getting that character
{
wint_t rc = ungetwc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
break;
}
}
return count > 0;
}
// fskipNewLine(): skip all white space until end of line incl. the newline
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
// returns false, true, or EOF
int fskipNewline(FILE* f, bool skip)
{
int c;
bool found = false;
// skip white space
do
{
c = fgetc(f);
} while (c == ' ' || c == '\t');
if (c == '\r' || c == '\n') // Accept any type of newline
{
found = true;
if (skip)
c = fgetc(f);
}
if ((found && !skip) ||
!(c == '\r' || c == '\n'))
{
// if we found an EOF, return that unless there was a newline before the EOF
if (c == EOF)
return found ? (int) true : EOF;
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
return (int) found;
}
// if we get here we saw a newline
return (int) true;
}
// read a space-terminated token
// ...TODO: eat trailing space like fscanf() doessurrounding space)
const char* fgettoken(FILE* f, __out_z_cap(size) char* buf, int size)
{
fskipspace(f); // skip leading space
int c = -1;
int i;
for (i = 0;; i++)
{
c = fgetc(f);
if (c == EOF)
break;
if (isspace(c))
break;
if (i >= size - 1)
RuntimeError("input token too long (max. %d characters allowed)", size - 1);
buf[i] = (char) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != EOF)
{
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
}
assert(i < size);
buf[i] = 0;
return buf;
}
string fgettoken(FILE* f)
{
char buf[80];
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
}
// read a space-terminated token
const wchar_t* fgettoken(FILE* f, __out_z_cap(size) wchar_t* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
fskipwspace(f); // skip leading space
wint_t c = WEOF;
int i;
for (i = 0;; i++)
{
c = fgetwc(f);
if (c == WEOF)
break;
if (iswspace(c))
break;
if (i >= size - 1)
RuntimeError("input token too long (max. %d wchar_tacters allowed)", size - 1);
buf[i] = (wchar_t) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != WEOF)
{
int rc = ungetwc(c, f);
if (rc != c)
RuntimeError("error in ungetwc(): %s", strerror(errno));
}
assert(i < size);
buf[i] = 0;
return buf;
}
wstring fgetwtoken(FILE* f)
{
wchar_t buf[80];
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
}
template <>
int ftrygetText<bool>(FILE* f, bool& v)
{
wchar_t c;
int rc = ftrygetText(f, c);
v = (c == L'T');
return rc;
}
// ----------------------------------------------------------------------------
// fputText(): write a bool out as character
// ----------------------------------------------------------------------------
template <>
void fputText<bool>(FILE* f, bool v)
{
fputText(f, v ? L'T' : L'F');
}
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
std::string fgetTag(FILE* f)
{
char tag[5];
freadOrDie(&tag[0], sizeof(tag[0]), 4, f);
tag[4] = 0;
return std::string(tag);
}
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag(FILE* f, const char* expectedTag)
{
fcompareTag(fgetTag(f), expectedTag);
}
void fcheckTag_ascii(FILE* f, const string& expectedTag)
{
char buf[20]; // long enough for a tag
fskipspace(f);
fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
if (expectedTag != buf)
{
RuntimeError("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag(const string& readTag, const string& expectedTag)
{
if (readTag != expectedTag)
{
RuntimeError("invalid tag '%s' found; expected '%s'",
readTag.c_str(), expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag(FILE* f, const char* tag)
{
const int TAG_LEN = 4;
assert(strnlen(tag, TAG_LEN + 1) == TAG_LEN);
fwriteOrDie((void*) tag, sizeof(*tag), strnlen(tag, TAG_LEN), f);
}
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring(FILE* f)
{
char c;
do
{
freadOrDie(&c, sizeof(c), 1, f);
} while (c);
}
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
// ----------------------------------------------------------------------------
void fpad(FILE* f, int n)
{
// get current writing position
int pos = ftell(f);
if (pos == -1)
{
RuntimeError("error in ftell(): %s", strerror(errno));
}
// determine how many bytes are needed (at least 1 for the 0-terminator)
// and create a dummy string of that length incl. terminator
int len = n - (pos % n);
const char dummyString[] = "MSR-Asia: JL+FS";
size_t offset = sizeof(dummyString) / sizeof(dummyString[0]) - len;
assert(offset >= 0);
fputstring(f, dummyString + offset);
}
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte(FILE* f)
{
char v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort(FILE* f)
{
short v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
short fgetshort_bigendian(FILE* f)
{
unsigned char b[2];
freadOrDie(&b, sizeof(b), 1, f);
return (short) ((b[0] << 8) + b[1]);
}
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24(FILE* f)
{
int v;
assert(sizeof(v) == 4);
freadOrDie(&v, sizeof(v) - 1, 1, f); // only read 3 lower-order bytes
v <<= 8; // shift up (upper 8 bits uninit'ed)
v >>= 8; // shift down 8 bits with sign-extend
return v;
}
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint(FILE* f)
{
int v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
int fgetint_bigendian(FILE* f)
{
unsigned char b[4];
freadOrDie(&b, sizeof(b), 1, f);
return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
}
int fgetint_ascii(FILE* f)
{
fskipspace(f);
int res = 0;
char c;
freadOrDie(&c, sizeof(c), 1, f);
while (isdigit((unsigned char) c))
{
res = (10 * res) + (c - '0');
freadOrDie(&c, sizeof(c), 1, f);
}
int rc = ungetc(c, f);
if (rc != c)
{
RuntimeError("error in ungetc(): %s", strerror(errno));
}
return res;
}
// ----------------------------------------------------------------------------
// fgetlong(): read an long value
// ----------------------------------------------------------------------------
long fgetlong(FILE* f)
{
long v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat(FILE* f)
{
float v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
float fgetfloat_bigendian(FILE* f)
{
int bitpattern = fgetint_bigendian(f);
return *((float*) &bitpattern);
}
float fgetfloat_ascii(FILE* f)
{
float val;
fskipspace(f);
int rc = fscanf(f, "%f", &val); // security hint: safe overloads
if (rc == 0)
RuntimeError("error reading float value from file (invalid format): %s", strerror(errno));
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
return val;
}
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble(FILE* f)
{
double v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
#ifdef _WIN32
// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file
// ----------------------------------------------------------------------------
void WAVEHEADER::prepareRest(int sampleCount)
{
FmtLength = 16;
wFormatTag = 1;
nAvgBytesPerSec = nSamplesPerSec * nBlockAlign;
riffchar[0] = 'R';
riffchar[1] = 'I';
riffchar[2] = 'F';
riffchar[3] = 'F';
if (sampleCount != -1)
{
DataLength = sampleCount * nBlockAlign;
RiffLength = 36 + DataLength;
}
else
{
DataLength = 0xffffffff;
RiffLength = 0xffffffff;
}
wavechar[0] = 'W';
wavechar[1] = 'A';
wavechar[2] = 'V';
wavechar[3] = 'E';
wavechar[4] = 'f';
wavechar[5] = 'm';
wavechar[6] = 't';
wavechar[7] = ' ';
datachar[0] = 'd';
datachar[1] = 'a';
datachar[2] = 't';
datachar[3] = 'a';
}
void WAVEHEADER::prepare(unsigned int Fs, int Bits, int Channels, int SampleCount)
{
nChannels = (short) Channels;
nSamplesPerSec = Fs;
nBlockAlign = (short) (Channels * (Bits / 8));
nAvgBytesPerSec = Fs * nBlockAlign;
wBitsPerSample = (short) Bits;
prepareRest(SampleCount);
}
void WAVEHEADER::prepare(const WAVEFORMATEX& wfx, int sampleCount /* -1 for unknown */)
{
nChannels = wfx.nChannels;
nSamplesPerSec = wfx.nSamplesPerSec;
nBlockAlign = wfx.nBlockAlign;
wBitsPerSample = wfx.wBitsPerSample;
prepareRest(sampleCount);
}
void WAVEHEADER::write(FILE* f)
{
fputTag(f, "RIFF");
fputint(f, RiffLength);
fputTag(f, "WAVE");
fputTag(f, "fmt ");
fputint(f, FmtLength);
fputshort(f, wFormatTag);
fputshort(f, nChannels);
fputint(f, nSamplesPerSec);
fputint(f, nAvgBytesPerSec);
fputshort(f, nBlockAlign);
fputshort(f, wBitsPerSample);
assert(FmtLength == 16);
assert(wFormatTag == 1);
fputTag(f, "data");
fputint(f, DataLength);
fflushOrDie(f);
}
/*static*/ void WAVEHEADER::update(FILE* f)
{
long curPos = ftell(f);
if (curPos == -1L)
{
RuntimeError("error determining file position: %s", strerror(errno));
}
unsigned int len = (unsigned int) filesize(f);
unsigned int RiffLength = len - 8;
unsigned int DataLength = RiffLength - 36;
fseekOrDie(f, 4, SEEK_SET);
fputint(f, RiffLength);
fseekOrDie(f, 40, SEEK_SET);
fputint(f, DataLength);
fseekOrDie(f, curPos, SEEK_SET);
}
#endif
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte(FILE* f, char v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort(FILE* f, short v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24(FILE* f, int v)
{
assert(sizeof(v) == 4);
fwriteOrDie(&v, sizeof(v) - 1, 1, f); // write low-order 3 bytes
}
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint(FILE* f, int v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputlong(): write an long value
// ----------------------------------------------------------------------------
void fputlong(FILE* f, long v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat(FILE* f, float v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble(FILE* f, double v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile(const wstring& pathname, const std::vector<char>& buffer)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(&buffer[0], sizeof(buffer[0]), buffer.size(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
void fputfile(const wstring& pathname, const std::wstring& string)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(string.c_str(), sizeof(string[0]), string.length(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
void fputfile(const wstring& pathname, const std::string& string)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(string.c_str(), sizeof(string[0]), string.length(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile(const wstring& pathname, std::vector<char>& buffer)
{
FILE* f = fopenOrDie(pathname, L"rb");
size_t len = filesize(f);
buffer.resize(len);
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
freadOrDie(&buffer[0], sizeof(buffer[0]), buffer.size(), f);
}
fclose(f);
}
void fgetfile(FILE* f, std::vector<char>& buffer)
{ // this version reads until eof
buffer.resize(0);
buffer.reserve(1000000); // avoid too many reallocations
std::vector<char> inbuf;
inbuf.resize(65536); // read in chunks of this size
while (!feof(f)) // read until eof
{
size_t n = fread(&inbuf[0], sizeof(inbuf[0]), inbuf.size(), f);
if (ferror(f))
{
RuntimeError("fgetfile: error reading from file: %s", strerror(errno));
}
buffer.insert(buffer.end(), inbuf.begin(), inbuf.begin() + n);
}
buffer.reserve(buffer.size());
}
// load it into RAM in one huge chunk
static size_t fgetfilechars(const std::wstring& path, vector<char>& buffer)
{
auto_file_ptr f(fopenOrDie(path, L"rb"));
size_t len = filesize(f);
buffer.reserve(len + 1);
freadOrDie(buffer, len, f);
buffer.push_back(0); // this makes it a proper C string
return len;
}
template <class LINES>
static void strtoklines(char* s, LINES& lines)
{
for (char* p = strtok(s, "\r\n"); p; p = strtok(NULL, "\r\n"))
lines.push_back(p);
}
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
// parse into lines
lines.resize(0);
lines.reserve(len / 20);
strtoklines(&buffer[0], lines);
}
// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
// parse into lines
vector<char*> lines;
lines.reserve(len / 20);
strtoklines(&buffer[0], lines);
return lines;
}
// ----------------------------------------------------------------------------
// getfiletime(): access modification time
// ----------------------------------------------------------------------------
#ifndef _FILETIME_
//typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }; // from minwindef.h
typedef time_t FILETIME;
#else
bool operator>=(const FILETIME& targettime, const FILETIME& inputtime) // for use in fuptodate()
{
return (targettime.dwHighDateTime > inputtime.dwHighDateTime) ||
(targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime);
}
#endif
#ifdef _WIN32
class auto_find_handle
{
HANDLE h;
auto_find_handle operator=(const auto_find_handle&);
auto_find_handle(const auto_find_handle&);
public:
auto_find_handle(HANDLE p_h)
: h(p_h)
{
}
~auto_find_handle()
{
// TODO: Check for error code and throw if !std::uncaught_exception()
if (h != INVALID_HANDLE_VALUE)
::FindClose(h);
}
operator HANDLE() const
{
return h;
}
};
#endif
bool getfiletime(const wstring& path, FILETIME& time)
{ // return file modification time, false if cannot be determined
#ifdef _WIN32
WIN32_FIND_DATAW findFileData;
auto_find_handle hFind(FindFirstFileW(path.c_str(), &findFileData));
if (hFind != INVALID_HANDLE_VALUE)
{
time = findFileData.ftLastWriteTime;
return true;
}
else
return false;
#else // TODO: test this; e.g. does st_mtime have the desired resolution?
struct stat buf;
int result;
// Get data associated with "crt_stat.c":
result = stat(wtocharpath(path.c_str()).c_str(), &buf);
// Check if statistics are valid:
if (result != 0)
return false;
time = buf.st_mtime;
return true;
#endif
}
// ----------------------------------------------------------------------------
// expand_wildcards -- wildcard expansion of a path, including directories.
// ----------------------------------------------------------------------------
#ifdef _WIN32
// Win32-style variant of this function (in case we want to use it some day)
// Returns 0 in case of failure. May throw in case of bad_alloc.
static BOOL ExpandWildcards(wstring path, vector<wstring>& paths)
{
// convert root to DOS filename convention
for (size_t k = 0; k < path.length(); k++)
if (path[k] == '/')
path[k] = '\\';
// remove terminating backslash
size_t last = path.length() - 1;
if (last >= 0 && path[last] == '\\')
path.erase(last);
// convert root to long filename convention
// if (path.find (L"\\\\?\\") != 0)
// path = L"\\\\?\\" + root;
// split off everything after first wildcard
size_t wpos = path.find_first_of(L"*?");
if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
wpos = path.find_first_of(L"*?", 4); // 4=skip "\\?\"
if (wpos == wstring::npos)
{ // no wildcard: just return it
paths.push_back(path);
return TRUE;
}
// split off everything afterwards if any
wstring rest; // remaining path after this directory
size_t spos = path.find_first_of(L"\\", wpos + 1);
if (spos != wstring::npos)
{
rest = path.substr(spos + 1);
path.erase(spos);
}
// crawl folder
WIN32_FIND_DATAW ffdata;
auto_find_handle hFind(::FindFirstFileW(path.c_str(), &ffdata));
if (hFind == INVALID_HANDLE_VALUE)
{
DWORD err = ::GetLastError();
if (rest.empty() && err == 2)
return TRUE; // no matching file: empty
return FALSE; // another error
}
size_t pos = path.find_last_of(L"\\");
if (pos == wstring::npos)
LogicError("unexpected missing \\ in path");
wstring parent = path.substr(0, pos);
do
{
// skip this and parent directory
bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
if (isDir && ffdata.cFileName[0] == '.')
continue;
wstring filename = parent + L"\\" + ffdata.cFileName;
if (rest.empty())
{
paths.push_back(filename);
}
else if (isDir) // multi-wildcards: further expand
{
BOOL rc = ExpandWildcards(filename + L"\\" + rest, paths);
rc; // error here means no match, e.g. Access Denied to one subfolder
}
} while (::FindNextFileW(hFind, &ffdata) != 0);
return TRUE;
}
#endif
void expand_wildcards(const wstring& path, vector<wstring>& paths)
{
#ifdef _WIN32
BOOL rc = ExpandWildcards(path, paths);
if (!rc)
RuntimeError("error in expanding wild cards '%ls': Win32 error %d", path.c_str(), (int) ::GetLastError());
#else
// On Linux we have just the function for the job: glob
glob_t globResult;
if (glob(wtocharpath(path.c_str()).c_str(), GLOB_TILDE, NULL, &globResult) != 0)
{
RuntimeError("error in expanding wild cards '%ls': %s", path.c_str(), strerror(errno));
}
for (unsigned int i = 0; i < globResult.gl_pathc; ++i)
{
paths.push_back(msra::strfun::utf16(globResult.gl_pathv[i]));
}
globfree(&globResult);
#endif
}
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
static void mkdir(const wstring& path)
{
int rc = _wmkdir(path.c_str());
if (rc >= 0 || errno == EEXIST)
return; // no error or already existing --ok
#ifdef _WIN32 // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
if (errno == EACCES)
{
DWORD att = ::GetFileAttributesW(path.c_str());
if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
return; // ok
}
#endif
RuntimeError("mkdir: error creating intermediate directory %ls", path.c_str());
}
// make subdir of a file including parents
void msra::files::make_intermediate_dirs(const wstring& filepath)
{
vector<wchar_t> buf;
buf.resize(filepath.length() + 1, 0);
wcscpy(&buf[0], filepath.c_str());
wstring subpath;
int skip = 0;
#ifdef _WIN32
// On windows, if share (\\) then the first two levels (machine, share name) cannot be made.
if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
{
subpath = L"/";
skip = 2; // skip two levels (machine, share)
}
#else
// On unix, if the filepath starts with '/' then it is absolute
// path and the created sub-paths should also start with '/'
if (buf[0] == '/')
{
subpath = L"/";
}
#endif
// make all constituents except the filename (to make a dir, include a trailing slash)
wchar_t* context = nullptr;
for (const wchar_t* p = wcstok_s(&buf[0], L"/\\", &context); p; p = wcstok_s(NULL, L"/\\", &context))
{
if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
{
mkdir(subpath);
}
else if (skip > 0)
skip--; // skip this level
// rebuild the final path
if (subpath != L"")
subpath += L"/";
subpath += p;
}
}
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
// test if file 'target' is not older than 'input' --used for make mode
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
// 'target' may or may not exist
bool msra::files::fuptodate(const wstring& target, const wstring& input, bool inputrequired)
{
FILETIME targettime;
if (!getfiletime(target, targettime))
return false; // target missing: need to update
FILETIME inputtime;
if (!getfiletime(input, inputtime))
return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
// up to date if target has higher time stamp
return targettime >= inputtime; // note: uses an overload for WIN32 FILETIME (in Linux, FILETIME=time_t=size_t)
}
// separate string by separator
template<class String>
vector<String> SplitString(const String& str, const String& sep)
{
vector<String> vstr;
String csub;
size_t ifound = 0;
size_t ifoundlast = ifound;
ifound = str.find_first_of(sep, ifound);
while (ifound != String::npos)
{
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);
ifoundlast = ifound + 1;
ifound = str.find_first_of(sep, ifoundlast);
}
ifound = str.length();
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);
return vstr;
}
template vector<string> SplitString(const string& istr, const string& sep);
template vector<wstring> SplitString(const wstring& istr, const wstring& sep);
static inline std::string wcstombs(const std::wstring& p) // output: MBCS
{
size_t len = p.length();
vector<char> buf(2 * len + 1); // max: 1 wchar => 2 mb chars
fill(buf.begin(), buf.end(), 0);
::wcstombs(&buf[0], p.c_str(), 2 * len + 1);
return std::string(&buf[0]);
}
static inline std::wstring mbstowcs(const std::string& p) // input: MBCS
{
size_t len = p.length();
vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
fill(buf.begin(), buf.end(), (wchar_t) 0);
// OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
::mbstowcs(&buf[0], p.c_str(), len + 1);
return std::wstring(&buf[0]);
}
wstring s2ws(const string& str)
{
#ifdef __unix__
return mbstowcs(str);
#else
typedef std::codecvt_utf8<wchar_t> convert_typeX;
std::wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.from_bytes(str);
#endif
}
string ws2s(const wstring& wstr)
{
#ifdef __unix__
return wcstombs(wstr);
#else
typedef codecvt_utf8<wchar_t> convert_typeX;
wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.to_bytes(wstr);
#endif
}