https://github.com/Microsoft/CNTK
Revision 218d2da137c9ba38d2ae272e0a557d74c5408d6a authored by jeanfad on 06 June 2016, 14:34:12 UTC, committed by jeanfad on 28 June 2016, 09:24:30 UTC
1 parent 3c01d90
Tip revision: 218d2da137c9ba38d2ae272e0a557d74c5408d6a authored by jeanfad on 06 June 2016, 14:34:12 UTC
little docstring fixes
little docstring fixes
Tip revision: 218d2da
fileutil.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#endif
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#pragma warning(disable : 4996) // ^^ this does not seem to work--TODO: make it work
#define _FILE_OFFSET_BITS 64 // to force fseeko() and ftello() 64 bit in Linux
#ifdef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES // fixed-buffer overloads for strcpy() etc.
#undef _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
#endif
#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
#include "Basics.h"
#include "fileutil.h"
#include "ProgressTracing.h"
#ifdef __unix__
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <glob.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <assert.h>
#ifdef _WIN32
#define NOMINMAX
#include "Windows.h" // for FILETIME
#endif
#include <algorithm> // for std::find
#include <limits.h>
#include <memory>
#include <cwctype>
#ifndef UNDER_CE // some headers don't exist under winCE - the appropriate definitions seem to be in stdlib.h
#if defined(_WIN32) || defined(__CYGWIN__)
#include <fcntl.h> // for _O_BINARY/TEXT - not needed for wince
#include <io.h> // for _setmode()
#define SET_BINARY_MODE(handle) setmode(handle, _O_BINARY)
#define SET_TEXT_MODE(handle) setmode(handle, _O_TEXT)
#else
#define SET_BINARY_MODE(handle) ((int) 0)
#define SET_TEXT_MODE(handle) ((int) 0)
#endif
#endif
#define __out_z_cap(x) // a fake SAL annotation; this may come in handy some day if we try static code analysis, so I don't want to delete it
#include <errno.h>
using namespace std;
using namespace Microsoft::MSR::CNTK;
// ----------------------------------------------------------------------------
// some mappings for non-Windows builds
// ----------------------------------------------------------------------------
template <>
const wchar_t* GetScanFormatString(char)
{
return L" %hc";
}
template <>
const wchar_t* GetScanFormatString(wchar_t)
{
return L" %lc";
}
template <>
const wchar_t* GetScanFormatString(short)
{
return L" %hi";
}
template <>
const wchar_t* GetScanFormatString(int)
{
return L" %i";
}
template <>
const wchar_t* GetScanFormatString(long)
{
return L" %li";
}
template <>
const wchar_t* GetScanFormatString(unsigned short)
{
return L" %hu";
}
template <>
const wchar_t* GetScanFormatString(unsigned int)
{
return L" %u";
}
//template <> const wchar_t* GetScanFormatString(unsigned long) {return L" %lu";}
template <>
const wchar_t* GetScanFormatString(float)
{
return L" %g";
}
template <>
const wchar_t* GetScanFormatString(double)
{
return L" %lg";
}
template <>
const wchar_t* GetScanFormatString(size_t)
{
return L" %llu";
}
template <>
const wchar_t* GetScanFormatString(long long)
{
return L" %lli";
}
template <>
const wchar_t* GetFormatString(char)
{
return L" %hc";
}
template <>
const wchar_t* GetFormatString(wchar_t)
{
return L" %lc";
}
template <>
const wchar_t* GetFormatString(short)
{
return L" %hi";
}
template <>
const wchar_t* GetFormatString(int)
{
return L" %i";
}
template <>
const wchar_t* GetFormatString(long)
{
return L" %li";
}
template <>
const wchar_t* GetFormatString(unsigned short)
{
return L" %hu";
}
template <>
const wchar_t* GetFormatString(unsigned int)
{
return L" %u";
}
//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";}
template <>
const wchar_t* GetFormatString(float)
{
return L" %.9g";
}
template <>
const wchar_t* GetFormatString(double)
{
return L" %.17g";
}
template <>
const wchar_t* GetFormatString(size_t)
{
return L" %llu";
}
template <>
const wchar_t* GetFormatString(long long)
{
return L" %lli";
}
template <>
const wchar_t* GetFormatString(const char*)
{
return L" %hs";
}
template <>
const wchar_t* GetFormatString(const wchar_t*)
{
return L" %ls";
}
// ----------------------------------------------------------------------------
// fgetText() specializations for fwscanf differences: get a value from a text file
// ----------------------------------------------------------------------------
void fgetText(FILE* f, char& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError("error reading value from file (invalid format): %ls", formatString);
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
}
void fgetText(FILE* f, wchar_t& v)
{
const wchar_t* formatString = GetFormatString(v);
int rc = fwscanf(f, formatString, &v);
if (rc == 0)
RuntimeError("error reading value from file (invalid format): %ls", formatString);
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
}
// ----------------------------------------------------------------------------
// fopenOrDie(): like fopen() but terminate with err msg in case of error.
// A pathname of "-" returns stdout or stdin, depending on mode, and it will
// change the binary mode if 'b' or 't' are given. If you use this, make sure
// not to fclose() such a handle.
// ----------------------------------------------------------------------------
static const wchar_t* strchr(const wchar_t* s, wchar_t v)
{
return wcschr(s, v);
}
// pathname is "-" -- open stdin or stdout. Changes bin mode if 'b' or 't' given.
template <class _T>
FILE* fopenStdHandle(const _T* mode)
{
FILE* f = strchr(mode, 'r') ? stdin : stdout;
if (strchr(mode, 'b') || strchr(mode, 't')) // change binary mode
fsetmode(f, strchr(mode, 'b') ? 'b' : 't');
return f;
}
FILE* fopenOrDie(const string& pathname, const char* mode)
{
FILE* f = (pathname[0] == '-') ? fopenStdHandle(mode) : fopen(pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError("error opening file '%s': %s", pathname.c_str(), strerror(errno));
}
if (strchr(mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf(f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
FILE* fopenOrDie(const wstring& pathname, const wchar_t* mode)
{
FILE* f = (pathname[0] == '-') ? fopenStdHandle(mode) : _wfopen(pathname.c_str(), mode);
if (f == NULL)
{
RuntimeError("error opening file '%ls': %s", pathname.c_str(), strerror(errno));
}
if (strchr(mode, 'S'))
{ // if optimized for sequential access then use large buffer
setvbuf(f, NULL, _IOFBF, 10000000); // OK if it fails
}
return f;
}
// ----------------------------------------------------------------------------
// set mode to binary or text (pass 'b' or 't')
// ----------------------------------------------------------------------------
void fsetmode(FILE* f, char type)
{
if (type != 'b' && type != 't')
{
RuntimeError("fsetmode: invalid type '%c'", type);
}
#ifdef UNDER_CE // winCE and win32 have different return types for _fileno
FILE* fd = fileno(f); // note: no error check possible
#else
int fd = fileno(f); // note: no error check possible
#endif
int rc = (type == 'b' ? SET_BINARY_MODE(fd) : SET_TEXT_MODE(fd));
if (rc == -1)
{
RuntimeError("error changing file mode: %s", strerror(errno));
}
}
// ----------------------------------------------------------------------------
// freadOrDie(): like fread() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void freadOrDie(void* ptr, size_t size, size_t count, FILE* f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min(count, (size_t) 15 * 1024 * 1024); // BUGBUG: I surely meant this limit to be bytes, not units of 'size'...
size_t n = fread(ptr, size, chunkn, f);
if (n != chunkn)
RuntimeError("error reading from file: %s", strerror(errno));
count -= n;
ptr = n * size + (char*) ptr;
}
}
#ifdef _WIN32
void freadOrDie(void* ptr, size_t size, size_t count, const HANDLE f)
{
// \\XXX\C$ reads are limited, with some randomness (e.g. 48 MB), on Windows 7 32 bit, so we break this into chunks of some MB. Meh.
while (count > 0)
{
size_t chunkn = min(count * size, (size_t) 15 * 1024 * 1024);
DWORD n;
ReadFile(f, ptr, (DWORD) chunkn, &n, NULL);
if (n != chunkn)
RuntimeError("error number for reading from file: %s", GetLastError());
count -= (size_t)(n / size);
ptr = n + (char*) ptr;
}
}
#endif
// ----------------------------------------------------------------------------
// fwriteOrDie(): like fwrite() but terminate with err msg in case of error;
// Windows C std lib fwrite() has problems writing >100 MB at a time (fails
// with Invalid Argument error), so we break it into chunks (yak!!)
// ----------------------------------------------------------------------------
void fwriteOrDie(const void* ptr, size_t size, size_t count, FILE* f)
{
const char* p1 = (const char*) ptr;
size_t totalBytes = size * count;
while (totalBytes > 0)
{
size_t wantWrite = totalBytes;
#define LIMIT (16 * 1024 * 1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
size_t n = fwrite((const void*) p1, 1, wantWrite, f);
if (n != wantWrite)
{
RuntimeError("error writing to file (ptr=0x%08lx, size=%d, count=%d, writing %d bytes after %d): %s",
(unsigned long) (size_t) ptr, (int) size, (int) count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror(errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
#ifdef _WIN32
void fwriteOrDie(const void* ptr, size_t size, size_t count, const HANDLE f)
{
const char* p1 = (const char*) ptr;
DWORD totalBytes = (DWORD)(size * count);
while (totalBytes > 0)
{
DWORD wantWrite = totalBytes;
#define LIMIT (16 * 1024 * 1024) // limit to 16 MB at a time
if (wantWrite > LIMIT)
{
wantWrite = LIMIT;
}
DWORD byteWritten = 0;
if (WriteFile(f, (const void*) p1, wantWrite, &byteWritten, NULL) == false)
{
RuntimeError("error writing to file (ptr=0x%08lx, size=%d,"
" count=%d, writing %d bytes after %d): %s",
ptr, size, count, (int) wantWrite,
(int) (size * count - totalBytes),
strerror(errno));
}
totalBytes -= wantWrite;
p1 += wantWrite;
}
}
#endif
long fseekOrDie(FILE* f, long offset, int mode)
{
long curPos = ftell(f);
if (curPos == -1L)
{
RuntimeError("error seeking: %s", strerror(errno));
}
int rc = fseek(f, offset, mode);
if (rc != 0)
{
RuntimeError("error seeking: %s", strerror(errno));
}
return curPos;
}
// ----------------------------------------------------------------------------
// fprintfOrDie(): like fprintf() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
#pragma warning(push)
#pragma warning(disable : 4793) // 'vararg' : causes native code generation
void fprintfOrDie(FILE* f, const char* fmt, ...)
{
va_list arg_ptr;
va_start(arg_ptr, fmt);
int rc = vfprintf(f, fmt, arg_ptr);
if (rc < 0)
{
RuntimeError("error writing to file: %s", strerror(errno));
}
}
#pragma warning(pop)
// ----------------------------------------------------------------------------
// fsyncOrDie(): like fsync() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fsyncOrDie(FILE* f)
{
int fd = fileno(f);
if (fd == -1)
{
RuntimeError("unable to convert file handle to file descriptor: %s", strerror(errno));
}
// Ensure that all data is synced before returning from this function
#ifdef _WIN32
if (!FlushFileBuffers((HANDLE)_get_osfhandle(fd)))
{
RuntimeError("error syncing to file: %d", (int) ::GetLastError());
}
#else
int rc = fsync(fd);
if (rc != 0)
{
RuntimeError("error syncing to file: %s", strerror(errno));
}
#endif
}
// ----------------------------------------------------------------------------
// fflushOrDie(): like fflush() but terminate with err msg in case of error
// ----------------------------------------------------------------------------
void fflushOrDie(FILE* f)
{
int rc = fflush(f);
if (rc != 0)
{
RuntimeError("error flushing to file: %s", strerror(errno));
}
}
// ----------------------------------------------------------------------------
// filesize(): determine size of the file in bytes (with open file)
// ----------------------------------------------------------------------------
size_t filesize(FILE* f)
{
#ifdef _WIN32
size_t curPos = _ftelli64(f);
if (curPos == -1L)
{
RuntimeError("error determining file position: %s", strerror(errno));
}
int rc = _fseeki64(f, 0, SEEK_END);
if (rc != 0)
RuntimeError("error seeking to end of file: %s", strerror(errno));
size_t len = _ftelli64(f);
if (len == -1L)
RuntimeError("error determining file position: %s", strerror(errno));
rc = _fseeki64(f, curPos, SEEK_SET);
if (rc != 0)
RuntimeError("error resetting file position: %s", strerror(errno));
return len;
#else // TODO: test this
struct stat stat_buf;
int rc = fstat(fileno(f), &stat_buf);
if (rc != 0)
RuntimeError("error determining length of file: %s", strerror(errno));
static_assert(sizeof(stat_buf.st_size) >= sizeof(uint64_t), "struct stat not compiled for 64-bit mode");
return stat_buf.st_size;
#endif
}
// filesize(): determine size of the file in bytes (with pathname)
size_t filesize(const wchar_t* pathname)
{
FILE* f = fopenOrDie(pathname, L"rb");
try
{
size_t len = filesize(f);
fclose(f);
return (size_t) len;
}
catch (...)
{
fclose(f);
throw;
}
}
#ifndef UNDER_CE // no 64-bit under winCE
// filesize64(): determine size of the file in bytes (with pathname)
int64_t filesize64(const wchar_t* pathname)
{
#ifdef _WIN32
struct _stat64 fileinfo;
if (_wstat64(pathname, &fileinfo) == -1)
return 0;
else
return fileinfo.st_size;
#else
return filesize(pathname);
#endif
}
#endif
// ----------------------------------------------------------------------------
// fget/setpos(): seek functions with error handling
// ----------------------------------------------------------------------------
uint64_t fgetpos(FILE* f)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
fpos_t post;
int rc = ::fgetpos(f, &post);
if (rc != 0)
RuntimeError("error getting file position: %s", strerror(errno));
#else
auto pos = ftello(f);
uint64_t post = (uint64_t) pos;
static_assert(sizeof(post) >= sizeof(pos), "64-bit file offsets not enabled");
if ((decltype(pos)) post != pos)
LogicError("64-bit file offsets not enabled");
#endif
return post;
}
void fsetpos(FILE* f, uint64_t reqpos)
{
#ifdef _MSC_VER // standard does not allow to cast between fpos_t and integer numbers, and indeed it does not work on Linux (but on Windows and GCC)
#ifdef _MSC_VER // special hack for VS CRT
// Visual Studio's ::fsetpos() flushes the read buffer. This conflicts with a situation where
// we generally read linearly but skip a few bytes or KB occasionally, as is
// the case in speech recognition tools. This requires a number of optimizations.
uint64_t curpos = fgetpos(f);
uint64_t cureob = curpos + f->_cnt; // UGH: we mess with an internal structure here
while (reqpos >= curpos && reqpos < cureob)
{
// if we made it then do not call fsetpos()
if (reqpos == fgetpos(f))
return;
// if we seek within the existing buffer, then just move to the position by dummy reads
char buf[65536];
size_t n = min((size_t) reqpos - (size_t) curpos, _countof(buf));
fread(buf, sizeof(buf[0]), n, f); // (this may fail, but really shouldn't)
curpos += n;
// since we mess with f->_cnt, if something unexpected happened to the buffer then back off
if (curpos != fgetpos(f) || curpos + f->_cnt != cureob)
break; // oops
}
#endif // end special hack for VS CRT
// actually perform the seek
fpos_t post = reqpos;
int rc = ::fsetpos(f, &post);
#else // assuming __unix__
off_t post = (off_t) reqpos;
static_assert(sizeof(off_t) >= sizeof(reqpos), "64-bit file offsets not enabled");
if ((decltype(reqpos)) post != reqpos)
LogicError("64-bit file offsets not enabled");
int rc = fseeko(f, post, SEEK_SET);
#endif
if (rc != 0)
RuntimeError("error setting file position: %s", strerror(errno));
}
// ----------------------------------------------------------------------------
// unlinkOrDie(): unlink() with error handling
// ----------------------------------------------------------------------------
void unlinkOrDie(const std::string& pathname)
{
if (unlink(pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError("error deleting file '%s': %s", pathname.c_str(), strerror(errno));
}
void unlinkOrDie(const std::wstring& pathname)
{
if (_wunlink(pathname.c_str()) != 0 && errno != ENOENT) // if file is missing that's what we want
RuntimeError("error deleting file '%ls': %s", pathname.c_str(), strerror(errno));
}
// ----------------------------------------------------------------------------
// renameOrDie(): rename() with error handling
// ----------------------------------------------------------------------------
void renameOrDie(const std::string& from, const std::string& to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileA(to.c_str()))
RuntimeError("error deleting file: '%s': %d", to.c_str(), GetLastError());
if (!MoveFileA(from.c_str(), to.c_str()))
RuntimeError("error renaming file '%s': %d", from.c_str(), GetLastError());
#else
// Delete destination file if it exists
// WORKAROUND: "rename" should do this but this is a workaround
// to the HDFS FUSE implementation's bug of failing to do so
// workaround for FUSE rename when running on Philly
unlinkOrDie(to);
if (rename(from.c_str(), to.c_str()) != 0)
{
RuntimeError("error renaming file '%s': %s", from.c_str(), strerror(errno));
}
#endif
}
void renameOrDie(const std::wstring& from, const std::wstring& to)
{
#ifdef _WIN32
// deleting destination file if exits (to match Linux semantic)
if (fexists(to.c_str()) && !DeleteFileW(to.c_str()))
RuntimeError("error deleting file '%ls': %d", to.c_str(), GetLastError());
if (!MoveFileW(from.c_str(), to.c_str()))
RuntimeError("error renaming file '%ls': %d", from.c_str(), GetLastError());
#else
renameOrDie(wtocharpath(from.c_str()).c_str(), wtocharpath(to.c_str()).c_str());
#endif
}
// ----------------------------------------------------------------------------
// fputstring(): write a 0-terminated string
// ----------------------------------------------------------------------------
void fputstring(FILE* f, const char* str)
{
fwriteOrDie((void*) str, sizeof(*str), strnlen(str, SIZE_MAX) + 1, f); // SECURITY NOTE: string use has been reviewed
}
void fputstring(FILE* f, const std::string& str)
{
fputstring(f, str.c_str());
}
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4127)
#endif
void fputstring(FILE* f, const wchar_t* str)
{
if (sizeof(*str) == 2)
{
fwriteOrDie((void*) str, sizeof(*str), wcsnlen(str, SIZE_MAX) + 1, f); // SECURITY NOTE: string use has been reviewed
}
else if (sizeof(*str) == 4)
{
size_t strLen = wcsnlen(str, SIZE_MAX);
std::unique_ptr<char16_t[]> str16(new char16_t[strLen + 1]);
for (int i = 0; i < strLen; i++)
{
str16[i] = (char16_t) str[i];
}
str16[strLen] = 0;
fwriteOrDie((void*) str16.get(), sizeof(*str) / 2, strLen + 1, f); // SECURITY NOTE: string use has been reviewed
}
else
{
RuntimeError("error: unknown encoding\n");
}
}
#ifdef _WIN32
#pragma warning(pop)
#endif
void fputstring(FILE* f, const std::wstring& str)
{
fputstring(f, str.c_str());
}
// ----------------------------------------------------------------------------
// fexists(): test if a file exists
// ----------------------------------------------------------------------------
bool fexists(const wchar_t* pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAW findFileData;
HANDLE hFind = FindFirstFileW(pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose(hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f(_wfopen(pathname, L"r"));
return f != nullptr;
#endif
}
bool fexists(const char* pathname)
{
#ifdef _MSC_VER
WIN32_FIND_DATAA findFileData;
HANDLE hFind = FindFirstFileA(pathname, &findFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
FindClose(hFind);
return true;
}
else
{
return false;
}
#else
auto_file_ptr f(fopen(pathname, "r"));
return f != nullptr;
#endif
}
// ----------------------------------------------------------------------------
// funicode(): test if a file uses unicode by reading its BOM
// ----------------------------------------------------------------------------
bool funicode(FILE* f)
{
unsigned short testCode;
if (fread(&testCode, sizeof(short), 1, f) == 1 &&
(int) testCode == 0xFEFF)
return true;
fseek(f, 0, SEEK_SET);
// rewind (f);
return false;
}
// ----------------------------------------------------------------------------
// fgetline(): like fgets() but terminate with err msg in case of error;
// removes the newline character at the end (like gets());
// Returns 'buf' (always). buf guaranteed to be 0-terminated.
// ----------------------------------------------------------------------------
#ifdef __CYGWIN__ // strnlen() is somehow missing in Cygwin, which we use to quick-check GCC builds under Windows (although it is not a real target platform)
static inline size_t strnlen(const char* s, size_t n)
{
return std::find(s, s + n, '\0') - s;
}
#endif
#ifdef UNDER_CE // strlen for char * not defined in winCE
static inline size_t strnlen(const char* s, size_t n)
{
return std::find(s, s + n, '\0') - s;
}
#endif
static inline wchar_t* fgets(wchar_t* buf, int n, FILE* f)
{
return fgetws(buf, n, f);
}
static inline size_t strnlen(wchar_t* s, size_t n)
{
return wcsnlen(s, n);
}
template <class CHAR>
CHAR* fgetline(FILE* f, CHAR* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
CHAR* p = fgets(buf, size, f);
if (p == NULL) // EOF reached: next time feof() = true
{
if (ferror(f))
RuntimeError("error reading line: %s", strerror(errno));
buf[0] = 0;
return buf;
}
size_t n = strnlen(p, size);
// check for buffer overflow
if (n >= (size_t) size - 1)
{
basic_string<CHAR> example(p, n < 100 ? n : 100);
uint64_t filepos = fgetpos(f); // (for error message only)
RuntimeError("input line too long at file offset %d (max. %d characters allowed) [%s ...]", (int) filepos, (int) size - 1, msra::strfun::utf8(example).c_str());
}
// remove newline at end
if (n > 0 && p[n - 1] == '\n') // UNIX and Windows style
{
n--;
p[n] = 0;
if (n > 0 && p[n - 1] == '\r') // Windows style
{
n--;
p[n] = 0;
}
}
else if (n > 0 && p[n - 1] == '\r') // Mac style
{
n--;
p[n] = 0;
}
return buf;
}
// STL string version
std::string fgetline(FILE* f)
{
vector<char> buf(1000000);
return fgetline(f, &buf[0], (int) buf.size());
}
// STL string version
std::wstring fgetlinew(FILE* f)
{
vector<wchar_t> buf(1000000);
return fgetline(f, &buf[0], (int) buf.size());
}
// STL string version avoiding most memory allocations
void fgetline(FILE* f, std::string& s, std::vector<char>& buf)
{
buf.resize(1000000); // enough? // KIT: increased to 1M to be safe
const char* p = fgetline(f, &buf[0], (int) buf.size());
s.assign(p);
}
void fgetline(FILE* f, std::wstring& s, std::vector<wchar_t>& buf)
{
buf.resize(1000000); // enough? // KIT: increased to 1M to be safe
const wchar_t* p = fgetline(f, &buf[0], (int) buf.size());
s.assign(p);
}
// char buffer version
void fgetline(FILE* f, std::vector<char>& buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize(BUF_SIZE);
fgetline(f, &buf[0], (int) buf.size());
buf.resize(strnlen(&buf[0], BUF_SIZE) + 1); // SECURITY NOTE: string use has been reviewed
}
void fgetline(FILE* f, std::vector<wchar_t>& buf)
{
const int BUF_SIZE = 1000000; // enough? // KIT: increased to 1M to be safe
buf.resize(BUF_SIZE);
fgetline(f, &buf[0], (int) buf.size());
buf.resize(wcsnlen(&buf[0], BUF_SIZE) + 1); // SECURITY NOTE: string use has been reviewed
}
// read a 0-terminated string
const char* fgetstring(FILE* f, __out_z_cap(size) char* buf, int size)
{
int i;
for (i = 0;; i++)
{
int c = fgetc(f);
if (c == EOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
if (i >= size - 1)
RuntimeError("input line too long (max. %d characters allowed)", size - 1);
buf[i] = (char) c;
}
assert(i < size);
buf[i] = 0;
return buf;
}
// read a 0-terminated wstring
string fgetstring(FILE* f)
{
string res;
for (;;)
{
int c = fgetc(f);
if (c == EOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((char) c);
}
return res;
}
// read a 0-terminated string
const wchar_t* fgetstring(FILE* f, __out_z_cap(size) wchar_t* buf, int size)
{
int i;
for (i = 0;; i++)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
if (i >= size - 1)
{
RuntimeError("input line too long (max. %d wchar_tacters allowed)", size - 1);
}
buf[i] = (wchar_t) c;
}
assert(i < size);
buf[i] = 0;
return buf;
}
#if (_MSC_VER < 1800)
// read a 0-terminated wstring
wstring fgetwstring(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
//
// there is a known vc++ runtime bug: Microsoft Connect 768113
// fgetwc can skip a byte in certain condition
// this is already fixed in update release to VS 2012
// for now the workaround is to use fgetc twice to simulate fgetwc
//
// wint_t c = fgetwc (f);
int c1 = fgetc(f);
int c2 = fgetc(f);
// synthetic fgetc output to simulate fgetwc
// note the order below works only for little endian
wint_t c = (wint_t)((c2 << 8) | c1);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((wchar_t) c);
}
return res;
}
#else
// read a 0-terminated wstring
wstring fgetwstring(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
wstring res;
for (;;)
{
wint_t c = fgetwc(f);
if (c == WEOF)
RuntimeError("error reading string or missing 0: %s", strerror(errno));
if (c == 0)
break;
res.push_back((wchar_t) c);
}
return res;
}
#endif
bool fskipspace(FILE* f)
{
int count = 0;
for (;; count++)
{
int c = fgetc(f);
if (c == EOF) // hit the end
{
if (ferror(f))
RuntimeError("error reading from file: %s", strerror(errno));
break;
}
if (!isspace(c)) // end of space: undo getting that character
{
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
break;
}
}
return count > 0;
}
bool fskipwspace(FILE* f)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
int count = 0;
for (;; count++)
{
wint_t c = fgetwc(f);
if (c == WEOF) // hit the end
{
if (ferror(f))
RuntimeError("error reading from file: %s", strerror(errno));
break;
}
if (!iswspace(c)) // end of space: undo getting that character
{
wint_t rc = ungetwc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
break;
}
}
return count > 0;
}
// fskipNewLine(): skip all white space until end of line incl. the newline
// skip - skip the end of line if true, otherwise leave the end of line (but eat any leading space)
// returns false, true, or EOF
int fskipNewline(FILE* f, bool skip)
{
int c;
bool found = false;
// skip white space
do
{
c = fgetc(f);
} while (c == ' ' || c == '\t');
if (c == '\r' || c == '\n') // Accept any type of newline
{
found = true;
if (skip)
c = fgetc(f);
}
if ((found && !skip) ||
!(c == '\r' || c == '\n'))
{
// if we found an EOF, return that unless there was a newline before the EOF
if (c == EOF)
return found ? (int) true : EOF;
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
return (int) found;
}
// if we get here we saw a newline
return (int) true;
}
// read a space-terminated token
// ...TODO: eat trailing space like fscanf() doessurrounding space)
const char* fgettoken(FILE* f, __out_z_cap(size) char* buf, int size)
{
fskipspace(f); // skip leading space
int c = -1;
int i;
for (i = 0;; i++)
{
c = fgetc(f);
if (c == EOF)
break;
if (isspace(c))
break;
if (i >= size - 1)
RuntimeError("input token too long (max. %d characters allowed)", size - 1);
buf[i] = (char) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != EOF)
{
int rc = ungetc(c, f);
if (rc != c)
RuntimeError("error in ungetc(): %s", strerror(errno));
}
assert(i < size);
buf[i] = 0;
return buf;
}
string fgettoken(FILE* f)
{
char buf[80];
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
}
// read a space-terminated token
const wchar_t* fgettoken(FILE* f, __out_z_cap(size) wchar_t* buf, int size)
{
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)
fskipwspace(f); // skip leading space
wint_t c = WEOF;
int i;
for (i = 0;; i++)
{
c = fgetwc(f);
if (c == WEOF)
break;
if (iswspace(c))
break;
if (i >= size - 1)
RuntimeError("input token too long (max. %d wchar_tacters allowed)", size - 1);
buf[i] = (wchar_t) c;
}
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f); // skip trailing space
if (c != WEOF)
{
int rc = ungetwc(c, f);
if (rc != c)
RuntimeError("error in ungetwc(): %s", strerror(errno));
}
assert(i < size);
buf[i] = 0;
return buf;
}
wstring fgetwtoken(FILE* f)
{
wchar_t buf[80];
return fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
}
template <>
int ftrygetText<bool>(FILE* f, bool& v)
{
wchar_t c;
int rc = ftrygetText(f, c);
v = (c == L'T');
return rc;
}
// ----------------------------------------------------------------------------
// fputText(): write a bool out as character
// ----------------------------------------------------------------------------
template <>
void fputText<bool>(FILE* f, bool v)
{
fputText(f, v ? L'T' : L'F');
}
// ----------------------------------------------------------------------------
// fgetTag(): read a 4-byte tag & return as a string
// ----------------------------------------------------------------------------
std::string fgetTag(FILE* f)
{
char tag[5];
freadOrDie(&tag[0], sizeof(tag[0]), 4, f);
tag[4] = 0;
return std::string(tag);
}
// ----------------------------------------------------------------------------
// fcheckTag(): read a 4-byte tag & verify it; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcheckTag(FILE* f, const char* expectedTag)
{
fcompareTag(fgetTag(f), expectedTag);
}
void fcheckTag_ascii(FILE* f, const string& expectedTag)
{
char buf[20]; // long enough for a tag
fskipspace(f);
fgettoken(f, buf, sizeof(buf) / sizeof(*buf));
if (expectedTag != buf)
{
RuntimeError("invalid tag '%s' found; expected '%s'", buf, expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fcompareTag(): compare two tags; terminate if wrong tag
// ----------------------------------------------------------------------------
void fcompareTag(const string& readTag, const string& expectedTag)
{
if (readTag != expectedTag)
{
RuntimeError("invalid tag '%s' found; expected '%s'",
readTag.c_str(), expectedTag.c_str());
}
}
// ----------------------------------------------------------------------------
// fputTag(): write a 4-byte tag
// ----------------------------------------------------------------------------
void fputTag(FILE* f, const char* tag)
{
const int TAG_LEN = 4;
assert(strnlen(tag, TAG_LEN + 1) == TAG_LEN);
fwriteOrDie((void*) tag, sizeof(*tag), strnlen(tag, TAG_LEN), f);
}
// ----------------------------------------------------------------------------
// fskipstring(): skip a 0-terminated string, such as a pad string
// ----------------------------------------------------------------------------
void fskipstring(FILE* f)
{
char c;
do
{
freadOrDie(&c, sizeof(c), 1, f);
} while (c);
}
// ----------------------------------------------------------------------------
// fpad(): write a 0-terminated string to pad file to a n-byte boundary
// (note: file must be opened in binmode to work properly on DOS/Windows!!!)
// ----------------------------------------------------------------------------
void fpad(FILE* f, int n)
{
// get current writing position
int pos = ftell(f);
if (pos == -1)
{
RuntimeError("error in ftell(): %s", strerror(errno));
}
// determine how many bytes are needed (at least 1 for the 0-terminator)
// and create a dummy string of that length incl. terminator
int len = n - (pos % n);
const char dummyString[] = "MSR-Asia: JL+FS";
size_t offset = sizeof(dummyString) / sizeof(dummyString[0]) - len;
assert(offset >= 0);
fputstring(f, dummyString + offset);
}
// ----------------------------------------------------------------------------
// fgetbyte(): read a byte value
// ----------------------------------------------------------------------------
char fgetbyte(FILE* f)
{
char v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetshort(): read a short value
// ----------------------------------------------------------------------------
short fgetshort(FILE* f)
{
short v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
short fgetshort_bigendian(FILE* f)
{
unsigned char b[2];
freadOrDie(&b, sizeof(b), 1, f);
return (short) ((b[0] << 8) + b[1]);
}
// ----------------------------------------------------------------------------
// fgetint24(): read a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
int fgetint24(FILE* f)
{
int v;
assert(sizeof(v) == 4);
freadOrDie(&v, sizeof(v) - 1, 1, f); // only read 3 lower-order bytes
v <<= 8; // shift up (upper 8 bits uninit'ed)
v >>= 8; // shift down 8 bits with sign-extend
return v;
}
// ----------------------------------------------------------------------------
// fgetint(): read an int value
// ----------------------------------------------------------------------------
int fgetint(FILE* f)
{
int v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
int fgetint_bigendian(FILE* f)
{
unsigned char b[4];
freadOrDie(&b, sizeof(b), 1, f);
return (int) (((((b[0] << 8) + b[1]) << 8) + b[2]) << 8) + b[3];
}
int fgetint_ascii(FILE* f)
{
fskipspace(f);
int res = 0;
char c;
freadOrDie(&c, sizeof(c), 1, f);
while (isdigit((unsigned char) c))
{
res = (10 * res) + (c - '0');
freadOrDie(&c, sizeof(c), 1, f);
}
int rc = ungetc(c, f);
if (rc != c)
{
RuntimeError("error in ungetc(): %s", strerror(errno));
}
return res;
}
// ----------------------------------------------------------------------------
// fgetlong(): read an long value
// ----------------------------------------------------------------------------
long fgetlong(FILE* f)
{
long v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
// ----------------------------------------------------------------------------
// fgetfloat(): read a float value
// ----------------------------------------------------------------------------
float fgetfloat(FILE* f)
{
float v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
float fgetfloat_bigendian(FILE* f)
{
int bitpattern = fgetint_bigendian(f);
return *((float*) &bitpattern);
}
float fgetfloat_ascii(FILE* f)
{
float val;
fskipspace(f);
int rc = fscanf(f, "%f", &val); // security hint: safe overloads
if (rc == 0)
RuntimeError("error reading float value from file (invalid format): %s", strerror(errno));
else if (rc == EOF)
RuntimeError("error reading from file: %s", strerror(errno));
assert(rc == 1);
return val;
}
// ----------------------------------------------------------------------------
// fgetdouble(): read a double value
// ----------------------------------------------------------------------------
double fgetdouble(FILE* f)
{
double v;
freadOrDie(&v, sizeof(v), 1, f);
return v;
}
#ifdef _WIN32
// ----------------------------------------------------------------------------
// fgetwav(): read an entire .wav file
// ----------------------------------------------------------------------------
void WAVEHEADER::prepareRest(int sampleCount)
{
FmtLength = 16;
wFormatTag = 1;
nAvgBytesPerSec = nSamplesPerSec * nBlockAlign;
riffchar[0] = 'R';
riffchar[1] = 'I';
riffchar[2] = 'F';
riffchar[3] = 'F';
if (sampleCount != -1)
{
DataLength = sampleCount * nBlockAlign;
RiffLength = 36 + DataLength;
}
else
{
DataLength = 0xffffffff;
RiffLength = 0xffffffff;
}
wavechar[0] = 'W';
wavechar[1] = 'A';
wavechar[2] = 'V';
wavechar[3] = 'E';
wavechar[4] = 'f';
wavechar[5] = 'm';
wavechar[6] = 't';
wavechar[7] = ' ';
datachar[0] = 'd';
datachar[1] = 'a';
datachar[2] = 't';
datachar[3] = 'a';
}
void WAVEHEADER::prepare(unsigned int Fs, int Bits, int Channels, int SampleCount)
{
nChannels = (short) Channels;
nSamplesPerSec = Fs;
nBlockAlign = (short) (Channels * (Bits / 8));
nAvgBytesPerSec = Fs * nBlockAlign;
wBitsPerSample = (short) Bits;
prepareRest(SampleCount);
}
void WAVEHEADER::prepare(const WAVEFORMATEX& wfx, int sampleCount /* -1 for unknown */)
{
nChannels = wfx.nChannels;
nSamplesPerSec = wfx.nSamplesPerSec;
nBlockAlign = wfx.nBlockAlign;
wBitsPerSample = wfx.wBitsPerSample;
prepareRest(sampleCount);
}
void WAVEHEADER::write(FILE* f)
{
fputTag(f, "RIFF");
fputint(f, RiffLength);
fputTag(f, "WAVE");
fputTag(f, "fmt ");
fputint(f, FmtLength);
fputshort(f, wFormatTag);
fputshort(f, nChannels);
fputint(f, nSamplesPerSec);
fputint(f, nAvgBytesPerSec);
fputshort(f, nBlockAlign);
fputshort(f, wBitsPerSample);
assert(FmtLength == 16);
assert(wFormatTag == 1);
fputTag(f, "data");
fputint(f, DataLength);
fflushOrDie(f);
}
/*static*/ void WAVEHEADER::update(FILE* f)
{
long curPos = ftell(f);
if (curPos == -1L)
{
RuntimeError("error determining file position: %s", strerror(errno));
}
unsigned int len = (unsigned int) filesize(f);
unsigned int RiffLength = len - 8;
unsigned int DataLength = RiffLength - 36;
fseekOrDie(f, 4, SEEK_SET);
fputint(f, RiffLength);
fseekOrDie(f, 40, SEEK_SET);
fputint(f, DataLength);
fseekOrDie(f, curPos, SEEK_SET);
}
#endif
// ----------------------------------------------------------------------------
// fputbyte(): write a byte value
// ----------------------------------------------------------------------------
void fputbyte(FILE* f, char v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputshort(): write a short value
// ----------------------------------------------------------------------------
void fputshort(FILE* f, short v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputint24(): write a 3-byte (24-bit) int value
// ----------------------------------------------------------------------------
void fputint24(FILE* f, int v)
{
assert(sizeof(v) == 4);
fwriteOrDie(&v, sizeof(v) - 1, 1, f); // write low-order 3 bytes
}
// ----------------------------------------------------------------------------
// fputint(): write an int value
// ----------------------------------------------------------------------------
void fputint(FILE* f, int v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputlong(): write an long value
// ----------------------------------------------------------------------------
void fputlong(FILE* f, long v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfloat(): write a float value
// ----------------------------------------------------------------------------
void fputfloat(FILE* f, float v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputdouble(): write a double value
// ----------------------------------------------------------------------------
void fputdouble(FILE* f, double v)
{
fwriteOrDie(&v, sizeof(v), 1, f);
}
// ----------------------------------------------------------------------------
// fputfile(): write a binary block or a string as a file
// ----------------------------------------------------------------------------
void fputfile(const wstring& pathname, const std::vector<char>& buffer)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(&buffer[0], sizeof(buffer[0]), buffer.size(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
void fputfile(const wstring& pathname, const std::wstring& string)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(string.c_str(), sizeof(string[0]), string.length(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
void fputfile(const wstring& pathname, const std::string& string)
{
FILE* f = fopenOrDie(pathname, L"wb");
try
{
if (string.length() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
fwriteOrDie(string.c_str(), sizeof(string[0]), string.length(), f);
}
fcloseOrDie(f);
}
catch (...)
{
fclose(f);
throw;
}
}
// ----------------------------------------------------------------------------
// fgetfile(): load a file as a binary block
// ----------------------------------------------------------------------------
void fgetfile(const wstring& pathname, std::vector<char>& buffer)
{
FILE* f = fopenOrDie(pathname, L"rb");
size_t len = filesize(f);
buffer.resize(len);
if (buffer.size() > 0)
{ // ^^ otherwise buffer[0] is an illegal expression
freadOrDie(&buffer[0], sizeof(buffer[0]), buffer.size(), f);
}
fclose(f);
}
void fgetfile(FILE* f, std::vector<char>& buffer)
{ // this version reads until eof
buffer.resize(0);
buffer.reserve(1000000); // avoid too many reallocations
std::vector<char> inbuf;
inbuf.resize(65536); // read in chunks of this size
while (!feof(f)) // read until eof
{
size_t n = fread(&inbuf[0], sizeof(inbuf[0]), inbuf.size(), f);
if (ferror(f))
{
RuntimeError("fgetfile: error reading from file: %s", strerror(errno));
}
buffer.insert(buffer.end(), inbuf.begin(), inbuf.begin() + n);
}
buffer.reserve(buffer.size());
}
// load it into RAM in one huge chunk
static size_t fgetfilechars(const std::wstring& path, vector<char>& buffer)
{
auto_file_ptr f(fopenOrDie(path, L"rb"));
size_t len = filesize(f);
buffer.reserve(len + 1);
freadOrDie(buffer, len, f);
buffer.push_back(0); // this makes it a proper C string
return len;
}
template <class LINES>
static void strtoklines(char* s, LINES& lines)
{
for (char* p = strtok(s, "\r\n"); p; p = strtok(NULL, "\r\n"))
lines.push_back(p);
}
void msra::files::fgetfilelines(const std::wstring& path, vector<char>& buffer, std::vector<std::string>& lines)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
// parse into lines
lines.resize(0);
lines.reserve(len / 20);
strtoklines(&buffer[0], lines);
}
// same as above but returning const char* (avoiding the memory allocation)
vector<char*> msra::files::fgetfilelines(const wstring& path, vector<char>& buffer)
{
// load it into RAM in one huge chunk
const size_t len = fgetfilechars(path, buffer);
// parse into lines
vector<char*> lines;
lines.reserve(len / 20);
strtoklines(&buffer[0], lines);
return lines;
}
// ----------------------------------------------------------------------------
// getfiletime(): access modification time
// ----------------------------------------------------------------------------
#ifndef _FILETIME_
//typedef struct _FILETIME { DWORD dwLowDateTime; DWORD dwHighDateTime; }; // from minwindef.h
typedef time_t FILETIME;
#else
bool operator>=(const FILETIME& targettime, const FILETIME& inputtime) // for use in fuptodate()
{
return (targettime.dwHighDateTime > inputtime.dwHighDateTime) ||
(targettime.dwHighDateTime == inputtime.dwHighDateTime && targettime.dwLowDateTime >= inputtime.dwLowDateTime);
}
#endif
#ifdef _WIN32
class auto_find_handle
{
HANDLE h;
auto_find_handle operator=(const auto_find_handle&);
auto_find_handle(const auto_find_handle&);
public:
auto_find_handle(HANDLE p_h)
: h(p_h)
{
}
~auto_find_handle()
{
// TODO: Check for error code and throw if !std::uncaught_exception()
if (h != INVALID_HANDLE_VALUE)
::FindClose(h);
}
operator HANDLE() const
{
return h;
}
};
#endif
bool getfiletime(const wstring& path, FILETIME& time)
{ // return file modification time, false if cannot be determined
#ifdef _WIN32
WIN32_FIND_DATAW findFileData;
auto_find_handle hFind(FindFirstFileW(path.c_str(), &findFileData));
if (hFind != INVALID_HANDLE_VALUE)
{
time = findFileData.ftLastWriteTime;
return true;
}
else
return false;
#else // TODO: test this; e.g. does st_mtime have the desired resolution?
struct stat buf;
int result;
// Get data associated with "crt_stat.c":
result = stat(wtocharpath(path.c_str()).c_str(), &buf);
// Check if statistics are valid:
if (result != 0)
return false;
time = buf.st_mtime;
return true;
#endif
}
// ----------------------------------------------------------------------------
// expand_wildcards -- wildcard expansion of a path, including directories.
// ----------------------------------------------------------------------------
#ifdef _WIN32
// Win32-style variant of this function (in case we want to use it some day)
// Returns 0 in case of failure. May throw in case of bad_alloc.
static BOOL ExpandWildcards(wstring path, vector<wstring>& paths)
{
// convert root to DOS filename convention
for (size_t k = 0; k < path.length(); k++)
if (path[k] == '/')
path[k] = '\\';
// remove terminating backslash
size_t last = path.length() - 1;
if (last >= 0 && path[last] == '\\')
path.erase(last);
// convert root to long filename convention
// if (path.find (L"\\\\?\\") != 0)
// path = L"\\\\?\\" + root;
// split off everything after first wildcard
size_t wpos = path.find_first_of(L"*?");
if (wpos == 2 && path[0] == '\\' && path[1] == '\\')
wpos = path.find_first_of(L"*?", 4); // 4=skip "\\?\"
if (wpos == wstring::npos)
{ // no wildcard: just return it
paths.push_back(path);
return TRUE;
}
// split off everything afterwards if any
wstring rest; // remaining path after this directory
size_t spos = path.find_first_of(L"\\", wpos + 1);
if (spos != wstring::npos)
{
rest = path.substr(spos + 1);
path.erase(spos);
}
// crawl folder
WIN32_FIND_DATAW ffdata;
auto_find_handle hFind(::FindFirstFileW(path.c_str(), &ffdata));
if (hFind == INVALID_HANDLE_VALUE)
{
DWORD err = ::GetLastError();
if (rest.empty() && err == 2)
return TRUE; // no matching file: empty
return FALSE; // another error
}
size_t pos = path.find_last_of(L"\\");
if (pos == wstring::npos)
LogicError("unexpected missing \\ in path");
wstring parent = path.substr(0, pos);
do
{
// skip this and parent directory
bool isDir = ((ffdata.dwFileAttributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)) != 0);
if (isDir && ffdata.cFileName[0] == '.')
continue;
wstring filename = parent + L"\\" + ffdata.cFileName;
if (rest.empty())
{
paths.push_back(filename);
}
else if (isDir) // multi-wildcards: further expand
{
BOOL rc = ExpandWildcards(filename + L"\\" + rest, paths);
rc; // error here means no match, e.g. Access Denied to one subfolder
}
} while (::FindNextFileW(hFind, &ffdata) != 0);
return TRUE;
}
#endif
void expand_wildcards(const wstring& path, vector<wstring>& paths)
{
#ifdef _WIN32
BOOL rc = ExpandWildcards(path, paths);
if (!rc)
RuntimeError("error in expanding wild cards '%ls': Win32 error %d", path.c_str(), (int) ::GetLastError());
#else
// On Linux we have just the function for the job: glob
glob_t globResult;
if (glob(wtocharpath(path.c_str()).c_str(), GLOB_TILDE, NULL, &globResult) != 0)
{
RuntimeError("error in expanding wild cards '%ls': %s", path.c_str(), strerror(errno));
}
for (unsigned int i = 0; i < globResult.gl_pathc; ++i)
{
paths.push_back(msra::strfun::utf16(globResult.gl_pathv[i]));
}
globfree(&globResult);
#endif
}
// ----------------------------------------------------------------------------
// make_intermediate_dirs() -- make all intermediate dirs on a path
// ----------------------------------------------------------------------------
static void mkdir(const wstring& path)
{
int rc = _wmkdir(path.c_str());
if (rc >= 0 || errno == EEXIST)
return; // no error or already existing --ok
#ifdef _WIN32 // bug in _wmkdir(): returns access_denied if folder exists but read-only --check existence
if (errno == EACCES)
{
DWORD att = ::GetFileAttributesW(path.c_str());
if (att != INVALID_FILE_ATTRIBUTES || (att & FILE_ATTRIBUTE_DIRECTORY) != 0)
return; // ok
}
#endif
RuntimeError("mkdir: error creating intermediate directory %ls", path.c_str());
}
// make subdir of a file including parents
void msra::files::make_intermediate_dirs(const wstring& filepath)
{
vector<wchar_t> buf;
buf.resize(filepath.length() + 1, 0);
wcscpy(&buf[0], filepath.c_str());
wstring subpath;
int skip = 0;
#ifdef _WIN32
// On windows, if share (\\) then the first two levels (machine, share name) cannot be made.
if ((buf[0] == '/' && buf[1] == '/') || (buf[0] == '\\' && buf[1] == '\\'))
{
subpath = L"/";
skip = 2; // skip two levels (machine, share)
}
#else
// On unix, if the filepath starts with '/' then it is absolute
// path and the created sub-paths should also start with '/'
if (buf[0] == '/')
{
subpath = L"/";
}
#endif
// make all constituents except the filename (to make a dir, include a trailing slash)
wchar_t* context = nullptr;
for (const wchar_t* p = wcstok_s(&buf[0], L"/\\", &context); p; p = wcstok_s(NULL, L"/\\", &context))
{
if (subpath != L"" && subpath != L"/" && subpath != L"\\" && skip == 0)
{
mkdir(subpath);
}
else if (skip > 0)
skip--; // skip this level
// rebuild the final path
if (subpath != L"")
subpath += L"/";
subpath += p;
}
}
// ----------------------------------------------------------------------------
// fuptodate() -- test whether an output file is at least as new as an input file
// ----------------------------------------------------------------------------
// test if file 'target' is not older than 'input' --used for make mode
// 'input' must exist if 'inputrequired'; otherweise if 'target' exists, it is considered up to date
// 'target' may or may not exist
bool msra::files::fuptodate(const wstring& target, const wstring& input, bool inputrequired)
{
FILETIME targettime;
if (!getfiletime(target, targettime))
return false; // target missing: need to update
FILETIME inputtime;
if (!getfiletime(input, inputtime))
return !inputrequired; // input missing: if required, pretend to be out of date as to force caller to fail
// up to date if target has higher time stamp
return targettime >= inputtime; // note: uses an overload for WIN32 FILETIME (in Linux, FILETIME=time_t=size_t)
}
// separate string by separator
template<class String>
vector<String> SplitString(const String& str, const String& sep)
{
vector<String> vstr;
String csub;
size_t ifound = 0;
size_t ifoundlast = ifound;
ifound = str.find_first_of(sep, ifound);
while (ifound != String::npos)
{
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);
ifoundlast = ifound + 1;
ifound = str.find_first_of(sep, ifoundlast);
}
ifound = str.length();
csub = str.substr(ifoundlast, ifound - ifoundlast);
if (!csub.empty())
vstr.push_back(csub);
return vstr;
}
template vector<string> SplitString(const string& istr, const string& sep);
template vector<wstring> SplitString(const wstring& istr, const wstring& sep);
static inline std::string wcstombs(const std::wstring& p) // output: MBCS
{
size_t len = p.length();
vector<char> buf(2 * len + 1); // max: 1 wchar => 2 mb chars
fill(buf.begin(), buf.end(), 0);
::wcstombs(&buf[0], p.c_str(), 2 * len + 1);
return std::string(&buf[0]);
}
static inline std::wstring mbstowcs(const std::string& p) // input: MBCS
{
size_t len = p.length();
vector<wchar_t> buf(len + 1); // max: >1 mb chars => 1 wchar
fill(buf.begin(), buf.end(), (wchar_t) 0);
// OACR_WARNING_SUPPRESS(UNSAFE_STRING_FUNCTION, "Reviewed OK. size checked. [rogeryu 2006/03/21]");
::mbstowcs(&buf[0], p.c_str(), len + 1);
return std::wstring(&buf[0]);
}
wstring s2ws(const string& str)
{
#ifdef __unix__
return mbstowcs(str);
#else
typedef std::codecvt_utf8<wchar_t> convert_typeX;
std::wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.from_bytes(str);
#endif
}
string ws2s(const wstring& wstr)
{
#ifdef __unix__
return wcstombs(wstr);
#else
typedef codecvt_utf8<wchar_t> convert_typeX;
wstring_convert<convert_typeX, wchar_t> converterX;
return converterX.to_bytes(wstr);
#endif
}
Computing file changes ...