Raw File
BufferedFileReader.h
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//

#pragma once

#include <stdint.h>
#include <vector>
#include <memory>
#include "ReaderConstants.h"
#include "FileWrapper.h"

namespace CNTK {

class BufferedFileReader
{
public:
    BufferedFileReader(size_t maxSize, const FileWrapper& file);

    // File offset that correspond to the current position.
    inline size_t GetFileOffset() const { return m_fileOffset + m_index; }

    // Returns the character at the current buffer position.
    inline char Peek() const
    {
        if (m_done)
            RuntimeError("Buffer is empty.");

        return m_buffer[m_index];
    }

    // Advances the current position to the next character.
    // Returns true, unless the EOF has been reached.
    inline bool Pop() 
    {
        if (m_done)
            return false;

        if (m_buffer[m_index] == g_eol)
            m_lineNumber++;

        if (++m_index == m_buffer.size())
            Refill();

        return !m_done;
    }

    // Return the character at the current position and advances the position 
    // to the next character. Returns false when no more characters are available
    // (i.e, upon reaching the EOF).
    inline bool TryGetNext(char& c)
    {
        if (m_done)
            return false;

        c = Peek();
        Pop();  // move to the next character
        return true;
    }

    // Moves the current position to the next line (the position following an EOL delimiter).
    // Returns true, unless the EOF has been reached.
    bool TryMoveToNextLine();

    // Reads the current line (i.e., everything that's left on the current line) into the provided 
    // string reference (omitting the trailing EOL). Returns false upon reaching the EOF.
    bool TryReadLine(std::string& str);
    
    // Returns the current line number.
    inline size_t CurrentLineNumber() const { return m_lineNumber; }

    // Returns true if no more data is available (reached EOF).
    inline bool Empty() const { return m_done; }


    // File offset that correspond to the current position to read from.
    void SetFileOffset(const size_t& fileOffset)
    {
        // We reset the current buffer only if the new fileOffset is out of the buffer limits.
        // If not, we just go to the index corresponding to the offset.
        if (fileOffset >= (m_buffer.size() + m_fileOffset) || fileOffset < m_fileOffset) {
            m_file.SeekOrDie(fileOffset, SEEK_SET);
            Reset();
        }
        else
        {
            m_index = fileOffset - m_fileOffset;
            m_done = false;
        }
    }

private:
    // Read up to m_maxSize bytes from file into the buffer.
    void Refill();

    // Resets the buffer: clears the current buffer content and refills starting at the current file position.
    void Reset()
    {
        m_buffer.clear();
        m_index = 0;
        m_lineNumber = 0;
        m_done = false;
        Refill();
    }

    // Maximum allowed buffer size. 
    // Also, it defines the maximum number of bytes that we'll attempt to read at one time.
    const size_t m_maxSize{ 0 };

    // Buffer.
    std::vector<char> m_buffer;

    // Current position in the buffer.
    size_t m_index{ 0 };

    // File offset at which the buffer was fill out in the last call to Refill();
    size_t m_fileOffset{ 0 };

    // Flag indicating whether there is more data (set to true once the EOF is reached).
    bool m_done{ false };

    // Current line number;
    size_t m_lineNumber{ 0 };

    FileWrapper m_file;
};

}
back to top