Statistics
| Branch: | Revision:

root / src / common / filereader.h @ e1750c09

History | View | Annotate | Download (8.14 KB)

1
//=========================================================================
2
//  FILEREADER.H - part of
3
//                  OMNeT++/OMNEST
4
//           Discrete System Simulation in C++
5
//
6
//  Author: Levente Meszaros
7
//
8
//=========================================================================
9

    
10
/*--------------------------------------------------------------*
11
  Copyright (C) 2006-2008 OpenSim Ltd.
12

13
  This file is distributed WITHOUT ANY WARRANTY. See the file
14
  `license' for details on this and other legal matters.
15
*--------------------------------------------------------------*/
16

    
17
#ifndef __FILEREADER_H_
18
#define __FILEREADER_H_
19

    
20
#include <vector>
21
#include <string>
22
#include "platmisc.h"
23
#include "commondefs.h"
24
#include "intxtypes.h"   // for int64
25

    
26
NAMESPACE_BEGIN
27

    
28
/**
29
 * Reads a file line by line. It has to be very efficient since
30
 * it may be used up to several gigabyte-sized files (output vector files
31
 * and event logs). File reading is done in large chunks, and
32
 * the code avoids string copying and duplicating. The in-memory
33
 * buffer must be able to contain at least two lines, therefore
34
 * the maximum line length is limited to buffer size divided by 2.
35
 *
36
 * It maintains a position which is used to return subsequent lines from
37
 * the file in both directions from both ends. Automatically follows file
38
 * content when appended, but overwriting the file causes an exception to be thrown.
39
 *
40
 * All functions throw class opp_runtime_error on error.
41
 */
42
class COMMON_API FileReader
43
{
44
  private:
45
    // the file
46
    const std::string fileName;
47
    FILE *f;
48
    bool checkFileChanged;
49
    bool synchronizeWhenAppended;
50

    
51
    // the buffer
52
    const size_t bufferSize;
53
    const char *bufferBegin;
54
    const char *bufferEnd; // = buffer + bufferSize
55
    const size_t maxLineSize;
56

    
57
    // file positions and size
58
    file_offset_t bufferFileOffset;
59
    file_offset_t storedBufferFileOffset;
60
    int64 fileSize;
61

    
62
    // the currently used (filled with data) region in buffer
63
    char *dataBegin; // must point between bufferBegin and bufferEnd
64
    char *dataEnd; // must point between bufferBegin and bufferEnd
65

    
66
    // the very last line of the file as currently known
67
    file_offset_t lastLineOffset;
68
    int lastLineLength;
69
    std::string lastLine;
70

    
71
    // the position where readNextLine() or readPreviousLine() starts from
72
    char *currentDataPointer; // must point between dataBegin and dataEnd when used
73
    char *storedDataPointer;
74

    
75
    // the pointer returned by readNextLine() or readPreviousLine()
76
    file_offset_t currentLineStartOffset;
77
    file_offset_t currentLineEndOffset;
78

    
79
    // total number of lines read in so far
80
    int64 numReadLines;
81

    
82
    // total bytes read in so far
83
    int64 numReadBytes;
84

    
85
  public:
86
    enum FileChangedState {
87
        UNCHANGED,
88
        APPENDED,
89
        OVERWRITTEN,
90
    };
91

    
92
  private:
93
    /**
94
     * Reads data into the buffer till the end of the buffer in the given direction
95
     * starting from the current data pointer.
96
     * May read from 0 up to bufferSize number of bytes.
97
     */
98
    void fillBuffer(bool forward);
99

    
100
    void ensureFileOpenInternal();
101

    
102
    // assert data structure consistence
103
    void checkConsistence(bool checkDataPointer = false) const;
104
    FileChangedState checkFileChangedAndSynchronize();
105

    
106
    // store and restore state to be able to read at another position
107
    void storePosition();
108
    void restorePosition();
109

    
110
    file_offset_t pointerToFileOffset(char *pointer) const;
111
    char* fileOffsetToPointer(file_offset_t offset) const { return offset - bufferFileOffset + (char *)bufferBegin; }
112

    
113
    file_offset_t getDataBeginFileOffset() const { return pointerToFileOffset(dataBegin); }
114
    file_offset_t getDataEndFileOffset() const { return pointerToFileOffset(dataEnd); }
115
    bool hasData() const { return dataBegin != dataEnd; }
116

    
117
    void setCurrentDataPointer(char *pointer);
118

    
119
    bool isLineStart(char *s);
120
    char *findNextLineStart(char *s, bool bufferFilled = false);
121
    char *findPreviousLineStart(char *s, bool bufferFilled = false);
122

    
123
    int64 getFileSizeInternal();
124

    
125
  public:
126
    /**
127
     * Creates a tokenizer object for the given file, with the given buffer size.
128
     * The file doesn't get opened yet.
129
     */
130
    FileReader(const char *fileName, size_t bufferSize = 64 * 1024);
131

    
132
    /**
133
     * Destructor.
134
     */
135
    virtual ~FileReader();
136

    
137
    /**
138
     * Controls whether file is checked for changes each time before physically accessing it.
139
     */
140
    void setCheckFileChanged(bool value) { checkFileChanged = value; }
141

    
142
    /**
143
     * Controls what happens when it is determined that new content has been appended to the file.
144
     */
145
    void setSynchronizeWhenAppended(bool value) { synchronizeWhenAppended = value; }
146

    
147
    /**
148
     * This method is called automatically whenever the file is accessed through a public function.
149
     */
150
    void ensureFileOpen();
151

    
152
    /**
153
     * This method is called automatically from the destructor, but might be useful to release the file when it is
154
     * not needed for a long period of time.
155
     */
156
    void ensureFileClosed();
157

    
158
    /**
159
     * Checks if file has been changed on disk. A file change is considered to be an append if it did not change the
160
     * content of the last line (starting at the very same offset).
161
     */
162
    FileChangedState getFileChangedState();
163

    
164
    /**
165
     * Updates internal state to reflect file changes on disk, it does not move the current position
166
     * and thus it may become invalid for truncated files.
167
     */
168
    void synchronize();
169

    
170
    /**
171
     * Returns the first line from the file, see getNextLineBufferPointer.
172
     */
173
    char *getFirstLineBufferPointer();
174

    
175
    /**
176
     * Returns the last line from the file, see getPreviousLineBufferPointer.
177
     */
178
    char *getLastLineBufferPointer();
179

    
180
    /**
181
     * Reads the next line from the file starting from the current position, and returns a pointer to its first character.
182
     * It returns NULL after EOF. Incomplete last line gets ignored (returns NULL)
183
     * as it is possible that the file is currently being written into. When new lines are appended to the file subsequent
184
     * calls will return non NULL and continue reading lines from that on.
185
     * Moves the current position to the end of the line just returned.
186
     */
187
    char *getNextLineBufferPointer();
188

    
189
    /**
190
     * Reads the previous line from the file ending at the current position, and returns a pointer to its first character.
191
     * It returns NULL when the beginning of file reached.
192
     * Moves the current position to the beginning of the line just returned.
193
     */
194
    char *getPreviousLineBufferPointer();
195

    
196
    /**
197
     * Searches through the file from the current position for the given text and returns the first matching line.
198
     */
199
    char *findNextLineBufferPointer(const char *search, bool caseSensitive = true);
200

    
201
    /**
202
     * Searches through the file from the current position for the given text and returns the first matching line.
203
     */
204
    char *findPreviousLineBufferPointer(const char *search, bool caseSensitive = true);
205

    
206
    /**
207
     * Returns the start offset of the line last parsed with readNextLine() or readPreviousLine().
208
     */
209
    file_offset_t getCurrentLineStartOffset() const { return currentLineStartOffset; }
210

    
211
    /**
212
     * Returns the end offset of the line last parsed with readNextLine() or readPreviousLine().
213
     * This points to the start offset of the next line, so it includes the CR/LF of the previous line.
214
     */
215
    file_offset_t getCurrentLineEndOffset() const { return currentLineEndOffset; }
216

    
217
    /**
218
     * Returns the length of the last line including CR/LF.
219
     */
220
    int getCurrentLineLength() const { return currentLineEndOffset - currentLineStartOffset; }
221

    
222
    /**
223
     * Returns the size of the file when last time synchronize was called.
224
     */
225
    int64 getFileSize();
226

    
227
    /**
228
     * Moves the current position to the given offset.
229
     */
230
    void seekTo(file_offset_t offset, unsigned int ensureBufferSizeAround = 0);
231

    
232
    /**
233
     * Returns the total number of lines requested so far.
234
     */
235
    int64 getNumReadLines() const { return numReadLines; };
236

    
237
    /**
238
     * Returns the total number of bytes read in so far.
239
     */
240
    int64 getNumReadBytes() const { return numReadBytes; }
241
};
242

    
243
NAMESPACE_END
244

    
245

    
246
#endif