Project

General

Profile

Statistics
| Branch: | Revision:

root / src / common / filereader.cc @ e1750c09

History | View | Annotate | Download (18.4 KB)

1 01873262 Georg Kunz
//=========================================================================
2
//  FILEREADER.CC - part of
3
//                  OMNeT++/OMNEST
4
//           Discrete System Simulation in C++
5
//
6
//  Author: Levente Meszaros
7
//
8
//=========================================================================
9
10
/*--------------------------------------------------------------*
11
  Copyright (C) 2006-2008 OpenSim Ltd.
12

13
  This file is distributed WITHOUT ANY WARRANTY. See the file
14
  `license' for details on this and other legal matters.
15
*--------------------------------------------------------------*/
16
17
18
#include <sstream>
19
#include <stdio.h>
20
#include <string.h>
21
#include <sys/stat.h>
22
#include "platmisc.h"
23
#include "commonutil.h"
24
#include "filereader.h"
25
#include "exception.h"
26
27
USING_NAMESPACE
28
29
#define LL  INT64_PRINTF_FORMAT
30
31
#define PRINT_DEBUG_MESSAGES false
32
33
FileReader::FileReader(const char *fileName, size_t bufferSize)
34
   : fileName(fileName), bufferSize(bufferSize),
35
     bufferBegin(new char[bufferSize]), bufferEnd(bufferBegin + bufferSize),
36
     maxLineSize(bufferSize / 2)
37
{
38
    f = NULL;
39
    checkFileChanged = true;
40
    synchronizeWhenAppended = true;
41
42
    numReadLines = 0;
43
    numReadBytes = 0;
44
45
    fileSize = -1;
46
47
    bufferFileOffset = -1;
48
    storedBufferFileOffset = -1;
49
50
    dataBegin = NULL;
51
    dataEnd = NULL;
52
    currentDataPointer = NULL;
53
    storedDataPointer = NULL;
54
55
    lastLineOffset = -1;
56
    lastLineLength = -1;
57
58
    currentLineStartOffset = -1;
59
    currentLineEndOffset = -1;
60
}
61
62
FileReader::~FileReader()
63
{
64
    delete [] bufferBegin;
65
    ensureFileClosed();
66
}
67
68
void FileReader::ensureFileOpenInternal()
69
{
70
    if (!f) {
71
        // Note: 'b' mode turns off CR/LF translation and might be faster
72
        f = fopen(fileName.c_str(), "rb");
73
74
        if (!f)
75
            throw opp_runtime_error("Cannot open file `%s'", fileName.c_str());
76
77
        if (bufferFileOffset == -1)
78
            seekTo(0);
79
    }
80
}
81
82
void FileReader::ensureFileOpen()
83
{
84
    if (!f) {
85
        ensureFileOpenInternal();
86
        synchronize();
87
    }
88
}
89
90
void FileReader::ensureFileClosed()
91
{
92
    if (f) {
93
        fclose(f);
94
        f = NULL;
95
    }
96
}
97
98
void FileReader::storePosition()
99
{
100
    Assert(storedBufferFileOffset == -1 && !storedDataPointer);
101
    storedBufferFileOffset = bufferFileOffset;
102
    storedDataPointer = currentDataPointer;
103
}
104
105
void FileReader::restorePosition()
106
{
107
    Assert(storedBufferFileOffset != -1 && storedDataPointer);
108
    bufferFileOffset = storedBufferFileOffset;
109
    setCurrentDataPointer(storedDataPointer);
110
    dataBegin = dataEnd = NULL;
111
    storedBufferFileOffset = -1;
112
    storedDataPointer = NULL;
113
}
114
115
file_offset_t FileReader::pointerToFileOffset(char *pointer) const
116
{
117
    file_offset_t fileOffset = pointer - bufferBegin + bufferFileOffset;
118
    Assert(fileOffset >= 0 && fileOffset <= fileSize);
119
    return fileOffset;
120
}
121
122
FileReader::FileChangedState FileReader::getFileChangedState()
123
{
124
    int64 newFileSize = getFileSizeInternal();
125
126
    if (newFileSize > fileSize) {
127
        // compare the stored last line with the one in the file
128
        storePosition();
129
130
        // avoid recursively checking the file for changes
131
        bool oldCheckFileChanged = checkFileChanged;
132
        checkFileChanged = false;
133
        char *currentLastLine = getLastLineBufferPointer();
134
        checkFileChanged = oldCheckFileChanged;
135
136
        int currentLineLength = getCurrentLineLength();
137
        restorePosition();
138
139
        if (currentLastLine && currentLineLength == lastLineLength && !strncmp(currentLastLine, lastLine.c_str(), lastLineLength))
140
            return APPENDED;
141
        else
142
            return OVERWRITTEN;
143
    }
144
    else if (newFileSize < fileSize)
145
        return OVERWRITTEN;
146
    else
147
        return UNCHANGED;
148
}
149
150
void FileReader::synchronize()
151
{
152
    ensureFileOpenInternal();
153
    int64 newFileSize = getFileSizeInternal();
154
155
    if (newFileSize != fileSize || lastLineOffset == -1) {
156
        fileSize = newFileSize;
157
        dataBegin = dataEnd = NULL;
158
159
        // read in the last line from the file
160
        storePosition();
161
162
        // avoid recursively checking the file for changes
163
        bool oldCheckFileChanged = checkFileChanged;
164
        checkFileChanged = false;
165
        char *line = getLastLineBufferPointer();
166
        checkFileChanged = oldCheckFileChanged;
167
168
        if (line) {
169
            lastLineLength = getCurrentLineLength();
170
            lastLineOffset = getCurrentLineStartOffset();
171
            lastLine.assign(line, lastLineLength);
172
        }
173
        else {
174
            lastLineLength = -1;
175
            lastLineOffset = -1;
176
            lastLine.clear();
177
        }
178
179
        restorePosition();
180
    }
181
}
182
183
void FileReader::checkConsistence(bool checkDataPointer) const
184
{
185
    bool ok = (size_t)(bufferEnd - bufferBegin) == bufferSize &&
186
      ((!dataBegin && !dataEnd) ||
187
       (dataBegin <= dataEnd && bufferBegin <= dataBegin && dataEnd <= bufferEnd &&
188
    (!checkDataPointer || (dataBegin <= currentDataPointer && currentDataPointer <= dataEnd))));
189
190
    if (!ok)
191
        throw opp_runtime_error("FileReader: internal error");
192
}
193
194
FileReader::FileChangedState FileReader::checkFileChangedAndSynchronize()
195
{
196
    FileReader::FileChangedState changed = getFileChangedState();
197
198
    switch (changed) {
199
        case OVERWRITTEN:
200
            throw opp_runtime_error("File changed: `%s' has been overwritten", fileName.c_str());
201
        case APPENDED:
202
            if (synchronizeWhenAppended)
203
                synchronize();
204
            else
205
                throw opp_runtime_error("File changed: `%s' has been appended", fileName.c_str());
206
        default:
207
           break;
208
    }
209
210
    return changed;
211
}
212
213
void FileReader::setCurrentDataPointer(char *pointer)
214
{
215
    currentDataPointer = pointer;
216
217
#ifndef NDEBUG
218
    checkConsistence();
219
#endif
220
}
221
222
void FileReader::fillBuffer(bool forward)
223
{
224
#ifndef NDEBUG
225
    checkConsistence();
226
#endif
227
228
    char *dataPointer;
229
    int dataLength;
230
231
    if (!hasData()) {
232
        dataPointer = (char *)bufferBegin;
233
        dataLength = bufferSize;
234
    }
235
    else if (forward) {
236
        Assert(currentDataPointer);
237
238
        if (currentDataPointer < dataBegin) {
239
            dataPointer = currentDataPointer;
240
            dataLength = dataBegin - currentDataPointer;
241
        }
242
        else {
243
            dataPointer = dataEnd;
244
            dataLength = std::min((int64)(bufferEnd - dataEnd), getFileSize() - pointerToFileOffset(dataEnd));
245
        }
246
    }
247
    else {
248
        Assert(currentDataPointer);
249
250
        if (currentDataPointer > dataEnd) {
251
            dataPointer = dataEnd;
252
            dataLength = currentDataPointer - dataEnd;
253
        }
254
        else {
255
            dataPointer = (char *)bufferBegin;
256
            dataLength = dataBegin - bufferBegin;
257
        }
258
    }
259
260
    if (dataLength > 0) {
261
        // check for changes before reading the file
262
        if (checkFileChanged) {
263
            if (checkFileChangedAndSynchronize() != UNCHANGED) {
264
                fillBuffer(forward);
265
                return;
266
            }
267
        }
268
269
        file_offset_t fileOffset = pointerToFileOffset(dataPointer);
270
        opp_fseek(f, fileOffset, SEEK_SET);
271
        if (ferror(f))
272
            throw opp_runtime_error("Cannot seek in file `%s'", fileName.c_str());
273
274
        int bytesRead = fread(dataPointer, 1, dataLength, f);
275
        if (ferror(f))
276
            throw opp_runtime_error("Read error in file `%s'", fileName.c_str());
277
278
#ifndef NDEBUG
279
        if (PRINT_DEBUG_MESSAGES) printf("Reading data at file offset: %"LL"d, length: %d\n", fileOffset, bytesRead);
280
#endif
281
282
        if (!hasData()) {
283
            dataBegin = dataPointer;
284
            dataEnd = dataPointer + bytesRead;
285
        }
286
        else if (forward) {
287
            if (currentDataPointer < dataBegin)
288
                dataBegin = currentDataPointer;
289
            else
290
                dataEnd = dataPointer + bytesRead;
291
        }
292
        else {
293
            if (currentDataPointer > dataEnd)
294
                dataEnd = currentDataPointer;
295
            else
296
                dataBegin = (char *)bufferBegin;
297
        }
298
299
        numReadBytes += bytesRead;
300
    }
301
302
#ifndef NDEBUG
303
    checkConsistence(true);
304
#endif
305
}
306
307
bool FileReader::isLineStart(char *s) {
308
    Assert(bufferBegin <= s && s <= bufferEnd);
309
310
    if (s == bufferBegin) {
311
        // first line of file
312
        if (bufferFileOffset == 0)
313
            return true;
314
        else { // slow path
315
           file_offset_t fileOffset = pointerToFileOffset(s) - 1;
316
317
           opp_fseek(f, fileOffset, SEEK_SET);
318
           if (ferror(f))
319
               throw opp_runtime_error("Cannot seek in file `%s'", fileName.c_str());
320
321
           char previousChar;
322
           fread(&previousChar, 1, 1, f);  //XXX warning: ignored return value
323
324
           if (ferror(f))
325
               throw opp_runtime_error("Read error in file `%s'", fileName.c_str());
326
327
           return previousChar == '\n';
328
        }
329
    }
330
    else if (s - 1 < dataBegin)
331
        fillBuffer(false);
332
    else if (s - 1 >= dataEnd)
333
        fillBuffer(true);
334
335
    return *(s - 1) == '\n';
336
}
337
338
char *FileReader::findNextLineStart(char *start, bool bufferFilled)
339
{
340
    char *s = start;
341
342
    // find next CR/LF (fast path)
343
    while (s < dataEnd && *s != '\r' && *s!= '\n')
344
        s++;
345
346
    if (s < dataEnd && *s == '\r')
347
        s++;
348
349
    if (s < dataEnd && *s == '\n')
350
        s++;
351
352
    Assert(s <= dataEnd);
353
    if (s == dataEnd) // did we reach the end of the data in the buffer? (slow path)
354
    {
355
        file_offset_t fileOffset = pointerToFileOffset(start);
356
357
        if (s != start && isLineStart(s)) // line just ends at the end of data buffer
358
            return s;
359
        else if (fileOffset == getFileSize()) // searching from the end of the file
360
            return NULL;
361
        else if (!bufferFilled) { // refill buffer
362
            seekTo(fileOffset, maxLineSize);
363
            fillBuffer(true);
364
            s = fileOffsetToPointer(fileOffset);
365
366
            return findNextLineStart(s, true);
367
        }
368
        else if (getDataEndFileOffset() == getFileSize()) // searching reached to the end of the file without CR/LF
369
            return NULL;
370
        else // line too long
371
            throw opp_runtime_error("Line too long, should be below %d in file `%s'", maxLineSize, fileName.c_str());
372
    }
373
374
    return s;
375
}
376
377
char *FileReader::findPreviousLineStart(char *start, bool bufferFilled)
378
{
379
    char *s = start - 1;
380
381
    if (s >= dataBegin && *s == '\n')
382
        s--;
383
384
    if (s >= dataBegin && *s == '\r')
385
        s--;
386
387
    // find previous CR/LF (fast path)
388
    while (s >= dataBegin && *s != '\r' && *s!= '\n')
389
        s--;
390
391
    s++;
392
393
    Assert(s >= dataBegin);
394
    if (s == dataBegin) // did we reach the beginning of the data in the buffer? (slow path)
395
    {
396
        file_offset_t fileOffset = pointerToFileOffset(start);
397
398
        if (s != start && isLineStart(s)) // line starts at the beginning of the data buffer
399
            return s;
400
        else if (fileOffset == 0) // searching from the beginning of the file
401
            return NULL;
402
        else if (!bufferFilled) { // refill buffer
403
            seekTo(fileOffset, maxLineSize);
404
            fillBuffer(false);
405
            s = fileOffsetToPointer(fileOffset);
406
407
            return findPreviousLineStart(s, true);
408
        }
409
        else if (getDataBeginFileOffset() == 0) // searching reached to the beginning of the file without CR/LF
410
            return dataBegin;
411
        else // line too long
412
            throw opp_runtime_error("Line too long, should be below %d in file `%s'", maxLineSize, fileName.c_str());
413
    }
414
415
    return s;
416
}
417
418
char *FileReader::getNextLineBufferPointer()
419
{
420
    numReadLines++;
421
    ensureFileOpen();
422
423
    Assert(currentDataPointer);
424
425
#ifndef NDEBUG
426
    if (PRINT_DEBUG_MESSAGES) printf("Reading in next line at file offset: %"LL"d\n", pointerToFileOffset(currentDataPointer));
427
#endif
428
429
    // read forward if needed
430
    fillBuffer(true);
431
432
    // when starting in the middle of a line
433
    if (!isLineStart(currentDataPointer)) {
434
        char *nextLineDataPointer = findNextLineStart(currentDataPointer);
435
436
        if (nextLineDataPointer)
437
            setCurrentDataPointer(nextLineDataPointer);
438
        else {
439
            currentLineStartOffset = currentLineEndOffset = -1;
440
441
            return NULL;
442
        }
443
    }
444
445
    currentLineStartOffset = pointerToFileOffset(currentDataPointer);
446
    char *nextLineDataPointer = findNextLineStart(currentDataPointer);
447
448
    if (nextLineDataPointer) {
449
        setCurrentDataPointer(nextLineDataPointer);
450
        currentLineEndOffset = pointerToFileOffset(currentDataPointer);
451
452
        return fileOffsetToPointer(currentLineStartOffset);
453
    }
454
    else {
455
        currentLineStartOffset = currentLineEndOffset = -1;
456
457
        return NULL;
458
    }
459
}
460
461
char *FileReader::getPreviousLineBufferPointer()
462
{
463
    numReadLines++;
464
    ensureFileOpen();
465
466
    Assert(currentDataPointer);
467
468
#ifndef NDEBUG
469
    if (PRINT_DEBUG_MESSAGES) printf("Reading in previous line at file offset: %"LL"d\n", pointerToFileOffset(currentDataPointer));
470
#endif
471
472
    // read backward if needed
473
    fillBuffer(false);
474
475
    // when starting in the middle of a line
476
    if (!isLineStart(currentDataPointer)) {
477
        char *previousLineDataPointer = findPreviousLineStart(currentDataPointer);
478
479
        if (previousLineDataPointer)
480
            setCurrentDataPointer(previousLineDataPointer);
481
        else {
482
            currentLineStartOffset = currentLineEndOffset = -1;
483
484
            return NULL;
485
        }
486
    }
487
488
    currentLineEndOffset = pointerToFileOffset(currentDataPointer);
489
    char *previousLineDataPointer = findPreviousLineStart(currentDataPointer);
490
491
    if (previousLineDataPointer) {
492
        setCurrentDataPointer(previousLineDataPointer);
493
        currentLineStartOffset = pointerToFileOffset(currentDataPointer);
494
495
        return fileOffsetToPointer(currentLineStartOffset);
496
    }
497
    else {
498
        currentLineStartOffset = currentLineEndOffset = -1;
499
500
        return NULL;
501
    }
502
}
503
504
char *FileReader::getFirstLineBufferPointer()
505
{
506
    seekTo(0);
507
    return getNextLineBufferPointer();
508
}
509
510
char *FileReader::getLastLineBufferPointer()
511
{
512
    seekTo(getFileSize());
513
    return getPreviousLineBufferPointer();
514
}
515
516
const char *strnistr(const char *haystack, const char *needle, int n, bool caseSensitive)
517
{
518
    int needleLen = strlen(needle);
519
    if (n == 0)
520
        n = strlen(haystack);
521
522
    int slen = n - needleLen;
523
524
    for (const char *s = haystack; slen>0 && *s; s++, slen--)
525
        if (!(caseSensitive ? strncmp(s, needle, needleLen) : strncasecmp(s, needle, needleLen)))
526
            return s;
527
    return NULL;
528
}
529
530
char *FileReader::findNextLineBufferPointer(const char *search, bool caseSensitive)
531
{
532
    char *line;
533
    while ((line = getNextLineBufferPointer()) != NULL)
534
        if (strnistr(line, search, getCurrentLineLength(), caseSensitive))
535
            return line;
536
537
    return NULL;
538
}
539
540
char *FileReader::findPreviousLineBufferPointer(const char *search, bool caseSensitive)
541
{
542
    char *line;
543
    while ((line = getPreviousLineBufferPointer()) != NULL)
544
        if (strnistr(line, search, getCurrentLineLength(), caseSensitive))
545
            return line;
546
547
    return NULL;
548
}
549
550
int64 FileReader::getFileSize()
551
{
552
    if (fileSize == -1)
553
        fileSize = getFileSizeInternal();
554
555
    return fileSize;
556
}
557
558
int64 FileReader::getFileSizeInternal()
559
{
560
    ensureFileOpen();
561
562
    struct opp_stat_t s;
563
    opp_fstat(fileno(f), &s);
564
    return s.st_size;
565
}
566
567
void FileReader::seekTo(file_offset_t fileOffset, unsigned int ensureBufferSizeAround)
568
{
569
#ifndef NDEBUG
570
    if (PRINT_DEBUG_MESSAGES) printf("Seeking to file offset: %"LL"d\n", fileOffset);
571
    checkConsistence();
572
#endif
573
574
    if (fileOffset < 0 || fileOffset > getFileSize())
575
        throw opp_runtime_error("Invalid file offset: %"LL"d", fileOffset);
576
577
    ensureFileOpen();
578
579
    // check if requested offset is already in memory
580
    if (bufferFileOffset != -1 &&
581
        bufferFileOffset + ensureBufferSizeAround <= fileOffset &&
582
        fileOffset <= (file_offset_t)(bufferFileOffset + bufferSize - ensureBufferSizeAround))
583
    {
584
        setCurrentDataPointer(fileOffsetToPointer(fileOffset));
585
        Assert(currentDataPointer);
586
        return;
587
    }
588
589
    file_offset_t newBufferFileOffset = std::min(std::max((int64)0L, getFileSize() - (int64)bufferSize), std::max((int64)0L, fileOffset - (int64)bufferSize / 2));
590
    setCurrentDataPointer((char *)bufferBegin + fileOffset - newBufferFileOffset);
591
    Assert(currentDataPointer);
592
593
#ifndef NDEBUG
594
    if (PRINT_DEBUG_MESSAGES) printf("Setting buffer file offset to: %"LL"d\n", newBufferFileOffset);
595
#endif
596
597
    // try to keep as much data as possible
598
    if (hasData()) {
599
        file_offset_t oldDataBeginFileOffset = getDataBeginFileOffset();
600
        file_offset_t oldDataEndFileOffset = getDataEndFileOffset();
601
602
#ifndef NDEBUG
603
        if (PRINT_DEBUG_MESSAGES) printf("Data before: from file offset: %"LL"d to file offset: %"LL"d\n", oldDataBeginFileOffset, oldDataEndFileOffset);
604
#endif
605
606
        file_offset_t newBufferBeginFileOffset = newBufferFileOffset;
607
        file_offset_t newBufferEndFileOffset = newBufferFileOffset + bufferSize;
608
        file_offset_t moveSrcBeginFileOffset = std::min(newBufferEndFileOffset, std::max(newBufferBeginFileOffset, oldDataBeginFileOffset));
609
        file_offset_t moveSrcEndFileOffset = std::min(newBufferEndFileOffset, std::max(newBufferBeginFileOffset, oldDataEndFileOffset));
610
        char *moveSrc = fileOffsetToPointer(moveSrcBeginFileOffset);
611
        char *moveDest = moveSrcBeginFileOffset - newBufferBeginFileOffset + (char *)bufferBegin;
612
        int moveSize = moveSrcEndFileOffset - moveSrcBeginFileOffset;
613
614
        if (moveSize > 0 && moveSrc != moveDest) {
615
#ifndef NDEBUG
616
            if (PRINT_DEBUG_MESSAGES) printf("Keeping data from file offset: %"LL"d with length: %d\n", pointerToFileOffset(moveSrc), moveSize);
617
#endif
618
619
            fflush(stdout);
620
621
            memmove(moveDest, moveSrc, moveSize);
622
        }
623
624
        bufferFileOffset = newBufferFileOffset;
625
        dataBegin = moveDest;
626
        dataEnd = moveDest + moveSize;
627
628
#ifndef NDEBUG
629
        if (PRINT_DEBUG_MESSAGES) printf("Data after: from file offset: %"LL"d to file offset: %"LL"d\n", getDataBeginFileOffset(), getDataEndFileOffset());
630
#endif
631
    }
632
    else {
633
        bufferFileOffset = newBufferFileOffset;
634
        dataBegin = currentDataPointer;
635
        dataEnd = currentDataPointer;
636
    }
637
638
#ifndef NDEBUG
639
    checkConsistence();
640
#endif
641
}
642
643
/*
644
Example code:
645

646
#include <iostream>
647
using namespace std;
648

649
int main(int argc, char **argv)
650
{
651
    if (argc<2)
652
        return 1;
653

654
    try {
655
        FileReader freader(argv[1],200);
656
        char *line;
657
        while ((line = freader.getNextLineBufferPointer()) != NULL)
658
            cout << line << "\n";
659
    }
660
    catch (std::exception& e) {
661
        cout << "Error: " << e.what() << endl;
662
    }
663

664
    return 0;
665
}
666
*/