Statistics
| Branch: | Revision:

root / src / common / linetokenizer.cc @ master

History | View | Annotate | Download (5.27 KB)

1 01873262 Georg Kunz
//=========================================================================
2
//  LINETOKENIZER.CC - part of
3
//                  OMNeT++/OMNEST
4
//           Discrete System Simulation in C++
5
//
6
//  Author: Andras Varga
7
//
8
//=========================================================================
9
10
/*--------------------------------------------------------------*
11
  Copyright (C) 2006-2008 OpenSim Ltd.
12

13
  This file is distributed WITHOUT ANY WARRANTY. See the file
14
  `license' for details on this and other legal matters.
15
*--------------------------------------------------------------*/
16
17
18
#include <assert.h>
19
#include <sstream>
20
#include <string.h>
21
#include "exception.h"
22
#include "linetokenizer.h"
23
24
USING_NAMESPACE
25
26
27
LineTokenizer::LineTokenizer(int bufferSize, int maxTokenNum, char sep1, char sep2)
28
    : sep1(sep1), sep2(sep2)
29
{
30
    if (maxTokenNum < 0)
31
        maxTokenNum = bufferSize/4;
32
33
    vecsize = maxTokenNum;
34
    vec = new char *[vecsize];
35
36
    lineBufferSize = bufferSize;
37
    lineBuffer = new char[lineBufferSize];
38
}
39
40
LineTokenizer::~LineTokenizer()
41
{
42
    delete [] vec;
43
    delete [] lineBuffer;
44
}
45
46
inline int h2d(char c)
47
{
48
    if (c>='0' && c<='9') return c-'0';
49
    if (c>='A' && c<='F') return c-'A'+10;
50
    if (c>='a' && c<='f') return c-'a'+10;
51
    return -1;
52
}
53
54
inline int h2d(char *&s)
55
{
56
    int a = h2d(*s);
57
    if (a<0) return 0;
58
    s++;
59
    int b = h2d(*s);
60
    if (b<0) return a;
61
    s++;
62
    return a*16+b;
63
}
64
65
static void interpretBackslashes(char *buffer)
66
{
67
    // interpret backslashes in-place. This works because the output
68
    // is always shorter (or equal) than the input.
69
    char *s = buffer;
70
    char *d = buffer;
71
    for (; *s; s++, d++)
72
    {
73
        if (*s=='\\')
74
        {
75
            // allow backslash as quote character, also interpret backslash sequences
76
            // note: this must be kept consistent with opp_quotestr()/opp_parsequotedstr()
77
            s++;
78
            switch(*s)
79
            {
80
                case 'b': *d = '\b'; break;
81
                case 'f': *d = '\f'; break;
82
                case 'n': *d = '\n'; break;
83
                case 'r': *d = '\r'; break;
84
                case 't': *d = '\t'; break;
85
                case 'x': s++; *d = h2d(s); s--; break; // hex code
86
                case '"': *d = '"'; break;  // quote needs to be backslashed
87
                case '\\': *d = '\\'; break;  // backslash needs to be backslashed
88
                case '\n': d--; break; // don't store line continuation (backslash followed by newline)
89
                case '\0': d--; s--; break; // string ends in stray backslash
90
                default: *d = *s; // be tolerant with unrecogized backslash sequences
91
            }
92
        }
93
        else
94
        {
95
            *d = *s;
96
        }
97
    }
98
    *d = '\0';
99
}
100
101
int LineTokenizer::tokenize(const char *line, int length)
102
{
103
    if (length >= lineBufferSize)
104
        throw opp_runtime_error("Cannot tokenize lines longer than %d", lineBufferSize - 1);
105
106
    strncpy(lineBuffer, line, length);
107
    lineBuffer[length] = '\0'; // guard
108
109
    char *s = lineBuffer + length - 1;
110
    while (s >= lineBuffer && (*s == '\r' || *s == '\n'))
111
        *s-- = '\0';
112
113
    numtokens = 0;
114
    s = lineBuffer;
115
116
    // loop through the tokens on the line
117
    for (;;)
118
    {
119
        // skip separators before token
120
        while (*s==sep1 || *s==sep2) s++;
121
122
        char *token;
123
        if (!*s)
124
        {
125
            // end of line found -- exit loop
126
            break;
127
        }
128
        else if (*s=='"')
129
        {
130
            // parse quoted string
131
            token = s+1;
132
            s++;
133
            // try to find end of quoted string
134
            bool containsBackslash = false;
135
            while (*s && *s!='"')
136
                if (*s++=='\\')
137
                    {s++; containsBackslash = true;}
138
            // check we found the close quote
139
            if (*s!='"')
140
                throw opp_runtime_error("Unmatched quote in file");
141
            // terminate quoted string with zero, overwriting close quote
142
            *s++ = 0;
143
            // if token contained a backslash (rare!), we need post-processing
144
            // to interpret the escape sequences
145
            if (containsBackslash)
146
                interpretBackslashes(token);
147
148
        }
149
        else
150
        {
151
            // parse unquoted string
152
            token = s;
153
            // try find end of string
154
            while (*s && *s!=sep1 && *s!=sep2) s++;
155
            // terminate string with zero (if we are not already at end of the line)
156
            if (*s) *s++ = 0;
157
        }
158
159
        // add token to the array (if there's room); s points to the rest of the string
160
        if (numtokens==vecsize)
161
            throw opp_runtime_error("Too many tokens on a line, max %d allowed", vecsize-1);
162
        vec[numtokens++] = token;
163
    }
164
    return numtokens;
165
}
166
167
/*
168
Example code:
169

170
#include <string.h>
171
#include <iostream>
172
using namespace std;
173

174
void tok(const char *s)
175
{
176
    char *buf = new char[strlen(s)+1];
177
    strcpy(buf, s);
178
    cout << buf << " --> ";
179

180
    LineTokenizer t;
181
    bool ok = t.tokenize(buf);
182
    if (!ok)
183
        cout << t.errorMsg(1) << endl;
184

185
    int numtokens = t.numTokens();
186
    char **vec = t.tokens();
187
    for (int i=0; i<numtokens; i++)
188
        cout << (i==0?"":":") << vec[i];
189
    cout << "\n";
190
}
191

192
int main(int argc, char **argv)
193
{
194
    tok("E 121.1344 e434");
195
    tok("E \"121.1344 e434\" 222");
196

197
    return 0;
198
}
199
*/
200