Statistics
| Branch: | Revision:

root / src / common / patternmatcher.cc @ e1750c09

History | View | Annotate | Download (11.1 KB)

1
//==========================================================================
2
//  PATTERNMATCHER.CC - part of
3
//                     OMNeT++/OMNEST
4
//             Discrete System Simulation in C++
5
//
6
//  Author: Andras Varga
7
//
8
//==========================================================================
9

    
10
/*--------------------------------------------------------------*
11
  Copyright (C) 2006-2008 OpenSim Ltd.
12

13
  This file is distributed WITHOUT ANY WARRANTY. See the file
14
  `license' for details on this and other legal matters.
15
*--------------------------------------------------------------*/
16

    
17

    
18
#include <assert.h>
19
#include <stdio.h>
20
#include <string.h>
21
#include <stdlib.h>
22
#include "opp_ctype.h"
23
#include "platmisc.h"
24
#include "patternmatcher.h"
25
#include "stringutil.h"
26
#include "exception.h"
27

    
28
USING_NAMESPACE
29

    
30

    
31
PatternMatcher::PatternMatcher()
32
{
33
}
34

    
35
PatternMatcher::PatternMatcher(const char *pattern, bool dottedpath, bool fullstring, bool casesensitive)
36
{
37
    setPattern(pattern, dottedpath, fullstring, casesensitive);
38
}
39

    
40
PatternMatcher::~PatternMatcher()
41
{
42
}
43

    
44
void PatternMatcher::setPattern(const char *patt, bool dottedpath, bool fullstring, bool casesensitive)
45
{
46
    pattern.clear();
47
    iscasesensitive = casesensitive;
48

    
49
    // "tokenize" pattern
50
    const char *s = patt;
51
    while (*s!='\0')
52
    {
53
        Elem e;
54
        switch(*s)
55
        {
56
           case '?':  e.type = dottedpath ? COMMONCHAR : ANYCHAR; s++; break;
57
           case '[':  if (pattern.empty() || pattern.back().type!=LITERALSTRING || !parseNumRange(s, ']', e.fromnum, e.tonum))
58
                          parseLiteralString(s,e);
59
                      else
60
                          e.type = NUMRANGE;
61
                      break;
62
           case '{':  if (parseNumRange(s, '}', e.fromnum, e.tonum))
63
                          {e.type = NUMRANGE; s++;}
64
                      else
65
                          parseSet(s,e);
66
                      break;
67
           case '*':  if (*(s+1)=='*')
68
                          {e.type = ANYSEQ; s+=2;}
69
                      else
70
                          {e.type = dottedpath ? COMMONSEQ : ANYSEQ; s++;}
71
                      break;
72
           default:   parseLiteralString(s,e); break;
73
        }
74
        pattern.push_back(e);
75
    }
76

    
77
    if (!fullstring)
78
    {
79
        // for substring match, we add "**" at both ends of the pattern (unless already there)
80
        Elem e;
81
        e.type = ANYSEQ;
82
        if (pattern.empty() || pattern.back().type!=ANYSEQ)
83
            pattern.push_back(e);
84
        if (pattern.front().type!=ANYSEQ)
85
            pattern.insert(pattern.begin(),e);
86
    }
87
    Elem e;
88
    e.type = END;
89
    pattern.push_back(e);
90
}
91

    
92
void PatternMatcher::parseSet(const char *&s, Elem& e)
93
{
94
    s++; // skip "{"
95
    e.type = SET;
96
    if (*s=='^')
97
    {
98
        e.type = NEGSET;
99
        s++;
100
    }
101
    // Note: to make "}" part of the set, it must be first within the braces
102
    const char *sbeg = s;
103
    while (*s && (*s!='}' || s==sbeg))
104
    {
105
        char range[3];
106
        range[2] = 0;
107
        if (*(s+1)=='-' && *(s+2) && *(s+2)!='}')
108
        {
109
            // store "A-Z" as "AZ"
110
            range[0] = *s;
111
            range[1] = *(s+2);
112
            s+=3;
113
        }
114
        else
115
        {
116
            // store "X" as "XX"
117
            range[0] = range[1] = *s;
118
            s++;
119
        }
120
        if (!iscasesensitive)
121
        {
122
            // if one end of range is alpha and the other is not, funny things will happen
123
            range[0] = opp_toupper(range[0]);
124
            range[1] = opp_toupper(range[1]);
125
        }
126
        e.setchars += range;
127
    }
128
    if (!*s)
129
        throw opp_runtime_error("unmatched '}' in expression");
130
    s++; // skip "}"
131
}
132

    
133
void PatternMatcher::parseLiteralString(const char *&s, Elem& e)
134
{
135
    e.type = LITERALSTRING;
136
    while (*s && *s!='?' && *s!='{' && *s!='*')
137
    {
138
        long dummy;
139
        const char *s1;
140
        if (*s=='\\')
141
            e.literalstring += *(++s);
142
        else
143
            e.literalstring += *s;
144
        if (*s=='[' && parseNumRange((s1=s),']',dummy,dummy))
145
            break;
146
        s++;
147
    }
148
}
149

    
150
bool PatternMatcher::parseNumRange(const char *&str, char closingchar, long& lo, long& up)
151
{
152
    //
153
    // try to parse "[n..m]" or "{n..m}" and return true on success.
154
    // str should point at "[" or "{"; on success return it'll point to "]" or "}",
155
    // and on failure it'll be unchanged. n and m will be stored in lo and up.
156
    // They are optional -- if missing, lo or up will be set to -1.
157
    //
158
    lo = up = -1L;
159
    const char *s = str+1; // skip "[" or "{"
160
    if (opp_isdigit(*s))
161
    {
162
        lo = atol(s);
163
        while (opp_isdigit(*s)) s++;
164
    }
165
    if (*s!='.' || *(s+1)!='.')
166
        return false;
167
    s+=2;
168
    if (opp_isdigit(*s))
169
    {
170
        up = atol(s);
171
        while (opp_isdigit(*s)) s++;
172
    }
173
    if (*s!=closingchar)
174
        return false;
175
    str = s;
176
    return true;
177
}
178

    
179
std::string PatternMatcher::debugStrFrom(int from)
180
{
181
    std::string result;
182
    for (int k=from; k<(int)pattern.size(); k++)
183
    {
184
        Elem& e = pattern[k];
185
        switch (e.type)
186
        {
187
            case LITERALSTRING: result += opp_quotestr(e.literalstring.c_str()); break;
188
            case ANYCHAR: result += "?!"; break;
189
            case COMMONCHAR: result += "?"; break;
190
            case SET: result += opp_stringf("SET(%s)", e.setchars.c_str()); break;
191
            case NEGSET: result += opp_stringf("NEGSET(%s)", e.setchars.c_str()); break;
192
            case NUMRANGE: result += opp_stringf("%ld..%ld", e.fromnum, e.tonum); break;
193
            case ANYSEQ: result += "**"; break;
194
            case COMMONSEQ: result += "*"; break;
195
            case END: break;
196
            default: assert(0);
197
        }
198
        result += " ";
199
    }
200
    return result;
201
}
202

    
203
bool PatternMatcher::isInSet(char c, const char *set)
204
{
205
    assert((strlen(set)&1)==0);
206
    if (!iscasesensitive)
207
        c = opp_toupper(c); // set is already uppercase here
208
    while (*set)
209
    {
210
        if (c>=*set && c<=*(set+1))
211
            return true;
212
        set+=2;
213
    }
214
    return false;
215
}
216

    
217
bool PatternMatcher::doMatch(const char *s, int k, int suffixlen)
218
{
219
    while (true)
220
    {
221
        Elem& e = pattern[k];
222
        long num; // case NUMRANGE
223
        int len;  // case LITERALSTRING
224
        switch (e.type)
225
        {
226
            case LITERALSTRING:
227
                len = e.literalstring.length();
228
                // special case: last string literal with prefix match: allow s to be shorter
229
                if (suffixlen>0 && k==(int)pattern.size()-2)
230
                    len -= suffixlen;
231
                // compare
232
                if (iscasesensitive ?
233
                    strncmp(s, e.literalstring.c_str(), len) :
234
                    strncasecmp(s, e.literalstring.c_str(), len)
235
                   )
236
                    return false;
237
                s += len;
238
                break;
239
            case ANYCHAR:
240
                if (!*s)
241
                    return false;
242
                s++;
243
                break;
244
            case COMMONCHAR:
245
                if (!*s || *s=='.')
246
                    return false;
247
                s++;
248
                break;
249
            case SET:
250
                if (!*s)
251
                    return false;
252
                if (!isInSet(*s, e.setchars.c_str()))
253
                    return false;
254
                s++;
255
                break;
256
            case NEGSET:
257
                if (!*s)
258
                    return false;
259
                if (isInSet(*s, e.setchars.c_str()))
260
                    return false;
261
                s++;
262
                break;
263
            case NUMRANGE:
264
                if (!opp_isdigit(*s))
265
                    return false;
266
                num = atol(s);
267
                while (opp_isdigit(*s)) s++;
268
                if ((e.fromnum>=0 && num<e.fromnum) || (e.tonum>=0 && num>e.tonum))
269
                    return false;
270
                break;
271
            case ANYSEQ:
272
                // potential shortcuts: if pattern ends in ANYSEQ, rest of the input
273
                // can be anything; if pattern ends in ANYSEQ LITERAL, it's enough if
274
                // input ends in the literal string
275
                if (k==(int)pattern.size()-2)
276
                    return true;
277
                if (k==(int)pattern.size()-3 && pattern[k+1].type==LITERALSTRING)
278
                    return opp_stringendswith(s, pattern[k+1].literalstring.c_str());
279

    
280
                // general case
281
                while (true)
282
                {
283
                    if (doMatch(s,k+1,suffixlen))
284
                        return true;
285
                    if (!*s)
286
                        return false;
287
                    s++;
288
                }
289
                break; // at EOS
290
            case COMMONSEQ:
291
                while (true)
292
                {
293
                    if (doMatch(s,k+1,suffixlen))
294
                        return true;
295
                    if (!*s || *s=='.')
296
                        return false;
297
                    s++;
298
                }
299
                break;
300
            case END:
301
                return !*s;
302
            default:
303
                assert(0);
304
        }
305
        k++;
306
        assert(k<(int)pattern.size());
307
    }
308
}
309

    
310
bool PatternMatcher::matches(const char *line)
311
{
312
    assert(pattern[pattern.size()-1].type==END);
313

    
314
    // shortcut: omnetpp.ini keys often begin with "*" or "**"
315
    // but end in a string literal. So it's usually a performance win to
316
    // to first check that the last string literal of the pattern matches
317
    // the end of the string. (We do the shortcut only in the case-sensitive
318
    // case. omnetpp.ini is case sensitive.)
319

    
320
    if (pattern.size()>=2 && iscasesensitive)
321
    {
322
        Elem& e = pattern[pattern.size()-2];
323
        if (e.type==LITERALSTRING)
324
        {
325
            // return if last 2 chars don't match
326
            int pattlen = e.literalstring.size();
327
            int linelen = strlen(line);
328
            if (pattlen>=2 && linelen>=2 && (line[linelen-1]!=e.literalstring.at(pattlen-1) ||
329
                line[linelen-2]!=e.literalstring.at(pattlen-2))) //FIXME why doesn't work for pattlen==1 ?
330
                return false;
331
        }
332
    }
333

    
334
    // perform full-blown pattern matching
335
    return doMatch(line, 0, 0);
336
}
337

    
338
const char *PatternMatcher::patternPrefixMatches(const char *line, int suffixoffset)
339
{
340
    if (!iscasesensitive)
341
        throw opp_runtime_error("PatternMatcher: patternPrefixMatches() doesn't support case-insensitive match");
342

    
343
    // pattern must end in a literal string...
344
    assert(pattern[pattern.size()-1].type==END);
345
    if (pattern.size()<2)
346
        return NULL;
347
    Elem& e = pattern[pattern.size()-2];
348
    if (e.type!=LITERALSTRING)
349
        return NULL;
350

    
351
    // ...with the suffixlen characters at the end of 'line'
352
    const char *pattstring  = e.literalstring.c_str();
353
    const char *p = strstr(pattstring, line+suffixoffset);
354
    if (!p)
355
        return NULL;
356
    p += strlen(line+suffixoffset);
357
    rest = p;
358
    int pattsuffixlen = e.literalstring.size() - (p-pattstring);
359

    
360
    // pattern, if we cut off the 'rest', must exactly match 'line'
361
    return doMatch(line, 0, pattsuffixlen) ? rest.c_str() : NULL;
362
}
363

    
364
bool PatternMatcher::containsWildcards(const char *pattern)
365
{
366
    return strchr(pattern,'?') || strchr(pattern,'*') ||
367
           strchr(pattern,'\\') || strchr(pattern,'{') ||
368
           strstr(pattern,"..");
369
}