Statistics
| Branch: | Revision:

root / src / common / patternmatcher.h @ 81ad8b66

History | View | Annotate | Download (7.03 KB)

1 01873262 Georg Kunz
//==========================================================================
2
//  PATTERNMATCHER.H - part of
3
//                     OMNeT++/OMNEST
4
//             Discrete System Simulation in C++
5
//
6
//  Author: Andras Varga
7
//
8
//==========================================================================
9
10
/*--------------------------------------------------------------*
11
  Copyright (C) 2006-2008 OpenSim Ltd.
12

13
  This file is distributed WITHOUT ANY WARRANTY. See the file
14
  `license' for details on this and other legal matters.
15
*--------------------------------------------------------------*/
16
17
#ifndef __CPATTERNMATCHER_H
18
#define __CPATTERNMATCHER_H
19
20
#include <stdio.h>
21
#include <string>
22
#include <vector>
23
#include "commondefs.h"
24
25
NAMESPACE_BEGIN
26
27
/**
28
 * Glob-style pattern matching class, adopted to special OMNeT++ requirements.
29
 * One instance represents a pattern to match.
30
 *
31
 * Pattern syntax:
32
 *   - ? : matches any character except '.'
33
 *   - * : matches zero or more characters except '.'
34
 *   - ** : matches zero or more character (any character)
35
 *   - {a-z} : matches a character in range a-z
36
 *   - {^a-z} : matches a character NOT in range a-z
37
 *   - {32..255} : any number (ie. sequence of digits) in range 32..255  (e.g. "99")
38
 *   - [32..255] : any number in square brackets in range 32..255 (e.g. "[99]")
39
 *   - backslash \ : takes away the special meaning of the subsequent character
40
 *
41
 * The "except '.'" phrases in the above rules apply only in "dottedpath" mode (see below).
42
 *
43
 * There are three option switches (see setPattern() method):
44
 *   - dottedpath: dottedpath=yes is the mode used in omnetpp.ini for matching
45
 *     module parameters, like this: "**.mac[*].retries=9". In this mode
46
 *     mode, '*' cannot "eat" dot, so it can only match one component (module
47
 *     name) in the path. '**' can be used to match more components.
48
 *     (This is similar to e.g. Java Ant's usage of the asterisk.)
49
 *     In dottedpath=false mode, '*' will match anything.
50
 *   - fullstring: selects between full string and substring match. The pattern
51
 *     "ate" will match "whatever" in substring mode, but not in full string
52
 *      mode.
53
 *   - case sensitive: selects between case sensitive and case insensitive mode.
54
 *
55
 * Rule details:
56
 *   - sets, negated sets: They can contain several character ranges and also
57
 *     enumeration of characters. For example: "{_a-zA-Z0-9}","{xyzc-f}". To
58
 *     include '-' in the set, put it at a position where it cannot be
59
 *     interpreted as character range, for example: "{a-z-}" or "{-a-z}".
60
 *     If you want to include '}' in the set, it must be the first
61
 *     character: "{}a-z}", or as a negated set: "{^}a-z}". A backslash
62
 *     is always taken as literal backslash (and NOT as escape character)
63
 *     within set definitions.
64
 *     When doing case-insensitive match, avoid ranges that include both
65
 *     alpha (a-zA-Z) and non-alpha characters, because they might cause
66
 *     funny results.
67
 *   - numeric ranges: only nonnegative integers can be matched.
68
 *     The start or the end of the range (or both) can be omitted:
69
 *     "{10..}", "{..99}" or "{..}" are valid numeric ranges (the last one
70
 *     matches any number). The specification must use exactly two dots.
71
 *     Caveat: "*{17..19}" will match "a17","117" and "963217" as well.
72
 */
73
class COMMON_API PatternMatcher
74
{
75
  private:
76
    enum ElemType {
77
      LITERALSTRING = 0,
78
      ANYCHAR,
79
      COMMONCHAR, // any char except "."
80
      SET,
81
      NEGSET,
82
      NUMRANGE,
83
      ANYSEQ,     // "**": sequence of any chars
84
      COMMONSEQ,  // "*": seq of any chars except "."
85
      END
86
    };
87
88
    struct Elem {
89
      ElemType type;
90
      std::string literalstring; // if type==LITERALSTRING
91
      std::string setchars; // SET/NEGSET: character pairs (0,1),(2,3) etc denote char ranges
92
      long fromnum, tonum; // NUMRANGE; -1 means "unset"
93
    };
94
95
    std::vector<Elem> pattern;
96
    bool iscasesensitive;
97
98
    std::string rest; // used to pass return value from doMatch() to patternPrefixMatches()
99
100
  private:
101
    void parseSet(const char *&s, Elem& e);
102
    void parseNumRange(const char *&s, Elem& e);
103
    void parseLiteralString(const char *&s, Elem& e);
104
    bool parseNumRange(const char *&str, char closingchar, long& lo, long& up);
105
    std::string debugStrFrom(int from);
106
    bool isInSet(char c, const char *set);
107
    // match line from pattern[patternpos]; with last string literal, ignore last suffixlen of pattern
108
    bool doMatch(const char *line, int patternpos, int suffixlen);
109
110
  public:
111
    /**
112
     * Constructor
113
     */
114
    PatternMatcher();
115
116
    /**
117
     * Constructor
118
     */
119
    PatternMatcher(const char *pattern, bool dottedpath, bool fullstring, bool casesensitive);
120
121
    /**
122
     * Destructor
123
     */
124
    ~PatternMatcher();
125
126
    /**
127
     * Sets the pattern to be used by subsequent calls to matches(). See the
128
     * general class description for the meaning of the rest of the arguments.
129
     * Throws cException if the pattern is bogus.
130
     */
131
    void setPattern(const char *pattern, bool dottedpath, bool fullstring, bool casesensitive);
132
133
    /**
134
     * Returns true if the line matches the pattern with the given settings.
135
     * See setPattern().
136
     */
137
    bool matches(const char *line);
138
139
    /**
140
     * Similar to matches(): it returns non-NULL iif (1) the pattern ends in
141
     * a string literal (and not, say, '*' or '**') which contains the line suffix
142
     * (which begins at suffixoffset characters of line) and (2) pattern matches
143
     * the whole line, except that (3) in matching the pattern's last string literal,
144
     * it is also accepted if line is shorter than the pattern. If the above
145
     * conditions hold, it returns the rest of the pattern. The returned
146
     * pointer is valid until the next call to this method.
147
     *
148
     * This method is used by cIniFile's <tt>getEntriesWithPrefix()</tt>, used
149
     * e.g. to find RNG mapping entries for a module. For that, we have to find
150
     * all ini file entries (keys) like <tt>"net.host1.gen.rng-NN"</tt>
151
     * where NN=0,1,2,... In cIniFile, every entry  is a pattern
152
     * (<tt>"**.host*.gen.rng-1"</tt>, <tt>"**.*.gen.rng-0"</tt>, etc.).
153
     * So we'd invoke <tt>patternPrefixMatches("net.host1.gen.rng-", 13)</tt>
154
     * (i.e. suffix=".rng-") to find those entries (patterns) which can expand to
155
     * <tt>"net.host1.gen.rng-0"</tt>, <tt>"net.host1.gen.rng-1"</tt>, etc.
156
     *
157
     * See matches().
158
     */
159
    const char *patternPrefixMatches(const char *line, int suffixoffset);
160
161
    /**
162
     * Returns the internal representation of the pattern as a string.
163
     * May be useful for debugging purposes.
164
     */
165
    std::string debugStr()  {return debugStrFrom(0);}
166
167
    /**
168
     * Prints the internal representation of the pattern on the standard output.
169
     * May be useful for debugging purposes.
170
     */
171
    void dump()  {printf("%s", debugStr().c_str());}
172
173
    /**
174
     * Utility function to determine whether a given string contains wildcards.
175
     * If it does not, a simple strcmp() might be a faster option than using
176
     * PatternMatcher.
177
     */
178
    static bool containsWildcards(const char *pattern);
179
180
};
181
182
NAMESPACE_END
183
184
185
#endif
186