]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - contrib/libc++/src/regex.cpp
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / contrib / libc++ / src / regex.cpp
1 //===-------------------------- regex.cpp ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "regex"
11 #include "algorithm"
12 #include "iterator"
13
14 _LIBCPP_BEGIN_NAMESPACE_STD
15
16 static
17 const char*
18 make_error_type_string(regex_constants::error_type ecode)
19 {
20     switch (ecode)
21     {
22     case regex_constants::error_collate:
23         return "The expression contained an invalid collating element name.";
24     case regex_constants::error_ctype:
25         return "The expression contained an invalid character class name.";
26     case regex_constants::error_escape:
27         return "The expression contained an invalid escaped character, or a "
28                "trailing escape.";
29     case regex_constants::error_backref:
30         return "The expression contained an invalid back reference.";
31     case regex_constants::error_brack:
32         return "The expression contained mismatched [ and ].";
33     case regex_constants::error_paren:
34         return "The expression contained mismatched ( and ).";
35     case regex_constants::error_brace:
36         return "The expression contained mismatched { and }.";
37     case regex_constants::error_badbrace:
38         return "The expression contained an invalid range in a {} expression.";
39     case regex_constants::error_range:
40         return "The expression contained an invalid character range, "
41                "such as [b-a] in most encodings.";
42     case regex_constants::error_space:
43         return "There was insufficient memory to convert the expression into "
44                "a finite state machine.";
45     case regex_constants::error_badrepeat:
46         return "One of *?+{ was not preceded by a valid regular expression.";
47     case regex_constants::error_complexity:
48         return "The complexity of an attempted match against a regular "
49                "expression exceeded a pre-set level.";
50     case regex_constants::error_stack:
51         return "There was insufficient memory to determine whether the regular "
52                "expression could match the specified character sequence.";
53     case regex_constants::__re_err_grammar:
54         return "An invalid regex grammar has been requested.";
55     case regex_constants::__re_err_empty:
56         return "An empty regex is not allowed in the POSIX grammar.";
57     default:
58         break;
59     }
60     return "Unknown error type";
61 }
62
63 regex_error::regex_error(regex_constants::error_type ecode)
64     : runtime_error(make_error_type_string(ecode)),
65       __code_(ecode)
66 {}
67
68 regex_error::~regex_error() throw() {}
69
70 namespace {
71
72 #if defined(__clang__)
73 #pragma clang diagnostic push
74 #pragma clang diagnostic ignored "-Wpadded"
75 #endif
76
77 struct collationnames
78 {
79     const char* elem_;
80     char char_;
81 };
82
83 #if defined(__clang__)
84 #pragma clang diagnostic pop
85 #endif
86
87 const collationnames collatenames[] =
88 {
89     {"A", 0x41},
90     {"B", 0x42},
91     {"C", 0x43},
92     {"D", 0x44},
93     {"E", 0x45},
94     {"F", 0x46},
95     {"G", 0x47},
96     {"H", 0x48},
97     {"I", 0x49},
98     {"J", 0x4a},
99     {"K", 0x4b},
100     {"L", 0x4c},
101     {"M", 0x4d},
102     {"N", 0x4e},
103     {"NUL", 0x00},
104     {"O", 0x4f},
105     {"P", 0x50},
106     {"Q", 0x51},
107     {"R", 0x52},
108     {"S", 0x53},
109     {"T", 0x54},
110     {"U", 0x55},
111     {"V", 0x56},
112     {"W", 0x57},
113     {"X", 0x58},
114     {"Y", 0x59},
115     {"Z", 0x5a},
116     {"a", 0x61},
117     {"alert", 0x07},
118     {"ampersand", 0x26},
119     {"apostrophe", 0x27},
120     {"asterisk", 0x2a},
121     {"b", 0x62},
122     {"backslash", 0x5c},
123     {"backspace", 0x08},
124     {"c", 0x63},
125     {"carriage-return", 0x0d},
126     {"circumflex", 0x5e},
127     {"circumflex-accent", 0x5e},
128     {"colon", 0x3a},
129     {"comma", 0x2c},
130     {"commercial-at", 0x40},
131     {"d", 0x64},
132     {"dollar-sign", 0x24},
133     {"e", 0x65},
134     {"eight", 0x38},
135     {"equals-sign", 0x3d},
136     {"exclamation-mark", 0x21},
137     {"f", 0x66},
138     {"five", 0x35},
139     {"form-feed", 0x0c},
140     {"four", 0x34},
141     {"full-stop", 0x2e},
142     {"g", 0x67},
143     {"grave-accent", 0x60},
144     {"greater-than-sign", 0x3e},
145     {"h", 0x68},
146     {"hyphen", 0x2d},
147     {"hyphen-minus", 0x2d},
148     {"i", 0x69},
149     {"j", 0x6a},
150     {"k", 0x6b},
151     {"l", 0x6c},
152     {"left-brace", 0x7b},
153     {"left-curly-bracket", 0x7b},
154     {"left-parenthesis", 0x28},
155     {"left-square-bracket", 0x5b},
156     {"less-than-sign", 0x3c},
157     {"low-line", 0x5f},
158     {"m", 0x6d},
159     {"n", 0x6e},
160     {"newline", 0x0a},
161     {"nine", 0x39},
162     {"number-sign", 0x23},
163     {"o", 0x6f},
164     {"one", 0x31},
165     {"p", 0x70},
166     {"percent-sign", 0x25},
167     {"period", 0x2e},
168     {"plus-sign", 0x2b},
169     {"q", 0x71},
170     {"question-mark", 0x3f},
171     {"quotation-mark", 0x22},
172     {"r", 0x72},
173     {"reverse-solidus", 0x5c},
174     {"right-brace", 0x7d},
175     {"right-curly-bracket", 0x7d},
176     {"right-parenthesis", 0x29},
177     {"right-square-bracket", 0x5d},
178     {"s", 0x73},
179     {"semicolon", 0x3b},
180     {"seven", 0x37},
181     {"six", 0x36},
182     {"slash", 0x2f},
183     {"solidus", 0x2f},
184     {"space", 0x20},
185     {"t", 0x74},
186     {"tab", 0x09},
187     {"three", 0x33},
188     {"tilde", 0x7e},
189     {"two", 0x32},
190     {"u", 0x75},
191     {"underscore", 0x5f},
192     {"v", 0x76},
193     {"vertical-line", 0x7c},
194     {"vertical-tab", 0x0b},
195     {"w", 0x77},
196     {"x", 0x78},
197     {"y", 0x79},
198     {"z", 0x7a},
199     {"zero", 0x30}
200 };
201
202 #if defined(__clang__)
203 #pragma clang diagnostic push
204 #pragma clang diagnostic ignored "-Wpadded"
205 #endif
206
207 struct classnames
208 {
209     const char* elem_;
210     regex_traits<char>::char_class_type mask_;
211 };
212
213 #if defined(__clang__)
214 #pragma clang diagnostic pop
215 #endif
216
217 const classnames ClassNames[] =
218 {
219     {"alnum",  ctype_base::alnum},
220     {"alpha",  ctype_base::alpha},
221     {"blank",  ctype_base::blank},
222     {"cntrl",  ctype_base::cntrl},
223     {"d",      ctype_base::digit},
224     {"digit",  ctype_base::digit},
225     {"graph",  ctype_base::graph},
226     {"lower",  ctype_base::lower},
227     {"print",  ctype_base::print},
228     {"punct",  ctype_base::punct},
229     {"s",      ctype_base::space},
230     {"space",  ctype_base::space},
231     {"upper",  ctype_base::upper},
232     {"w",      regex_traits<char>::__regex_word},
233     {"xdigit", ctype_base::xdigit}
234 };
235
236 struct use_strcmp
237 {
238     bool operator()(const collationnames& x, const char* y)
239         {return strcmp(x.elem_, y) < 0;}
240     bool operator()(const classnames& x, const char* y)
241         {return strcmp(x.elem_, y) < 0;}
242 };
243
244 }
245
246 string
247 __get_collation_name(const char* s)
248 {
249     const collationnames* i =
250             _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp());
251     string r;
252     if (i != end(collatenames) && strcmp(s, i->elem_) == 0)
253         r = char(i->char_);
254     return r;
255 }
256
257 regex_traits<char>::char_class_type
258 __get_classname(const char* s, bool __icase)
259 {
260     const classnames* i =
261             _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp());
262     regex_traits<char>::char_class_type r = 0;
263     if (i != end(ClassNames) && strcmp(s, i->elem_) == 0)
264     {
265         r = i->mask_;
266         if (r == regex_traits<char>::__regex_word)
267             r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower;
268         else if (__icase)
269         {
270             if (r & (ctype_base::lower | ctype_base::upper))
271                 r |= ctype_base::alpha;
272         }
273     }
274     return r;
275 }
276
277 template <>
278 void
279 __match_any_but_newline<char>::__exec(__state& __s) const
280 {
281     if (__s.__current_ != __s.__last_)
282     {
283         switch (*__s.__current_)
284         {
285         case '\r':
286         case '\n':
287             __s.__do_ = __state::__reject;
288             __s.__node_ = nullptr;
289             break;
290         default:
291             __s.__do_ = __state::__accept_and_consume;
292             ++__s.__current_;
293             __s.__node_ = this->first();
294             break;
295         }
296     }
297     else
298     {
299         __s.__do_ = __state::__reject;
300         __s.__node_ = nullptr;
301     }
302 }
303
304 template <>
305 void
306 __match_any_but_newline<wchar_t>::__exec(__state& __s) const
307 {
308     if (__s.__current_ != __s.__last_)
309     {
310         switch (*__s.__current_)
311         {
312         case '\r':
313         case '\n':
314         case 0x2028:
315         case 0x2029:
316             __s.__do_ = __state::__reject;
317             __s.__node_ = nullptr;
318             break;
319         default:
320             __s.__do_ = __state::__accept_and_consume;
321             ++__s.__current_;
322             __s.__node_ = this->first();
323             break;
324         }
325     }
326     else
327     {
328         __s.__do_ = __state::__reject;
329         __s.__node_ = nullptr;
330     }
331 }
332
333 _LIBCPP_END_NAMESPACE_STD