| 1 |
// class template regex -*- C++ -*- |
| 2 |
|
| 3 |
// Copyright (C) 2013-2021 Free Software Foundation, Inc. |
| 4 |
// |
| 5 |
// This file is part of the GNU ISO C++ Library. This library is free |
| 6 |
// software; you can redistribute it and/or modify it under the |
| 7 |
// terms of the GNU General Public License as published by the |
| 8 |
// Free Software Foundation; either version 3, or (at your option) |
| 9 |
// any later version. |
| 10 |
|
| 11 |
// This library is distributed in the hope that it will be useful, |
| 12 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 |
// GNU General Public License for more details. |
| 15 |
|
| 16 |
// Under Section 7 of GPL version 3, you are granted additional |
| 17 |
// permissions described in the GCC Runtime Library Exception, version |
| 18 |
// 3.1, as published by the Free Software Foundation. |
| 19 |
|
| 20 |
// You should have received a copy of the GNU General Public License and |
| 21 |
// a copy of the GCC Runtime Library Exception along with this program; |
| 22 |
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 |
// <http://www.gnu.org/licenses/>. |
| 24 |
|
| 25 |
/** |
| 26 |
* @file bits/regex_compiler.tcc |
| 27 |
* This is an internal header file, included by other library headers. |
| 28 |
* Do not attempt to use it directly. @headername{regex} |
| 29 |
*/ |
| 30 |
|
| 31 |
// FIXME make comments doxygen format. |
| 32 |
|
| 33 |
/* |
| 34 |
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast" |
| 35 |
// (http://swtch.com/~rsc/regexp/regexp1.html), |
| 36 |
// but doesn't strictly follow it. |
| 37 |
// |
| 38 |
// When compiling, states are *chained* instead of tree- or graph-constructed. |
| 39 |
// It's more like structured programs: there's if statement and loop statement. |
| 40 |
// |
| 41 |
// For alternative structure (say "a|b"), aka "if statement", two branches |
| 42 |
// should be constructed. However, these two shall merge to an "end_tag" at |
| 43 |
// the end of this operator: |
| 44 |
// |
| 45 |
// branch1 |
| 46 |
// / \ |
| 47 |
// => begin_tag end_tag => |
| 48 |
// \ / |
| 49 |
// branch2 |
| 50 |
// |
| 51 |
// This is the difference between this implementation and that in Russ's |
| 52 |
// article. |
| 53 |
// |
| 54 |
// That's why we introduced dummy node here ------ "end_tag" is a dummy node. |
| 55 |
// All dummy nodes will be eliminated at the end of compilation. |
| 56 |
*/ |
| 57 |
|
| 58 |
namespace std _GLIBCXX_VISIBILITY(default) |
| 59 |
{ |
| 60 |
_GLIBCXX_BEGIN_NAMESPACE_VERSION |
| 61 |
|
| 62 |
namespace __detail |
| 63 |
{ |
| 64 |
template<typename _TraitsT> |
| 65 |
_Compiler<_TraitsT>:: |
| 66 |
_Compiler(_IterT __b, _IterT __e, |
| 67 |
const typename _TraitsT::locale_type& __loc, _FlagT __flags) |
| 68 |
: _M_flags((__flags |
| 69 |
& (regex_constants::ECMAScript |
| 70 |
| regex_constants::basic |
| 71 |
| regex_constants::extended |
| 72 |
| regex_constants::grep |
| 73 |
| regex_constants::egrep |
| 74 |
| regex_constants::awk)) |
| 75 |
? __flags |
| 76 |
: __flags | regex_constants::ECMAScript), |
| 77 |
_M_scanner(__b, __e, _M_flags, __loc), |
| 78 |
_M_nfa(make_shared<_RegexT>(__loc, _M_flags)), |
| 79 |
_M_traits(_M_nfa->_M_traits), |
| 80 |
_M_ctype(std::use_facet<_CtypeT>(__loc)) |
| 81 |
{ |
| 82 |
_StateSeqT __r(*_M_nfa, _M_nfa->_M_start()); |
| 83 |
__r._M_append(_M_nfa->_M_insert_subexpr_begin()); |
| 84 |
this->_M_disjunction(); |
| 85 |
if (!_M_match_token(_ScannerT::_S_token_eof)) |
| 86 |
__throw_regex_error(regex_constants::error_paren); |
| 87 |
__r._M_append(_M_pop()); |
| 88 |
__glibcxx_assert(_M_stack.empty()); |
| 89 |
__r._M_append(_M_nfa->_M_insert_subexpr_end()); |
| 90 |
__r._M_append(_M_nfa->_M_insert_accept()); |
| 91 |
_M_nfa->_M_eliminate_dummy(); |
| 92 |
} |
| 93 |
|
| 94 |
template<typename _TraitsT> |
| 95 |
void |
| 96 |
_Compiler<_TraitsT>:: |
| 97 |
_M_disjunction() |
| 98 |
{ |
| 99 |
this->_M_alternative(); |
| 100 |
while (_M_match_token(_ScannerT::_S_token_or)) |
| 101 |
{ |
| 102 |
_StateSeqT __alt1 = _M_pop(); |
| 103 |
this->_M_alternative(); |
| 104 |
_StateSeqT __alt2 = _M_pop(); |
| 105 |
auto __end = _M_nfa->_M_insert_dummy(); |
| 106 |
__alt1._M_append(__end); |
| 107 |
__alt2._M_append(__end); |
| 108 |
// __alt2 is state._M_next, __alt1 is state._M_alt. The executor |
| 109 |
// executes _M_alt before _M_next, as well as executing left |
| 110 |
// alternative before right one. |
| 111 |
_M_stack.push(_StateSeqT(*_M_nfa, |
| 112 |
_M_nfa->_M_insert_alt( |
| 113 |
__alt2._M_start, __alt1._M_start, false), |
| 114 |
__end)); |
| 115 |
} |
| 116 |
} |
| 117 |
|
| 118 |
template<typename _TraitsT> |
| 119 |
void |
| 120 |
_Compiler<_TraitsT>:: |
| 121 |
_M_alternative() |
| 122 |
{ |
| 123 |
if (this->_M_term()) |
| 124 |
{ |
| 125 |
_StateSeqT __re = _M_pop(); |
| 126 |
this->_M_alternative(); |
| 127 |
__re._M_append(_M_pop()); |
| 128 |
_M_stack.push(__re); |
| 129 |
} |
| 130 |
else |
| 131 |
_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_dummy())); |
| 132 |
} |
| 133 |
|
| 134 |
template<typename _TraitsT> |
| 135 |
bool |
| 136 |
_Compiler<_TraitsT>:: |
| 137 |
_M_term() |
| 138 |
{ |
| 139 |
if (this->_M_assertion()) |
| 140 |
return true; |
| 141 |
if (this->_M_atom()) |
| 142 |
{ |
| 143 |
while (this->_M_quantifier()); |
| 144 |
return true; |
| 145 |
} |
| 146 |
return false; |
| 147 |
} |
| 148 |
|
| 149 |
template<typename _TraitsT> |
| 150 |
bool |
| 151 |
_Compiler<_TraitsT>:: |
| 152 |
_M_assertion() |
| 153 |
{ |
| 154 |
if (_M_match_token(_ScannerT::_S_token_line_begin)) |
| 155 |
_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_begin())); |
| 156 |
else if (_M_match_token(_ScannerT::_S_token_line_end)) |
| 157 |
_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_end())); |
| 158 |
else if (_M_match_token(_ScannerT::_S_token_word_bound)) |
| 159 |
// _M_value[0] == 'n' means it's negative, say "not word boundary". |
| 160 |
_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> |
| 161 |
_M_insert_word_bound(_M_value[0] == 'n'))); |
| 162 |
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin)) |
| 163 |
{ |
| 164 |
auto __neg = _M_value[0] == 'n'; |
| 165 |
this->_M_disjunction(); |
| 166 |
if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) |
| 167 |
__throw_regex_error(regex_constants::error_paren, |
| 168 |
"Parenthesis is not closed."); |
| 169 |
auto __tmp = _M_pop(); |
| 170 |
__tmp._M_append(_M_nfa->_M_insert_accept()); |
| 171 |
_M_stack.push( |
| 172 |
_StateSeqT( |
| 173 |
*_M_nfa, |
| 174 |
_M_nfa->_M_insert_lookahead(__tmp._M_start, __neg))); |
| 175 |
} |
| 176 |
else |
| 177 |
return false; |
| 178 |
return true; |
| 179 |
} |
| 180 |
|
| 181 |
template<typename _TraitsT> |
| 182 |
bool |
| 183 |
_Compiler<_TraitsT>:: |
| 184 |
_M_quantifier() |
| 185 |
{ |
| 186 |
bool __neg = (_M_flags & regex_constants::ECMAScript); |
| 187 |
auto __init = [this, &__neg]() |
| 188 |
{ |
| 189 |
if (_M_stack.empty()) |
| 190 |
__throw_regex_error(regex_constants::error_badrepeat, |
| 191 |
"Nothing to repeat before a quantifier."); |
| 192 |
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt); |
| 193 |
}; |
| 194 |
if (_M_match_token(_ScannerT::_S_token_closure0)) |
| 195 |
{ |
| 196 |
__init(); |
| 197 |
auto __e = _M_pop(); |
| 198 |
_StateSeqT __r(*_M_nfa, |
| 199 |
_M_nfa->_M_insert_repeat(_S_invalid_state_id, |
| 200 |
__e._M_start, __neg)); |
| 201 |
__e._M_append(__r); |
| 202 |
_M_stack.push(__r); |
| 203 |
} |
| 204 |
else if (_M_match_token(_ScannerT::_S_token_closure1)) |
| 205 |
{ |
| 206 |
__init(); |
| 207 |
auto __e = _M_pop(); |
| 208 |
__e._M_append(_M_nfa->_M_insert_repeat(_S_invalid_state_id, |
| 209 |
__e._M_start, __neg)); |
| 210 |
_M_stack.push(__e); |
| 211 |
} |
| 212 |
else if (_M_match_token(_ScannerT::_S_token_opt)) |
| 213 |
{ |
| 214 |
__init(); |
| 215 |
auto __e = _M_pop(); |
| 216 |
auto __end = _M_nfa->_M_insert_dummy(); |
| 217 |
_StateSeqT __r(*_M_nfa, |
| 218 |
_M_nfa->_M_insert_repeat(_S_invalid_state_id, |
| 219 |
__e._M_start, __neg)); |
| 220 |
__e._M_append(__end); |
| 221 |
__r._M_append(__end); |
| 222 |
_M_stack.push(__r); |
| 223 |
} |
| 224 |
else if (_M_match_token(_ScannerT::_S_token_interval_begin)) |
| 225 |
{ |
| 226 |
if (_M_stack.empty()) |
| 227 |
__throw_regex_error(regex_constants::error_badrepeat, |
| 228 |
"Nothing to repeat before a quantifier."); |
| 229 |
if (!_M_match_token(_ScannerT::_S_token_dup_count)) |
| 230 |
__throw_regex_error(regex_constants::error_badbrace, |
| 231 |
"Unexpected token in brace expression."); |
| 232 |
_StateSeqT __r(_M_pop()); |
| 233 |
_StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy()); |
| 234 |
long __min_rep = _M_cur_int_value(10); |
| 235 |
bool __infi = false; |
| 236 |
long __n = 0; |
| 237 |
|
| 238 |
// {3 |
| 239 |
if (_M_match_token(_ScannerT::_S_token_comma)) |
| 240 |
{ |
| 241 |
if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7} |
| 242 |
__n = _M_cur_int_value(10) - __min_rep; |
| 243 |
else |
| 244 |
__infi = true; |
| 245 |
} |
| 246 |
if (!_M_match_token(_ScannerT::_S_token_interval_end)) |
| 247 |
__throw_regex_error(regex_constants::error_brace, |
| 248 |
"Unexpected end of brace expression."); |
| 249 |
|
| 250 |
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt); |
| 251 |
|
| 252 |
for (long __i = 0; __i < __min_rep; ++__i) |
| 253 |
__e._M_append(__r._M_clone()); |
| 254 |
|
| 255 |
if (__infi) |
| 256 |
{ |
| 257 |
auto __tmp = __r._M_clone(); |
| 258 |
_StateSeqT __s(*_M_nfa, |
| 259 |
_M_nfa->_M_insert_repeat(_S_invalid_state_id, |
| 260 |
__tmp._M_start, __neg)); |
| 261 |
__tmp._M_append(__s); |
| 262 |
__e._M_append(__s); |
| 263 |
} |
| 264 |
else |
| 265 |
{ |
| 266 |
if (__n < 0) |
| 267 |
__throw_regex_error(regex_constants::error_badbrace, |
| 268 |
"Invalid range in brace expression."); |
| 269 |
auto __end = _M_nfa->_M_insert_dummy(); |
| 270 |
// _M_alt is the "match more" branch, and _M_next is the |
| 271 |
// "match less" one. Switch _M_alt and _M_next of all created |
| 272 |
// nodes. This is a hack but IMO works well. |
| 273 |
std::stack<_StateIdT> __stack; |
| 274 |
for (long __i = 0; __i < __n; ++__i) |
| 275 |
{ |
| 276 |
auto __tmp = __r._M_clone(); |
| 277 |
auto __alt = _M_nfa->_M_insert_repeat(__tmp._M_start, |
| 278 |
__end, __neg); |
| 279 |
__stack.push(__alt); |
| 280 |
__e._M_append(_StateSeqT(*_M_nfa, __alt, __tmp._M_end)); |
| 281 |
} |
| 282 |
__e._M_append(__end); |
| 283 |
while (!__stack.empty()) |
| 284 |
{ |
| 285 |
auto& __tmp = (*_M_nfa)[__stack.top()]; |
| 286 |
__stack.pop(); |
| 287 |
std::swap(__tmp._M_next, __tmp._M_alt); |
| 288 |
} |
| 289 |
} |
| 290 |
_M_stack.push(__e); |
| 291 |
} |
| 292 |
else |
| 293 |
return false; |
| 294 |
return true; |
| 295 |
} |
| 296 |
|
| 297 |
#define __INSERT_REGEX_MATCHER(__func, ...)\ |
| 298 |
do {\ |
| 299 |
if (!(_M_flags & regex_constants::icase))\ |
| 300 |
if (!(_M_flags & regex_constants::collate))\ |
| 301 |
__func<false, false>(__VA_ARGS__);\ |
| 302 |
else\ |
| 303 |
__func<false, true>(__VA_ARGS__);\ |
| 304 |
else\ |
| 305 |
if (!(_M_flags & regex_constants::collate))\ |
| 306 |
__func<true, false>(__VA_ARGS__);\ |
| 307 |
else\ |
| 308 |
__func<true, true>(__VA_ARGS__);\ |
| 309 |
} while (false) |
| 310 |
|
| 311 |
template<typename _TraitsT> |
| 312 |
bool |
| 313 |
_Compiler<_TraitsT>:: |
| 314 |
_M_atom() |
| 315 |
{ |
| 316 |
if (_M_match_token(_ScannerT::_S_token_anychar)) |
| 317 |
{ |
| 318 |
if (!(_M_flags & regex_constants::ECMAScript)) |
| 319 |
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix); |
| 320 |
else |
| 321 |
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma); |
| 322 |
} |
| 323 |
else if (_M_try_char()) |
| 324 |
__INSERT_REGEX_MATCHER(_M_insert_char_matcher); |
| 325 |
else if (_M_match_token(_ScannerT::_S_token_backref)) |
| 326 |
_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa-> |
| 327 |
_M_insert_backref(_M_cur_int_value(10)))); |
| 328 |
else if (_M_match_token(_ScannerT::_S_token_quoted_class)) |
| 329 |
__INSERT_REGEX_MATCHER(_M_insert_character_class_matcher); |
| 330 |
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin)) |
| 331 |
{ |
| 332 |
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy()); |
| 333 |
this->_M_disjunction(); |
| 334 |
if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) |
| 335 |
__throw_regex_error(regex_constants::error_paren, |
| 336 |
"Parenthesis is not closed."); |
| 337 |
__r._M_append(_M_pop()); |
| 338 |
_M_stack.push(__r); |
| 339 |
} |
| 340 |
else if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) |
| 341 |
{ |
| 342 |
_StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin()); |
| 343 |
this->_M_disjunction(); |
| 344 |
if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) |
| 345 |
__throw_regex_error(regex_constants::error_paren, |
| 346 |
"Parenthesis is not closed."); |
| 347 |
__r._M_append(_M_pop()); |
| 348 |
__r._M_append(_M_nfa->_M_insert_subexpr_end()); |
| 349 |
_M_stack.push(__r); |
| 350 |
} |
| 351 |
else if (!_M_bracket_expression()) |
| 352 |
return false; |
| 353 |
return true; |
| 354 |
} |
| 355 |
|
| 356 |
template<typename _TraitsT> |
| 357 |
bool |
| 358 |
_Compiler<_TraitsT>:: |
| 359 |
_M_bracket_expression() |
| 360 |
{ |
| 361 |
bool __neg = |
| 362 |
_M_match_token(_ScannerT::_S_token_bracket_neg_begin); |
| 363 |
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin))) |
| 364 |
return false; |
| 365 |
__INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg); |
| 366 |
return true; |
| 367 |
} |
| 368 |
#undef __INSERT_REGEX_MATCHER |
| 369 |
|
| 370 |
template<typename _TraitsT> |
| 371 |
template<bool __icase, bool __collate> |
| 372 |
void |
| 373 |
_Compiler<_TraitsT>:: |
| 374 |
_M_insert_any_matcher_ecma() |
| 375 |
{ |
| 376 |
_M_stack.push(_StateSeqT(*_M_nfa, |
| 377 |
_M_nfa->_M_insert_matcher |
| 378 |
(_AnyMatcher<_TraitsT, true, __icase, __collate> |
| 379 |
(_M_traits)))); |
| 380 |
} |
| 381 |
|
| 382 |
template<typename _TraitsT> |
| 383 |
template<bool __icase, bool __collate> |
| 384 |
void |
| 385 |
_Compiler<_TraitsT>:: |
| 386 |
_M_insert_any_matcher_posix() |
| 387 |
{ |
| 388 |
_M_stack.push(_StateSeqT(*_M_nfa, |
| 389 |
_M_nfa->_M_insert_matcher |
| 390 |
(_AnyMatcher<_TraitsT, false, __icase, __collate> |
| 391 |
(_M_traits)))); |
| 392 |
} |
| 393 |
|
| 394 |
template<typename _TraitsT> |
| 395 |
template<bool __icase, bool __collate> |
| 396 |
void |
| 397 |
_Compiler<_TraitsT>:: |
| 398 |
_M_insert_char_matcher() |
| 399 |
{ |
| 400 |
_M_stack.push(_StateSeqT(*_M_nfa, |
| 401 |
_M_nfa->_M_insert_matcher |
| 402 |
(_CharMatcher<_TraitsT, __icase, __collate> |
| 403 |
(_M_value[0], _M_traits)))); |
| 404 |
} |
| 405 |
|
| 406 |
template<typename _TraitsT> |
| 407 |
template<bool __icase, bool __collate> |
| 408 |
void |
| 409 |
_Compiler<_TraitsT>:: |
| 410 |
_M_insert_character_class_matcher() |
| 411 |
{ |
| 412 |
__glibcxx_assert(_M_value.size() == 1); |
| 413 |
_BracketMatcher<_TraitsT, __icase, __collate> __matcher |
| 414 |
(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); |
| 415 |
__matcher._M_add_character_class(_M_value, false); |
| 416 |
__matcher._M_ready(); |
| 417 |
_M_stack.push(_StateSeqT(*_M_nfa, |
| 418 |
_M_nfa->_M_insert_matcher(std::move(__matcher)))); |
| 419 |
} |
| 420 |
|
| 421 |
template<typename _TraitsT> |
| 422 |
template<bool __icase, bool __collate> |
| 423 |
void |
| 424 |
_Compiler<_TraitsT>:: |
| 425 |
_M_insert_bracket_matcher(bool __neg) |
| 426 |
{ |
| 427 |
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); |
| 428 |
pair<bool, _CharT> __last_char; // Optional<_CharT> |
| 429 |
__last_char.first = false; |
| 430 |
if (!(_M_flags & regex_constants::ECMAScript)) |
| 431 |
{ |
| 432 |
if (_M_try_char()) |
| 433 |
{ |
| 434 |
__last_char.first = true; |
| 435 |
__last_char.second = _M_value[0]; |
| 436 |
} |
| 437 |
else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
| 438 |
{ |
| 439 |
__last_char.first = true; |
| 440 |
__last_char.second = '-'; |
| 441 |
} |
| 442 |
} |
| 443 |
while (_M_expression_term(__last_char, __matcher)); |
| 444 |
if (__last_char.first) |
| 445 |
__matcher._M_add_char(__last_char.second); |
| 446 |
__matcher._M_ready(); |
| 447 |
_M_stack.push(_StateSeqT( |
| 448 |
*_M_nfa, |
| 449 |
_M_nfa->_M_insert_matcher(std::move(__matcher)))); |
| 450 |
} |
| 451 |
|
| 452 |
template<typename _TraitsT> |
| 453 |
template<bool __icase, bool __collate> |
| 454 |
bool |
| 455 |
_Compiler<_TraitsT>:: |
| 456 |
_M_expression_term(pair<bool, _CharT>& __last_char, |
| 457 |
_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) |
| 458 |
{ |
| 459 |
if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
| 460 |
return false; |
| 461 |
|
| 462 |
const auto __push_char = [&](_CharT __ch) |
| 463 |
{ |
| 464 |
if (__last_char.first) |
| 465 |
__matcher._M_add_char(__last_char.second); |
| 466 |
else |
| 467 |
__last_char.first = true; |
| 468 |
__last_char.second = __ch; |
| 469 |
}; |
| 470 |
const auto __flush = [&] |
| 471 |
{ |
| 472 |
if (__last_char.first) |
| 473 |
{ |
| 474 |
__matcher._M_add_char(__last_char.second); |
| 475 |
__last_char.first = false; |
| 476 |
} |
| 477 |
}; |
| 478 |
|
| 479 |
if (_M_match_token(_ScannerT::_S_token_collsymbol)) |
| 480 |
{ |
| 481 |
auto __symbol = __matcher._M_add_collate_element(_M_value); |
| 482 |
if (__symbol.size() == 1) |
| 483 |
__push_char(__symbol[0]); |
| 484 |
else |
| 485 |
__flush(); |
| 486 |
} |
| 487 |
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) |
| 488 |
{ |
| 489 |
__flush(); |
| 490 |
__matcher._M_add_equivalence_class(_M_value); |
| 491 |
} |
| 492 |
else if (_M_match_token(_ScannerT::_S_token_char_class_name)) |
| 493 |
{ |
| 494 |
__flush(); |
| 495 |
__matcher._M_add_character_class(_M_value, false); |
| 496 |
} |
| 497 |
else if (_M_try_char()) |
| 498 |
__push_char(_M_value[0]); |
| 499 |
// POSIX doesn't allow '-' as a start-range char (say [a-z--0]), |
| 500 |
// except when the '-' is the first or last character in the bracket |
| 501 |
// expression ([--0]). ECMAScript treats all '-' after a range as a |
| 502 |
// normal character. Also see above, where _M_expression_term gets called. |
| 503 |
// |
| 504 |
// As a result, POSIX rejects [-----], but ECMAScript doesn't. |
| 505 |
// Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax. |
| 506 |
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax. |
| 507 |
// |
| 508 |
// It turns out that no one reads BNFs ;) |
| 509 |
else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
| 510 |
{ |
| 511 |
if (!__last_char.first) |
| 512 |
{ |
| 513 |
if (!(_M_flags & regex_constants::ECMAScript)) |
| 514 |
{ |
| 515 |
if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
| 516 |
{ |
| 517 |
__push_char('-'); |
| 518 |
return false; |
| 519 |
} |
| 520 |
__throw_regex_error( |
| 521 |
regex_constants::error_range, |
| 522 |
"Unexpected dash in bracket expression. For POSIX syntax, " |
| 523 |
"a dash is not treated literally only when it is at " |
| 524 |
"beginning or end."); |
| 525 |
} |
| 526 |
__push_char('-'); |
| 527 |
} |
| 528 |
else |
| 529 |
{ |
| 530 |
if (_M_try_char()) |
| 531 |
{ |
| 532 |
__matcher._M_make_range(__last_char.second, _M_value[0]); |
| 533 |
__last_char.first = false; |
| 534 |
} |
| 535 |
else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
| 536 |
{ |
| 537 |
__matcher._M_make_range(__last_char.second, '-'); |
| 538 |
__last_char.first = false; |
| 539 |
} |
| 540 |
else |
| 541 |
{ |
| 542 |
if (_M_scanner._M_get_token() |
| 543 |
!= _ScannerT::_S_token_bracket_end) |
| 544 |
__throw_regex_error( |
| 545 |
regex_constants::error_range, |
| 546 |
"Character is expected after a dash."); |
| 547 |
__push_char('-'); |
| 548 |
} |
| 549 |
} |
| 550 |
} |
| 551 |
else if (_M_match_token(_ScannerT::_S_token_quoted_class)) |
| 552 |
{ |
| 553 |
__flush(); |
| 554 |
__matcher._M_add_character_class(_M_value, |
| 555 |
_M_ctype.is(_CtypeT::upper, |
| 556 |
_M_value[0])); |
| 557 |
} |
| 558 |
else |
| 559 |
__throw_regex_error(regex_constants::error_brack, |
| 560 |
"Unexpected character in bracket expression."); |
| 561 |
|
| 562 |
return true; |
| 563 |
} |
| 564 |
|
| 565 |
template<typename _TraitsT> |
| 566 |
bool |
| 567 |
_Compiler<_TraitsT>:: |
| 568 |
_M_try_char() |
| 569 |
{ |
| 570 |
bool __is_char = false; |
| 571 |
if (_M_match_token(_ScannerT::_S_token_oct_num)) |
| 572 |
{ |
| 573 |
__is_char = true; |
| 574 |
_M_value.assign(1, _M_cur_int_value(8)); |
| 575 |
} |
| 576 |
else if (_M_match_token(_ScannerT::_S_token_hex_num)) |
| 577 |
{ |
| 578 |
__is_char = true; |
| 579 |
_M_value.assign(1, _M_cur_int_value(16)); |
| 580 |
} |
| 581 |
else if (_M_match_token(_ScannerT::_S_token_ord_char)) |
| 582 |
__is_char = true; |
| 583 |
return __is_char; |
| 584 |
} |
| 585 |
|
| 586 |
template<typename _TraitsT> |
| 587 |
bool |
| 588 |
_Compiler<_TraitsT>:: |
| 589 |
_M_match_token(_TokenT token) |
| 590 |
{ |
| 591 |
if (token == _M_scanner._M_get_token()) |
| 592 |
{ |
| 593 |
_M_value = _M_scanner._M_get_value(); |
| 594 |
_M_scanner._M_advance(); |
| 595 |
return true; |
| 596 |
} |
| 597 |
return false; |
| 598 |
} |
| 599 |
|
| 600 |
template<typename _TraitsT> |
| 601 |
int |
| 602 |
_Compiler<_TraitsT>:: |
| 603 |
_M_cur_int_value(int __radix) |
| 604 |
{ |
| 605 |
long __v = 0; |
| 606 |
for (typename _StringT::size_type __i = 0; |
| 607 |
__i < _M_value.length(); ++__i) |
| 608 |
__v =__v * __radix + _M_traits.value(_M_value[__i], __radix); |
| 609 |
return __v; |
| 610 |
} |
| 611 |
|
| 612 |
template<typename _TraitsT, bool __icase, bool __collate> |
| 613 |
bool |
| 614 |
_BracketMatcher<_TraitsT, __icase, __collate>:: |
| 615 |
_M_apply(_CharT __ch, false_type) const |
| 616 |
{ |
| 617 |
return [this, __ch] |
| 618 |
{ |
| 619 |
if (std::binary_search(_M_char_set.begin(), _M_char_set.end(), |
| 620 |
_M_translator._M_translate(__ch))) |
| 621 |
return true; |
| 622 |
auto __s = _M_translator._M_transform(__ch); |
| 623 |
for (auto& __it : _M_range_set) |
| 624 |
if (_M_translator._M_match_range(__it.first, __it.second, __s)) |
| 625 |
return true; |
| 626 |
if (_M_traits.isctype(__ch, _M_class_set)) |
| 627 |
return true; |
| 628 |
if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), |
| 629 |
_M_traits.transform_primary(&__ch, &__ch+1)) |
| 630 |
!= _M_equiv_set.end()) |
| 631 |
return true; |
| 632 |
for (auto& __it : _M_neg_class_set) |
| 633 |
if (!_M_traits.isctype(__ch, __it)) |
| 634 |
return true; |
| 635 |
return false; |
| 636 |
}() ^ _M_is_non_matching; |
| 637 |
} |
| 638 |
} // namespace __detail |
| 639 |
|
| 640 |
_GLIBCXX_END_NAMESPACE_VERSION |
| 641 |
} // namespace |