| 1 |
// class template regex -*- C++ -*- |
| 2 |
|
| 3 |
// Copyright (C) 2013-2021 Free Software Foundation, Inc. |
| 4 |
// |
| 5 |
// This file is part of the GNU ISO C++ Library. This library is free |
| 6 |
// software; you can redistribute it and/or modify it under the |
| 7 |
// terms of the GNU General Public License as published by the |
| 8 |
// Free Software Foundation; either version 3, or (at your option) |
| 9 |
// any later version. |
| 10 |
|
| 11 |
// This library is distributed in the hope that it will be useful, |
| 12 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 |
// GNU General Public License for more details. |
| 15 |
|
| 16 |
// Under Section 7 of GPL version 3, you are granted additional |
| 17 |
// permissions described in the GCC Runtime Library Exception, version |
| 18 |
// 3.1, as published by the Free Software Foundation. |
| 19 |
|
| 20 |
// You should have received a copy of the GNU General Public License and |
| 21 |
// a copy of the GCC Runtime Library Exception along with this program; |
| 22 |
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 |
// <http://www.gnu.org/licenses/>. |
| 24 |
|
| 25 |
/** |
| 26 |
* @file bits/regex_automaton.tcc |
| 27 |
* This is an internal header file, included by other library headers. |
| 28 |
* Do not attempt to use it directly. @headername{regex} |
| 29 |
*/ |
| 30 |
|
| 31 |
namespace std _GLIBCXX_VISIBILITY(default) |
| 32 |
{ |
| 33 |
_GLIBCXX_BEGIN_NAMESPACE_VERSION |
| 34 |
|
| 35 |
namespace __detail |
| 36 |
{ |
| 37 |
#ifdef _GLIBCXX_DEBUG |
| 38 |
inline std::ostream& |
| 39 |
_State_base::_M_print(std::ostream& ostr) const |
| 40 |
{ |
| 41 |
switch (_M_opcode) |
| 42 |
{ |
| 43 |
case _S_opcode_alternative: |
| 44 |
case _S_opcode_repeat: |
| 45 |
ostr << "alt next=" << _M_next << " alt=" << _M_alt; |
| 46 |
break; |
| 47 |
case _S_opcode_subexpr_begin: |
| 48 |
ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; |
| 49 |
break; |
| 50 |
case _S_opcode_subexpr_end: |
| 51 |
ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; |
| 52 |
break; |
| 53 |
case _S_opcode_backref: |
| 54 |
ostr << "backref next=" << _M_next << " index=" << _M_backref_index; |
| 55 |
break; |
| 56 |
case _S_opcode_match: |
| 57 |
ostr << "match next=" << _M_next; |
| 58 |
break; |
| 59 |
case _S_opcode_accept: |
| 60 |
ostr << "accept next=" << _M_next; |
| 61 |
break; |
| 62 |
default: |
| 63 |
ostr << "unknown next=" << _M_next; |
| 64 |
break; |
| 65 |
} |
| 66 |
return ostr; |
| 67 |
} |
| 68 |
|
| 69 |
// Prints graphviz dot commands for state. |
| 70 |
inline std::ostream& |
| 71 |
_State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const |
| 72 |
{ |
| 73 |
switch (_M_opcode) |
| 74 |
{ |
| 75 |
case _S_opcode_alternative: |
| 76 |
case _S_opcode_repeat: |
| 77 |
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" |
| 78 |
<< __id << " -> " << _M_next |
| 79 |
<< " [label=\"next\", tailport=\"s\"];\n" |
| 80 |
<< __id << " -> " << _M_alt |
| 81 |
<< " [label=\"alt\", tailport=\"n\"];\n"; |
| 82 |
break; |
| 83 |
case _S_opcode_backref: |
| 84 |
__ostr << __id << " [label=\"" << __id << "\\nBACKREF " |
| 85 |
<< _M_subexpr << "\"];\n" |
| 86 |
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n"; |
| 87 |
break; |
| 88 |
case _S_opcode_line_begin_assertion: |
| 89 |
__ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n" |
| 90 |
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; |
| 91 |
break; |
| 92 |
case _S_opcode_line_end_assertion: |
| 93 |
__ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n" |
| 94 |
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; |
| 95 |
break; |
| 96 |
case _S_opcode_word_boundary: |
| 97 |
__ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY " |
| 98 |
<< _M_neg << "\"];\n" |
| 99 |
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; |
| 100 |
break; |
| 101 |
case _S_opcode_subexpr_lookahead: |
| 102 |
__ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n" |
| 103 |
<< __id << " -> " << _M_next |
| 104 |
<< " [label=\"epsilon\", tailport=\"s\"];\n" |
| 105 |
<< __id << " -> " << _M_alt |
| 106 |
<< " [label=\"<assert>\", tailport=\"n\"];\n"; |
| 107 |
break; |
| 108 |
case _S_opcode_subexpr_begin: |
| 109 |
__ostr << __id << " [label=\"" << __id << "\\nSBEGIN " |
| 110 |
<< _M_subexpr << "\"];\n" |
| 111 |
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; |
| 112 |
break; |
| 113 |
case _S_opcode_subexpr_end: |
| 114 |
__ostr << __id << " [label=\"" << __id << "\\nSEND " |
| 115 |
<< _M_subexpr << "\"];\n" |
| 116 |
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; |
| 117 |
break; |
| 118 |
case _S_opcode_dummy: |
| 119 |
break; |
| 120 |
case _S_opcode_match: |
| 121 |
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" |
| 122 |
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n"; |
| 123 |
break; |
| 124 |
case _S_opcode_accept: |
| 125 |
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; |
| 126 |
break; |
| 127 |
default: |
| 128 |
_GLIBCXX_DEBUG_ASSERT(false); |
| 129 |
break; |
| 130 |
} |
| 131 |
return __ostr; |
| 132 |
} |
| 133 |
|
| 134 |
template<typename _TraitsT> |
| 135 |
std::ostream& |
| 136 |
_NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const |
| 137 |
{ |
| 138 |
__ostr << "digraph _Nfa {\n" |
| 139 |
" rankdir=LR;\n"; |
| 140 |
for (size_t __i = 0; __i < this->size(); ++__i) |
| 141 |
(*this)[__i]._M_dot(__ostr, __i); |
| 142 |
__ostr << "}\n"; |
| 143 |
return __ostr; |
| 144 |
} |
| 145 |
#endif |
| 146 |
|
| 147 |
template<typename _TraitsT> |
| 148 |
_StateIdT |
| 149 |
_NFA<_TraitsT>::_M_insert_backref(size_t __index) |
| 150 |
{ |
| 151 |
if (this->_M_flags & regex_constants::__polynomial) |
| 152 |
__throw_regex_error(regex_constants::error_complexity, |
| 153 |
"Unexpected back-reference in polynomial mode."); |
| 154 |
// To figure out whether a backref is valid, a stack is used to store |
| 155 |
// unfinished sub-expressions. For example, when parsing |
| 156 |
// "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3 |
| 157 |
// sub expressions are parsed or partially parsed(in the stack), aka, |
| 158 |
// "(a..", "(b)" and "(c.."). |
| 159 |
// _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this |
| 160 |
// time, "\\2" is valid, but "\\1" and "\\3" are not. |
| 161 |
if (__index >= _M_subexpr_count) |
| 162 |
__throw_regex_error( |
| 163 |
regex_constants::error_backref, |
| 164 |
"Back-reference index exceeds current sub-expression count."); |
| 165 |
for (auto __it : this->_M_paren_stack) |
| 166 |
if (__index == __it) |
| 167 |
__throw_regex_error( |
| 168 |
regex_constants::error_backref, |
| 169 |
"Back-reference referred to an opened sub-expression."); |
| 170 |
this->_M_has_backref = true; |
| 171 |
_StateT __tmp(_S_opcode_backref); |
| 172 |
__tmp._M_backref_index = __index; |
| 173 |
return _M_insert_state(std::move(__tmp)); |
| 174 |
} |
| 175 |
|
| 176 |
template<typename _TraitsT> |
| 177 |
void |
| 178 |
_NFA<_TraitsT>::_M_eliminate_dummy() |
| 179 |
{ |
| 180 |
for (auto& __it : *this) |
| 181 |
{ |
| 182 |
while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode() |
| 183 |
== _S_opcode_dummy) |
| 184 |
__it._M_next = (*this)[__it._M_next]._M_next; |
| 185 |
if (__it._M_has_alt()) |
| 186 |
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode() |
| 187 |
== _S_opcode_dummy) |
| 188 |
__it._M_alt = (*this)[__it._M_alt]._M_next; |
| 189 |
} |
| 190 |
} |
| 191 |
|
| 192 |
// Just apply DFS on the sequence and re-link their links. |
| 193 |
template<typename _TraitsT> |
| 194 |
_StateSeq<_TraitsT> |
| 195 |
_StateSeq<_TraitsT>::_M_clone() |
| 196 |
{ |
| 197 |
std::map<_StateIdT, _StateIdT> __m; |
| 198 |
std::stack<_StateIdT> __stack; |
| 199 |
__stack.push(_M_start); |
| 200 |
while (!__stack.empty()) |
| 201 |
{ |
| 202 |
auto __u = __stack.top(); |
| 203 |
__stack.pop(); |
| 204 |
auto __dup = _M_nfa[__u]; |
| 205 |
// _M_insert_state() never return -1 |
| 206 |
auto __id = _M_nfa._M_insert_state(std::move(__dup)); |
| 207 |
__m[__u] = __id; |
| 208 |
if (__dup._M_has_alt()) |
| 209 |
if (__dup._M_alt != _S_invalid_state_id |
| 210 |
&& __m.count(__dup._M_alt) == 0) |
| 211 |
__stack.push(__dup._M_alt); |
| 212 |
if (__u == _M_end) |
| 213 |
continue; |
| 214 |
if (__dup._M_next != _S_invalid_state_id |
| 215 |
&& __m.count(__dup._M_next) == 0) |
| 216 |
__stack.push(__dup._M_next); |
| 217 |
} |
| 218 |
for (auto __it : __m) |
| 219 |
{ |
| 220 |
auto __v = __it.second; |
| 221 |
auto& __ref = _M_nfa[__v]; |
| 222 |
if (__ref._M_next != _S_invalid_state_id) |
| 223 |
__ref._M_next = __m.find(__ref._M_next)->second; |
| 224 |
if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id) |
| 225 |
__ref._M_alt = __m.find(__ref._M_alt)->second; |
| 226 |
} |
| 227 |
return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]); |
| 228 |
} |
| 229 |
} // namespace __detail |
| 230 |
|
| 231 |
_GLIBCXX_END_NAMESPACE_VERSION |
| 232 |
} // namespace |