123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271 |
- // class template regex -*- C++ -*-
- // Copyright (C) 2013-2022 Free Software Foundation, Inc.
- //
- // This file is part of the GNU ISO C++ Library. This library is free
- // software; you can redistribute it and/or modify it under the
- // terms of the GNU General Public License as published by the
- // Free Software Foundation; either version 3, or (at your option)
- // any later version.
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- // Under Section 7 of GPL version 3, you are granted additional
- // permissions described in the GCC Runtime Library Exception, version
- // 3.1, as published by the Free Software Foundation.
- // You should have received a copy of the GNU General Public License and
- // a copy of the GCC Runtime Library Exception along with this program;
- // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- // <http://www.gnu.org/licenses/>.
- /**
- * @file bits/regex_scanner.h
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
- namespace std _GLIBCXX_VISIBILITY(default)
- {
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
- namespace __detail
- {
- /**
- * @addtogroup regex-detail
- * @{
- */
- struct _ScannerBase
- {
- public:
- /// Token types returned from the scanner.
- enum _TokenT : unsigned
- {
- _S_token_anychar,
- _S_token_ord_char,
- _S_token_oct_num,
- _S_token_hex_num,
- _S_token_backref,
- _S_token_subexpr_begin,
- _S_token_subexpr_no_group_begin,
- _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
- _S_token_subexpr_end,
- _S_token_bracket_begin,
- _S_token_bracket_neg_begin,
- _S_token_bracket_end,
- _S_token_interval_begin,
- _S_token_interval_end,
- _S_token_quoted_class,
- _S_token_char_class_name,
- _S_token_collsymbol,
- _S_token_equiv_class_name,
- _S_token_opt,
- _S_token_or,
- _S_token_closure0,
- _S_token_closure1,
- _S_token_line_begin,
- _S_token_line_end,
- _S_token_word_bound, // neg if _M_value[0] == 'n'
- _S_token_comma,
- _S_token_dup_count,
- _S_token_eof,
- _S_token_bracket_dash,
- _S_token_unknown = -1u
- };
- protected:
- typedef regex_constants::syntax_option_type _FlagT;
- enum _StateT
- {
- _S_state_normal,
- _S_state_in_brace,
- _S_state_in_bracket,
- };
- protected:
- _ScannerBase(_FlagT __flags)
- : _M_state(_S_state_normal),
- _M_flags(__flags),
- _M_escape_tbl(_M_is_ecma()
- ? _M_ecma_escape_tbl
- : _M_awk_escape_tbl),
- _M_spec_char(_M_is_ecma()
- ? _M_ecma_spec_char
- : _M_flags & regex_constants::basic
- ? _M_basic_spec_char
- : _M_flags & regex_constants::extended
- ? _M_extended_spec_char
- : _M_flags & regex_constants::grep
- ? ".[\\*^$\n"
- : _M_flags & regex_constants::egrep
- ? ".[\\()*+?{|^$\n"
- : _M_flags & regex_constants::awk
- ? _M_extended_spec_char
- : nullptr),
- _M_at_bracket_start(false)
- { __glibcxx_assert(_M_spec_char); }
- protected:
- const char*
- _M_find_escape(char __c)
- {
- auto __it = _M_escape_tbl;
- for (; __it->first != '\0'; ++__it)
- if (__it->first == __c)
- return &__it->second;
- return nullptr;
- }
- bool
- _M_is_ecma() const
- { return _M_flags & regex_constants::ECMAScript; }
- bool
- _M_is_basic() const
- { return _M_flags & (regex_constants::basic | regex_constants::grep); }
- bool
- _M_is_extended() const
- {
- return _M_flags & (regex_constants::extended
- | regex_constants::egrep
- | regex_constants::awk);
- }
- bool
- _M_is_grep() const
- { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
- bool
- _M_is_awk() const
- { return _M_flags & regex_constants::awk; }
- protected:
- // TODO: Make them static in the next abi change.
- const std::pair<char, _TokenT> _M_token_tbl[9] =
- {
- {'^', _S_token_line_begin},
- {'$', _S_token_line_end},
- {'.', _S_token_anychar},
- {'*', _S_token_closure0},
- {'+', _S_token_closure1},
- {'?', _S_token_opt},
- {'|', _S_token_or},
- {'\n', _S_token_or}, // grep and egrep
- {'\0', _S_token_or},
- };
- const std::pair<char, char> _M_ecma_escape_tbl[8] =
- {
- {'0', '\0'},
- {'b', '\b'},
- {'f', '\f'},
- {'n', '\n'},
- {'r', '\r'},
- {'t', '\t'},
- {'v', '\v'},
- {'\0', '\0'},
- };
- const std::pair<char, char> _M_awk_escape_tbl[11] =
- {
- {'"', '"'},
- {'/', '/'},
- {'\\', '\\'},
- {'a', '\a'},
- {'b', '\b'},
- {'f', '\f'},
- {'n', '\n'},
- {'r', '\r'},
- {'t', '\t'},
- {'v', '\v'},
- {'\0', '\0'},
- };
- const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
- const char* _M_basic_spec_char = ".[\\*^$";
- const char* _M_extended_spec_char = ".[\\()*+?{|^$";
- _StateT _M_state;
- _FlagT _M_flags;
- _TokenT _M_token;
- const std::pair<char, char>* _M_escape_tbl;
- const char* _M_spec_char;
- bool _M_at_bracket_start;
- };
- /**
- * @brief Scans an input range for regex tokens.
- *
- * The %_Scanner class interprets the regular expression pattern in
- * the input range passed to its constructor as a sequence of parse
- * tokens passed to the regular expression compiler. The sequence
- * of tokens provided depends on the flag settings passed to the
- * constructor: different regular expression grammars will interpret
- * the same input pattern in syntactically different ways.
- */
- template<typename _CharT>
- class _Scanner
- : public _ScannerBase
- {
- public:
- typedef std::basic_string<_CharT> _StringT;
- typedef regex_constants::syntax_option_type _FlagT;
- typedef const std::ctype<_CharT> _CtypeT;
- _Scanner(const _CharT* __begin, const _CharT* __end,
- _FlagT __flags, std::locale __loc);
- void
- _M_advance();
- _TokenT
- _M_get_token() const noexcept
- { return _M_token; }
- const _StringT&
- _M_get_value() const noexcept
- { return _M_value; }
- #ifdef _GLIBCXX_DEBUG
- std::ostream&
- _M_print(std::ostream&);
- #endif
- private:
- void
- _M_scan_normal();
- void
- _M_scan_in_bracket();
- void
- _M_scan_in_brace();
- void
- _M_eat_escape_ecma();
- void
- _M_eat_escape_posix();
- void
- _M_eat_escape_awk();
- void
- _M_eat_class(char);
- const _CharT* _M_current;
- const _CharT* _M_end;
- _CtypeT& _M_ctype;
- _StringT _M_value;
- void (_Scanner::* _M_eat_escape)();
- };
- ///@} regex-detail
- } // namespace __detail
- _GLIBCXX_END_NAMESPACE_VERSION
- } // namespace std
- #include <bits/regex_scanner.tcc>
|