regex_constants.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2010-2022 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex_constants.h
  22. * @brief Constant definitions for the std regex library.
  23. *
  24. * This is an internal header file, included by other library headers.
  25. * Do not attempt to use it directly. @headername{regex}
  26. */
  27. namespace std _GLIBCXX_VISIBILITY(default)
  28. {
  29. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  30. /**
  31. * @defgroup regex Regular Expressions
  32. *
  33. * A facility for performing regular expression pattern matching.
  34. * @{
  35. */
  36. /**
  37. * @namespace std::regex_constants
  38. * @brief ISO C++ 2011 namespace for options and flags used with std::regex
  39. */
  40. namespace regex_constants
  41. {
  42. /**
  43. * @name 5.1 Regular Expression Syntax Options
  44. */
  45. ///@{
  46. /**
  47. * @brief This is a bitmask type indicating how to interpret the regex.
  48. *
  49. * The @c syntax_option_type is implementation defined but it is valid to
  50. * perform bitwise operations on these values and expect the right thing to
  51. * happen.
  52. *
  53. * A valid value of type syntax_option_type shall have exactly one of the
  54. * elements @c ECMAScript, @c basic, @c extended, @c awk, @c grep, @c egrep
  55. * %set.
  56. */
  57. enum syntax_option_type : unsigned int
  58. {
  59. _S_icase = 1 << 0,
  60. _S_nosubs = 1 << 1,
  61. _S_optimize = 1 << 2,
  62. _S_collate = 1 << 3,
  63. _S_ECMAScript = 1 << 4,
  64. _S_basic = 1 << 5,
  65. _S_extended = 1 << 6,
  66. _S_awk = 1 << 7,
  67. _S_grep = 1 << 8,
  68. _S_egrep = 1 << 9,
  69. _S_polynomial = 1 << 10,
  70. _S_multiline = 1 << 11
  71. };
  72. /**
  73. * Specifies that the matching of regular expressions against a character
  74. * sequence shall be performed without regard to case.
  75. */
  76. _GLIBCXX17_INLINE constexpr syntax_option_type icase = _S_icase;
  77. /**
  78. * Specifies that when a regular expression is matched against a character
  79. * container sequence, no sub-expression matches are to be stored in the
  80. * supplied match_results structure.
  81. */
  82. _GLIBCXX17_INLINE constexpr syntax_option_type nosubs = _S_nosubs;
  83. /**
  84. * Specifies that the regular expression engine should pay more attention to
  85. * the speed with which regular expressions are matched, and less to the
  86. * speed with which regular expression objects are constructed. Otherwise
  87. * it has no detectable effect on the program output.
  88. */
  89. _GLIBCXX17_INLINE constexpr syntax_option_type optimize = _S_optimize;
  90. /**
  91. * Specifies that character ranges of the form [a-b] should be locale
  92. * sensitive.
  93. */
  94. _GLIBCXX17_INLINE constexpr syntax_option_type collate = _S_collate;
  95. /**
  96. * Specifies that the grammar recognized by the regular expression engine is
  97. * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
  98. * Language Specification, Standard Ecma-262, third edition, 1999], as
  99. * modified in section [28.13]. This grammar is similar to that defined
  100. * in the PERL scripting language but extended with elements found in the
  101. * POSIX regular expression grammar.
  102. */
  103. _GLIBCXX17_INLINE constexpr syntax_option_type ECMAScript = _S_ECMAScript;
  104. /**
  105. * Specifies that the grammar recognized by the regular expression engine is
  106. * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
  107. * Portable Operating System Interface (POSIX), Base Definitions and
  108. * Headers, Section 9, Regular Expressions [IEEE, Information Technology --
  109. * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
  110. */
  111. _GLIBCXX17_INLINE constexpr syntax_option_type basic = _S_basic;
  112. /**
  113. * Specifies that the grammar recognized by the regular expression engine is
  114. * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
  115. * Portable Operating System Interface (POSIX), Base Definitions and
  116. * Headers, Section 9, Regular Expressions.
  117. */
  118. _GLIBCXX17_INLINE constexpr syntax_option_type extended = _S_extended;
  119. /**
  120. * Specifies that the grammar recognized by the regular expression engine is
  121. * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is
  122. * identical to syntax_option_type extended, except that C-style escape
  123. * sequences are supported. These sequences are:
  124. * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,,
  125. * and \\ddd (where ddd is one, two, or three octal digits).
  126. */
  127. _GLIBCXX17_INLINE constexpr syntax_option_type awk = _S_awk;
  128. /**
  129. * Specifies that the grammar recognized by the regular expression engine is
  130. * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is
  131. * identical to syntax_option_type basic, except that newlines are treated
  132. * as whitespace.
  133. */
  134. _GLIBCXX17_INLINE constexpr syntax_option_type grep = _S_grep;
  135. /**
  136. * Specifies that the grammar recognized by the regular expression engine is
  137. * that used by POSIX utility grep when given the -E option in
  138. * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
  139. * extended, except that newlines are treated as whitespace.
  140. */
  141. _GLIBCXX17_INLINE constexpr syntax_option_type egrep = _S_egrep;
  142. #if __cplusplus >= 201703L || !defined __STRICT_ANSI__
  143. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  144. // 2503. multiline option should be added to syntax_option_type
  145. /**
  146. * Specifies that the `^` anchor matches at the beginning of a line,
  147. * and the `$` anchor matches at the end of a line, not only at the
  148. * beginning/end of the input.
  149. * Valid for the ECMAScript syntax, ignored otherwise.
  150. * @since C++17
  151. */
  152. _GLIBCXX17_INLINE constexpr syntax_option_type multiline = _S_multiline;
  153. #endif
  154. /// Extension: Equivalent to regex_constants::multiline for C++11 and C++14.
  155. _GLIBCXX17_INLINE constexpr syntax_option_type __multiline = _S_multiline;
  156. /**
  157. * Extension: Ensure both space complexity of compiled regex and
  158. * time complexity execution are not exponential.
  159. * If specified in a regex with back-references, the exception
  160. * regex_constants::error_complexity will be thrown.
  161. */
  162. _GLIBCXX17_INLINE constexpr syntax_option_type __polynomial = _S_polynomial;
  163. constexpr inline syntax_option_type
  164. operator&(syntax_option_type __a, syntax_option_type __b)
  165. {
  166. return (syntax_option_type)(static_cast<unsigned int>(__a)
  167. & static_cast<unsigned int>(__b));
  168. }
  169. constexpr inline syntax_option_type
  170. operator|(syntax_option_type __a, syntax_option_type __b)
  171. {
  172. return (syntax_option_type)(static_cast<unsigned int>(__a)
  173. | static_cast<unsigned int>(__b));
  174. }
  175. constexpr inline syntax_option_type
  176. operator^(syntax_option_type __a, syntax_option_type __b)
  177. {
  178. return (syntax_option_type)(static_cast<unsigned int>(__a)
  179. ^ static_cast<unsigned int>(__b));
  180. }
  181. constexpr inline syntax_option_type
  182. operator~(syntax_option_type __a)
  183. { return (syntax_option_type)(~static_cast<unsigned int>(__a)); }
  184. _GLIBCXX14_CONSTEXPR
  185. inline syntax_option_type&
  186. operator&=(syntax_option_type& __a, syntax_option_type __b)
  187. { return __a = __a & __b; }
  188. _GLIBCXX14_CONSTEXPR
  189. inline syntax_option_type&
  190. operator|=(syntax_option_type& __a, syntax_option_type __b)
  191. { return __a = __a | __b; }
  192. _GLIBCXX14_CONSTEXPR
  193. inline syntax_option_type&
  194. operator^=(syntax_option_type& __a, syntax_option_type __b)
  195. { return __a = __a ^ __b; }
  196. ///@}
  197. /**
  198. * @name 5.2 Matching Rules
  199. *
  200. * Matching a regular expression against a sequence of characters [first,
  201. * last) proceeds according to the rules of the grammar specified for the
  202. * regular expression object, modified according to the effects listed
  203. * below for any bitmask elements set.
  204. *
  205. */
  206. ///@{
  207. /**
  208. * @brief This is a bitmask type indicating regex matching rules.
  209. *
  210. * The @c match_flag_type is implementation defined but it is valid to
  211. * perform bitwise operations on these values and expect the right thing to
  212. * happen.
  213. */
  214. enum match_flag_type : unsigned int
  215. {
  216. _S_default,
  217. _S_not_bol = 1 << 0,
  218. _S_not_eol = 1 << 1,
  219. _S_not_bow = 1 << 2,
  220. _S_not_eow = 1 << 3,
  221. _S_any = 1 << 4,
  222. _S_not_null = 1 << 5,
  223. _S_continuous = 1 << 6,
  224. _S_prev_avail = 1 << 7,
  225. _S_sed = 1 << 8,
  226. _S_no_copy = 1 << 9,
  227. _S_first_only = 1 << 10,
  228. _S_match_flag_last = 1 << 11
  229. };
  230. /**
  231. * The default matching rules.
  232. */
  233. _GLIBCXX17_INLINE constexpr match_flag_type match_default = _S_default;
  234. /**
  235. * The first character in the sequence [first, last) is treated as though it
  236. * is not at the beginning of a line, so the character (^) in the regular
  237. * expression shall not match [first, first).
  238. */
  239. _GLIBCXX17_INLINE constexpr match_flag_type match_not_bol = _S_not_bol;
  240. /**
  241. * The last character in the sequence [first, last) is treated as though it
  242. * is not at the end of a line, so the character ($) in the regular
  243. * expression shall not match [last, last).
  244. */
  245. _GLIBCXX17_INLINE constexpr match_flag_type match_not_eol = _S_not_eol;
  246. /**
  247. * The expression \\b is not matched against the sub-sequence
  248. * [first,first).
  249. */
  250. _GLIBCXX17_INLINE constexpr match_flag_type match_not_bow = _S_not_bow;
  251. /**
  252. * The expression \\b should not be matched against the sub-sequence
  253. * [last,last).
  254. */
  255. _GLIBCXX17_INLINE constexpr match_flag_type match_not_eow = _S_not_eow;
  256. /**
  257. * If more than one match is possible then any match is an acceptable
  258. * result.
  259. */
  260. _GLIBCXX17_INLINE constexpr match_flag_type match_any = _S_any;
  261. /**
  262. * The expression does not match an empty sequence.
  263. */
  264. _GLIBCXX17_INLINE constexpr match_flag_type match_not_null = _S_not_null;
  265. /**
  266. * The expression only matches a sub-sequence that begins at first .
  267. */
  268. _GLIBCXX17_INLINE constexpr match_flag_type match_continuous = _S_continuous;
  269. /**
  270. * `--first` is a valid iterator position. When this flag is set then the
  271. * flags `match_not_bol` and `match_not_bow` are ignored by the algorithms
  272. * `regex_match`, `regex_search`, and `regex_replace`, and by the iterators
  273. * `regex_iterator` and `regex_token_iterator`.
  274. */
  275. _GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail = _S_prev_avail;
  276. /**
  277. * When a regular expression match is to be replaced by a new string, the
  278. * new string is constructed using the rules used by the ECMAScript replace
  279. * function in ECMA- 262 [Ecma International, ECMAScript Language
  280. * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
  281. * String.prototype.replace. In addition, during search and replace
  282. * operations all non-overlapping occurrences of the regular expression
  283. * are located and replaced, and sections of the input that did not match
  284. * the expression are copied unchanged to the output string.
  285. *
  286. * Format strings (from ECMA-262 [15.5.4.11]):
  287. * @li $$ The dollar-sign itself ($)
  288. * @li $& The matched substring.
  289. * @li $` The portion of @a string that precedes the matched substring.
  290. * This would be match_results::prefix().
  291. * @li $' The portion of @a string that follows the matched substring.
  292. * This would be match_results::suffix().
  293. * @li $n The nth capture, where n is in [1,9] and $n is not followed by a
  294. * decimal digit. If n <= match_results::size() and the nth capture
  295. * is undefined, use the empty string instead. If n >
  296. * match_results::size(), the result is implementation-defined.
  297. * @li $nn The nnth capture, where nn is a two-digit decimal number on
  298. * [01, 99]. If nn <= match_results::size() and the nth capture is
  299. * undefined, use the empty string instead. If
  300. * nn > match_results::size(), the result is implementation-defined.
  301. */
  302. _GLIBCXX17_INLINE constexpr match_flag_type format_default = _S_default;
  303. /**
  304. * When a regular expression match is to be replaced by a new string, the
  305. * new string is constructed using the rules used by the POSIX sed utility
  306. * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
  307. * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
  308. */
  309. _GLIBCXX17_INLINE constexpr match_flag_type format_sed = _S_sed;
  310. /**
  311. * During a search and replace operation, sections of the character
  312. * container sequence being searched that do not match the regular
  313. * expression shall not be copied to the output string.
  314. */
  315. _GLIBCXX17_INLINE constexpr match_flag_type format_no_copy = _S_no_copy;
  316. /**
  317. * When specified during a search and replace operation, only the first
  318. * occurrence of the regular expression shall be replaced.
  319. */
  320. _GLIBCXX17_INLINE constexpr match_flag_type format_first_only = _S_first_only;
  321. constexpr inline match_flag_type
  322. operator&(match_flag_type __a, match_flag_type __b)
  323. {
  324. return (match_flag_type)(static_cast<unsigned int>(__a)
  325. & static_cast<unsigned int>(__b));
  326. }
  327. constexpr inline match_flag_type
  328. operator|(match_flag_type __a, match_flag_type __b)
  329. {
  330. return (match_flag_type)(static_cast<unsigned int>(__a)
  331. | static_cast<unsigned int>(__b));
  332. }
  333. constexpr inline match_flag_type
  334. operator^(match_flag_type __a, match_flag_type __b)
  335. {
  336. return (match_flag_type)(static_cast<unsigned int>(__a)
  337. ^ static_cast<unsigned int>(__b));
  338. }
  339. constexpr inline match_flag_type
  340. operator~(match_flag_type __a)
  341. { return (match_flag_type)(~static_cast<unsigned int>(__a)); }
  342. _GLIBCXX14_CONSTEXPR
  343. inline match_flag_type&
  344. operator&=(match_flag_type& __a, match_flag_type __b)
  345. { return __a = __a & __b; }
  346. _GLIBCXX14_CONSTEXPR
  347. inline match_flag_type&
  348. operator|=(match_flag_type& __a, match_flag_type __b)
  349. { return __a = __a | __b; }
  350. _GLIBCXX14_CONSTEXPR
  351. inline match_flag_type&
  352. operator^=(match_flag_type& __a, match_flag_type __b)
  353. { return __a = __a ^ __b; }
  354. ///@}
  355. } // namespace regex_constants
  356. /// @} group regex
  357. _GLIBCXX_END_NAMESPACE_VERSION
  358. } // namespace std