locale_conv.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. // wstring_convert implementation -*- C++ -*-
  2. // Copyright (C) 2015-2022 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file bits/locale_conv.h
  21. * This is an internal header file, included by other library headers.
  22. * Do not attempt to use it directly. @headername{locale}
  23. */
  24. #ifndef _LOCALE_CONV_H
  25. #define _LOCALE_CONV_H 1
  26. #if __cplusplus < 201103L
  27. # include <bits/c++0x_warning.h>
  28. #else
  29. #include <streambuf>
  30. #include <bits/stringfwd.h>
  31. #include <bits/allocator.h>
  32. #include <bits/codecvt.h>
  33. namespace std _GLIBCXX_VISIBILITY(default)
  34. {
  35. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  36. /**
  37. * @addtogroup locales
  38. * @{
  39. */
  40. template<typename _OutStr, typename _InChar, typename _Codecvt,
  41. typename _State, typename _Fn>
  42. bool
  43. __do_str_codecvt(const _InChar* __first, const _InChar* __last,
  44. _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
  45. size_t& __count, _Fn __fn)
  46. {
  47. if (__first == __last)
  48. {
  49. __outstr.clear();
  50. __count = 0;
  51. return true;
  52. }
  53. size_t __outchars = 0;
  54. auto __next = __first;
  55. const auto __maxlen = __cvt.max_length() + 1;
  56. codecvt_base::result __result;
  57. do
  58. {
  59. __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
  60. auto __outnext = &__outstr.front() + __outchars;
  61. auto const __outlast = &__outstr.back() + 1;
  62. __result = (__cvt.*__fn)(__state, __next, __last, __next,
  63. __outnext, __outlast, __outnext);
  64. __outchars = __outnext - &__outstr.front();
  65. }
  66. while (__result == codecvt_base::partial && __next != __last
  67. && ptrdiff_t(__outstr.size() - __outchars) < __maxlen);
  68. if (__result == codecvt_base::error)
  69. {
  70. __count = __next - __first;
  71. return false;
  72. }
  73. // The codecvt facet will only return noconv when the types are
  74. // the same, so avoid instantiating basic_string::assign otherwise
  75. if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
  76. typename _Codecvt::extern_type>())
  77. if (__result == codecvt_base::noconv)
  78. {
  79. __outstr.assign(__first, __last);
  80. __count = __last - __first;
  81. return true;
  82. }
  83. __outstr.resize(__outchars);
  84. __count = __next - __first;
  85. return true;
  86. }
  87. // Convert narrow character string to wide.
  88. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  89. inline bool
  90. __str_codecvt_in(const char* __first, const char* __last,
  91. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  92. const codecvt<_CharT, char, _State>& __cvt,
  93. _State& __state, size_t& __count)
  94. {
  95. using _Codecvt = codecvt<_CharT, char, _State>;
  96. using _ConvFn
  97. = codecvt_base::result
  98. (_Codecvt::*)(_State&, const char*, const char*, const char*&,
  99. _CharT*, _CharT*, _CharT*&) const;
  100. _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
  101. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  102. __count, __fn);
  103. }
  104. // As above, but with no __count parameter
  105. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  106. inline bool
  107. __str_codecvt_in(const char* __first, const char* __last,
  108. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  109. const codecvt<_CharT, char, _State>& __cvt)
  110. {
  111. _State __state = {};
  112. size_t __n;
  113. return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
  114. }
  115. // As above, but returns false for partial conversion
  116. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  117. inline bool
  118. __str_codecvt_in_all(const char* __first, const char* __last,
  119. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  120. const codecvt<_CharT, char, _State>& __cvt)
  121. {
  122. _State __state = {};
  123. size_t __n;
  124. return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
  125. && (__n == size_t(__last - __first));
  126. }
  127. // Convert wide character string to narrow.
  128. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  129. inline bool
  130. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  131. basic_string<char, _Traits, _Alloc>& __outstr,
  132. const codecvt<_CharT, char, _State>& __cvt,
  133. _State& __state, size_t& __count)
  134. {
  135. using _Codecvt = codecvt<_CharT, char, _State>;
  136. using _ConvFn
  137. = codecvt_base::result
  138. (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
  139. char*, char*, char*&) const;
  140. _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
  141. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  142. __count, __fn);
  143. }
  144. // As above, but with no __count parameter
  145. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  146. inline bool
  147. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  148. basic_string<char, _Traits, _Alloc>& __outstr,
  149. const codecvt<_CharT, char, _State>& __cvt)
  150. {
  151. _State __state = {};
  152. size_t __n;
  153. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
  154. }
  155. // As above, but returns false for partial conversions
  156. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  157. inline bool
  158. __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
  159. basic_string<char, _Traits, _Alloc>& __outstr,
  160. const codecvt<_CharT, char, _State>& __cvt)
  161. {
  162. _State __state = {};
  163. size_t __n;
  164. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
  165. && (__n == size_t(__last - __first));
  166. }
  167. #ifdef _GLIBCXX_USE_CHAR8_T
  168. // Convert wide character string to narrow.
  169. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  170. inline bool
  171. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  172. basic_string<char8_t, _Traits, _Alloc>& __outstr,
  173. const codecvt<_CharT, char8_t, _State>& __cvt,
  174. _State& __state, size_t& __count)
  175. {
  176. using _Codecvt = codecvt<_CharT, char8_t, _State>;
  177. using _ConvFn
  178. = codecvt_base::result
  179. (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
  180. char8_t*, char8_t*, char8_t*&) const;
  181. _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
  182. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  183. __count, __fn);
  184. }
  185. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  186. inline bool
  187. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  188. basic_string<char8_t, _Traits, _Alloc>& __outstr,
  189. const codecvt<_CharT, char8_t, _State>& __cvt)
  190. {
  191. _State __state = {};
  192. size_t __n;
  193. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
  194. }
  195. #endif // _GLIBCXX_USE_CHAR8_T
  196. namespace __detail
  197. {
  198. template<typename _Tp>
  199. struct _Scoped_ptr
  200. {
  201. __attribute__((__nonnull__(2)))
  202. explicit
  203. _Scoped_ptr(_Tp* __ptr) noexcept
  204. : _M_ptr(__ptr)
  205. { }
  206. _Scoped_ptr(_Tp* __ptr, const char* __msg)
  207. : _M_ptr(__ptr)
  208. {
  209. if (!__ptr)
  210. __throw_logic_error(__msg);
  211. }
  212. ~_Scoped_ptr() { delete _M_ptr; }
  213. _Scoped_ptr(const _Scoped_ptr&) = delete;
  214. _Scoped_ptr& operator=(const _Scoped_ptr&) = delete;
  215. __attribute__((__returns_nonnull__))
  216. _Tp* operator->() const noexcept { return _M_ptr; }
  217. _Tp& operator*() const noexcept { return *_M_ptr; }
  218. private:
  219. _Tp* _M_ptr;
  220. };
  221. }
  222. _GLIBCXX_BEGIN_NAMESPACE_CXX11
  223. /// String conversions
  224. template<typename _Codecvt, typename _Elem = wchar_t,
  225. typename _Wide_alloc = allocator<_Elem>,
  226. typename _Byte_alloc = allocator<char>>
  227. class wstring_convert
  228. {
  229. public:
  230. typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
  231. typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
  232. typedef typename _Codecvt::state_type state_type;
  233. typedef typename wide_string::traits_type::int_type int_type;
  234. /// Default constructor.
  235. wstring_convert() : _M_cvt(new _Codecvt()) { }
  236. /** Constructor.
  237. *
  238. * @param __pcvt The facet to use for conversions.
  239. *
  240. * Takes ownership of @p __pcvt and will delete it in the destructor.
  241. */
  242. explicit
  243. wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt, "wstring_convert")
  244. { }
  245. /** Construct with an initial converstion state.
  246. *
  247. * @param __pcvt The facet to use for conversions.
  248. * @param __state Initial conversion state.
  249. *
  250. * Takes ownership of @p __pcvt and will delete it in the destructor.
  251. * The object's conversion state will persist between conversions.
  252. */
  253. wstring_convert(_Codecvt* __pcvt, state_type __state)
  254. : _M_cvt(__pcvt, "wstring_convert"),
  255. _M_state(__state), _M_with_cvtstate(true)
  256. { }
  257. /** Construct with error strings.
  258. *
  259. * @param __byte_err A string to return on failed conversions.
  260. * @param __wide_err A wide string to return on failed conversions.
  261. */
  262. explicit
  263. wstring_convert(const byte_string& __byte_err,
  264. const wide_string& __wide_err = wide_string())
  265. : _M_cvt(new _Codecvt),
  266. _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
  267. _M_with_strings(true)
  268. { }
  269. ~wstring_convert() = default;
  270. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  271. // 2176. Special members for wstring_convert and wbuffer_convert
  272. wstring_convert(const wstring_convert&) = delete;
  273. wstring_convert& operator=(const wstring_convert&) = delete;
  274. /// @{ Convert from bytes.
  275. wide_string
  276. from_bytes(char __byte)
  277. {
  278. char __bytes[2] = { __byte };
  279. return from_bytes(__bytes, __bytes+1);
  280. }
  281. wide_string
  282. from_bytes(const char* __ptr)
  283. { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
  284. wide_string
  285. from_bytes(const byte_string& __str)
  286. {
  287. auto __ptr = __str.data();
  288. return from_bytes(__ptr, __ptr + __str.size());
  289. }
  290. wide_string
  291. from_bytes(const char* __first, const char* __last)
  292. {
  293. if (!_M_with_cvtstate)
  294. _M_state = state_type();
  295. wide_string __out{ _M_wide_err_string.get_allocator() };
  296. if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
  297. _M_count))
  298. return __out;
  299. if (_M_with_strings)
  300. return _M_wide_err_string;
  301. __throw_range_error("wstring_convert::from_bytes");
  302. }
  303. /// @}
  304. /// @{ Convert to bytes.
  305. byte_string
  306. to_bytes(_Elem __wchar)
  307. {
  308. _Elem __wchars[2] = { __wchar };
  309. return to_bytes(__wchars, __wchars+1);
  310. }
  311. byte_string
  312. to_bytes(const _Elem* __ptr)
  313. {
  314. return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
  315. }
  316. byte_string
  317. to_bytes(const wide_string& __wstr)
  318. {
  319. auto __ptr = __wstr.data();
  320. return to_bytes(__ptr, __ptr + __wstr.size());
  321. }
  322. byte_string
  323. to_bytes(const _Elem* __first, const _Elem* __last)
  324. {
  325. if (!_M_with_cvtstate)
  326. _M_state = state_type();
  327. byte_string __out{ _M_byte_err_string.get_allocator() };
  328. if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
  329. _M_count))
  330. return __out;
  331. if (_M_with_strings)
  332. return _M_byte_err_string;
  333. __throw_range_error("wstring_convert::to_bytes");
  334. }
  335. /// @}
  336. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  337. // 2174. wstring_convert::converted() should be noexcept
  338. /// The number of elements successfully converted in the last conversion.
  339. size_t converted() const noexcept { return _M_count; }
  340. /// The final conversion state of the last conversion.
  341. state_type state() const { return _M_state; }
  342. private:
  343. __detail::_Scoped_ptr<_Codecvt> _M_cvt;
  344. byte_string _M_byte_err_string;
  345. wide_string _M_wide_err_string;
  346. state_type _M_state = state_type();
  347. size_t _M_count = 0;
  348. bool _M_with_cvtstate = false;
  349. bool _M_with_strings = false;
  350. };
  351. _GLIBCXX_END_NAMESPACE_CXX11
  352. /// Buffer conversions
  353. template<typename _Codecvt, typename _Elem = wchar_t,
  354. typename _Tr = char_traits<_Elem>>
  355. class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
  356. {
  357. typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
  358. public:
  359. typedef typename _Codecvt::state_type state_type;
  360. /// Default constructor.
  361. wbuffer_convert() : wbuffer_convert(nullptr) { }
  362. /** Constructor.
  363. *
  364. * @param __bytebuf The underlying byte stream buffer.
  365. * @param __pcvt The facet to use for conversions.
  366. * @param __state Initial conversion state.
  367. *
  368. * Takes ownership of @p __pcvt and will delete it in the destructor.
  369. */
  370. explicit
  371. wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
  372. state_type __state = state_type())
  373. : _M_buf(__bytebuf), _M_cvt(__pcvt, "wbuffer_convert"),
  374. _M_state(__state), _M_always_noconv(_M_cvt->always_noconv())
  375. {
  376. if (_M_buf)
  377. {
  378. this->setp(_M_put_area, _M_put_area + _S_buffer_length);
  379. this->setg(_M_get_area + _S_putback_length,
  380. _M_get_area + _S_putback_length,
  381. _M_get_area + _S_putback_length);
  382. }
  383. }
  384. ~wbuffer_convert() = default;
  385. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  386. // 2176. Special members for wstring_convert and wbuffer_convert
  387. wbuffer_convert(const wbuffer_convert&) = delete;
  388. wbuffer_convert& operator=(const wbuffer_convert&) = delete;
  389. streambuf* rdbuf() const noexcept { return _M_buf; }
  390. streambuf*
  391. rdbuf(streambuf *__bytebuf) noexcept
  392. {
  393. auto __prev = _M_buf;
  394. _M_buf = __bytebuf;
  395. return __prev;
  396. }
  397. /// The conversion state following the last conversion.
  398. state_type state() const noexcept { return _M_state; }
  399. protected:
  400. int
  401. sync()
  402. { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
  403. typename _Wide_streambuf::int_type
  404. overflow(typename _Wide_streambuf::int_type __out)
  405. {
  406. if (!_M_buf || !_M_conv_put())
  407. return _Tr::eof();
  408. else if (!_Tr::eq_int_type(__out, _Tr::eof()))
  409. return this->sputc(__out);
  410. return _Tr::not_eof(__out);
  411. }
  412. typename _Wide_streambuf::int_type
  413. underflow()
  414. {
  415. if (!_M_buf)
  416. return _Tr::eof();
  417. if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
  418. return _Tr::to_int_type(*this->gptr());
  419. else
  420. return _Tr::eof();
  421. }
  422. streamsize
  423. xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
  424. {
  425. if (!_M_buf || __n == 0)
  426. return 0;
  427. streamsize __done = 0;
  428. do
  429. {
  430. auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
  431. __n - __done);
  432. _Tr::copy(this->pptr(), __s + __done, __nn);
  433. this->pbump(__nn);
  434. __done += __nn;
  435. } while (__done < __n && _M_conv_put());
  436. return __done;
  437. }
  438. private:
  439. // fill the get area from converted contents of the byte stream buffer
  440. bool
  441. _M_conv_get()
  442. {
  443. const streamsize __pb1 = this->gptr() - this->eback();
  444. const streamsize __pb2 = _S_putback_length;
  445. const streamsize __npb = std::min(__pb1, __pb2);
  446. _Tr::move(_M_get_area + _S_putback_length - __npb,
  447. this->gptr() - __npb, __npb);
  448. streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
  449. __nbytes = std::min(__nbytes, _M_buf->in_avail());
  450. if (__nbytes < 1)
  451. __nbytes = 1;
  452. __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
  453. if (__nbytes < 1)
  454. return false;
  455. __nbytes += _M_unconv;
  456. // convert _M_get_buf into _M_get_area
  457. _Elem* __outbuf = _M_get_area + _S_putback_length;
  458. _Elem* __outnext = __outbuf;
  459. const char* __bnext = _M_get_buf;
  460. codecvt_base::result __result;
  461. if (_M_always_noconv)
  462. __result = codecvt_base::noconv;
  463. else
  464. {
  465. _Elem* __outend = _M_get_area + _S_buffer_length;
  466. __result = _M_cvt->in(_M_state,
  467. __bnext, __bnext + __nbytes, __bnext,
  468. __outbuf, __outend, __outnext);
  469. }
  470. if (__result == codecvt_base::noconv)
  471. {
  472. // cast is safe because noconv means _Elem is same type as char
  473. auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
  474. _Tr::copy(__outbuf, __get_buf, __nbytes);
  475. _M_unconv = 0;
  476. return true;
  477. }
  478. if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
  479. char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
  480. this->setg(__outbuf, __outbuf, __outnext);
  481. return __result != codecvt_base::error;
  482. }
  483. // unused
  484. bool
  485. _M_put(...)
  486. { return false; }
  487. bool
  488. _M_put(const char* __p, streamsize __n)
  489. {
  490. if (_M_buf->sputn(__p, __n) < __n)
  491. return false;
  492. return true;
  493. }
  494. // convert the put area and write to the byte stream buffer
  495. bool
  496. _M_conv_put()
  497. {
  498. _Elem* const __first = this->pbase();
  499. const _Elem* const __last = this->pptr();
  500. const streamsize __pending = __last - __first;
  501. if (_M_always_noconv)
  502. return _M_put(__first, __pending);
  503. char __outbuf[2 * _S_buffer_length];
  504. const _Elem* __next = __first;
  505. const _Elem* __start;
  506. do
  507. {
  508. __start = __next;
  509. char* __outnext = __outbuf;
  510. char* const __outlast = __outbuf + sizeof(__outbuf);
  511. auto __result = _M_cvt->out(_M_state, __next, __last, __next,
  512. __outnext, __outlast, __outnext);
  513. if (__result == codecvt_base::error)
  514. return false;
  515. else if (__result == codecvt_base::noconv)
  516. return _M_put(__next, __pending);
  517. if (!_M_put(__outbuf, __outnext - __outbuf))
  518. return false;
  519. }
  520. while (__next != __last && __next != __start);
  521. if (__next != __last)
  522. _Tr::move(__first, __next, __last - __next);
  523. this->pbump(__first - __next);
  524. return __next != __first;
  525. }
  526. streambuf* _M_buf;
  527. __detail::_Scoped_ptr<_Codecvt> _M_cvt;
  528. state_type _M_state;
  529. static const streamsize _S_buffer_length = 32;
  530. static const streamsize _S_putback_length = 3;
  531. _Elem _M_put_area[_S_buffer_length];
  532. _Elem _M_get_area[_S_buffer_length];
  533. streamsize _M_unconv = 0;
  534. char _M_get_buf[_S_buffer_length-_S_putback_length];
  535. bool _M_always_noconv;
  536. };
  537. /// @} group locales
  538. _GLIBCXX_END_NAMESPACE_VERSION
  539. } // namespace
  540. #endif // __cplusplus
  541. #endif /* _LOCALE_CONV_H */