1 // Locale support (codecvt) -*- C++ -*- 2 3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 4 // 2009 Free Software Foundation, Inc. 5 // 6 // This file is part of the GNU ISO C++ Library. This library is free 7 // software; you can redistribute it and/or modify it under the 8 // terms of the GNU General Public License as published by the 9 // Free Software Foundation; either version 3, or (at your option) 10 // any later version. 11 12 // This library is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 17 // Under Section 7 of GPL version 3, you are granted additional 18 // permissions described in the GCC Runtime Library Exception, version 19 // 3.1, as published by the Free Software Foundation. 20 21 // You should have received a copy of the GNU General Public License and 22 // a copy of the GCC Runtime Library Exception along with this program; 23 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 // <http://www.gnu.org/licenses/>. 25 26 /** @file bits/codecvt.h 27 * This is an internal header file, included by other library headers. 28 * You should not attempt to use it directly. 29 */ 30 31 // 32 // ISO C++ 14882: 22.2.1.5 Template class codecvt 33 // 34 35 // Written by Benjamin Kosnik <bkoz (at) redhat.com> 36 37 #ifndef _CODECVT_H 38 #define _CODECVT_H 1 39 40 #pragma GCC system_header 41 42 _GLIBCXX_BEGIN_NAMESPACE(std) 43 44 /// Empty base class for codecvt facet [22.2.1.5]. 45 class codecvt_base 46 { 47 public: 48 enum result 49 { 50 ok, 51 partial, 52 error, 53 noconv 54 }; 55 }; 56 57 /** 58 * @brief Common base for codecvt functions. 59 * 60 * This template class provides implementations of the public functions 61 * that forward to the protected virtual functions. 62 * 63 * This template also provides abstract stubs for the protected virtual 64 * functions. 65 */ 66 template<typename _InternT, typename _ExternT, typename _StateT> 67 class __codecvt_abstract_base 68 : public locale::facet, public codecvt_base 69 { 70 public: 71 // Types: 72 typedef codecvt_base::result result; 73 typedef _InternT intern_type; 74 typedef _ExternT extern_type; 75 typedef _StateT state_type; 76 77 // 22.2.1.5.1 codecvt members 78 /** 79 * @brief Convert from internal to external character set. 80 * 81 * Converts input string of intern_type to output string of 82 * extern_type. This is analogous to wcsrtombs. It does this by 83 * calling codecvt::do_out. 84 * 85 * The source and destination character sets are determined by the 86 * facet's locale, internal and external types. 87 * 88 * The characters in [from,from_end) are converted and written to 89 * [to,to_end). from_next and to_next are set to point to the 90 * character following the last successfully converted character, 91 * respectively. If the result needed no conversion, from_next and 92 * to_next are not affected. 93 * 94 * The @a state argument should be initialized if the input is at the 95 * beginning and carried from a previous call if continuing 96 * conversion. There are no guarantees about how @a state is used. 97 * 98 * The result returned is a member of codecvt_base::result. If 99 * all the input is converted, returns codecvt_base::ok. If no 100 * conversion is necessary, returns codecvt_base::noconv. If 101 * the input ends early or there is insufficient space in the 102 * output, returns codecvt_base::partial. Otherwise the 103 * conversion failed and codecvt_base::error is returned. 104 * 105 * @param state Persistent conversion state data. 106 * @param from Start of input. 107 * @param from_end End of input. 108 * @param from_next Returns start of unconverted data. 109 * @param to Start of output buffer. 110 * @param to_end End of output buffer. 111 * @param to_next Returns start of unused output area. 112 * @return codecvt_base::result. 113 */ 114 result 115 out(state_type& __state, const intern_type* __from, 116 const intern_type* __from_end, const intern_type*& __from_next, 117 extern_type* __to, extern_type* __to_end, 118 extern_type*& __to_next) const 119 { 120 return this->do_out(__state, __from, __from_end, __from_next, 121 __to, __to_end, __to_next); 122 } 123 124 /** 125 * @brief Reset conversion state. 126 * 127 * Writes characters to output that would restore @a state to initial 128 * conditions. The idea is that if a partial conversion occurs, then 129 * the converting the characters written by this function would leave 130 * the state in initial conditions, rather than partial conversion 131 * state. It does this by calling codecvt::do_unshift(). 132 * 133 * For example, if 4 external characters always converted to 1 internal 134 * character, and input to in() had 6 external characters with state 135 * saved, this function would write two characters to the output and 136 * set the state to initialized conditions. 137 * 138 * The source and destination character sets are determined by the 139 * facet's locale, internal and external types. 140 * 141 * The result returned is a member of codecvt_base::result. If the 142 * state could be reset and data written, returns codecvt_base::ok. If 143 * no conversion is necessary, returns codecvt_base::noconv. If the 144 * output has insufficient space, returns codecvt_base::partial. 145 * Otherwise the reset failed and codecvt_base::error is returned. 146 * 147 * @param state Persistent conversion state data. 148 * @param to Start of output buffer. 149 * @param to_end End of output buffer. 150 * @param to_next Returns start of unused output area. 151 * @return codecvt_base::result. 152 */ 153 result 154 unshift(state_type& __state, extern_type* __to, extern_type* __to_end, 155 extern_type*& __to_next) const 156 { return this->do_unshift(__state, __to,__to_end,__to_next); } 157 158 /** 159 * @brief Convert from external to internal character set. 160 * 161 * Converts input string of extern_type to output string of 162 * intern_type. This is analogous to mbsrtowcs. It does this by 163 * calling codecvt::do_in. 164 * 165 * The source and destination character sets are determined by the 166 * facet's locale, internal and external types. 167 * 168 * The characters in [from,from_end) are converted and written to 169 * [to,to_end). from_next and to_next are set to point to the 170 * character following the last successfully converted character, 171 * respectively. If the result needed no conversion, from_next and 172 * to_next are not affected. 173 * 174 * The @a state argument should be initialized if the input is at the 175 * beginning and carried from a previous call if continuing 176 * conversion. There are no guarantees about how @a state is used. 177 * 178 * The result returned is a member of codecvt_base::result. If 179 * all the input is converted, returns codecvt_base::ok. If no 180 * conversion is necessary, returns codecvt_base::noconv. If 181 * the input ends early or there is insufficient space in the 182 * output, returns codecvt_base::partial. Otherwise the 183 * conversion failed and codecvt_base::error is returned. 184 * 185 * @param state Persistent conversion state data. 186 * @param from Start of input. 187 * @param from_end End of input. 188 * @param from_next Returns start of unconverted data. 189 * @param to Start of output buffer. 190 * @param to_end End of output buffer. 191 * @param to_next Returns start of unused output area. 192 * @return codecvt_base::result. 193 */ 194 result 195 in(state_type& __state, const extern_type* __from, 196 const extern_type* __from_end, const extern_type*& __from_next, 197 intern_type* __to, intern_type* __to_end, 198 intern_type*& __to_next) const 199 { 200 return this->do_in(__state, __from, __from_end, __from_next, 201 __to, __to_end, __to_next); 202 } 203 204 int 205 encoding() const throw() 206 { return this->do_encoding(); } 207 208 bool 209 always_noconv() const throw() 210 { return this->do_always_noconv(); } 211 212 int 213 length(state_type& __state, const extern_type* __from, 214 const extern_type* __end, size_t __max) const 215 { return this->do_length(__state, __from, __end, __max); } 216 217 int 218 max_length() const throw() 219 { return this->do_max_length(); } 220 221 protected: 222 explicit 223 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { } 224 225 virtual 226 ~__codecvt_abstract_base() { } 227 228 /** 229 * @brief Convert from internal to external character set. 230 * 231 * Converts input string of intern_type to output string of 232 * extern_type. This function is a hook for derived classes to change 233 * the value returned. @see out for more information. 234 */ 235 virtual result 236 do_out(state_type& __state, const intern_type* __from, 237 const intern_type* __from_end, const intern_type*& __from_next, 238 extern_type* __to, extern_type* __to_end, 239 extern_type*& __to_next) const = 0; 240 241 virtual result 242 do_unshift(state_type& __state, extern_type* __to, 243 extern_type* __to_end, extern_type*& __to_next) const = 0; 244 245 virtual result 246 do_in(state_type& __state, const extern_type* __from, 247 const extern_type* __from_end, const extern_type*& __from_next, 248 intern_type* __to, intern_type* __to_end, 249 intern_type*& __to_next) const = 0; 250 251 virtual int 252 do_encoding() const throw() = 0; 253 254 virtual bool 255 do_always_noconv() const throw() = 0; 256 257 virtual int 258 do_length(state_type&, const extern_type* __from, 259 const extern_type* __end, size_t __max) const = 0; 260 261 virtual int 262 do_max_length() const throw() = 0; 263 }; 264 265 /// @brief class codecvt [22.2.1.5]. 266 /// NB: Generic, mostly useless implementation. 267 template<typename _InternT, typename _ExternT, typename _StateT> 268 class codecvt 269 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT> 270 { 271 public: 272 // Types: 273 typedef codecvt_base::result result; 274 typedef _InternT intern_type; 275 typedef _ExternT extern_type; 276 typedef _StateT state_type; 277 278 protected: 279 __c_locale _M_c_locale_codecvt; 280 281 public: 282 static locale::id id; 283 284 explicit 285 codecvt(size_t __refs = 0) 286 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { } 287 288 explicit 289 codecvt(__c_locale __cloc, size_t __refs = 0); 290 291 protected: 292 virtual 293 ~codecvt() { } 294 295 virtual result 296 do_out(state_type& __state, const intern_type* __from, 297 const intern_type* __from_end, const intern_type*& __from_next, 298 extern_type* __to, extern_type* __to_end, 299 extern_type*& __to_next) const; 300 301 virtual result 302 do_unshift(state_type& __state, extern_type* __to, 303 extern_type* __to_end, extern_type*& __to_next) const; 304 305 virtual result 306 do_in(state_type& __state, const extern_type* __from, 307 const extern_type* __from_end, const extern_type*& __from_next, 308 intern_type* __to, intern_type* __to_end, 309 intern_type*& __to_next) const; 310 311 virtual int 312 do_encoding() const throw(); 313 314 virtual bool 315 do_always_noconv() const throw(); 316 317 virtual int 318 do_length(state_type&, const extern_type* __from, 319 const extern_type* __end, size_t __max) const; 320 321 virtual int 322 do_max_length() const throw(); 323 }; 324 325 template<typename _InternT, typename _ExternT, typename _StateT> 326 locale::id codecvt<_InternT, _ExternT, _StateT>::id; 327 328 /// class codecvt<char, char, mbstate_t> specialization. 329 template<> 330 class codecvt<char, char, mbstate_t> 331 : public __codecvt_abstract_base<char, char, mbstate_t> 332 { 333 public: 334 // Types: 335 typedef char intern_type; 336 typedef char extern_type; 337 typedef mbstate_t state_type; 338 339 protected: 340 __c_locale _M_c_locale_codecvt; 341 342 public: 343 static locale::id id; 344 345 explicit 346 codecvt(size_t __refs = 0); 347 348 explicit 349 codecvt(__c_locale __cloc, size_t __refs = 0); 350 351 protected: 352 virtual 353 ~codecvt(); 354 355 virtual result 356 do_out(state_type& __state, const intern_type* __from, 357 const intern_type* __from_end, const intern_type*& __from_next, 358 extern_type* __to, extern_type* __to_end, 359 extern_type*& __to_next) const; 360 361 virtual result 362 do_unshift(state_type& __state, extern_type* __to, 363 extern_type* __to_end, extern_type*& __to_next) const; 364 365 virtual result 366 do_in(state_type& __state, const extern_type* __from, 367 const extern_type* __from_end, const extern_type*& __from_next, 368 intern_type* __to, intern_type* __to_end, 369 intern_type*& __to_next) const; 370 371 virtual int 372 do_encoding() const throw(); 373 374 virtual bool 375 do_always_noconv() const throw(); 376 377 virtual int 378 do_length(state_type&, const extern_type* __from, 379 const extern_type* __end, size_t __max) const; 380 381 virtual int 382 do_max_length() const throw(); 383 }; 384 385 #ifdef _GLIBCXX_USE_WCHAR_T 386 /// class codecvt<wchar_t, char, mbstate_t> specialization. 387 template<> 388 class codecvt<wchar_t, char, mbstate_t> 389 : public __codecvt_abstract_base<wchar_t, char, mbstate_t> 390 { 391 public: 392 // Types: 393 typedef wchar_t intern_type; 394 typedef char extern_type; 395 typedef mbstate_t state_type; 396 397 protected: 398 __c_locale _M_c_locale_codecvt; 399 400 public: 401 static locale::id id; 402 403 explicit 404 codecvt(size_t __refs = 0); 405 406 explicit 407 codecvt(__c_locale __cloc, size_t __refs = 0); 408 409 protected: 410 virtual 411 ~codecvt(); 412 413 virtual result 414 do_out(state_type& __state, const intern_type* __from, 415 const intern_type* __from_end, const intern_type*& __from_next, 416 extern_type* __to, extern_type* __to_end, 417 extern_type*& __to_next) const; 418 419 virtual result 420 do_unshift(state_type& __state, 421 extern_type* __to, extern_type* __to_end, 422 extern_type*& __to_next) const; 423 424 virtual result 425 do_in(state_type& __state, 426 const extern_type* __from, const extern_type* __from_end, 427 const extern_type*& __from_next, 428 intern_type* __to, intern_type* __to_end, 429 intern_type*& __to_next) const; 430 431 virtual 432 int do_encoding() const throw(); 433 434 virtual 435 bool do_always_noconv() const throw(); 436 437 virtual 438 int do_length(state_type&, const extern_type* __from, 439 const extern_type* __end, size_t __max) const; 440 441 virtual int 442 do_max_length() const throw(); 443 }; 444 #endif //_GLIBCXX_USE_WCHAR_T 445 446 /// class codecvt_byname [22.2.1.6]. 447 template<typename _InternT, typename _ExternT, typename _StateT> 448 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> 449 { 450 public: 451 explicit 452 codecvt_byname(const char* __s, size_t __refs = 0) 453 : codecvt<_InternT, _ExternT, _StateT>(__refs) 454 { 455 if (__builtin_strcmp(__s, "C") != 0 456 && __builtin_strcmp(__s, "POSIX") != 0) 457 { 458 this->_S_destroy_c_locale(this->_M_c_locale_codecvt); 459 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s); 460 } 461 } 462 463 protected: 464 virtual 465 ~codecvt_byname() { } 466 }; 467 468 // Inhibit implicit instantiations for required instantiations, 469 // which are defined via explicit instantiations elsewhere. 470 // NB: This syntax is a GNU extension. 471 #if _GLIBCXX_EXTERN_TEMPLATE 472 extern template class codecvt_byname<char, char, mbstate_t>; 473 474 extern template 475 const codecvt<char, char, mbstate_t>& 476 use_facet<codecvt<char, char, mbstate_t> >(const locale&); 477 478 extern template 479 bool 480 has_facet<codecvt<char, char, mbstate_t> >(const locale&); 481 482 #ifdef _GLIBCXX_USE_WCHAR_T 483 extern template class codecvt_byname<wchar_t, char, mbstate_t>; 484 485 extern template 486 const codecvt<wchar_t, char, mbstate_t>& 487 use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 488 489 extern template 490 bool 491 has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 492 #endif 493 #endif 494 495 _GLIBCXX_END_NAMESPACE 496 497 #endif // _CODECVT_H 498