Home | History | Annotate | Download | only in bits
      1 // Locale support (codecvt) -*- C++ -*-
      2 
      3 // Copyright (C) 2000-2013 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /** @file bits/codecvt.h
     26  *  This is an internal header file, included by other library headers.
     27  *  Do not attempt to use it directly. @headername{locale}
     28  */
     29 
     30 //
     31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
     32 //
     33 
     34 // Written by Benjamin Kosnik <bkoz (at) redhat.com>
     35 
     36 #ifndef _CODECVT_H
     37 #define _CODECVT_H 1
     38 
     39 #pragma GCC system_header
     40 
     41 namespace std _GLIBCXX_VISIBILITY(default)
     42 {
     43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     44 
     45   /// Empty base class for codecvt facet [22.2.1.5].
     46   class codecvt_base
     47   {
     48   public:
     49     enum result
     50     {
     51       ok,
     52       partial,
     53       error,
     54       noconv
     55     };
     56   };
     57 
     58   /**
     59    *  @brief  Common base for codecvt functions.
     60    *
     61    *  This template class provides implementations of the public functions
     62    *  that forward to the protected virtual functions.
     63    *
     64    *  This template also provides abstract stubs for the protected virtual
     65    *  functions.
     66   */
     67   template<typename _InternT, typename _ExternT, typename _StateT>
     68     class __codecvt_abstract_base
     69     : public locale::facet, public codecvt_base
     70     {
     71     public:
     72       // Types:
     73       typedef codecvt_base::result	result;
     74       typedef _InternT			intern_type;
     75       typedef _ExternT			extern_type;
     76       typedef _StateT			state_type;
     77 
     78       // 22.2.1.5.1 codecvt members
     79       /**
     80        *  @brief  Convert from internal to external character set.
     81        *
     82        *  Converts input string of intern_type to output string of
     83        *  extern_type.  This is analogous to wcsrtombs.  It does this by
     84        *  calling codecvt::do_out.
     85        *
     86        *  The source and destination character sets are determined by the
     87        *  facet's locale, internal and external types.
     88        *
     89        *  The characters in [from,from_end) are converted and written to
     90        *  [to,to_end).  from_next and to_next are set to point to the
     91        *  character following the last successfully converted character,
     92        *  respectively.  If the result needed no conversion, from_next and
     93        *  to_next are not affected.
     94        *
     95        *  The @a state argument should be initialized if the input is at the
     96        *  beginning and carried from a previous call if continuing
     97        *  conversion.  There are no guarantees about how @a state is used.
     98        *
     99        *  The result returned is a member of codecvt_base::result.  If
    100        *  all the input is converted, returns codecvt_base::ok.  If no
    101        *  conversion is necessary, returns codecvt_base::noconv.  If
    102        *  the input ends early or there is insufficient space in the
    103        *  output, returns codecvt_base::partial.  Otherwise the
    104        *  conversion failed and codecvt_base::error is returned.
    105        *
    106        *  @param  __state  Persistent conversion state data.
    107        *  @param  __from  Start of input.
    108        *  @param  __from_end  End of input.
    109        *  @param  __from_next  Returns start of unconverted data.
    110        *  @param  __to  Start of output buffer.
    111        *  @param  __to_end  End of output buffer.
    112        *  @param  __to_next  Returns start of unused output area.
    113        *  @return  codecvt_base::result.
    114       */
    115       result
    116       out(state_type& __state, const intern_type* __from,
    117 	  const intern_type* __from_end, const intern_type*& __from_next,
    118 	  extern_type* __to, extern_type* __to_end,
    119 	  extern_type*& __to_next) const
    120       {
    121 	return this->do_out(__state, __from, __from_end, __from_next,
    122 			    __to, __to_end, __to_next);
    123       }
    124 
    125       /**
    126        *  @brief  Reset conversion state.
    127        *
    128        *  Writes characters to output that would restore @a state to initial
    129        *  conditions.  The idea is that if a partial conversion occurs, then
    130        *  the converting the characters written by this function would leave
    131        *  the state in initial conditions, rather than partial conversion
    132        *  state.  It does this by calling codecvt::do_unshift().
    133        *
    134        *  For example, if 4 external characters always converted to 1 internal
    135        *  character, and input to in() had 6 external characters with state
    136        *  saved, this function would write two characters to the output and
    137        *  set the state to initialized conditions.
    138        *
    139        *  The source and destination character sets are determined by the
    140        *  facet's locale, internal and external types.
    141        *
    142        *  The result returned is a member of codecvt_base::result.  If the
    143        *  state could be reset and data written, returns codecvt_base::ok.  If
    144        *  no conversion is necessary, returns codecvt_base::noconv.  If the
    145        *  output has insufficient space, returns codecvt_base::partial.
    146        *  Otherwise the reset failed and codecvt_base::error is returned.
    147        *
    148        *  @param  __state  Persistent conversion state data.
    149        *  @param  __to  Start of output buffer.
    150        *  @param  __to_end  End of output buffer.
    151        *  @param  __to_next  Returns start of unused output area.
    152        *  @return  codecvt_base::result.
    153       */
    154       result
    155       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
    156 	      extern_type*& __to_next) const
    157       { return this->do_unshift(__state, __to,__to_end,__to_next); }
    158 
    159       /**
    160        *  @brief  Convert from external to internal character set.
    161        *
    162        *  Converts input string of extern_type to output string of
    163        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
    164        *  calling codecvt::do_in.
    165        *
    166        *  The source and destination character sets are determined by the
    167        *  facet's locale, internal and external types.
    168        *
    169        *  The characters in [from,from_end) are converted and written to
    170        *  [to,to_end).  from_next and to_next are set to point to the
    171        *  character following the last successfully converted character,
    172        *  respectively.  If the result needed no conversion, from_next and
    173        *  to_next are not affected.
    174        *
    175        *  The @a state argument should be initialized if the input is at the
    176        *  beginning and carried from a previous call if continuing
    177        *  conversion.  There are no guarantees about how @a state is used.
    178        *
    179        *  The result returned is a member of codecvt_base::result.  If
    180        *  all the input is converted, returns codecvt_base::ok.  If no
    181        *  conversion is necessary, returns codecvt_base::noconv.  If
    182        *  the input ends early or there is insufficient space in the
    183        *  output, returns codecvt_base::partial.  Otherwise the
    184        *  conversion failed and codecvt_base::error is returned.
    185        *
    186        *  @param  __state  Persistent conversion state data.
    187        *  @param  __from  Start of input.
    188        *  @param  __from_end  End of input.
    189        *  @param  __from_next  Returns start of unconverted data.
    190        *  @param  __to  Start of output buffer.
    191        *  @param  __to_end  End of output buffer.
    192        *  @param  __to_next  Returns start of unused output area.
    193        *  @return  codecvt_base::result.
    194       */
    195       result
    196       in(state_type& __state, const extern_type* __from,
    197 	 const extern_type* __from_end, const extern_type*& __from_next,
    198 	 intern_type* __to, intern_type* __to_end,
    199 	 intern_type*& __to_next) const
    200       {
    201 	return this->do_in(__state, __from, __from_end, __from_next,
    202 			   __to, __to_end, __to_next);
    203       }
    204 
    205       int
    206       encoding() const throw()
    207       { return this->do_encoding(); }
    208 
    209       bool
    210       always_noconv() const throw()
    211       { return this->do_always_noconv(); }
    212 
    213       int
    214       length(state_type& __state, const extern_type* __from,
    215 	     const extern_type* __end, size_t __max) const
    216       { return this->do_length(__state, __from, __end, __max); }
    217 
    218       int
    219       max_length() const throw()
    220       { return this->do_max_length(); }
    221 
    222     protected:
    223       explicit
    224       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
    225 
    226       virtual
    227       ~__codecvt_abstract_base() { }
    228 
    229       /**
    230        *  @brief  Convert from internal to external character set.
    231        *
    232        *  Converts input string of intern_type to output string of
    233        *  extern_type.  This function is a hook for derived classes to change
    234        *  the value returned.  @see out for more information.
    235       */
    236       virtual result
    237       do_out(state_type& __state, const intern_type* __from,
    238 	     const intern_type* __from_end, const intern_type*& __from_next,
    239 	     extern_type* __to, extern_type* __to_end,
    240 	     extern_type*& __to_next) const = 0;
    241 
    242       virtual result
    243       do_unshift(state_type& __state, extern_type* __to,
    244 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
    245 
    246       virtual result
    247       do_in(state_type& __state, const extern_type* __from,
    248 	    const extern_type* __from_end, const extern_type*& __from_next,
    249 	    intern_type* __to, intern_type* __to_end,
    250 	    intern_type*& __to_next) const = 0;
    251 
    252       virtual int
    253       do_encoding() const throw() = 0;
    254 
    255       virtual bool
    256       do_always_noconv() const throw() = 0;
    257 
    258       virtual int
    259       do_length(state_type&, const extern_type* __from,
    260 		const extern_type* __end, size_t __max) const = 0;
    261 
    262       virtual int
    263       do_max_length() const throw() = 0;
    264     };
    265 
    266 
    267 
    268   /**
    269    *  @brief  Primary class template codecvt.
    270    *  @ingroup locales
    271    *
    272    *  NB: Generic, mostly useless implementation.
    273    *
    274   */
    275    template<typename _InternT, typename _ExternT, typename _StateT>
    276     class codecvt
    277     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
    278     {
    279     public:
    280       // Types:
    281       typedef codecvt_base::result	result;
    282       typedef _InternT			intern_type;
    283       typedef _ExternT			extern_type;
    284       typedef _StateT			state_type;
    285 
    286     protected:
    287       __c_locale			_M_c_locale_codecvt;
    288 
    289     public:
    290       static locale::id			id;
    291 
    292       explicit
    293       codecvt(size_t __refs = 0)
    294       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
    295 	_M_c_locale_codecvt(0)
    296       { }
    297 
    298       explicit
    299       codecvt(__c_locale __cloc, size_t __refs = 0);
    300 
    301     protected:
    302       virtual
    303       ~codecvt() { }
    304 
    305       virtual result
    306       do_out(state_type& __state, const intern_type* __from,
    307 	     const intern_type* __from_end, const intern_type*& __from_next,
    308 	     extern_type* __to, extern_type* __to_end,
    309 	     extern_type*& __to_next) const;
    310 
    311       virtual result
    312       do_unshift(state_type& __state, extern_type* __to,
    313 		 extern_type* __to_end, extern_type*& __to_next) const;
    314 
    315       virtual result
    316       do_in(state_type& __state, const extern_type* __from,
    317 	    const extern_type* __from_end, const extern_type*& __from_next,
    318 	    intern_type* __to, intern_type* __to_end,
    319 	    intern_type*& __to_next) const;
    320 
    321       virtual int
    322       do_encoding() const throw();
    323 
    324       virtual bool
    325       do_always_noconv() const throw();
    326 
    327       virtual int
    328       do_length(state_type&, const extern_type* __from,
    329 		const extern_type* __end, size_t __max) const;
    330 
    331       virtual int
    332       do_max_length() const throw();
    333     };
    334 
    335   template<typename _InternT, typename _ExternT, typename _StateT>
    336     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
    337 
    338   /// class codecvt<char, char, mbstate_t> specialization.
    339   template<>
    340     class codecvt<char, char, mbstate_t>
    341     : public __codecvt_abstract_base<char, char, mbstate_t>
    342     {
    343     public:
    344       // Types:
    345       typedef char			intern_type;
    346       typedef char			extern_type;
    347       typedef mbstate_t			state_type;
    348 
    349     protected:
    350       __c_locale			_M_c_locale_codecvt;
    351 
    352     public:
    353       static locale::id id;
    354 
    355       explicit
    356       codecvt(size_t __refs = 0);
    357 
    358       explicit
    359       codecvt(__c_locale __cloc, size_t __refs = 0);
    360 
    361     protected:
    362       virtual
    363       ~codecvt();
    364 
    365       virtual result
    366       do_out(state_type& __state, const intern_type* __from,
    367 	     const intern_type* __from_end, const intern_type*& __from_next,
    368 	     extern_type* __to, extern_type* __to_end,
    369 	     extern_type*& __to_next) const;
    370 
    371       virtual result
    372       do_unshift(state_type& __state, extern_type* __to,
    373 		 extern_type* __to_end, extern_type*& __to_next) const;
    374 
    375       virtual result
    376       do_in(state_type& __state, const extern_type* __from,
    377 	    const extern_type* __from_end, const extern_type*& __from_next,
    378 	    intern_type* __to, intern_type* __to_end,
    379 	    intern_type*& __to_next) const;
    380 
    381       virtual int
    382       do_encoding() const throw();
    383 
    384       virtual bool
    385       do_always_noconv() const throw();
    386 
    387       virtual int
    388       do_length(state_type&, const extern_type* __from,
    389 		const extern_type* __end, size_t __max) const;
    390 
    391       virtual int
    392       do_max_length() const throw();
    393   };
    394 
    395 #ifdef _GLIBCXX_USE_WCHAR_T
    396   /// class codecvt<wchar_t, char, mbstate_t> specialization.
    397   template<>
    398     class codecvt<wchar_t, char, mbstate_t>
    399     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
    400     {
    401     public:
    402       // Types:
    403       typedef wchar_t			intern_type;
    404       typedef char			extern_type;
    405       typedef mbstate_t			state_type;
    406 
    407     protected:
    408       __c_locale			_M_c_locale_codecvt;
    409 
    410     public:
    411       static locale::id			id;
    412 
    413       explicit
    414       codecvt(size_t __refs = 0);
    415 
    416       explicit
    417       codecvt(__c_locale __cloc, size_t __refs = 0);
    418 
    419     protected:
    420       virtual
    421       ~codecvt();
    422 
    423       virtual result
    424       do_out(state_type& __state, const intern_type* __from,
    425 	     const intern_type* __from_end, const intern_type*& __from_next,
    426 	     extern_type* __to, extern_type* __to_end,
    427 	     extern_type*& __to_next) const;
    428 
    429       virtual result
    430       do_unshift(state_type& __state,
    431 		 extern_type* __to, extern_type* __to_end,
    432 		 extern_type*& __to_next) const;
    433 
    434       virtual result
    435       do_in(state_type& __state,
    436 	     const extern_type* __from, const extern_type* __from_end,
    437 	     const extern_type*& __from_next,
    438 	     intern_type* __to, intern_type* __to_end,
    439 	     intern_type*& __to_next) const;
    440 
    441       virtual
    442       int do_encoding() const throw();
    443 
    444       virtual
    445       bool do_always_noconv() const throw();
    446 
    447       virtual
    448       int do_length(state_type&, const extern_type* __from,
    449 		    const extern_type* __end, size_t __max) const;
    450 
    451       virtual int
    452       do_max_length() const throw();
    453     };
    454 #endif //_GLIBCXX_USE_WCHAR_T
    455 
    456   /// class codecvt_byname [22.2.1.6].
    457   template<typename _InternT, typename _ExternT, typename _StateT>
    458     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
    459     {
    460     public:
    461       explicit
    462       codecvt_byname(const char* __s, size_t __refs = 0)
    463       : codecvt<_InternT, _ExternT, _StateT>(__refs)
    464       {
    465 	if (__builtin_strcmp(__s, "C") != 0
    466 	    && __builtin_strcmp(__s, "POSIX") != 0)
    467 	  {
    468 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
    469 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
    470 	  }
    471       }
    472 
    473     protected:
    474       virtual
    475       ~codecvt_byname() { }
    476     };
    477 
    478   // Inhibit implicit instantiations for required instantiations,
    479   // which are defined via explicit instantiations elsewhere.
    480 #if _GLIBCXX_EXTERN_TEMPLATE
    481   extern template class codecvt_byname<char, char, mbstate_t>;
    482 
    483   extern template
    484     const codecvt<char, char, mbstate_t>&
    485     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
    486 
    487   extern template
    488     bool
    489     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
    490 
    491 #ifdef _GLIBCXX_USE_WCHAR_T
    492   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
    493 
    494   extern template
    495     const codecvt<wchar_t, char, mbstate_t>&
    496     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    497 
    498   extern template
    499     bool
    500     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    501 #endif
    502 #endif
    503 
    504 _GLIBCXX_END_NAMESPACE_VERSION
    505 } // namespace std
    506 
    507 #endif // _CODECVT_H
    508