Home | History | Annotate | Download | only in bits
      1 // Locale support (codecvt) -*- C++ -*-
      2 
      3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
      4 // 2009, 2010, 2011  Free Software Foundation, Inc.
      5 //
      6 // This file is part of the GNU ISO C++ Library.  This library is free
      7 // software; you can redistribute it and/or modify it under the
      8 // terms of the GNU General Public License as published by the
      9 // Free Software Foundation; either version 3, or (at your option)
     10 // any later version.
     11 
     12 // This library is distributed in the hope that it will be useful,
     13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 // GNU General Public License for more details.
     16 
     17 // Under Section 7 of GPL version 3, you are granted additional
     18 // permissions described in the GCC Runtime Library Exception, version
     19 // 3.1, as published by the Free Software Foundation.
     20 
     21 // You should have received a copy of the GNU General Public License and
     22 // a copy of the GCC Runtime Library Exception along with this program;
     23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 // <http://www.gnu.org/licenses/>.
     25 
     26 /** @file bits/codecvt.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{locale}
     29  */
     30 
     31 //
     32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
     33 //
     34 
     35 // Written by Benjamin Kosnik <bkoz (at) redhat.com>
     36 
     37 #ifndef _CODECVT_H
     38 #define _CODECVT_H 1
     39 
     40 #pragma GCC system_header
     41 
     42 namespace std _GLIBCXX_VISIBILITY(default)
     43 {
     44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     45 
     46   /// Empty base class for codecvt facet [22.2.1.5].
     47   class codecvt_base
     48   {
     49   public:
     50     enum result
     51     {
     52       ok,
     53       partial,
     54       error,
     55       noconv
     56     };
     57   };
     58 
     59   /**
     60    *  @brief  Common base for codecvt functions.
     61    *
     62    *  This template class provides implementations of the public functions
     63    *  that forward to the protected virtual functions.
     64    *
     65    *  This template also provides abstract stubs for the protected virtual
     66    *  functions.
     67   */
     68   template<typename _InternT, typename _ExternT, typename _StateT>
     69     class __codecvt_abstract_base
     70     : public locale::facet, public codecvt_base
     71     {
     72     public:
     73       // Types:
     74       typedef codecvt_base::result	result;
     75       typedef _InternT			intern_type;
     76       typedef _ExternT			extern_type;
     77       typedef _StateT			state_type;
     78 
     79       // 22.2.1.5.1 codecvt members
     80       /**
     81        *  @brief  Convert from internal to external character set.
     82        *
     83        *  Converts input string of intern_type to output string of
     84        *  extern_type.  This is analogous to wcsrtombs.  It does this by
     85        *  calling codecvt::do_out.
     86        *
     87        *  The source and destination character sets are determined by the
     88        *  facet's locale, internal and external types.
     89        *
     90        *  The characters in [from,from_end) are converted and written to
     91        *  [to,to_end).  from_next and to_next are set to point to the
     92        *  character following the last successfully converted character,
     93        *  respectively.  If the result needed no conversion, from_next and
     94        *  to_next are not affected.
     95        *
     96        *  The @a state argument should be initialized if the input is at the
     97        *  beginning and carried from a previous call if continuing
     98        *  conversion.  There are no guarantees about how @a state is used.
     99        *
    100        *  The result returned is a member of codecvt_base::result.  If
    101        *  all the input is converted, returns codecvt_base::ok.  If no
    102        *  conversion is necessary, returns codecvt_base::noconv.  If
    103        *  the input ends early or there is insufficient space in the
    104        *  output, returns codecvt_base::partial.  Otherwise the
    105        *  conversion failed and codecvt_base::error is returned.
    106        *
    107        *  @param  __state  Persistent conversion state data.
    108        *  @param  __from  Start of input.
    109        *  @param  __from_end  End of input.
    110        *  @param  __from_next  Returns start of unconverted data.
    111        *  @param  __to  Start of output buffer.
    112        *  @param  __to_end  End of output buffer.
    113        *  @param  __to_next  Returns start of unused output area.
    114        *  @return  codecvt_base::result.
    115       */
    116       result
    117       out(state_type& __state, const intern_type* __from,
    118 	  const intern_type* __from_end, const intern_type*& __from_next,
    119 	  extern_type* __to, extern_type* __to_end,
    120 	  extern_type*& __to_next) const
    121       {
    122 	return this->do_out(__state, __from, __from_end, __from_next,
    123 			    __to, __to_end, __to_next);
    124       }
    125 
    126       /**
    127        *  @brief  Reset conversion state.
    128        *
    129        *  Writes characters to output that would restore @a state to initial
    130        *  conditions.  The idea is that if a partial conversion occurs, then
    131        *  the converting the characters written by this function would leave
    132        *  the state in initial conditions, rather than partial conversion
    133        *  state.  It does this by calling codecvt::do_unshift().
    134        *
    135        *  For example, if 4 external characters always converted to 1 internal
    136        *  character, and input to in() had 6 external characters with state
    137        *  saved, this function would write two characters to the output and
    138        *  set the state to initialized conditions.
    139        *
    140        *  The source and destination character sets are determined by the
    141        *  facet's locale, internal and external types.
    142        *
    143        *  The result returned is a member of codecvt_base::result.  If the
    144        *  state could be reset and data written, returns codecvt_base::ok.  If
    145        *  no conversion is necessary, returns codecvt_base::noconv.  If the
    146        *  output has insufficient space, returns codecvt_base::partial.
    147        *  Otherwise the reset failed and codecvt_base::error is returned.
    148        *
    149        *  @param  __state  Persistent conversion state data.
    150        *  @param  __to  Start of output buffer.
    151        *  @param  __to_end  End of output buffer.
    152        *  @param  __to_next  Returns start of unused output area.
    153        *  @return  codecvt_base::result.
    154       */
    155       result
    156       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
    157 	      extern_type*& __to_next) const
    158       { return this->do_unshift(__state, __to,__to_end,__to_next); }
    159 
    160       /**
    161        *  @brief  Convert from external to internal character set.
    162        *
    163        *  Converts input string of extern_type to output string of
    164        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
    165        *  calling codecvt::do_in.
    166        *
    167        *  The source and destination character sets are determined by the
    168        *  facet's locale, internal and external types.
    169        *
    170        *  The characters in [from,from_end) are converted and written to
    171        *  [to,to_end).  from_next and to_next are set to point to the
    172        *  character following the last successfully converted character,
    173        *  respectively.  If the result needed no conversion, from_next and
    174        *  to_next are not affected.
    175        *
    176        *  The @a state argument should be initialized if the input is at the
    177        *  beginning and carried from a previous call if continuing
    178        *  conversion.  There are no guarantees about how @a state is used.
    179        *
    180        *  The result returned is a member of codecvt_base::result.  If
    181        *  all the input is converted, returns codecvt_base::ok.  If no
    182        *  conversion is necessary, returns codecvt_base::noconv.  If
    183        *  the input ends early or there is insufficient space in the
    184        *  output, returns codecvt_base::partial.  Otherwise the
    185        *  conversion failed and codecvt_base::error is returned.
    186        *
    187        *  @param  __state  Persistent conversion state data.
    188        *  @param  __from  Start of input.
    189        *  @param  __from_end  End of input.
    190        *  @param  __from_next  Returns start of unconverted data.
    191        *  @param  __to  Start of output buffer.
    192        *  @param  __to_end  End of output buffer.
    193        *  @param  __to_next  Returns start of unused output area.
    194        *  @return  codecvt_base::result.
    195       */
    196       result
    197       in(state_type& __state, const extern_type* __from,
    198 	 const extern_type* __from_end, const extern_type*& __from_next,
    199 	 intern_type* __to, intern_type* __to_end,
    200 	 intern_type*& __to_next) const
    201       {
    202 	return this->do_in(__state, __from, __from_end, __from_next,
    203 			   __to, __to_end, __to_next);
    204       }
    205 
    206       int
    207       encoding() const throw()
    208       { return this->do_encoding(); }
    209 
    210       bool
    211       always_noconv() const throw()
    212       { return this->do_always_noconv(); }
    213 
    214       int
    215       length(state_type& __state, const extern_type* __from,
    216 	     const extern_type* __end, size_t __max) const
    217       { return this->do_length(__state, __from, __end, __max); }
    218 
    219       int
    220       max_length() const throw()
    221       { return this->do_max_length(); }
    222 
    223     protected:
    224       explicit
    225       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
    226 
    227       virtual
    228       ~__codecvt_abstract_base() { }
    229 
    230       /**
    231        *  @brief  Convert from internal to external character set.
    232        *
    233        *  Converts input string of intern_type to output string of
    234        *  extern_type.  This function is a hook for derived classes to change
    235        *  the value returned.  @see out for more information.
    236       */
    237       virtual result
    238       do_out(state_type& __state, const intern_type* __from,
    239 	     const intern_type* __from_end, const intern_type*& __from_next,
    240 	     extern_type* __to, extern_type* __to_end,
    241 	     extern_type*& __to_next) const = 0;
    242 
    243       virtual result
    244       do_unshift(state_type& __state, extern_type* __to,
    245 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
    246 
    247       virtual result
    248       do_in(state_type& __state, const extern_type* __from,
    249 	    const extern_type* __from_end, const extern_type*& __from_next,
    250 	    intern_type* __to, intern_type* __to_end,
    251 	    intern_type*& __to_next) const = 0;
    252 
    253       virtual int
    254       do_encoding() const throw() = 0;
    255 
    256       virtual bool
    257       do_always_noconv() const throw() = 0;
    258 
    259       virtual int
    260       do_length(state_type&, const extern_type* __from,
    261 		const extern_type* __end, size_t __max) const = 0;
    262 
    263       virtual int
    264       do_max_length() const throw() = 0;
    265     };
    266 
    267 
    268 
    269   /**
    270    *  @brief  Primary class template codecvt.
    271    *  @ingroup locales
    272    *
    273    *  NB: Generic, mostly useless implementation.
    274    *
    275   */
    276    template<typename _InternT, typename _ExternT, typename _StateT>
    277     class codecvt
    278     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
    279     {
    280     public:
    281       // Types:
    282       typedef codecvt_base::result	result;
    283       typedef _InternT			intern_type;
    284       typedef _ExternT			extern_type;
    285       typedef _StateT			state_type;
    286 
    287     protected:
    288       __c_locale			_M_c_locale_codecvt;
    289 
    290     public:
    291       static locale::id			id;
    292 
    293       explicit
    294       codecvt(size_t __refs = 0)
    295       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
    296 	_M_c_locale_codecvt(0)
    297       { }
    298 
    299       explicit
    300       codecvt(__c_locale __cloc, size_t __refs = 0);
    301 
    302     protected:
    303       virtual
    304       ~codecvt() { }
    305 
    306       virtual result
    307       do_out(state_type& __state, const intern_type* __from,
    308 	     const intern_type* __from_end, const intern_type*& __from_next,
    309 	     extern_type* __to, extern_type* __to_end,
    310 	     extern_type*& __to_next) const;
    311 
    312       virtual result
    313       do_unshift(state_type& __state, extern_type* __to,
    314 		 extern_type* __to_end, extern_type*& __to_next) const;
    315 
    316       virtual result
    317       do_in(state_type& __state, const extern_type* __from,
    318 	    const extern_type* __from_end, const extern_type*& __from_next,
    319 	    intern_type* __to, intern_type* __to_end,
    320 	    intern_type*& __to_next) const;
    321 
    322       virtual int
    323       do_encoding() const throw();
    324 
    325       virtual bool
    326       do_always_noconv() const throw();
    327 
    328       virtual int
    329       do_length(state_type&, const extern_type* __from,
    330 		const extern_type* __end, size_t __max) const;
    331 
    332       virtual int
    333       do_max_length() const throw();
    334     };
    335 
    336   template<typename _InternT, typename _ExternT, typename _StateT>
    337     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
    338 
    339   /// class codecvt<char, char, mbstate_t> specialization.
    340   template<>
    341     class codecvt<char, char, mbstate_t>
    342     : public __codecvt_abstract_base<char, char, mbstate_t>
    343     {
    344     public:
    345       // Types:
    346       typedef char			intern_type;
    347       typedef char			extern_type;
    348       typedef mbstate_t			state_type;
    349 
    350     protected:
    351       __c_locale			_M_c_locale_codecvt;
    352 
    353     public:
    354       static locale::id id;
    355 
    356       explicit
    357       codecvt(size_t __refs = 0);
    358 
    359       explicit
    360       codecvt(__c_locale __cloc, size_t __refs = 0);
    361 
    362     protected:
    363       virtual
    364       ~codecvt();
    365 
    366       virtual result
    367       do_out(state_type& __state, const intern_type* __from,
    368 	     const intern_type* __from_end, const intern_type*& __from_next,
    369 	     extern_type* __to, extern_type* __to_end,
    370 	     extern_type*& __to_next) const;
    371 
    372       virtual result
    373       do_unshift(state_type& __state, extern_type* __to,
    374 		 extern_type* __to_end, extern_type*& __to_next) const;
    375 
    376       virtual result
    377       do_in(state_type& __state, const extern_type* __from,
    378 	    const extern_type* __from_end, const extern_type*& __from_next,
    379 	    intern_type* __to, intern_type* __to_end,
    380 	    intern_type*& __to_next) const;
    381 
    382       virtual int
    383       do_encoding() const throw();
    384 
    385       virtual bool
    386       do_always_noconv() const throw();
    387 
    388       virtual int
    389       do_length(state_type&, const extern_type* __from,
    390 		const extern_type* __end, size_t __max) const;
    391 
    392       virtual int
    393       do_max_length() const throw();
    394   };
    395 
    396 #ifdef _GLIBCXX_USE_WCHAR_T
    397   /// class codecvt<wchar_t, char, mbstate_t> specialization.
    398   template<>
    399     class codecvt<wchar_t, char, mbstate_t>
    400     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
    401     {
    402     public:
    403       // Types:
    404       typedef wchar_t			intern_type;
    405       typedef char			extern_type;
    406       typedef mbstate_t			state_type;
    407 
    408     protected:
    409       __c_locale			_M_c_locale_codecvt;
    410 
    411     public:
    412       static locale::id			id;
    413 
    414       explicit
    415       codecvt(size_t __refs = 0);
    416 
    417       explicit
    418       codecvt(__c_locale __cloc, size_t __refs = 0);
    419 
    420     protected:
    421       virtual
    422       ~codecvt();
    423 
    424       virtual result
    425       do_out(state_type& __state, const intern_type* __from,
    426 	     const intern_type* __from_end, const intern_type*& __from_next,
    427 	     extern_type* __to, extern_type* __to_end,
    428 	     extern_type*& __to_next) const;
    429 
    430       virtual result
    431       do_unshift(state_type& __state,
    432 		 extern_type* __to, extern_type* __to_end,
    433 		 extern_type*& __to_next) const;
    434 
    435       virtual result
    436       do_in(state_type& __state,
    437 	     const extern_type* __from, const extern_type* __from_end,
    438 	     const extern_type*& __from_next,
    439 	     intern_type* __to, intern_type* __to_end,
    440 	     intern_type*& __to_next) const;
    441 
    442       virtual
    443       int do_encoding() const throw();
    444 
    445       virtual
    446       bool do_always_noconv() const throw();
    447 
    448       virtual
    449       int do_length(state_type&, const extern_type* __from,
    450 		    const extern_type* __end, size_t __max) const;
    451 
    452       virtual int
    453       do_max_length() const throw();
    454     };
    455 #endif //_GLIBCXX_USE_WCHAR_T
    456 
    457   /// class codecvt_byname [22.2.1.6].
    458   template<typename _InternT, typename _ExternT, typename _StateT>
    459     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
    460     {
    461     public:
    462       explicit
    463       codecvt_byname(const char* __s, size_t __refs = 0)
    464       : codecvt<_InternT, _ExternT, _StateT>(__refs)
    465       {
    466 	if (__builtin_strcmp(__s, "C") != 0
    467 	    && __builtin_strcmp(__s, "POSIX") != 0)
    468 	  {
    469 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
    470 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
    471 	  }
    472       }
    473 
    474     protected:
    475       virtual
    476       ~codecvt_byname() { }
    477     };
    478 
    479   // Inhibit implicit instantiations for required instantiations,
    480   // which are defined via explicit instantiations elsewhere.
    481 #if _GLIBCXX_EXTERN_TEMPLATE
    482   extern template class codecvt_byname<char, char, mbstate_t>;
    483 
    484   extern template
    485     const codecvt<char, char, mbstate_t>&
    486     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
    487 
    488   extern template
    489     bool
    490     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
    491 
    492 #ifdef _GLIBCXX_USE_WCHAR_T
    493   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
    494 
    495   extern template
    496     const codecvt<wchar_t, char, mbstate_t>&
    497     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    498 
    499   extern template
    500     bool
    501     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    502 #endif
    503 #endif
    504 
    505 _GLIBCXX_END_NAMESPACE_VERSION
    506 } // namespace std
    507 
    508 #endif // _CODECVT_H
    509