Home | History | Annotate | Download | only in bits
      1 // Locale support (codecvt) -*- C++ -*-
      2 
      3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
      4 // 2009, 2010, 2011  Free Software Foundation, Inc.
      5 //
      6 // This file is part of the GNU ISO C++ Library.  This library is free
      7 // software; you can redistribute it and/or modify it under the
      8 // terms of the GNU General Public License as published by the
      9 // Free Software Foundation; either version 3, or (at your option)
     10 // any later version.
     11 
     12 // This library is distributed in the hope that it will be useful,
     13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 // GNU General Public License for more details.
     16 
     17 // Under Section 7 of GPL version 3, you are granted additional
     18 // permissions described in the GCC Runtime Library Exception, version
     19 // 3.1, as published by the Free Software Foundation.
     20 
     21 // You should have received a copy of the GNU General Public License and
     22 // a copy of the GCC Runtime Library Exception along with this program;
     23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 // <http://www.gnu.org/licenses/>.
     25 
     26 /** @file bits/codecvt.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{locale}
     29  */
     30 
     31 //
     32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
     33 //
     34 
     35 // Written by Benjamin Kosnik <bkoz (at) redhat.com>
     36 
     37 #ifndef _CODECVT_H
     38 #define _CODECVT_H 1
     39 
     40 #pragma GCC system_header
     41 
     42 namespace std _GLIBCXX_VISIBILITY(default)
     43 {
     44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     45 
     46   /// Empty base class for codecvt facet [22.2.1.5].
     47   class codecvt_base
     48   {
     49   public:
     50     enum result
     51     {
     52       ok,
     53       partial,
     54       error,
     55       noconv
     56     };
     57   };
     58 
     59   /**
     60    *  @brief  Common base for codecvt functions.
     61    *
     62    *  This template class provides implementations of the public functions
     63    *  that forward to the protected virtual functions.
     64    *
     65    *  This template also provides abstract stubs for the protected virtual
     66    *  functions.
     67   */
     68   template<typename _InternT, typename _ExternT, typename _StateT>
     69     class __codecvt_abstract_base
     70     : public locale::facet, public codecvt_base
     71     {
     72     public:
     73       // Types:
     74       typedef codecvt_base::result	result;
     75       typedef _InternT			intern_type;
     76       typedef _ExternT			extern_type;
     77       typedef _StateT			state_type;
     78 
     79       // 22.2.1.5.1 codecvt members
     80       /**
     81        *  @brief  Convert from internal to external character set.
     82        *
     83        *  Converts input string of intern_type to output string of
     84        *  extern_type.  This is analogous to wcsrtombs.  It does this by
     85        *  calling codecvt::do_out.
     86        *
     87        *  The source and destination character sets are determined by the
     88        *  facet's locale, internal and external types.
     89        *
     90        *  The characters in [from,from_end) are converted and written to
     91        *  [to,to_end).  from_next and to_next are set to point to the
     92        *  character following the last successfully converted character,
     93        *  respectively.  If the result needed no conversion, from_next and
     94        *  to_next are not affected.
     95        *
     96        *  The @a state argument should be initialized if the input is at the
     97        *  beginning and carried from a previous call if continuing
     98        *  conversion.  There are no guarantees about how @a state is used.
     99        *
    100        *  The result returned is a member of codecvt_base::result.  If
    101        *  all the input is converted, returns codecvt_base::ok.  If no
    102        *  conversion is necessary, returns codecvt_base::noconv.  If
    103        *  the input ends early or there is insufficient space in the
    104        *  output, returns codecvt_base::partial.  Otherwise the
    105        *  conversion failed and codecvt_base::error is returned.
    106        *
    107        *  @param  state  Persistent conversion state data.
    108        *  @param  from  Start of input.
    109        *  @param  from_end  End of input.
    110        *  @param  from_next  Returns start of unconverted data.
    111        *  @param  to  Start of output buffer.
    112        *  @param  to_end  End of output buffer.
    113        *  @param  to_next  Returns start of unused output area.
    114        *  @return  codecvt_base::result.
    115       */
    116       result
    117       out(state_type& __state, const intern_type* __from,
    118 	  const intern_type* __from_end, const intern_type*& __from_next,
    119 	  extern_type* __to, extern_type* __to_end,
    120 	  extern_type*& __to_next) const
    121       {
    122 	return this->do_out(__state, __from, __from_end, __from_next,
    123 			    __to, __to_end, __to_next);
    124       }
    125 
    126       /**
    127        *  @brief  Reset conversion state.
    128        *
    129        *  Writes characters to output that would restore @a state to initial
    130        *  conditions.  The idea is that if a partial conversion occurs, then
    131        *  the converting the characters written by this function would leave
    132        *  the state in initial conditions, rather than partial conversion
    133        *  state.  It does this by calling codecvt::do_unshift().
    134        *
    135        *  For example, if 4 external characters always converted to 1 internal
    136        *  character, and input to in() had 6 external characters with state
    137        *  saved, this function would write two characters to the output and
    138        *  set the state to initialized conditions.
    139        *
    140        *  The source and destination character sets are determined by the
    141        *  facet's locale, internal and external types.
    142        *
    143        *  The result returned is a member of codecvt_base::result.  If the
    144        *  state could be reset and data written, returns codecvt_base::ok.  If
    145        *  no conversion is necessary, returns codecvt_base::noconv.  If the
    146        *  output has insufficient space, returns codecvt_base::partial.
    147        *  Otherwise the reset failed and codecvt_base::error is returned.
    148        *
    149        *  @param  state  Persistent conversion state data.
    150        *  @param  to  Start of output buffer.
    151        *  @param  to_end  End of output buffer.
    152        *  @param  to_next  Returns start of unused output area.
    153        *  @return  codecvt_base::result.
    154       */
    155       result
    156       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
    157 	      extern_type*& __to_next) const
    158       { return this->do_unshift(__state, __to,__to_end,__to_next); }
    159 
    160       /**
    161        *  @brief  Convert from external to internal character set.
    162        *
    163        *  Converts input string of extern_type to output string of
    164        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
    165        *  calling codecvt::do_in.
    166        *
    167        *  The source and destination character sets are determined by the
    168        *  facet's locale, internal and external types.
    169        *
    170        *  The characters in [from,from_end) are converted and written to
    171        *  [to,to_end).  from_next and to_next are set to point to the
    172        *  character following the last successfully converted character,
    173        *  respectively.  If the result needed no conversion, from_next and
    174        *  to_next are not affected.
    175        *
    176        *  The @a state argument should be initialized if the input is at the
    177        *  beginning and carried from a previous call if continuing
    178        *  conversion.  There are no guarantees about how @a state is used.
    179        *
    180        *  The result returned is a member of codecvt_base::result.  If
    181        *  all the input is converted, returns codecvt_base::ok.  If no
    182        *  conversion is necessary, returns codecvt_base::noconv.  If
    183        *  the input ends early or there is insufficient space in the
    184        *  output, returns codecvt_base::partial.  Otherwise the
    185        *  conversion failed and codecvt_base::error is returned.
    186        *
    187        *  @param  state  Persistent conversion state data.
    188        *  @param  from  Start of input.
    189        *  @param  from_end  End of input.
    190        *  @param  from_next  Returns start of unconverted data.
    191        *  @param  to  Start of output buffer.
    192        *  @param  to_end  End of output buffer.
    193        *  @param  to_next  Returns start of unused output area.
    194        *  @return  codecvt_base::result.
    195       */
    196       result
    197       in(state_type& __state, const extern_type* __from,
    198 	 const extern_type* __from_end, const extern_type*& __from_next,
    199 	 intern_type* __to, intern_type* __to_end,
    200 	 intern_type*& __to_next) const
    201       {
    202 	return this->do_in(__state, __from, __from_end, __from_next,
    203 			   __to, __to_end, __to_next);
    204       }
    205 
    206       int
    207       encoding() const throw()
    208       { return this->do_encoding(); }
    209 
    210       bool
    211       always_noconv() const throw()
    212       { return this->do_always_noconv(); }
    213 
    214       int
    215       length(state_type& __state, const extern_type* __from,
    216 	     const extern_type* __end, size_t __max) const
    217       { return this->do_length(__state, __from, __end, __max); }
    218 
    219       int
    220       max_length() const throw()
    221       { return this->do_max_length(); }
    222 
    223     protected:
    224       explicit
    225       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
    226 
    227       virtual
    228       ~__codecvt_abstract_base() { }
    229 
    230       /**
    231        *  @brief  Convert from internal to external character set.
    232        *
    233        *  Converts input string of intern_type to output string of
    234        *  extern_type.  This function is a hook for derived classes to change
    235        *  the value returned.  @see out for more information.
    236       */
    237       virtual result
    238       do_out(state_type& __state, const intern_type* __from,
    239 	     const intern_type* __from_end, const intern_type*& __from_next,
    240 	     extern_type* __to, extern_type* __to_end,
    241 	     extern_type*& __to_next) const = 0;
    242 
    243       virtual result
    244       do_unshift(state_type& __state, extern_type* __to,
    245 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
    246 
    247       virtual result
    248       do_in(state_type& __state, const extern_type* __from,
    249 	    const extern_type* __from_end, const extern_type*& __from_next,
    250 	    intern_type* __to, intern_type* __to_end,
    251 	    intern_type*& __to_next) const = 0;
    252 
    253       virtual int
    254       do_encoding() const throw() = 0;
    255 
    256       virtual bool
    257       do_always_noconv() const throw() = 0;
    258 
    259       virtual int
    260       do_length(state_type&, const extern_type* __from,
    261 		const extern_type* __end, size_t __max) const = 0;
    262 
    263       virtual int
    264       do_max_length() const throw() = 0;
    265     };
    266 
    267 
    268 
    269   /**
    270    *  @brief  Primary class template codecvt.
    271    *  @ingroup locales
    272    *
    273    *  NB: Generic, mostly useless implementation.
    274    *
    275   */
    276    template<typename _InternT, typename _ExternT, typename _StateT>
    277     class codecvt
    278     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
    279     {
    280     public:
    281       // Types:
    282       typedef codecvt_base::result	result;
    283       typedef _InternT			intern_type;
    284       typedef _ExternT			extern_type;
    285       typedef _StateT			state_type;
    286 
    287     protected:
    288       __c_locale			_M_c_locale_codecvt;
    289 
    290     public:
    291       static locale::id			id;
    292 
    293       explicit
    294       codecvt(size_t __refs = 0)
    295       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
    296 
    297       explicit
    298       codecvt(__c_locale __cloc, size_t __refs = 0);
    299 
    300     protected:
    301       virtual
    302       ~codecvt() { }
    303 
    304       virtual result
    305       do_out(state_type& __state, const intern_type* __from,
    306 	     const intern_type* __from_end, const intern_type*& __from_next,
    307 	     extern_type* __to, extern_type* __to_end,
    308 	     extern_type*& __to_next) const;
    309 
    310       virtual result
    311       do_unshift(state_type& __state, extern_type* __to,
    312 		 extern_type* __to_end, extern_type*& __to_next) const;
    313 
    314       virtual result
    315       do_in(state_type& __state, const extern_type* __from,
    316 	    const extern_type* __from_end, const extern_type*& __from_next,
    317 	    intern_type* __to, intern_type* __to_end,
    318 	    intern_type*& __to_next) const;
    319 
    320       virtual int
    321       do_encoding() const throw();
    322 
    323       virtual bool
    324       do_always_noconv() const throw();
    325 
    326       virtual int
    327       do_length(state_type&, const extern_type* __from,
    328 		const extern_type* __end, size_t __max) const;
    329 
    330       virtual int
    331       do_max_length() const throw();
    332     };
    333 
    334   template<typename _InternT, typename _ExternT, typename _StateT>
    335     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
    336 
    337   /// class codecvt<char, char, mbstate_t> specialization.
    338   template<>
    339     class codecvt<char, char, mbstate_t>
    340     : public __codecvt_abstract_base<char, char, mbstate_t>
    341     {
    342     public:
    343       // Types:
    344       typedef char			intern_type;
    345       typedef char			extern_type;
    346       typedef mbstate_t			state_type;
    347 
    348     protected:
    349       __c_locale			_M_c_locale_codecvt;
    350 
    351     public:
    352       static locale::id id;
    353 
    354       explicit
    355       codecvt(size_t __refs = 0);
    356 
    357       explicit
    358       codecvt(__c_locale __cloc, size_t __refs = 0);
    359 
    360     protected:
    361       virtual
    362       ~codecvt();
    363 
    364       virtual result
    365       do_out(state_type& __state, const intern_type* __from,
    366 	     const intern_type* __from_end, const intern_type*& __from_next,
    367 	     extern_type* __to, extern_type* __to_end,
    368 	     extern_type*& __to_next) const;
    369 
    370       virtual result
    371       do_unshift(state_type& __state, extern_type* __to,
    372 		 extern_type* __to_end, extern_type*& __to_next) const;
    373 
    374       virtual result
    375       do_in(state_type& __state, const extern_type* __from,
    376 	    const extern_type* __from_end, const extern_type*& __from_next,
    377 	    intern_type* __to, intern_type* __to_end,
    378 	    intern_type*& __to_next) const;
    379 
    380       virtual int
    381       do_encoding() const throw();
    382 
    383       virtual bool
    384       do_always_noconv() const throw();
    385 
    386       virtual int
    387       do_length(state_type&, const extern_type* __from,
    388 		const extern_type* __end, size_t __max) const;
    389 
    390       virtual int
    391       do_max_length() const throw();
    392   };
    393 
    394 #ifdef _GLIBCXX_USE_WCHAR_T
    395   /// class codecvt<wchar_t, char, mbstate_t> specialization.
    396   template<>
    397     class codecvt<wchar_t, char, mbstate_t>
    398     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
    399     {
    400     public:
    401       // Types:
    402       typedef wchar_t			intern_type;
    403       typedef char			extern_type;
    404       typedef mbstate_t			state_type;
    405 
    406     protected:
    407       __c_locale			_M_c_locale_codecvt;
    408 
    409     public:
    410       static locale::id			id;
    411 
    412       explicit
    413       codecvt(size_t __refs = 0);
    414 
    415       explicit
    416       codecvt(__c_locale __cloc, size_t __refs = 0);
    417 
    418     protected:
    419       virtual
    420       ~codecvt();
    421 
    422       virtual result
    423       do_out(state_type& __state, const intern_type* __from,
    424 	     const intern_type* __from_end, const intern_type*& __from_next,
    425 	     extern_type* __to, extern_type* __to_end,
    426 	     extern_type*& __to_next) const;
    427 
    428       virtual result
    429       do_unshift(state_type& __state,
    430 		 extern_type* __to, extern_type* __to_end,
    431 		 extern_type*& __to_next) const;
    432 
    433       virtual result
    434       do_in(state_type& __state,
    435 	     const extern_type* __from, const extern_type* __from_end,
    436 	     const extern_type*& __from_next,
    437 	     intern_type* __to, intern_type* __to_end,
    438 	     intern_type*& __to_next) const;
    439 
    440       virtual
    441       int do_encoding() const throw();
    442 
    443       virtual
    444       bool do_always_noconv() const throw();
    445 
    446       virtual
    447       int do_length(state_type&, const extern_type* __from,
    448 		    const extern_type* __end, size_t __max) const;
    449 
    450       virtual int
    451       do_max_length() const throw();
    452     };
    453 #endif //_GLIBCXX_USE_WCHAR_T
    454 
    455   /// class codecvt_byname [22.2.1.6].
    456   template<typename _InternT, typename _ExternT, typename _StateT>
    457     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
    458     {
    459     public:
    460       explicit
    461       codecvt_byname(const char* __s, size_t __refs = 0)
    462       : codecvt<_InternT, _ExternT, _StateT>(__refs)
    463       {
    464 	if (__builtin_strcmp(__s, "C") != 0
    465 	    && __builtin_strcmp(__s, "POSIX") != 0)
    466 	  {
    467 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
    468 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
    469 	  }
    470       }
    471 
    472     protected:
    473       virtual
    474       ~codecvt_byname() { }
    475     };
    476 
    477   // Inhibit implicit instantiations for required instantiations,
    478   // which are defined via explicit instantiations elsewhere.
    479 #if _GLIBCXX_EXTERN_TEMPLATE
    480   extern template class codecvt_byname<char, char, mbstate_t>;
    481 
    482   extern template
    483     const codecvt<char, char, mbstate_t>&
    484     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
    485 
    486   extern template
    487     bool
    488     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
    489 
    490 #ifdef _GLIBCXX_USE_WCHAR_T
    491   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
    492 
    493   extern template
    494     const codecvt<wchar_t, char, mbstate_t>&
    495     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    496 
    497   extern template
    498     bool
    499     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    500 #endif
    501 #endif
    502 
    503 _GLIBCXX_END_NAMESPACE_VERSION
    504 } // namespace std
    505 
    506 #endif // _CODECVT_H
    507