Home | History | Annotate | Download | only in bits
      1 // Locale support (codecvt) -*- C++ -*-
      2 
      3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
      4 // 2009  Free Software Foundation, Inc.
      5 //
      6 // This file is part of the GNU ISO C++ Library.  This library is free
      7 // software; you can redistribute it and/or modify it under the
      8 // terms of the GNU General Public License as published by the
      9 // Free Software Foundation; either version 3, or (at your option)
     10 // any later version.
     11 
     12 // This library is distributed in the hope that it will be useful,
     13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 // GNU General Public License for more details.
     16 
     17 // Under Section 7 of GPL version 3, you are granted additional
     18 // permissions described in the GCC Runtime Library Exception, version
     19 // 3.1, as published by the Free Software Foundation.
     20 
     21 // You should have received a copy of the GNU General Public License and
     22 // a copy of the GCC Runtime Library Exception along with this program;
     23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 // <http://www.gnu.org/licenses/>.
     25 
     26 /** @file bits/codecvt.h
     27  *  This is an internal header file, included by other library headers.
     28  *  You should not attempt to use it directly.
     29  */
     30 
     31 //
     32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
     33 //
     34 
     35 // Written by Benjamin Kosnik <bkoz (at) redhat.com>
     36 
     37 #ifndef _CODECVT_H
     38 #define _CODECVT_H 1
     39 
     40 #pragma GCC system_header
     41 
     42 _GLIBCXX_BEGIN_NAMESPACE(std)
     43 
     44   /// Empty base class for codecvt facet [22.2.1.5].
     45   class codecvt_base
     46   {
     47   public:
     48     enum result
     49     {
     50       ok,
     51       partial,
     52       error,
     53       noconv
     54     };
     55   };
     56 
     57   /**
     58    *  @brief  Common base for codecvt functions.
     59    *
     60    *  This template class provides implementations of the public functions
     61    *  that forward to the protected virtual functions.
     62    *
     63    *  This template also provides abstract stubs for the protected virtual
     64    *  functions.
     65   */
     66   template<typename _InternT, typename _ExternT, typename _StateT>
     67     class __codecvt_abstract_base
     68     : public locale::facet, public codecvt_base
     69     {
     70     public:
     71       // Types:
     72       typedef codecvt_base::result	result;
     73       typedef _InternT			intern_type;
     74       typedef _ExternT			extern_type;
     75       typedef _StateT			state_type;
     76 
     77       // 22.2.1.5.1 codecvt members
     78       /**
     79        *  @brief  Convert from internal to external character set.
     80        *
     81        *  Converts input string of intern_type to output string of
     82        *  extern_type.  This is analogous to wcsrtombs.  It does this by
     83        *  calling codecvt::do_out.
     84        *
     85        *  The source and destination character sets are determined by the
     86        *  facet's locale, internal and external types.
     87        *
     88        *  The characters in [from,from_end) are converted and written to
     89        *  [to,to_end).  from_next and to_next are set to point to the
     90        *  character following the last successfully converted character,
     91        *  respectively.  If the result needed no conversion, from_next and
     92        *  to_next are not affected.
     93        *
     94        *  The @a state argument should be initialized if the input is at the
     95        *  beginning and carried from a previous call if continuing
     96        *  conversion.  There are no guarantees about how @a state is used.
     97        *
     98        *  The result returned is a member of codecvt_base::result.  If
     99        *  all the input is converted, returns codecvt_base::ok.  If no
    100        *  conversion is necessary, returns codecvt_base::noconv.  If
    101        *  the input ends early or there is insufficient space in the
    102        *  output, returns codecvt_base::partial.  Otherwise the
    103        *  conversion failed and codecvt_base::error is returned.
    104        *
    105        *  @param  state  Persistent conversion state data.
    106        *  @param  from  Start of input.
    107        *  @param  from_end  End of input.
    108        *  @param  from_next  Returns start of unconverted data.
    109        *  @param  to  Start of output buffer.
    110        *  @param  to_end  End of output buffer.
    111        *  @param  to_next  Returns start of unused output area.
    112        *  @return  codecvt_base::result.
    113       */
    114       result
    115       out(state_type& __state, const intern_type* __from,
    116 	  const intern_type* __from_end, const intern_type*& __from_next,
    117 	  extern_type* __to, extern_type* __to_end,
    118 	  extern_type*& __to_next) const
    119       {
    120 	return this->do_out(__state, __from, __from_end, __from_next,
    121 			    __to, __to_end, __to_next);
    122       }
    123 
    124       /**
    125        *  @brief  Reset conversion state.
    126        *
    127        *  Writes characters to output that would restore @a state to initial
    128        *  conditions.  The idea is that if a partial conversion occurs, then
    129        *  the converting the characters written by this function would leave
    130        *  the state in initial conditions, rather than partial conversion
    131        *  state.  It does this by calling codecvt::do_unshift().
    132        *
    133        *  For example, if 4 external characters always converted to 1 internal
    134        *  character, and input to in() had 6 external characters with state
    135        *  saved, this function would write two characters to the output and
    136        *  set the state to initialized conditions.
    137        *
    138        *  The source and destination character sets are determined by the
    139        *  facet's locale, internal and external types.
    140        *
    141        *  The result returned is a member of codecvt_base::result.  If the
    142        *  state could be reset and data written, returns codecvt_base::ok.  If
    143        *  no conversion is necessary, returns codecvt_base::noconv.  If the
    144        *  output has insufficient space, returns codecvt_base::partial.
    145        *  Otherwise the reset failed and codecvt_base::error is returned.
    146        *
    147        *  @param  state  Persistent conversion state data.
    148        *  @param  to  Start of output buffer.
    149        *  @param  to_end  End of output buffer.
    150        *  @param  to_next  Returns start of unused output area.
    151        *  @return  codecvt_base::result.
    152       */
    153       result
    154       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
    155 	      extern_type*& __to_next) const
    156       { return this->do_unshift(__state, __to,__to_end,__to_next); }
    157 
    158       /**
    159        *  @brief  Convert from external to internal character set.
    160        *
    161        *  Converts input string of extern_type to output string of
    162        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
    163        *  calling codecvt::do_in.
    164        *
    165        *  The source and destination character sets are determined by the
    166        *  facet's locale, internal and external types.
    167        *
    168        *  The characters in [from,from_end) are converted and written to
    169        *  [to,to_end).  from_next and to_next are set to point to the
    170        *  character following the last successfully converted character,
    171        *  respectively.  If the result needed no conversion, from_next and
    172        *  to_next are not affected.
    173        *
    174        *  The @a state argument should be initialized if the input is at the
    175        *  beginning and carried from a previous call if continuing
    176        *  conversion.  There are no guarantees about how @a state is used.
    177        *
    178        *  The result returned is a member of codecvt_base::result.  If
    179        *  all the input is converted, returns codecvt_base::ok.  If no
    180        *  conversion is necessary, returns codecvt_base::noconv.  If
    181        *  the input ends early or there is insufficient space in the
    182        *  output, returns codecvt_base::partial.  Otherwise the
    183        *  conversion failed and codecvt_base::error is returned.
    184        *
    185        *  @param  state  Persistent conversion state data.
    186        *  @param  from  Start of input.
    187        *  @param  from_end  End of input.
    188        *  @param  from_next  Returns start of unconverted data.
    189        *  @param  to  Start of output buffer.
    190        *  @param  to_end  End of output buffer.
    191        *  @param  to_next  Returns start of unused output area.
    192        *  @return  codecvt_base::result.
    193       */
    194       result
    195       in(state_type& __state, const extern_type* __from,
    196 	 const extern_type* __from_end, const extern_type*& __from_next,
    197 	 intern_type* __to, intern_type* __to_end,
    198 	 intern_type*& __to_next) const
    199       {
    200 	return this->do_in(__state, __from, __from_end, __from_next,
    201 			   __to, __to_end, __to_next);
    202       }
    203 
    204       int
    205       encoding() const throw()
    206       { return this->do_encoding(); }
    207 
    208       bool
    209       always_noconv() const throw()
    210       { return this->do_always_noconv(); }
    211 
    212       int
    213       length(state_type& __state, const extern_type* __from,
    214 	     const extern_type* __end, size_t __max) const
    215       { return this->do_length(__state, __from, __end, __max); }
    216 
    217       int
    218       max_length() const throw()
    219       { return this->do_max_length(); }
    220 
    221     protected:
    222       explicit
    223       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
    224 
    225       virtual
    226       ~__codecvt_abstract_base() { }
    227 
    228       /**
    229        *  @brief  Convert from internal to external character set.
    230        *
    231        *  Converts input string of intern_type to output string of
    232        *  extern_type.  This function is a hook for derived classes to change
    233        *  the value returned.  @see out for more information.
    234       */
    235       virtual result
    236       do_out(state_type& __state, const intern_type* __from,
    237 	     const intern_type* __from_end, const intern_type*& __from_next,
    238 	     extern_type* __to, extern_type* __to_end,
    239 	     extern_type*& __to_next) const = 0;
    240 
    241       virtual result
    242       do_unshift(state_type& __state, extern_type* __to,
    243 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
    244 
    245       virtual result
    246       do_in(state_type& __state, const extern_type* __from,
    247 	    const extern_type* __from_end, const extern_type*& __from_next,
    248 	    intern_type* __to, intern_type* __to_end,
    249 	    intern_type*& __to_next) const = 0;
    250 
    251       virtual int
    252       do_encoding() const throw() = 0;
    253 
    254       virtual bool
    255       do_always_noconv() const throw() = 0;
    256 
    257       virtual int
    258       do_length(state_type&, const extern_type* __from,
    259 		const extern_type* __end, size_t __max) const = 0;
    260 
    261       virtual int
    262       do_max_length() const throw() = 0;
    263     };
    264 
    265   /// @brief class codecvt [22.2.1.5].
    266   /// NB: Generic, mostly useless implementation.
    267   template<typename _InternT, typename _ExternT, typename _StateT>
    268     class codecvt
    269     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
    270     {
    271     public:
    272       // Types:
    273       typedef codecvt_base::result	result;
    274       typedef _InternT			intern_type;
    275       typedef _ExternT			extern_type;
    276       typedef _StateT			state_type;
    277 
    278     protected:
    279       __c_locale			_M_c_locale_codecvt;
    280 
    281     public:
    282       static locale::id			id;
    283 
    284       explicit
    285       codecvt(size_t __refs = 0)
    286       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
    287 
    288       explicit
    289       codecvt(__c_locale __cloc, size_t __refs = 0);
    290 
    291     protected:
    292       virtual
    293       ~codecvt() { }
    294 
    295       virtual result
    296       do_out(state_type& __state, const intern_type* __from,
    297 	     const intern_type* __from_end, const intern_type*& __from_next,
    298 	     extern_type* __to, extern_type* __to_end,
    299 	     extern_type*& __to_next) const;
    300 
    301       virtual result
    302       do_unshift(state_type& __state, extern_type* __to,
    303 		 extern_type* __to_end, extern_type*& __to_next) const;
    304 
    305       virtual result
    306       do_in(state_type& __state, const extern_type* __from,
    307 	    const extern_type* __from_end, const extern_type*& __from_next,
    308 	    intern_type* __to, intern_type* __to_end,
    309 	    intern_type*& __to_next) const;
    310 
    311       virtual int
    312       do_encoding() const throw();
    313 
    314       virtual bool
    315       do_always_noconv() const throw();
    316 
    317       virtual int
    318       do_length(state_type&, const extern_type* __from,
    319 		const extern_type* __end, size_t __max) const;
    320 
    321       virtual int
    322       do_max_length() const throw();
    323     };
    324 
    325   template<typename _InternT, typename _ExternT, typename _StateT>
    326     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
    327 
    328   /// class codecvt<char, char, mbstate_t> specialization.
    329   template<>
    330     class codecvt<char, char, mbstate_t>
    331     : public __codecvt_abstract_base<char, char, mbstate_t>
    332     {
    333     public:
    334       // Types:
    335       typedef char			intern_type;
    336       typedef char			extern_type;
    337       typedef mbstate_t			state_type;
    338 
    339     protected:
    340       __c_locale			_M_c_locale_codecvt;
    341 
    342     public:
    343       static locale::id id;
    344 
    345       explicit
    346       codecvt(size_t __refs = 0);
    347 
    348       explicit
    349       codecvt(__c_locale __cloc, size_t __refs = 0);
    350 
    351     protected:
    352       virtual
    353       ~codecvt();
    354 
    355       virtual result
    356       do_out(state_type& __state, const intern_type* __from,
    357 	     const intern_type* __from_end, const intern_type*& __from_next,
    358 	     extern_type* __to, extern_type* __to_end,
    359 	     extern_type*& __to_next) const;
    360 
    361       virtual result
    362       do_unshift(state_type& __state, extern_type* __to,
    363 		 extern_type* __to_end, extern_type*& __to_next) const;
    364 
    365       virtual result
    366       do_in(state_type& __state, const extern_type* __from,
    367 	    const extern_type* __from_end, const extern_type*& __from_next,
    368 	    intern_type* __to, intern_type* __to_end,
    369 	    intern_type*& __to_next) const;
    370 
    371       virtual int
    372       do_encoding() const throw();
    373 
    374       virtual bool
    375       do_always_noconv() const throw();
    376 
    377       virtual int
    378       do_length(state_type&, const extern_type* __from,
    379 		const extern_type* __end, size_t __max) const;
    380 
    381       virtual int
    382       do_max_length() const throw();
    383   };
    384 
    385 #ifdef _GLIBCXX_USE_WCHAR_T
    386   /// class codecvt<wchar_t, char, mbstate_t> specialization.
    387   template<>
    388     class codecvt<wchar_t, char, mbstate_t>
    389     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
    390     {
    391     public:
    392       // Types:
    393       typedef wchar_t			intern_type;
    394       typedef char			extern_type;
    395       typedef mbstate_t			state_type;
    396 
    397     protected:
    398       __c_locale			_M_c_locale_codecvt;
    399 
    400     public:
    401       static locale::id			id;
    402 
    403       explicit
    404       codecvt(size_t __refs = 0);
    405 
    406       explicit
    407       codecvt(__c_locale __cloc, size_t __refs = 0);
    408 
    409     protected:
    410       virtual
    411       ~codecvt();
    412 
    413       virtual result
    414       do_out(state_type& __state, const intern_type* __from,
    415 	     const intern_type* __from_end, const intern_type*& __from_next,
    416 	     extern_type* __to, extern_type* __to_end,
    417 	     extern_type*& __to_next) const;
    418 
    419       virtual result
    420       do_unshift(state_type& __state,
    421 		 extern_type* __to, extern_type* __to_end,
    422 		 extern_type*& __to_next) const;
    423 
    424       virtual result
    425       do_in(state_type& __state,
    426 	     const extern_type* __from, const extern_type* __from_end,
    427 	     const extern_type*& __from_next,
    428 	     intern_type* __to, intern_type* __to_end,
    429 	     intern_type*& __to_next) const;
    430 
    431       virtual
    432       int do_encoding() const throw();
    433 
    434       virtual
    435       bool do_always_noconv() const throw();
    436 
    437       virtual
    438       int do_length(state_type&, const extern_type* __from,
    439 		    const extern_type* __end, size_t __max) const;
    440 
    441       virtual int
    442       do_max_length() const throw();
    443     };
    444 #endif //_GLIBCXX_USE_WCHAR_T
    445 
    446   /// class codecvt_byname [22.2.1.6].
    447   template<typename _InternT, typename _ExternT, typename _StateT>
    448     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
    449     {
    450     public:
    451       explicit
    452       codecvt_byname(const char* __s, size_t __refs = 0)
    453       : codecvt<_InternT, _ExternT, _StateT>(__refs)
    454       {
    455 	if (__builtin_strcmp(__s, "C") != 0
    456 	    && __builtin_strcmp(__s, "POSIX") != 0)
    457 	  {
    458 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
    459 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
    460 	  }
    461       }
    462 
    463     protected:
    464       virtual
    465       ~codecvt_byname() { }
    466     };
    467 
    468   // Inhibit implicit instantiations for required instantiations,
    469   // which are defined via explicit instantiations elsewhere.
    470   // NB: This syntax is a GNU extension.
    471 #if _GLIBCXX_EXTERN_TEMPLATE
    472   extern template class codecvt_byname<char, char, mbstate_t>;
    473 
    474   extern template
    475     const codecvt<char, char, mbstate_t>&
    476     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
    477 
    478   extern template
    479     bool
    480     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
    481 
    482 #ifdef _GLIBCXX_USE_WCHAR_T
    483   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
    484 
    485   extern template
    486     const codecvt<wchar_t, char, mbstate_t>&
    487     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    488 
    489   extern template
    490     bool
    491     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
    492 #endif
    493 #endif
    494 
    495 _GLIBCXX_END_NAMESPACE
    496 
    497 #endif // _CODECVT_H
    498