Home | History | Annotate | Download | only in glib
      1 /* GLIB - Library of useful routines for C programming
      2  *
      3  * gconvert.c: Convert between character sets using iconv
      4  * Copyright Red Hat Inc., 2000
      5  * Authors: Havoc Pennington <hp (at) redhat.com>, Owen Taylor <otaylor (at) redhat.com>
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Lesser General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Lesser General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Lesser General Public
     18  * License along with this library; if not, write to the
     19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     20  * Boston, MA 02111-1307, USA.
     21  */
     22 
     23 #include "config.h"
     24 
     25 #include "glib.h"
     26 
     27 
     28 #ifndef ANDROID_STUB
     29 #ifndef G_OS_WIN32
     30 #include <iconv.h>
     31 #endif
     32 #endif
     33 
     34 #include <errno.h>
     35 #include <stdio.h>
     36 #include <string.h>
     37 #include <stdlib.h>
     38 
     39 #include "gprintfint.h"
     40 #include "gthreadprivate.h"
     41 #include "gunicode.h"
     42 
     43 #ifdef G_OS_WIN32
     44 #include "win_iconv.c"
     45 #endif
     46 
     47 #ifdef G_PLATFORM_WIN32
     48 #define STRICT
     49 #include <windows.h>
     50 #undef STRICT
     51 #endif
     52 
     53 #include "glibintl.h"
     54 
     55 #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H)
     56 #error GNU libiconv in use but included iconv.h not from libiconv
     57 #endif
     58 #if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H)
     59 #error GNU libiconv not in use but included iconv.h is from libiconv
     60 #endif
     61 
     62 #include "galias.h"
     63 
     64 typedef void iconv_t;
     65 
     66 GQuark
     67 g_convert_error_quark (void)
     68 {
     69   return g_quark_from_static_string ("g_convert_error");
     70 }
     71 
     72 static gboolean
     73 try_conversion (const char *to_codeset,
     74 		const char *from_codeset,
     75 		iconv_t    *cd)
     76 {
     77 #ifndef ANDROID_STUB
     78   *cd = iconv_open (to_codeset, from_codeset);
     79 
     80   if (*cd == (iconv_t)-1 && errno == EINVAL)
     81     return FALSE;
     82   else
     83     return TRUE;
     84 #else
     85   return FALSE;
     86 #endif
     87 }
     88 
     89 static gboolean
     90 try_to_aliases (const char **to_aliases,
     91 		const char  *from_codeset,
     92 		iconv_t     *cd)
     93 {
     94   if (to_aliases)
     95     {
     96       const char **p = to_aliases;
     97       while (*p)
     98 	{
     99 	  if (try_conversion (*p, from_codeset, cd))
    100 	    return TRUE;
    101 
    102 	  p++;
    103 	}
    104     }
    105 
    106   return FALSE;
    107 }
    108 
    109 #ifndef ANDROID_STUB
    110 G_GNUC_INTERNAL extern const char **
    111 _g_charset_get_aliases (const char *canonical_name);
    112 #endif
    113 
    114 /**
    115  * g_iconv_open:
    116  * @to_codeset: destination codeset
    117  * @from_codeset: source codeset
    118  *
    119  * Same as the standard UNIX routine iconv_open(), but
    120  * may be implemented via libiconv on UNIX flavors that lack
    121  * a native implementation.
    122  *
    123  * GLib provides g_convert() and g_locale_to_utf8() which are likely
    124  * more convenient than the raw iconv wrappers.
    125  *
    126  * Return value: a "conversion descriptor", or (GIConv)-1 if
    127  *  opening the converter failed.
    128  **/
    129 GIConv
    130 g_iconv_open (const gchar  *to_codeset,
    131 	      const gchar  *from_codeset)
    132 {
    133 #ifndef ANDROID_STUB
    134   iconv_t cd;
    135 
    136   if (!try_conversion (to_codeset, from_codeset, &cd))
    137     {
    138       const char **to_aliases = _g_charset_get_aliases (to_codeset);
    139       const char **from_aliases = _g_charset_get_aliases (from_codeset);
    140 
    141       if (from_aliases)
    142 	{
    143 	  const char **p = from_aliases;
    144 	  while (*p)
    145 	    {
    146 	      if (try_conversion (to_codeset, *p, &cd))
    147 		goto out;
    148 
    149 	      if (try_to_aliases (to_aliases, *p, &cd))
    150 		goto out;
    151 
    152 	      p++;
    153 	    }
    154 	}
    155 
    156       if (try_to_aliases (to_aliases, from_codeset, &cd))
    157 	goto out;
    158     }
    159 
    160  out:
    161   return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
    162 #else
    163   return (GIConv) -1;
    164 #endif
    165 }
    166 
    167 /**
    168  * g_iconv:
    169  * @converter: conversion descriptor from g_iconv_open()
    170  * @inbuf: bytes to convert
    171  * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
    172  * @outbuf: converted output bytes
    173  * @outbytes_left: inout parameter, bytes available to fill in @outbuf
    174  *
    175  * Same as the standard UNIX routine iconv(), but
    176  * may be implemented via libiconv on UNIX flavors that lack
    177  * a native implementation.
    178  *
    179  * GLib provides g_convert() and g_locale_to_utf8() which are likely
    180  * more convenient than the raw iconv wrappers.
    181  *
    182  * Return value: count of non-reversible conversions, or -1 on error
    183  **/
    184 gsize
    185 g_iconv (GIConv   converter,
    186 	 gchar  **inbuf,
    187 	 gsize   *inbytes_left,
    188 	 gchar  **outbuf,
    189 	 gsize   *outbytes_left)
    190 {
    191 #ifndef ANDROID_STUB
    192   iconv_t cd = (iconv_t)converter;
    193 
    194   return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
    195 #else
    196   return -1;
    197 #endif
    198 }
    199 
    200 /**
    201  * g_iconv_close:
    202  * @converter: a conversion descriptor from g_iconv_open()
    203  *
    204  * Same as the standard UNIX routine iconv_close(), but
    205  * may be implemented via libiconv on UNIX flavors that lack
    206  * a native implementation. Should be called to clean up
    207  * the conversion descriptor from g_iconv_open() when
    208  * you are done converting things.
    209  *
    210  * GLib provides g_convert() and g_locale_to_utf8() which are likely
    211  * more convenient than the raw iconv wrappers.
    212  *
    213  * Return value: -1 on error, 0 on success
    214  **/
    215 gint
    216 g_iconv_close (GIConv converter)
    217 {
    218 #ifndef ANDROID_STUB
    219   iconv_t cd = (iconv_t)converter;
    220 
    221   return iconv_close (cd);
    222 #else
    223   return -1;
    224 #endif
    225 }
    226 
    227 
    228 #ifdef NEED_ICONV_CACHE
    229 
    230 #define ICONV_CACHE_SIZE   (16)
    231 
    232 struct _iconv_cache_bucket {
    233   gchar *key;
    234   guint32 refcount;
    235   gboolean used;
    236   GIConv cd;
    237 };
    238 
    239 static GList *iconv_cache_list;
    240 static GHashTable *iconv_cache;
    241 static GHashTable *iconv_open_hash;
    242 static guint iconv_cache_size = 0;
    243 G_LOCK_DEFINE_STATIC (iconv_cache_lock);
    244 
    245 /* caller *must* hold the iconv_cache_lock */
    246 static void
    247 iconv_cache_init (void)
    248 {
    249   static gboolean initialized = FALSE;
    250 
    251   if (initialized)
    252     return;
    253 
    254   iconv_cache_list = NULL;
    255   iconv_cache = g_hash_table_new (g_str_hash, g_str_equal);
    256   iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
    257 
    258   initialized = TRUE;
    259 }
    260 
    261 
    262 /*
    263  * iconv_cache_bucket_new:
    264  * @key: cache key
    265  * @cd: iconv descriptor
    266  *
    267  * Creates a new cache bucket, inserts it into the cache and
    268  * increments the cache size.
    269  *
    270  * This assumes ownership of @key.
    271  *
    272  * Returns a pointer to the newly allocated cache bucket.
    273  **/
    274 static struct _iconv_cache_bucket *
    275 iconv_cache_bucket_new (gchar *key, GIConv cd)
    276 {
    277   struct _iconv_cache_bucket *bucket;
    278 
    279   bucket = g_new (struct _iconv_cache_bucket, 1);
    280   bucket->key = key;
    281   bucket->refcount = 1;
    282   bucket->used = TRUE;
    283   bucket->cd = cd;
    284 
    285   g_hash_table_insert (iconv_cache, bucket->key, bucket);
    286 
    287   /* FIXME: if we sorted the list so items with few refcounts were
    288      first, then we could expire them faster in iconv_cache_expire_unused () */
    289   iconv_cache_list = g_list_prepend (iconv_cache_list, bucket);
    290 
    291   iconv_cache_size++;
    292 
    293   return bucket;
    294 }
    295 
    296 
    297 /*
    298  * iconv_cache_bucket_expire:
    299  * @node: cache bucket's node
    300  * @bucket: cache bucket
    301  *
    302  * Expires a single cache bucket @bucket. This should only ever be
    303  * called on a bucket that currently has no used iconv descriptors
    304  * open.
    305  *
    306  * @node is not a required argument. If @node is not supplied, we
    307  * search for it ourselves.
    308  **/
    309 static void
    310 iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket)
    311 {
    312   g_hash_table_remove (iconv_cache, bucket->key);
    313 
    314   if (node == NULL)
    315     node = g_list_find (iconv_cache_list, bucket);
    316 
    317   g_assert (node != NULL);
    318 
    319   if (node->prev)
    320     {
    321       node->prev->next = node->next;
    322       if (node->next)
    323         node->next->prev = node->prev;
    324     }
    325   else
    326     {
    327       iconv_cache_list = node->next;
    328       if (node->next)
    329         node->next->prev = NULL;
    330     }
    331 
    332   g_list_free_1 (node);
    333 
    334   g_free (bucket->key);
    335   g_iconv_close (bucket->cd);
    336   g_free (bucket);
    337 
    338   iconv_cache_size--;
    339 }
    340 
    341 
    342 /*
    343  * iconv_cache_expire_unused:
    344  *
    345  * Expires as many unused cache buckets as it needs to in order to get
    346  * the total number of buckets < ICONV_CACHE_SIZE.
    347  **/
    348 static void
    349 iconv_cache_expire_unused (void)
    350 {
    351   struct _iconv_cache_bucket *bucket;
    352   GList *node, *next;
    353 
    354   node = iconv_cache_list;
    355   while (node && iconv_cache_size >= ICONV_CACHE_SIZE)
    356     {
    357       next = node->next;
    358 
    359       bucket = node->data;
    360       if (bucket->refcount == 0)
    361         iconv_cache_bucket_expire (node, bucket);
    362 
    363       node = next;
    364     }
    365 }
    366 
    367 static GIConv
    368 open_converter (const gchar *to_codeset,
    369 		const gchar *from_codeset,
    370 		GError     **error)
    371 {
    372   struct _iconv_cache_bucket *bucket;
    373   gchar *key, *dyn_key, auto_key[80];
    374   GIConv cd;
    375   gsize len_from_codeset, len_to_codeset;
    376 
    377   /* create our key */
    378   len_from_codeset = strlen (from_codeset);
    379   len_to_codeset = strlen (to_codeset);
    380   if (len_from_codeset + len_to_codeset + 2 < sizeof (auto_key))
    381     {
    382       key = auto_key;
    383       dyn_key = NULL;
    384     }
    385   else
    386     key = dyn_key = g_malloc (len_from_codeset + len_to_codeset + 2);
    387   memcpy (key, from_codeset, len_from_codeset);
    388   key[len_from_codeset] = ':';
    389   strcpy (key + len_from_codeset + 1, to_codeset);
    390 
    391   G_LOCK (iconv_cache_lock);
    392 
    393   /* make sure the cache has been initialized */
    394   iconv_cache_init ();
    395 
    396   bucket = g_hash_table_lookup (iconv_cache, key);
    397   if (bucket)
    398     {
    399       g_free (dyn_key);
    400 
    401       if (bucket->used)
    402         {
    403           cd = g_iconv_open (to_codeset, from_codeset);
    404           if (cd == (GIConv) -1)
    405             goto error;
    406         }
    407       else
    408         {
    409 	  /* Apparently iconv on Solaris <= 7 segfaults if you pass in
    410 	   * NULL for anything but inbuf; work around that. (NULL outbuf
    411 	   * or NULL *outbuf is allowed by Unix98.)
    412 	   */
    413 	  gsize inbytes_left = 0;
    414 	  gchar *outbuf = NULL;
    415 	  gsize outbytes_left = 0;
    416 
    417           cd = bucket->cd;
    418           bucket->used = TRUE;
    419 
    420           /* reset the descriptor */
    421           g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left);
    422         }
    423 
    424       bucket->refcount++;
    425     }
    426   else
    427     {
    428       cd = g_iconv_open (to_codeset, from_codeset);
    429       if (cd == (GIConv) -1)
    430 	{
    431 	  g_free (dyn_key);
    432 	  goto error;
    433 	}
    434 
    435       iconv_cache_expire_unused ();
    436 
    437       bucket = iconv_cache_bucket_new (dyn_key ? dyn_key : g_strdup (key), cd);
    438     }
    439 
    440   g_hash_table_insert (iconv_open_hash, cd, bucket->key);
    441 
    442   G_UNLOCK (iconv_cache_lock);
    443 
    444   return cd;
    445 
    446  error:
    447 
    448   G_UNLOCK (iconv_cache_lock);
    449 
    450   /* Something went wrong.  */
    451   if (error)
    452     {
    453       if (errno == EINVAL)
    454 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
    455 		     _("Conversion from character set '%s' to '%s' is not supported"),
    456 		     from_codeset, to_codeset);
    457       else
    458 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
    459 		     _("Could not open converter from '%s' to '%s'"),
    460 		     from_codeset, to_codeset);
    461     }
    462 
    463   return cd;
    464 }
    465 
    466 static int
    467 close_converter (GIConv converter)
    468 {
    469   struct _iconv_cache_bucket *bucket;
    470   const gchar *key;
    471   GIConv cd;
    472 
    473   cd = converter;
    474 
    475   if (cd == (GIConv) -1)
    476     return 0;
    477 
    478   G_LOCK (iconv_cache_lock);
    479 
    480   key = g_hash_table_lookup (iconv_open_hash, cd);
    481   if (key)
    482     {
    483       g_hash_table_remove (iconv_open_hash, cd);
    484 
    485       bucket = g_hash_table_lookup (iconv_cache, key);
    486       g_assert (bucket);
    487 
    488       bucket->refcount--;
    489 
    490       if (cd == bucket->cd)
    491         bucket->used = FALSE;
    492       else
    493         g_iconv_close (cd);
    494 
    495       if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE)
    496         {
    497           /* expire this cache bucket */
    498           iconv_cache_bucket_expire (NULL, bucket);
    499         }
    500     }
    501   else
    502     {
    503       G_UNLOCK (iconv_cache_lock);
    504 
    505       g_warning ("This iconv context wasn't opened using open_converter");
    506 
    507       return g_iconv_close (converter);
    508     }
    509 
    510   G_UNLOCK (iconv_cache_lock);
    511 
    512   return 0;
    513 }
    514 
    515 #else  /* !NEED_ICONV_CACHE */
    516 
    517 static GIConv
    518 open_converter (const gchar *to_codeset,
    519 		const gchar *from_codeset,
    520 		GError     **error)
    521 {
    522   GIConv cd;
    523 
    524   cd = g_iconv_open (to_codeset, from_codeset);
    525 
    526   if (cd == (GIConv) -1)
    527     {
    528       /* Something went wrong.  */
    529       if (error)
    530 	{
    531 	  if (errno == EINVAL)
    532 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
    533 			 _("Conversion from character set '%s' to '%s' is not supported"),
    534 			 from_codeset, to_codeset);
    535 	  else
    536 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
    537 			 _("Could not open converter from '%s' to '%s'"),
    538 			 from_codeset, to_codeset);
    539 	}
    540     }
    541 
    542   return cd;
    543 }
    544 
    545 static int
    546 close_converter (GIConv cd)
    547 {
    548   if (cd == (GIConv) -1)
    549     return 0;
    550 
    551   return g_iconv_close (cd);
    552 }
    553 
    554 #endif /* NEED_ICONV_CACHE */
    555 
    556 /**
    557  * g_convert_with_iconv:
    558  * @str:           the string to convert
    559  * @len:           the length of the string, or -1 if the string is
    560  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
    561  * @converter:     conversion descriptor from g_iconv_open()
    562  * @bytes_read:    location to store the number of bytes in the
    563  *                 input string that were successfully converted, or %NULL.
    564  *                 Even if the conversion was successful, this may be
    565  *                 less than @len if there were partial characters
    566  *                 at the end of the input. If the error
    567  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
    568  *                 stored will the byte offset after the last valid
    569  *                 input sequence.
    570  * @bytes_written: the number of bytes stored in the output buffer (not
    571  *                 including the terminating nul).
    572  * @error:         location to store the error occuring, or %NULL to ignore
    573  *                 errors. Any of the errors in #GConvertError may occur.
    574  *
    575  * Converts a string from one character set to another.
    576  *
    577  * Note that you should use g_iconv() for streaming
    578  * conversions<footnote id="streaming-state">
    579  *  <para>
    580  * Despite the fact that @byes_read can return information about partial
    581  * characters, the <literal>g_convert_...</literal> functions
    582  * are not generally suitable for streaming. If the underlying converter
    583  * being used maintains internal state, then this won't be preserved
    584  * across successive calls to g_convert(), g_convert_with_iconv() or
    585  * g_convert_with_fallback(). (An example of this is the GNU C converter
    586  * for CP1255 which does not emit a base character until it knows that
    587  * the next character is not a mark that could combine with the base
    588  * character.)
    589  *  </para>
    590  * </footnote>.
    591  *
    592  * Return value: If the conversion was successful, a newly allocated
    593  *               nul-terminated string, which must be freed with
    594  *               g_free(). Otherwise %NULL and @error will be set.
    595  **/
    596 gchar*
    597 g_convert_with_iconv (const gchar *str,
    598 		      gssize       len,
    599 		      GIConv       converter,
    600 		      gsize       *bytes_read,
    601 		      gsize       *bytes_written,
    602 		      GError     **error)
    603 {
    604   gchar *dest;
    605   gchar *outp;
    606   const gchar *p;
    607   gsize inbytes_remaining;
    608   gsize outbytes_remaining;
    609   gsize err;
    610   gsize outbuf_size;
    611   gboolean have_error = FALSE;
    612   gboolean done = FALSE;
    613   gboolean reset = FALSE;
    614 
    615   g_return_val_if_fail (converter != (GIConv) -1, NULL);
    616 
    617   if (len < 0)
    618     len = strlen (str);
    619 
    620   p = str;
    621   inbytes_remaining = len;
    622   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
    623 
    624   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
    625   outp = dest = g_malloc (outbuf_size);
    626 
    627   while (!done && !have_error)
    628     {
    629       if (reset)
    630         err = g_iconv (converter, NULL, &inbytes_remaining, &outp, &outbytes_remaining);
    631       else
    632         err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
    633 
    634       if (err == (gsize) -1)
    635 	{
    636 	  switch (errno)
    637 	    {
    638 	    case EINVAL:
    639 	      /* Incomplete text, do not report an error */
    640 	      done = TRUE;
    641 	      break;
    642 	    case E2BIG:
    643 	      {
    644 		gsize used = outp - dest;
    645 
    646 		outbuf_size *= 2;
    647 		dest = g_realloc (dest, outbuf_size);
    648 
    649 		outp = dest + used;
    650 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
    651 	      }
    652 	      break;
    653 	    case EILSEQ:
    654 	      if (error)
    655 		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
    656                                      _("Invalid byte sequence in conversion input"));
    657 	      have_error = TRUE;
    658 	      break;
    659 	    default:
    660 	      if (error)
    661 		g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
    662 			     _("Error during conversion: %s"),
    663 			     g_strerror (errno));
    664 	      have_error = TRUE;
    665 	      break;
    666 	    }
    667 	}
    668       else
    669 	{
    670 	  if (!reset)
    671 	    {
    672 	      /* call g_iconv with NULL inbuf to cleanup shift state */
    673 	      reset = TRUE;
    674 	      inbytes_remaining = 0;
    675 	    }
    676 	  else
    677 	    done = TRUE;
    678 	}
    679     }
    680 
    681   *outp = '\0';
    682 
    683   if (bytes_read)
    684     *bytes_read = p - str;
    685   else
    686     {
    687       if ((p - str) != len)
    688 	{
    689           if (!have_error)
    690             {
    691 	      if (error)
    692 		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
    693                                      _("Partial character sequence at end of input"));
    694               have_error = TRUE;
    695             }
    696 	}
    697     }
    698 
    699   if (bytes_written)
    700     *bytes_written = outp - dest;	/* Doesn't include '\0' */
    701 
    702   if (have_error)
    703     {
    704       g_free (dest);
    705       return NULL;
    706     }
    707   else
    708     return dest;
    709 }
    710 
    711 /**
    712  * g_convert:
    713  * @str:           the string to convert
    714  * @len:           the length of the string, or -1 if the string is
    715  *                 nul-terminated<footnote id="nul-unsafe">
    716                      <para>
    717                        Note that some encodings may allow nul bytes to
    718                        occur inside strings. In that case, using -1 for
    719                        the @len parameter is unsafe.
    720                      </para>
    721                    </footnote>.
    722  * @to_codeset:    name of character set into which to convert @str
    723  * @from_codeset:  character set of @str.
    724  * @bytes_read:    location to store the number of bytes in the
    725  *                 input string that were successfully converted, or %NULL.
    726  *                 Even if the conversion was successful, this may be
    727  *                 less than @len if there were partial characters
    728  *                 at the end of the input. If the error
    729  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
    730  *                 stored will the byte offset after the last valid
    731  *                 input sequence.
    732  * @bytes_written: the number of bytes stored in the output buffer (not
    733  *                 including the terminating nul).
    734  * @error:         location to store the error occuring, or %NULL to ignore
    735  *                 errors. Any of the errors in #GConvertError may occur.
    736  *
    737  * Converts a string from one character set to another.
    738  *
    739  * Note that you should use g_iconv() for streaming
    740  * conversions<footnoteref linkend="streaming-state"/>.
    741  *
    742  * Return value: If the conversion was successful, a newly allocated
    743  *               nul-terminated string, which must be freed with
    744  *               g_free(). Otherwise %NULL and @error will be set.
    745  **/
    746 gchar*
    747 g_convert (const gchar *str,
    748            gssize       len,
    749            const gchar *to_codeset,
    750            const gchar *from_codeset,
    751            gsize       *bytes_read,
    752 	   gsize       *bytes_written,
    753 	   GError     **error)
    754 {
    755   gchar *res;
    756   GIConv cd;
    757 
    758   g_return_val_if_fail (str != NULL, NULL);
    759   g_return_val_if_fail (to_codeset != NULL, NULL);
    760   g_return_val_if_fail (from_codeset != NULL, NULL);
    761 
    762   cd = open_converter (to_codeset, from_codeset, error);
    763 
    764   if (cd == (GIConv) -1)
    765     {
    766       if (bytes_read)
    767         *bytes_read = 0;
    768 
    769       if (bytes_written)
    770         *bytes_written = 0;
    771 
    772       return NULL;
    773     }
    774 
    775   res = g_convert_with_iconv (str, len, cd,
    776 			      bytes_read, bytes_written,
    777 			      error);
    778 
    779   close_converter (cd);
    780 
    781   return res;
    782 }
    783 
    784 /**
    785  * g_convert_with_fallback:
    786  * @str:          the string to convert
    787  * @len:          the length of the string, or -1 if the string is
    788  *                nul-terminated<footnoteref linkend="nul-unsafe"/>.
    789  * @to_codeset:   name of character set into which to convert @str
    790  * @from_codeset: character set of @str.
    791  * @fallback:     UTF-8 string to use in place of character not
    792  *                present in the target encoding. (The string must be
    793  *                representable in the target encoding).
    794                   If %NULL, characters not in the target encoding will
    795                   be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
    796  * @bytes_read:   location to store the number of bytes in the
    797  *                input string that were successfully converted, or %NULL.
    798  *                Even if the conversion was successful, this may be
    799  *                less than @len if there were partial characters
    800  *                at the end of the input.
    801  * @bytes_written: the number of bytes stored in the output buffer (not
    802  *                including the terminating nul).
    803  * @error:        location to store the error occuring, or %NULL to ignore
    804  *                errors. Any of the errors in #GConvertError may occur.
    805  *
    806  * Converts a string from one character set to another, possibly
    807  * including fallback sequences for characters not representable
    808  * in the output. Note that it is not guaranteed that the specification
    809  * for the fallback sequences in @fallback will be honored. Some
    810  * systems may do an approximate conversion from @from_codeset
    811  * to @to_codeset in their iconv() functions,
    812  * in which case GLib will simply return that approximate conversion.
    813  *
    814  * Note that you should use g_iconv() for streaming
    815  * conversions<footnoteref linkend="streaming-state"/>.
    816  *
    817  * Return value: If the conversion was successful, a newly allocated
    818  *               nul-terminated string, which must be freed with
    819  *               g_free(). Otherwise %NULL and @error will be set.
    820  **/
    821 gchar*
    822 g_convert_with_fallback (const gchar *str,
    823 			 gssize       len,
    824 			 const gchar *to_codeset,
    825 			 const gchar *from_codeset,
    826 			 gchar       *fallback,
    827 			 gsize       *bytes_read,
    828 			 gsize       *bytes_written,
    829 			 GError     **error)
    830 {
    831   gchar *utf8;
    832   gchar *dest;
    833   gchar *outp;
    834   const gchar *insert_str = NULL;
    835   const gchar *p;
    836   gsize inbytes_remaining;
    837   const gchar *save_p = NULL;
    838   gsize save_inbytes = 0;
    839   gsize outbytes_remaining;
    840   gsize err;
    841   GIConv cd;
    842   gsize outbuf_size;
    843   gboolean have_error = FALSE;
    844   gboolean done = FALSE;
    845 
    846   GError *local_error = NULL;
    847 
    848   g_return_val_if_fail (str != NULL, NULL);
    849   g_return_val_if_fail (to_codeset != NULL, NULL);
    850   g_return_val_if_fail (from_codeset != NULL, NULL);
    851 
    852   if (len < 0)
    853     len = strlen (str);
    854 
    855   /* Try an exact conversion; we only proceed if this fails
    856    * due to an illegal sequence in the input string.
    857    */
    858   dest = g_convert (str, len, to_codeset, from_codeset,
    859 		    bytes_read, bytes_written, &local_error);
    860   if (!local_error)
    861     return dest;
    862 
    863   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
    864     {
    865       g_propagate_error (error, local_error);
    866       return NULL;
    867     }
    868   else
    869     g_error_free (local_error);
    870 
    871   local_error = NULL;
    872 
    873   /* No go; to proceed, we need a converter from "UTF-8" to
    874    * to_codeset, and the string as UTF-8.
    875    */
    876   cd = open_converter (to_codeset, "UTF-8", error);
    877   if (cd == (GIConv) -1)
    878     {
    879       if (bytes_read)
    880         *bytes_read = 0;
    881 
    882       if (bytes_written)
    883         *bytes_written = 0;
    884 
    885       return NULL;
    886     }
    887 
    888   utf8 = g_convert (str, len, "UTF-8", from_codeset,
    889 		    bytes_read, &inbytes_remaining, error);
    890   if (!utf8)
    891     {
    892       close_converter (cd);
    893       if (bytes_written)
    894         *bytes_written = 0;
    895       return NULL;
    896     }
    897 
    898   /* Now the heart of the code. We loop through the UTF-8 string, and
    899    * whenever we hit an offending character, we form fallback, convert
    900    * the fallback to the target codeset, and then go back to
    901    * converting the original string after finishing with the fallback.
    902    *
    903    * The variables save_p and save_inbytes store the input state
    904    * for the original string while we are converting the fallback
    905    */
    906   p = utf8;
    907 
    908   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
    909   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
    910   outp = dest = g_malloc (outbuf_size);
    911 
    912   while (!done && !have_error)
    913     {
    914       gsize inbytes_tmp = inbytes_remaining;
    915       err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
    916       inbytes_remaining = inbytes_tmp;
    917 
    918       if (err == (gsize) -1)
    919 	{
    920 	  switch (errno)
    921 	    {
    922 	    case EINVAL:
    923 	      g_assert_not_reached();
    924 	      break;
    925 	    case E2BIG:
    926 	      {
    927 		gsize used = outp - dest;
    928 
    929 		outbuf_size *= 2;
    930 		dest = g_realloc (dest, outbuf_size);
    931 
    932 		outp = dest + used;
    933 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
    934 
    935 		break;
    936 	      }
    937 	    case EILSEQ:
    938 	      if (save_p)
    939 		{
    940 		  /* Error converting fallback string - fatal
    941 		   */
    942 		  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
    943 			       _("Cannot convert fallback '%s' to codeset '%s'"),
    944 			       insert_str, to_codeset);
    945 		  have_error = TRUE;
    946 		  break;
    947 		}
    948 	      else if (p)
    949 		{
    950 		  if (!fallback)
    951 		    {
    952 		      gunichar ch = g_utf8_get_char (p);
    953 		      insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
    954 						    ch);
    955 		    }
    956 		  else
    957 		    insert_str = fallback;
    958 
    959 		  save_p = g_utf8_next_char (p);
    960 		  save_inbytes = inbytes_remaining - (save_p - p);
    961 		  p = insert_str;
    962 		  inbytes_remaining = strlen (p);
    963 		  break;
    964 		}
    965 	      /* fall thru if p is NULL */
    966 	    default:
    967 	      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
    968 			   _("Error during conversion: %s"),
    969 			   g_strerror (errno));
    970 	      have_error = TRUE;
    971 	      break;
    972 	    }
    973 	}
    974       else
    975 	{
    976 	  if (save_p)
    977 	    {
    978 	      if (!fallback)
    979 		g_free ((gchar *)insert_str);
    980 	      p = save_p;
    981 	      inbytes_remaining = save_inbytes;
    982 	      save_p = NULL;
    983 	    }
    984 	  else if (p)
    985 	    {
    986 	      /* call g_iconv with NULL inbuf to cleanup shift state */
    987 	      p = NULL;
    988 	      inbytes_remaining = 0;
    989 	    }
    990 	  else
    991 	    done = TRUE;
    992 	}
    993     }
    994 
    995   /* Cleanup
    996    */
    997   *outp = '\0';
    998 
    999   close_converter (cd);
   1000 
   1001   if (bytes_written)
   1002     *bytes_written = outp - dest;	/* Doesn't include '\0' */
   1003 
   1004   g_free (utf8);
   1005 
   1006   if (have_error)
   1007     {
   1008       if (save_p && !fallback)
   1009 	g_free ((gchar *)insert_str);
   1010       g_free (dest);
   1011       return NULL;
   1012     }
   1013   else
   1014     return dest;
   1015 }
   1016 
   1017 /*
   1018  * g_locale_to_utf8
   1019  *
   1020  *
   1021  */
   1022 
   1023 static gchar *
   1024 strdup_len (const gchar *string,
   1025 	    gssize       len,
   1026 	    gsize       *bytes_written,
   1027 	    gsize       *bytes_read,
   1028 	    GError      **error)
   1029 
   1030 {
   1031   gsize real_len;
   1032 
   1033   if (!g_utf8_validate (string, len, NULL))
   1034     {
   1035       if (bytes_read)
   1036 	*bytes_read = 0;
   1037       if (bytes_written)
   1038 	*bytes_written = 0;
   1039 
   1040       g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
   1041                            _("Invalid byte sequence in conversion input"));
   1042       return NULL;
   1043     }
   1044 
   1045   if (len < 0)
   1046     real_len = strlen (string);
   1047   else
   1048     {
   1049       real_len = 0;
   1050 
   1051       while (real_len < len && string[real_len])
   1052 	real_len++;
   1053     }
   1054 
   1055   if (bytes_read)
   1056     *bytes_read = real_len;
   1057   if (bytes_written)
   1058     *bytes_written = real_len;
   1059 
   1060   return g_strndup (string, real_len);
   1061 }
   1062 
   1063 /**
   1064  * g_locale_to_utf8:
   1065  * @opsysstring:   a string in the encoding of the current locale. On Windows
   1066  *                 this means the system codepage.
   1067  * @len:           the length of the string, or -1 if the string is
   1068  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
   1069  * @bytes_read:    location to store the number of bytes in the
   1070  *                 input string that were successfully converted, or %NULL.
   1071  *                 Even if the conversion was successful, this may be
   1072  *                 less than @len if there were partial characters
   1073  *                 at the end of the input. If the error
   1074  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
   1075  *                 stored will the byte offset after the last valid
   1076  *                 input sequence.
   1077  * @bytes_written: the number of bytes stored in the output buffer (not
   1078  *                 including the terminating nul).
   1079  * @error:         location to store the error occuring, or %NULL to ignore
   1080  *                 errors. Any of the errors in #GConvertError may occur.
   1081  *
   1082  * Converts a string which is in the encoding used for strings by
   1083  * the C runtime (usually the same as that used by the operating
   1084  * system) in the <link linkend="setlocale">current locale</link> into a
   1085  * UTF-8 string.
   1086  *
   1087  * Return value: The converted string, or %NULL on an error.
   1088  **/
   1089 gchar *
   1090 g_locale_to_utf8 (const gchar  *opsysstring,
   1091 		  gssize        len,
   1092 		  gsize        *bytes_read,
   1093 		  gsize        *bytes_written,
   1094 		  GError      **error)
   1095 {
   1096   const char *charset;
   1097 
   1098   if (g_get_charset (&charset))
   1099     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
   1100   else
   1101     return g_convert (opsysstring, len,
   1102 		      "UTF-8", charset, bytes_read, bytes_written, error);
   1103 }
   1104 
   1105 /**
   1106  * g_locale_from_utf8:
   1107  * @utf8string:    a UTF-8 encoded string
   1108  * @len:           the length of the string, or -1 if the string is
   1109  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
   1110  * @bytes_read:    location to store the number of bytes in the
   1111  *                 input string that were successfully converted, or %NULL.
   1112  *                 Even if the conversion was successful, this may be
   1113  *                 less than @len if there were partial characters
   1114  *                 at the end of the input. If the error
   1115  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
   1116  *                 stored will the byte offset after the last valid
   1117  *                 input sequence.
   1118  * @bytes_written: the number of bytes stored in the output buffer (not
   1119  *                 including the terminating nul).
   1120  * @error:         location to store the error occuring, or %NULL to ignore
   1121  *                 errors. Any of the errors in #GConvertError may occur.
   1122  *
   1123  * Converts a string from UTF-8 to the encoding used for strings by
   1124  * the C runtime (usually the same as that used by the operating
   1125  * system) in the <link linkend="setlocale">current locale</link>. On
   1126  * Windows this means the system codepage.
   1127  *
   1128  * Return value: The converted string, or %NULL on an error.
   1129  **/
   1130 gchar *
   1131 g_locale_from_utf8 (const gchar *utf8string,
   1132 		    gssize       len,
   1133 		    gsize       *bytes_read,
   1134 		    gsize       *bytes_written,
   1135 		    GError     **error)
   1136 {
   1137   const gchar *charset;
   1138 
   1139   if (g_get_charset (&charset))
   1140     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
   1141   else
   1142     return g_convert (utf8string, len,
   1143 		      charset, "UTF-8", bytes_read, bytes_written, error);
   1144 }
   1145 
   1146 #ifndef G_PLATFORM_WIN32
   1147 
   1148 typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
   1149 
   1150 struct _GFilenameCharsetCache {
   1151   gboolean is_utf8;
   1152   gchar *charset;
   1153   gchar **filename_charsets;
   1154 };
   1155 
   1156 static void
   1157 filename_charset_cache_free (gpointer data)
   1158 {
   1159   GFilenameCharsetCache *cache = data;
   1160   g_free (cache->charset);
   1161   g_strfreev (cache->filename_charsets);
   1162   g_free (cache);
   1163 }
   1164 
   1165 /**
   1166  * g_get_filename_charsets:
   1167  * @charsets: return location for the %NULL-terminated list of encoding names
   1168  *
   1169  * Determines the preferred character sets used for filenames.
   1170  * The first character set from the @charsets is the filename encoding, the
   1171  * subsequent character sets are used when trying to generate a displayable
   1172  * representation of a filename, see g_filename_display_name().
   1173  *
   1174  * On Unix, the character sets are determined by consulting the
   1175  * environment variables <envar>G_FILENAME_ENCODING</envar> and
   1176  * <envar>G_BROKEN_FILENAMES</envar>. On Windows, the character set
   1177  * used in the GLib API is always UTF-8 and said environment variables
   1178  * have no effect.
   1179  *
   1180  * <envar>G_FILENAME_ENCODING</envar> may be set to a comma-separated list
   1181  * of character set names. The special token "&commat;locale" is taken to
   1182  * mean the character set for the <link linkend="setlocale">current
   1183  * locale</link>. If <envar>G_FILENAME_ENCODING</envar> is not set, but
   1184  * <envar>G_BROKEN_FILENAMES</envar> is, the character set of the current
   1185  * locale is taken as the filename encoding. If neither environment variable
   1186  * is set, UTF-8 is taken as the filename encoding, but the character
   1187  * set of the current locale is also put in the list of encodings.
   1188  *
   1189  * The returned @charsets belong to GLib and must not be freed.
   1190  *
   1191  * Note that on Unix, regardless of the locale character set or
   1192  * <envar>G_FILENAME_ENCODING</envar> value, the actual file names present
   1193  * on a system might be in any random encoding or just gibberish.
   1194  *
   1195  * Return value: %TRUE if the filename encoding is UTF-8.
   1196  *
   1197  * Since: 2.6
   1198  */
   1199 gboolean
   1200 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets)
   1201 {
   1202   static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT;
   1203   GFilenameCharsetCache *cache = g_static_private_get (&cache_private);
   1204   const gchar *charset;
   1205 
   1206   if (!cache)
   1207     {
   1208       cache = g_new0 (GFilenameCharsetCache, 1);
   1209       g_static_private_set (&cache_private, cache, filename_charset_cache_free);
   1210     }
   1211 
   1212   g_get_charset (&charset);
   1213 
   1214   if (!(cache->charset && strcmp (cache->charset, charset) == 0))
   1215     {
   1216       const gchar *new_charset;
   1217       gchar *p;
   1218       gint i;
   1219 
   1220       g_free (cache->charset);
   1221       g_strfreev (cache->filename_charsets);
   1222       cache->charset = g_strdup (charset);
   1223 
   1224       p = getenv ("G_FILENAME_ENCODING");
   1225       if (p != NULL && p[0] != '\0')
   1226 	{
   1227 	  cache->filename_charsets = g_strsplit (p, ",", 0);
   1228 	  cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
   1229 
   1230 	  for (i = 0; cache->filename_charsets[i]; i++)
   1231 	    {
   1232 	      if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
   1233 		{
   1234 		  g_get_charset (&new_charset);
   1235 		  g_free (cache->filename_charsets[i]);
   1236 		  cache->filename_charsets[i] = g_strdup (new_charset);
   1237 		}
   1238 	    }
   1239 	}
   1240       else if (getenv ("G_BROKEN_FILENAMES") != NULL)
   1241 	{
   1242 	  cache->filename_charsets = g_new0 (gchar *, 2);
   1243 	  cache->is_utf8 = g_get_charset (&new_charset);
   1244 	  cache->filename_charsets[0] = g_strdup (new_charset);
   1245 	}
   1246       else
   1247 	{
   1248 	  cache->filename_charsets = g_new0 (gchar *, 3);
   1249 	  cache->is_utf8 = TRUE;
   1250 	  cache->filename_charsets[0] = g_strdup ("UTF-8");
   1251 	  if (!g_get_charset (&new_charset))
   1252 	    cache->filename_charsets[1] = g_strdup (new_charset);
   1253 	}
   1254     }
   1255 
   1256   if (filename_charsets)
   1257     *filename_charsets = (const gchar **)cache->filename_charsets;
   1258 
   1259   return cache->is_utf8;
   1260 }
   1261 
   1262 #else /* G_PLATFORM_WIN32 */
   1263 
   1264 gboolean
   1265 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets)
   1266 {
   1267   static const gchar *charsets[] = {
   1268     "UTF-8",
   1269     NULL
   1270   };
   1271 
   1272 #ifdef G_OS_WIN32
   1273   /* On Windows GLib pretends that the filename charset is UTF-8 */
   1274   if (filename_charsets)
   1275     *filename_charsets = charsets;
   1276 
   1277   return TRUE;
   1278 #else
   1279   gboolean result;
   1280 
   1281   /* Cygwin works like before */
   1282   result = g_get_charset (&(charsets[0]));
   1283 
   1284   if (filename_charsets)
   1285     *filename_charsets = charsets;
   1286 
   1287   return result;
   1288 #endif
   1289 }
   1290 
   1291 #endif /* G_PLATFORM_WIN32 */
   1292 
   1293 static gboolean
   1294 get_filename_charset (const gchar **filename_charset)
   1295 {
   1296   const gchar **charsets;
   1297   gboolean is_utf8;
   1298 
   1299   is_utf8 = g_get_filename_charsets (&charsets);
   1300 
   1301   if (filename_charset)
   1302     *filename_charset = charsets[0];
   1303 
   1304   return is_utf8;
   1305 }
   1306 
   1307 /* This is called from g_thread_init(). It's used to
   1308  * initialize some static data in a threadsafe way.
   1309  */
   1310 void
   1311 _g_convert_thread_init (void)
   1312 {
   1313   const gchar **dummy;
   1314   (void) g_get_filename_charsets (&dummy);
   1315 }
   1316 
   1317 /**
   1318  * g_filename_to_utf8:
   1319  * @opsysstring:   a string in the encoding for filenames
   1320  * @len:           the length of the string, or -1 if the string is
   1321  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
   1322  * @bytes_read:    location to store the number of bytes in the
   1323  *                 input string that were successfully converted, or %NULL.
   1324  *                 Even if the conversion was successful, this may be
   1325  *                 less than @len if there were partial characters
   1326  *                 at the end of the input. If the error
   1327  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
   1328  *                 stored will the byte offset after the last valid
   1329  *                 input sequence.
   1330  * @bytes_written: the number of bytes stored in the output buffer (not
   1331  *                 including the terminating nul).
   1332  * @error:         location to store the error occuring, or %NULL to ignore
   1333  *                 errors. Any of the errors in #GConvertError may occur.
   1334  *
   1335  * Converts a string which is in the encoding used by GLib for
   1336  * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
   1337  * for filenames; on other platforms, this function indirectly depends on
   1338  * the <link linkend="setlocale">current locale</link>.
   1339  *
   1340  * Return value: The converted string, or %NULL on an error.
   1341  **/
   1342 gchar*
   1343 g_filename_to_utf8 (const gchar *opsysstring,
   1344 		    gssize       len,
   1345 		    gsize       *bytes_read,
   1346 		    gsize       *bytes_written,
   1347 		    GError     **error)
   1348 {
   1349   const gchar *charset;
   1350 
   1351   if (get_filename_charset (&charset))
   1352     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
   1353   else
   1354     return g_convert (opsysstring, len,
   1355 		      "UTF-8", charset, bytes_read, bytes_written, error);
   1356 }
   1357 
   1358 #if defined (G_OS_WIN32) && !defined (_WIN64)
   1359 
   1360 #undef g_filename_to_utf8
   1361 
   1362 /* Binary compatibility version. Not for newly compiled code. Also not needed for
   1363  * 64-bit versions as there should be no old deployed binaries that would use
   1364  * the old versions.
   1365  */
   1366 
   1367 gchar*
   1368 g_filename_to_utf8 (const gchar *opsysstring,
   1369 		    gssize       len,
   1370 		    gsize       *bytes_read,
   1371 		    gsize       *bytes_written,
   1372 		    GError     **error)
   1373 {
   1374   const gchar *charset;
   1375 
   1376   if (g_get_charset (&charset))
   1377     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
   1378   else
   1379     return g_convert (opsysstring, len,
   1380 		      "UTF-8", charset, bytes_read, bytes_written, error);
   1381 }
   1382 
   1383 #endif
   1384 
   1385 /**
   1386  * g_filename_from_utf8:
   1387  * @utf8string:    a UTF-8 encoded string.
   1388  * @len:           the length of the string, or -1 if the string is
   1389  *                 nul-terminated.
   1390  * @bytes_read:    location to store the number of bytes in the
   1391  *                 input string that were successfully converted, or %NULL.
   1392  *                 Even if the conversion was successful, this may be
   1393  *                 less than @len if there were partial characters
   1394  *                 at the end of the input. If the error
   1395  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
   1396  *                 stored will the byte offset after the last valid
   1397  *                 input sequence.
   1398  * @bytes_written: the number of bytes stored in the output buffer (not
   1399  *                 including the terminating nul).
   1400  * @error:         location to store the error occuring, or %NULL to ignore
   1401  *                 errors. Any of the errors in #GConvertError may occur.
   1402  *
   1403  * Converts a string from UTF-8 to the encoding GLib uses for
   1404  * filenames. Note that on Windows GLib uses UTF-8 for filenames;
   1405  * on other platforms, this function indirectly depends on the
   1406  * <link linkend="setlocale">current locale</link>.
   1407  *
   1408  * Return value: The converted string, or %NULL on an error.
   1409  **/
   1410 gchar*
   1411 g_filename_from_utf8 (const gchar *utf8string,
   1412 		      gssize       len,
   1413 		      gsize       *bytes_read,
   1414 		      gsize       *bytes_written,
   1415 		      GError     **error)
   1416 {
   1417   const gchar *charset;
   1418 
   1419   if (get_filename_charset (&charset))
   1420     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
   1421   else
   1422     return g_convert (utf8string, len,
   1423 		      charset, "UTF-8", bytes_read, bytes_written, error);
   1424 }
   1425 
   1426 #if defined (G_OS_WIN32) && !defined (_WIN64)
   1427 
   1428 #undef g_filename_from_utf8
   1429 
   1430 /* Binary compatibility version. Not for newly compiled code. */
   1431 
   1432 gchar*
   1433 g_filename_from_utf8 (const gchar *utf8string,
   1434 		      gssize       len,
   1435 		      gsize       *bytes_read,
   1436 		      gsize       *bytes_written,
   1437 		      GError     **error)
   1438 {
   1439   const gchar *charset;
   1440 
   1441   if (g_get_charset (&charset))
   1442     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
   1443   else
   1444     return g_convert (utf8string, len,
   1445 		      charset, "UTF-8", bytes_read, bytes_written, error);
   1446 }
   1447 
   1448 #endif
   1449 
   1450 /* Test of haystack has the needle prefix, comparing case
   1451  * insensitive. haystack may be UTF-8, but needle must
   1452  * contain only ascii. */
   1453 static gboolean
   1454 has_case_prefix (const gchar *haystack, const gchar *needle)
   1455 {
   1456   const gchar *h, *n;
   1457 
   1458   /* Eat one character at a time. */
   1459   h = haystack;
   1460   n = needle;
   1461 
   1462   while (*n && *h &&
   1463 	 g_ascii_tolower (*n) == g_ascii_tolower (*h))
   1464     {
   1465       n++;
   1466       h++;
   1467     }
   1468 
   1469   return *n == '\0';
   1470 }
   1471 
   1472 typedef enum {
   1473   UNSAFE_ALL        = 0x1,  /* Escape all unsafe characters   */
   1474   UNSAFE_ALLOW_PLUS = 0x2,  /* Allows '+'  */
   1475   UNSAFE_PATH       = 0x8,  /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
   1476   UNSAFE_HOST       = 0x10, /* Allows '/' and ':' and '@' */
   1477   UNSAFE_SLASHES    = 0x20  /* Allows all characters except for '/' and '%' */
   1478 } UnsafeCharacterSet;
   1479 
   1480 static const guchar acceptable[96] = {
   1481   /* A table of the ASCII chars from space (32) to DEL (127) */
   1482   /*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */
   1483   0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
   1484   /* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
   1485   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
   1486   /* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
   1487   0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
   1488   /* P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _ */
   1489   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
   1490   /* `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
   1491   0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
   1492   /* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~  DEL */
   1493   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
   1494 };
   1495 
   1496 static const gchar hex[16] = "0123456789ABCDEF";
   1497 
   1498 /* Note: This escape function works on file: URIs, but if you want to
   1499  * escape something else, please read RFC-2396 */
   1500 static gchar *
   1501 g_escape_uri_string (const gchar *string,
   1502 		     UnsafeCharacterSet mask)
   1503 {
   1504 #define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
   1505 
   1506   const gchar *p;
   1507   gchar *q;
   1508   gchar *result;
   1509   int c;
   1510   gint unacceptable;
   1511   UnsafeCharacterSet use_mask;
   1512 
   1513   g_return_val_if_fail (mask == UNSAFE_ALL
   1514 			|| mask == UNSAFE_ALLOW_PLUS
   1515 			|| mask == UNSAFE_PATH
   1516 			|| mask == UNSAFE_HOST
   1517 			|| mask == UNSAFE_SLASHES, NULL);
   1518 
   1519   unacceptable = 0;
   1520   use_mask = mask;
   1521   for (p = string; *p != '\0'; p++)
   1522     {
   1523       c = (guchar) *p;
   1524       if (!ACCEPTABLE (c))
   1525 	unacceptable++;
   1526     }
   1527 
   1528   result = g_malloc (p - string + unacceptable * 2 + 1);
   1529 
   1530   use_mask = mask;
   1531   for (q = result, p = string; *p != '\0'; p++)
   1532     {
   1533       c = (guchar) *p;
   1534 
   1535       if (!ACCEPTABLE (c))
   1536 	{
   1537 	  *q++ = '%'; /* means hex coming */
   1538 	  *q++ = hex[c >> 4];
   1539 	  *q++ = hex[c & 15];
   1540 	}
   1541       else
   1542 	*q++ = *p;
   1543     }
   1544 
   1545   *q = '\0';
   1546 
   1547   return result;
   1548 }
   1549 
   1550 
   1551 static gchar *
   1552 g_escape_file_uri (const gchar *hostname,
   1553 		   const gchar *pathname)
   1554 {
   1555   char *escaped_hostname = NULL;
   1556   char *escaped_path;
   1557   char *res;
   1558 
   1559 #ifdef G_OS_WIN32
   1560   char *p, *backslash;
   1561 
   1562   /* Turn backslashes into forward slashes. That's what Netscape
   1563    * does, and they are actually more or less equivalent in Windows.
   1564    */
   1565 
   1566   pathname = g_strdup (pathname);
   1567   p = (char *) pathname;
   1568 
   1569   while ((backslash = strchr (p, '\\')) != NULL)
   1570     {
   1571       *backslash = '/';
   1572       p = backslash + 1;
   1573     }
   1574 #endif
   1575 
   1576   if (hostname && *hostname != '\0')
   1577     {
   1578       escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
   1579     }
   1580 
   1581   escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
   1582 
   1583   res = g_strconcat ("file://",
   1584 		     (escaped_hostname) ? escaped_hostname : "",
   1585 		     (*escaped_path != '/') ? "/" : "",
   1586 		     escaped_path,
   1587 		     NULL);
   1588 
   1589 #ifdef G_OS_WIN32
   1590   g_free ((char *) pathname);
   1591 #endif
   1592 
   1593   g_free (escaped_hostname);
   1594   g_free (escaped_path);
   1595 
   1596   return res;
   1597 }
   1598 
   1599 static int
   1600 unescape_character (const char *scanner)
   1601 {
   1602   int first_digit;
   1603   int second_digit;
   1604 
   1605   first_digit = g_ascii_xdigit_value (scanner[0]);
   1606   if (first_digit < 0)
   1607     return -1;
   1608 
   1609   second_digit = g_ascii_xdigit_value (scanner[1]);
   1610   if (second_digit < 0)
   1611     return -1;
   1612 
   1613   return (first_digit << 4) | second_digit;
   1614 }
   1615 
   1616 static gchar *
   1617 g_unescape_uri_string (const char *escaped,
   1618 		       int         len,
   1619 		       const char *illegal_escaped_characters,
   1620 		       gboolean    ascii_must_not_be_escaped)
   1621 {
   1622   const gchar *in, *in_end;
   1623   gchar *out, *result;
   1624   int c;
   1625 
   1626   if (escaped == NULL)
   1627     return NULL;
   1628 
   1629   if (len < 0)
   1630     len = strlen (escaped);
   1631 
   1632   result = g_malloc (len + 1);
   1633 
   1634   out = result;
   1635   for (in = escaped, in_end = escaped + len; in < in_end; in++)
   1636     {
   1637       c = *in;
   1638 
   1639       if (c == '%')
   1640 	{
   1641 	  /* catch partial escape sequences past the end of the substring */
   1642 	  if (in + 3 > in_end)
   1643 	    break;
   1644 
   1645 	  c = unescape_character (in + 1);
   1646 
   1647 	  /* catch bad escape sequences and NUL characters */
   1648 	  if (c <= 0)
   1649 	    break;
   1650 
   1651 	  /* catch escaped ASCII */
   1652 	  if (ascii_must_not_be_escaped && c <= 0x7F)
   1653 	    break;
   1654 
   1655 	  /* catch other illegal escaped characters */
   1656 	  if (strchr (illegal_escaped_characters, c) != NULL)
   1657 	    break;
   1658 
   1659 	  in += 2;
   1660 	}
   1661 
   1662       *out++ = c;
   1663     }
   1664 
   1665   g_assert (out - result <= len);
   1666   *out = '\0';
   1667 
   1668   if (in != in_end)
   1669     {
   1670       g_free (result);
   1671       return NULL;
   1672     }
   1673 
   1674   return result;
   1675 }
   1676 
   1677 static gboolean
   1678 is_asciialphanum (gunichar c)
   1679 {
   1680   return c <= 0x7F && g_ascii_isalnum (c);
   1681 }
   1682 
   1683 static gboolean
   1684 is_asciialpha (gunichar c)
   1685 {
   1686   return c <= 0x7F && g_ascii_isalpha (c);
   1687 }
   1688 
   1689 /* allows an empty string */
   1690 static gboolean
   1691 hostname_validate (const char *hostname)
   1692 {
   1693   const char *p;
   1694   gunichar c, first_char, last_char;
   1695 
   1696   p = hostname;
   1697   if (*p == '\0')
   1698     return TRUE;
   1699   do
   1700     {
   1701       /* read in a label */
   1702       c = g_utf8_get_char (p);
   1703       p = g_utf8_next_char (p);
   1704       if (!is_asciialphanum (c))
   1705 	return FALSE;
   1706       first_char = c;
   1707       do
   1708 	{
   1709 	  last_char = c;
   1710 	  c = g_utf8_get_char (p);
   1711 	  p = g_utf8_next_char (p);
   1712 	}
   1713       while (is_asciialphanum (c) || c == '-');
   1714       if (last_char == '-')
   1715 	return FALSE;
   1716 
   1717       /* if that was the last label, check that it was a toplabel */
   1718       if (c == '\0' || (c == '.' && *p == '\0'))
   1719 	return is_asciialpha (first_char);
   1720     }
   1721   while (c == '.');
   1722   return FALSE;
   1723 }
   1724 
   1725 /**
   1726  * g_filename_from_uri:
   1727  * @uri: a uri describing a filename (escaped, encoded in ASCII).
   1728  * @hostname: Location to store hostname for the URI, or %NULL.
   1729  *            If there is no hostname in the URI, %NULL will be
   1730  *            stored in this location.
   1731  * @error: location to store the error occuring, or %NULL to ignore
   1732  *         errors. Any of the errors in #GConvertError may occur.
   1733  *
   1734  * Converts an escaped ASCII-encoded URI to a local filename in the
   1735  * encoding used for filenames.
   1736  *
   1737  * Return value: a newly-allocated string holding the resulting
   1738  *               filename, or %NULL on an error.
   1739  **/
   1740 gchar *
   1741 g_filename_from_uri (const gchar *uri,
   1742 		     gchar      **hostname,
   1743 		     GError     **error)
   1744 {
   1745   const char *path_part;
   1746   const char *host_part;
   1747   char *unescaped_hostname;
   1748   char *result;
   1749   char *filename;
   1750   int offs;
   1751 #ifdef G_OS_WIN32
   1752   char *p, *slash;
   1753 #endif
   1754 
   1755   if (hostname)
   1756     *hostname = NULL;
   1757 
   1758   if (!has_case_prefix (uri, "file:/"))
   1759     {
   1760       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
   1761 		   _("The URI '%s' is not an absolute URI using the \"file\" scheme"),
   1762 		   uri);
   1763       return NULL;
   1764     }
   1765 
   1766   path_part = uri + strlen ("file:");
   1767 
   1768   if (strchr (path_part, '#') != NULL)
   1769     {
   1770       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
   1771 		   _("The local file URI '%s' may not include a '#'"),
   1772 		   uri);
   1773       return NULL;
   1774     }
   1775 
   1776   if (has_case_prefix (path_part, "///"))
   1777     path_part += 2;
   1778   else if (has_case_prefix (path_part, "//"))
   1779     {
   1780       path_part += 2;
   1781       host_part = path_part;
   1782 
   1783       path_part = strchr (path_part, '/');
   1784 
   1785       if (path_part == NULL)
   1786 	{
   1787 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
   1788 		       _("The URI '%s' is invalid"),
   1789 		       uri);
   1790 	  return NULL;
   1791 	}
   1792 
   1793       unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
   1794 
   1795       if (unescaped_hostname == NULL ||
   1796 	  !hostname_validate (unescaped_hostname))
   1797 	{
   1798 	  g_free (unescaped_hostname);
   1799 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
   1800 		       _("The hostname of the URI '%s' is invalid"),
   1801 		       uri);
   1802 	  return NULL;
   1803 	}
   1804 
   1805       if (hostname)
   1806 	*hostname = unescaped_hostname;
   1807       else
   1808 	g_free (unescaped_hostname);
   1809     }
   1810 
   1811   filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
   1812 
   1813   if (filename == NULL)
   1814     {
   1815       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
   1816 		   _("The URI '%s' contains invalidly escaped characters"),
   1817 		   uri);
   1818       return NULL;
   1819     }
   1820 
   1821   offs = 0;
   1822 #ifdef G_OS_WIN32
   1823   /* Drop localhost */
   1824   if (hostname && *hostname != NULL &&
   1825       g_ascii_strcasecmp (*hostname, "localhost") == 0)
   1826     {
   1827       g_free (*hostname);
   1828       *hostname = NULL;
   1829     }
   1830 
   1831   /* Turn slashes into backslashes, because that's the canonical spelling */
   1832   p = filename;
   1833   while ((slash = strchr (p, '/')) != NULL)
   1834     {
   1835       *slash = '\\';
   1836       p = slash + 1;
   1837     }
   1838 
   1839   /* Windows URIs with a drive letter can be like "file://host/c:/foo"
   1840    * or "file://host/c|/foo" (some Netscape versions). In those cases, start
   1841    * the filename from the drive letter.
   1842    */
   1843   if (g_ascii_isalpha (filename[1]))
   1844     {
   1845       if (filename[2] == ':')
   1846 	offs = 1;
   1847       else if (filename[2] == '|')
   1848 	{
   1849 	  filename[2] = ':';
   1850 	  offs = 1;
   1851 	}
   1852     }
   1853 #endif
   1854 
   1855   result = g_strdup (filename + offs);
   1856   g_free (filename);
   1857 
   1858   return result;
   1859 }
   1860 
   1861 #if defined (G_OS_WIN32) && !defined (_WIN64)
   1862 
   1863 #undef g_filename_from_uri
   1864 
   1865 gchar *
   1866 g_filename_from_uri (const gchar *uri,
   1867 		     gchar      **hostname,
   1868 		     GError     **error)
   1869 {
   1870   gchar *utf8_filename;
   1871   gchar *retval = NULL;
   1872 
   1873   utf8_filename = g_filename_from_uri_utf8 (uri, hostname, error);
   1874   if (utf8_filename)
   1875     {
   1876       retval = g_locale_from_utf8 (utf8_filename, -1, NULL, NULL, error);
   1877       g_free (utf8_filename);
   1878     }
   1879   return retval;
   1880 }
   1881 
   1882 #endif
   1883 
   1884 /**
   1885  * g_filename_to_uri:
   1886  * @filename: an absolute filename specified in the GLib file name encoding,
   1887  *            which is the on-disk file name bytes on Unix, and UTF-8 on
   1888  *            Windows
   1889  * @hostname: A UTF-8 encoded hostname, or %NULL for none.
   1890  * @error: location to store the error occuring, or %NULL to ignore
   1891  *         errors. Any of the errors in #GConvertError may occur.
   1892  *
   1893  * Converts an absolute filename to an escaped ASCII-encoded URI, with the path
   1894  * component following Section 3.3. of RFC 2396.
   1895  *
   1896  * Return value: a newly-allocated string holding the resulting
   1897  *               URI, or %NULL on an error.
   1898  **/
   1899 gchar *
   1900 g_filename_to_uri (const gchar *filename,
   1901 		   const gchar *hostname,
   1902 		   GError     **error)
   1903 {
   1904   char *escaped_uri;
   1905 
   1906   g_return_val_if_fail (filename != NULL, NULL);
   1907 
   1908   if (!g_path_is_absolute (filename))
   1909     {
   1910       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
   1911 		   _("The pathname '%s' is not an absolute path"),
   1912 		   filename);
   1913       return NULL;
   1914     }
   1915 
   1916   if (hostname &&
   1917       !(g_utf8_validate (hostname, -1, NULL)
   1918 	&& hostname_validate (hostname)))
   1919     {
   1920       g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
   1921                            _("Invalid hostname"));
   1922       return NULL;
   1923     }
   1924 
   1925 #ifdef G_OS_WIN32
   1926   /* Don't use localhost unnecessarily */
   1927   if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
   1928     hostname = NULL;
   1929 #endif
   1930 
   1931   escaped_uri = g_escape_file_uri (hostname, filename);
   1932 
   1933   return escaped_uri;
   1934 }
   1935 
   1936 #if defined (G_OS_WIN32) && !defined (_WIN64)
   1937 
   1938 #undef g_filename_to_uri
   1939 
   1940 gchar *
   1941 g_filename_to_uri (const gchar *filename,
   1942 		   const gchar *hostname,
   1943 		   GError     **error)
   1944 {
   1945   gchar *utf8_filename;
   1946   gchar *retval = NULL;
   1947 
   1948   utf8_filename = g_locale_to_utf8 (filename, -1, NULL, NULL, error);
   1949 
   1950   if (utf8_filename)
   1951     {
   1952       retval = g_filename_to_uri_utf8 (utf8_filename, hostname, error);
   1953       g_free (utf8_filename);
   1954     }
   1955 
   1956   return retval;
   1957 }
   1958 
   1959 #endif
   1960 
   1961 /**
   1962  * g_uri_list_extract_uris:
   1963  * @uri_list: an URI list
   1964  *
   1965  * Splits an URI list conforming to the text/uri-list
   1966  * mime type defined in RFC 2483 into individual URIs,
   1967  * discarding any comments. The URIs are not validated.
   1968  *
   1969  * Returns: a newly allocated %NULL-terminated list of
   1970  *   strings holding the individual URIs. The array should
   1971  *   be freed with g_strfreev().
   1972  *
   1973  * Since: 2.6
   1974  */
   1975 gchar **
   1976 g_uri_list_extract_uris (const gchar *uri_list)
   1977 {
   1978   GSList *uris, *u;
   1979   const gchar *p, *q;
   1980   gchar **result;
   1981   gint n_uris = 0;
   1982 
   1983   uris = NULL;
   1984 
   1985   p = uri_list;
   1986 
   1987   /* We don't actually try to validate the URI according to RFC
   1988    * 2396, or even check for allowed characters - we just ignore
   1989    * comments and trim whitespace off the ends.  We also
   1990    * allow LF delimination as well as the specified CRLF.
   1991    *
   1992    * We do allow comments like specified in RFC 2483.
   1993    */
   1994   while (p)
   1995     {
   1996       if (*p != '#')
   1997 	{
   1998 	  while (g_ascii_isspace (*p))
   1999 	    p++;
   2000 
   2001 	  q = p;
   2002 	  while (*q && (*q != '\n') && (*q != '\r'))
   2003 	    q++;
   2004 
   2005 	  if (q > p)
   2006 	    {
   2007 	      q--;
   2008 	      while (q > p && g_ascii_isspace (*q))
   2009 		q--;
   2010 
   2011 	      if (q > p)
   2012 		{
   2013 		  uris = g_slist_prepend (uris, g_strndup (p, q - p + 1));
   2014 		  n_uris++;
   2015 		}
   2016 	    }
   2017 	}
   2018       p = strchr (p, '\n');
   2019       if (p)
   2020 	p++;
   2021     }
   2022 
   2023   result = g_new (gchar *, n_uris + 1);
   2024 
   2025   result[n_uris--] = NULL;
   2026   for (u = uris; u; u = u->next)
   2027     result[n_uris--] = u->data;
   2028 
   2029   g_slist_free (uris);
   2030 
   2031   return result;
   2032 }
   2033 
   2034 /**
   2035  * g_filename_display_basename:
   2036  * @filename: an absolute pathname in the GLib file name encoding
   2037  *
   2038  * Returns the display basename for the particular filename, guaranteed
   2039  * to be valid UTF-8. The display name might not be identical to the filename,
   2040  * for instance there might be problems converting it to UTF-8, and some files
   2041  * can be translated in the display.
   2042  *
   2043  * If GLib can not make sense of the encoding of @filename, as a last resort it
   2044  * replaces unknown characters with U+FFFD, the Unicode replacement character.
   2045  * You can search the result for the UTF-8 encoding of this character (which is
   2046  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
   2047  * encoding.
   2048  *
   2049  * You must pass the whole absolute pathname to this functions so that
   2050  * translation of well known locations can be done.
   2051  *
   2052  * This function is preferred over g_filename_display_name() if you know the
   2053  * whole path, as it allows translation.
   2054  *
   2055  * Return value: a newly allocated string containing
   2056  *   a rendition of the basename of the filename in valid UTF-8
   2057  *
   2058  * Since: 2.6
   2059  **/
   2060 gchar *
   2061 g_filename_display_basename (const gchar *filename)
   2062 {
   2063   char *basename;
   2064   char *display_name;
   2065 
   2066   g_return_val_if_fail (filename != NULL, NULL);
   2067 
   2068   basename = g_path_get_basename (filename);
   2069   display_name = g_filename_display_name (basename);
   2070   g_free (basename);
   2071   return display_name;
   2072 }
   2073 
   2074 /**
   2075  * g_filename_display_name:
   2076  * @filename: a pathname hopefully in the GLib file name encoding
   2077  *
   2078  * Converts a filename into a valid UTF-8 string. The conversion is
   2079  * not necessarily reversible, so you should keep the original around
   2080  * and use the return value of this function only for display purposes.
   2081  * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
   2082  * even if the filename actually isn't in the GLib file name encoding.
   2083  *
   2084  * If GLib can not make sense of the encoding of @filename, as a last resort it
   2085  * replaces unknown characters with U+FFFD, the Unicode replacement character.
   2086  * You can search the result for the UTF-8 encoding of this character (which is
   2087  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
   2088  * encoding.
   2089  *
   2090  * If you know the whole pathname of the file you should use
   2091  * g_filename_display_basename(), since that allows location-based
   2092  * translation of filenames.
   2093  *
   2094  * Return value: a newly allocated string containing
   2095  *   a rendition of the filename in valid UTF-8
   2096  *
   2097  * Since: 2.6
   2098  **/
   2099 gchar *
   2100 g_filename_display_name (const gchar *filename)
   2101 {
   2102   gint i;
   2103   const gchar **charsets;
   2104   gchar *display_name = NULL;
   2105   gboolean is_utf8;
   2106 
   2107   is_utf8 = g_get_filename_charsets (&charsets);
   2108 
   2109   if (is_utf8)
   2110     {
   2111       if (g_utf8_validate (filename, -1, NULL))
   2112 	display_name = g_strdup (filename);
   2113     }
   2114 
   2115   if (!display_name)
   2116     {
   2117       /* Try to convert from the filename charsets to UTF-8.
   2118        * Skip the first charset if it is UTF-8.
   2119        */
   2120       for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
   2121 	{
   2122 	  display_name = g_convert (filename, -1, "UTF-8", charsets[i],
   2123 				    NULL, NULL, NULL);
   2124 
   2125 	  if (display_name)
   2126 	    break;
   2127 	}
   2128     }
   2129 
   2130   /* if all conversions failed, we replace invalid UTF-8
   2131    * by a question mark
   2132    */
   2133   if (!display_name)
   2134     display_name = _g_utf8_make_valid (filename);
   2135 
   2136   return display_name;
   2137 }
   2138 
   2139 #define __G_CONVERT_C__
   2140 #include "galiasdef.c"
   2141