1 /* GNU SED, a batch stream editor. 2 Copyright (C) 2003, 2006, 2009 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 17 18 #include "sed.h" 19 #include <stdlib.h> 20 #include <string.h> 21 22 #include "localcharset.h" 23 24 int mb_cur_max; 25 bool is_utf8; 26 27 #ifdef HAVE_MBRTOWC 28 /* Add a byte to the multibyte character represented by the state 29 CUR_STAT, and answer its length if a character is completed, 30 or -2 if it is yet to be completed. */ 31 int brlen (ch, cur_stat) 32 int ch; 33 mbstate_t *cur_stat; 34 { 35 char c = ch; 36 37 /* If we use the generic brlen, then MBRLEN == mbrlen. */ 38 int result = mbrtowc(NULL, &c, 1, cur_stat); 39 40 /* An invalid sequence is treated like a singlebyte character. */ 41 if (result == -1) 42 { 43 memset (cur_stat, 0, sizeof (mbstate_t)); 44 return 1; 45 } 46 47 return result; 48 } 49 #endif 50 51 void 52 initialize_mbcs () 53 { 54 /* For UTF-8, we know that the encoding is stateless. */ 55 const char *codeset_name; 56 57 codeset_name = locale_charset (); 58 is_utf8 = (strcmp (codeset_name, "UTF-8") == 0); 59 60 #ifdef HAVE_MBRTOWC 61 mb_cur_max = MB_CUR_MAX; 62 #else 63 mb_cur_max = 1; 64 #endif 65 } 66 67