1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picobase.h 18 * 19 * base functionality 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 29 #ifndef PICOBASE_H_ 30 #define PICOBASE_H_ 31 32 #include "picoos.h" 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 #if 0 38 } 39 #endif 40 41 /* maximum number of bytes of an UTF8 character */ 42 #define PICOBASE_UTF8_MAXLEN 4 43 44 typedef picoos_uint8 picobase_utf8char[PICOBASE_UTF8_MAXLEN+1]; /* always zero terminated */ 45 typedef picoos_uint8 picobase_utf8; 46 typedef picoos_uint16 picobase_utf16; 47 typedef picoos_uint32 picobase_utf32; 48 49 /* ***************************************************************/ 50 /* Unicode UTF8 functions */ 51 /* ***************************************************************/ 52 53 /** 54 * Determines the number of UTF8 characters contained in 55 * the UTF8 string 'utf8str' of maximum length maxlen (in bytes) 56 * @param utf8str : a string encoded in UTF8 57 * @param maxlen : max length (in bytes) accessible in utf8str 58 * @return >=0 : length of the UTF8 string in number of UTF8 characters 59 * up to the first '\0' or maxlen 60 * @return <0 : not starting with a valid UTF8 character 61 * @remarks strict implementation, not allowing invalid utf8 62 */ 63 picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str, 64 const picoos_uint16 maxlen); 65 66 67 /** 68 * Determines the number of bytes an UTF8 character used based 69 * on the first byte of the UTF8 character 70 * @param firstchar: the first (and maybe only) byte of an UTF8 character 71 * @return positive value in {1,4} : number of bytes of the UTF8 character 72 * @return 0 :if not a valid UTF8 character start 73 * @remarks strict implementation, not allowing invalid utf8 74 */ 75 /* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */ 76 77 #define picobase_det_utf8_length(x) ( ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) ) 78 79 /** 80 * Converts the content of 'utf8str' to lowercase and stores it on 'lowercase' 81 * on the first byte of the UTF8 character 82 * @param utf8str : utf8 string 83 * @param lowercaseMaxLen : maximal number of bytes available in 'lowercase' 84 * @param lowercase : string converted to lowercase (output) 85 * @param done : flag to report success/failure of the operation (output) 86 * @return TRUE if successful, FALSE otherwise 87 */ 88 picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done); 89 90 /** 91 * Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase' 92 * @param utf8str : utf8 string 93 * @param uppercase : string converted to uppercase (output) 94 * @param uppercaseMaxLen : maximal number of bytes available in 'uppercase' 95 * @param done : flag to report success/failure of the operation (output) 96 * @return TRUE if successful, FALSE otherwise 97 */ 98 picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done); 99 100 /** 101 * Gets next UTF8 character 'utf8char' from the UTF8 string 102 * 'utf8s' starting at position 'pos' 103 * @param utf8s : UTF8 string 104 * @param utf8slenmax : max length accessible in utf8s 105 * @param pos : position from where the UTF8 character is checked and copied 106 * (set also as output to the position directly following the UTF8 char) 107 * @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output) 108 * @return TRUE if okay 109 * @return FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len 110 */ 111 picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s, 112 const picoos_uint32 utf8slenmax, 113 picoos_uint32 *pos, 114 picobase_utf8char utf8char); 115 116 /** 117 * Same as picobase_get_next_utf8char 118 * without copying the char to utf8char 119 */ 120 picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s, 121 const picoos_uint32 utf8slenmax, 122 picoos_uint32 *pos); 123 124 /** 125 * Gets previous UTF8 character 'utf8char' from the UTF8 string 126 * 'utf8s' starting the backward search at position 'pos-1' 127 * @param utf8s : UTF8 string 128 * @param utf8slenmin : min length accessible in utf8s 129 * @param pos : the search for the prev UTF8 char starts at [pos-1] 130 * (set also as output to the start position of the prev UTF8 character) 131 * @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output) 132 * @return TRUE if okay 133 * @return FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len 134 */ 135 picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s, 136 const picoos_uint32 utf8slenmin, 137 picoos_uint32 *pos, 138 picobase_utf8char utf8char); 139 140 /** 141 * Same as picobase_get_prev_utf8char 142 * without copying the char to utf8char 143 */ 144 picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s, 145 const picoos_uint32 utf8slenmin, 146 picoos_uint32 *pos); 147 148 149 /** 150 * returns TRUE if the input string is UTF8 and uppercase 151 * @param str : UTF8 string 152 * @param strmaxlen : max length for the input string 153 * @return TRUE if string is UTF8 and uppercase 154 * @return FALSE otherwise 155 */ 156 extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen); 157 158 /** 159 * returns TRUE if the input string is UTF8 and lowercase 160 * @param str : UTF8 string 161 * @param strmaxlen : max length for the input string 162 * @return TRUE if string is UTF8 and lowercase 163 * @return FALSE otherwise 164 */ 165 extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen); 166 167 #ifdef __cplusplus 168 } 169 #endif 170 171 #endif /*PICOBASE_H_*/ 172