Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picobase.h
     18  *
     19  * base functionality
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 
     29 #ifndef PICOBASE_H_
     30 #define PICOBASE_H_
     31 
     32 #include "picoos.h"
     33 
     34 #ifdef __cplusplus
     35 extern "C" {
     36 #endif
     37 #if 0
     38 }
     39 #endif
     40 
     41 /* maximum number of bytes of an UTF8 character */
     42 #define PICOBASE_UTF8_MAXLEN    4
     43 
     44 typedef picoos_uint8  picobase_utf8char[PICOBASE_UTF8_MAXLEN+1];  /* always zero terminated */
     45 typedef picoos_uint8  picobase_utf8;
     46 typedef picoos_uint16 picobase_utf16;
     47 typedef picoos_uint32 picobase_utf32;
     48 
     49 /* ***************************************************************/
     50 /* Unicode UTF8 functions */
     51 /* ***************************************************************/
     52 
     53 /**
     54  * Determines the number of UTF8 characters contained in
     55  *            the UTF8 string 'utf8str' of maximum length maxlen (in bytes)
     56  * @param    utf8str : a string encoded in UTF8
     57  * @param    maxlen  : max length (in bytes) accessible in utf8str
     58  * @return   >=0 : length of the UTF8 string in number of UTF8 characters
     59  *                     up to the first '\0' or maxlen
     60  * @return   <0 : not starting with a valid UTF8 character
     61  * @remarks  strict implementation, not allowing invalid utf8
     62 */
     63 picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
     64                                   const picoos_uint16 maxlen);
     65 
     66 
     67 /**
     68  * Determines the number of bytes an UTF8 character used based
     69  *            on the first byte of the UTF8 character
     70  * @param    firstchar: the first (and maybe only) byte of an UTF8 character
     71  * @return   positive value in {1,4} : number of bytes of the UTF8 character
     72  * @return   0 :if not a valid UTF8 character start
     73  * @remarks strict implementation, not allowing invalid utf8
     74 */
     75 /* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */
     76 
     77 #define picobase_det_utf8_length(x)  (  ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) )
     78 
     79 /**
     80  * Converts the content of 'utf8str' to lowercase and stores it on 'lowercase'
     81  *            on the first byte of the UTF8 character
     82  * @param    utf8str : utf8 string
     83  * @param    lowercaseMaxLen : maximal number of bytes available in 'lowercase'
     84  * @param    lowercase : string converted to lowercase (output)
     85  * @param    done : flag to report success/failure of the operation (output)
     86  * @return  TRUE if successful, FALSE otherwise
     87 */
     88 picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done);
     89 
     90 /**
     91  * Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase'
     92  * @param    utf8str : utf8 string
     93  * @param    uppercase : string converted to uppercase (output)
     94  * @param    uppercaseMaxLen : maximal number of bytes available in 'uppercase'
     95  * @param    done : flag to report success/failure of the operation (output)
     96  * @return  TRUE if successful, FALSE otherwise
     97 */
     98 picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done);
     99 
    100 /**
    101  * Gets next UTF8 character 'utf8char' from the UTF8 string
    102  *            'utf8s' starting at position 'pos'
    103  * @param    utf8s : UTF8 string
    104  * @param    utf8slenmax : max length accessible in utf8s
    105  * @param    pos : position from where the UTF8 character is checked and copied
    106  *            (set also as output to the position directly following the UTF8 char)
    107  * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
    108  * @return  TRUE if okay
    109  * @return  FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len
    110 */
    111 picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
    112                                         const picoos_uint32 utf8slenmax,
    113                                         picoos_uint32 *pos,
    114                                         picobase_utf8char utf8char);
    115 
    116 /**
    117  * Same as picobase_get_next_utf8char
    118  *            without copying the char to utf8char
    119 */
    120 picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
    121                                            const picoos_uint32 utf8slenmax,
    122                                            picoos_uint32 *pos);
    123 
    124 /**
    125  * Gets previous UTF8 character 'utf8char' from the UTF8 string
    126  *             'utf8s' starting the backward search at position 'pos-1'
    127  * @param    utf8s : UTF8 string
    128  * @param    utf8slenmin : min length accessible in utf8s
    129  * @param    pos : the search for the prev UTF8 char starts at [pos-1]
    130  *            (set also as output to the start position of the prev UTF8 character)
    131  * @param    utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
    132  * @return  TRUE if okay
    133  * @return  FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len
    134 */
    135 picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
    136                                         const picoos_uint32 utf8slenmin,
    137                                         picoos_uint32 *pos,
    138                                         picobase_utf8char utf8char);
    139 
    140 /**
    141  * Same as picobase_get_prev_utf8char
    142  *            without copying the char to utf8char
    143 */
    144 picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
    145                                            const picoos_uint32 utf8slenmin,
    146                                            picoos_uint32 *pos);
    147 
    148 
    149 /**
    150  * returns TRUE if the input string is UTF8 and uppercase
    151  * @param    str : UTF8 string
    152  * @param    strmaxlen : max length for the input string
    153  * @return  TRUE if string is UTF8 and uppercase
    154  * @return  FALSE otherwise
    155 */
    156 extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen);
    157 
    158 /**
    159  * returns TRUE if the input string is UTF8 and lowercase
    160  * @param    str : UTF8 string
    161  * @param    strmaxlen : max length for the input string
    162  * @return  TRUE if string is UTF8 and lowercase
    163  * @return  FALSE otherwise
    164 */
    165 extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen);
    166 
    167 #ifdef __cplusplus
    168 }
    169 #endif
    170 
    171 #endif /*PICOBASE_H_*/
    172