Home | History | Annotate | Download | only in telephony
      1 /* Copyright (C) 2007-2008 The Android Open Source Project
      2 **
      3 ** This software is licensed under the terms of the GNU General Public
      4 ** License version 2, as published by the Free Software Foundation, and
      5 ** may be copied, distributed, and modified under those terms.
      6 **
      7 ** This program is distributed in the hope that it will be useful,
      8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
      9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     10 ** GNU General Public License for more details.
     11 */
     12 #include "gsm.h"
     13 #include <stdlib.h>
     14 #include <string.h>
     15 
     16 /** UTILITIES
     17  **/
     18 byte_t
     19 gsm_int_to_bcdi( int  value )
     20 {
     21     return (byte_t)((value / 10) | ((value % 10) << 4));
     22 }
     23 
     24 int
     25 gsm_int_from_bcdi( byte_t  val )
     26 {
     27     int  ret = 0;
     28 
     29     if ((val & 0xf0) <= 0x90)
     30         ret = (val >> 4);
     31 
     32     if ((val & 0x0f) <= 0x90)
     33         ret |= (val % 0xf)*10;
     34 
     35     return ret;
     36 }
     37 
     38 #if 0
     39 static int
     40 gsm_bcdi_to_ascii( cbytes_t  bcd, int  bcdlen, bytes_t  dst )
     41 {
     42     static byte_t  bcdichars[14] = "0123456789*#,N";
     43 
     44     int  result = 0;
     45     int  shift  = 0;
     46 
     47     while (bcdlen > 0) {
     48         int  c = (bcd[0] >> shift) & 0xf;
     49 
     50         if (c == 0xf && bcdlen == 1)
     51             break;
     52 
     53         if (c < 14) {
     54             if (dst) dst[result] = bcdichars[c];
     55             result += 1;
     56         }
     57         bcdlen --;
     58         shift += 4;
     59         if (shift == 8) {
     60             bcd++;
     61             shift = 0;
     62         }
     63     }
     64     return result;
     65 }
     66 #endif
     67 
     68 #if 0
     69 static int
     70 gsm_bcdi_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
     71 {
     72     cbytes_t  end    = ascii + asciilen;
     73     int       result = 0;
     74     int       phase  = 0x01;
     75 
     76     while (ascii < end) {
     77         int  c = *ascii++;
     78 
     79         if (c == '*')
     80             c = 11;
     81         else if (c == '#')
     82             c = 12;
     83         else if (c == ',')
     84             c = 13;
     85         else if (c == 'N')
     86             c = 14;
     87         else {
     88             c -= '0';
     89             if ((unsigned)c >= 10)
     90                 break;
     91         }
     92         phase = (phase << 4) | c;
     93         if (phase & 0x100) {
     94             if (dst) dst[result] = (byte_t) phase;
     95             result += 1;
     96             phase   = 0x01;
     97         }
     98     }
     99     if (phase != 0x01) {
    100         if (dst) dst[result] = (byte_t)( phase | 0xf0 );
    101         result += 1;
    102     }
    103     return  result;
    104 }
    105 #endif
    106 
    107 int
    108 gsm_hexchar_to_int( char  c )
    109 {
    110     if ((unsigned)(c - '0') < 10)
    111         return c - '0';
    112     if ((unsigned)(c - 'a') < 6)
    113         return 10 + (c - 'a');
    114     if ((unsigned)(c - 'A') < 6)
    115         return 10 + (c - 'A');
    116     return -1;
    117 }
    118 
    119 int
    120 gsm_hexchar_to_int0( char  c )
    121 {
    122     int  ret = gsm_hexchar_to_int(c);
    123 
    124     return (ret < 0) ? 0 : ret;
    125 }
    126 
    127 int
    128 gsm_hex2_to_byte( const char*  hex )
    129 {
    130     int  hi = gsm_hexchar_to_int(hex[0]);
    131     int  lo = gsm_hexchar_to_int(hex[1]);
    132 
    133     if (hi < 0 || lo < 0)
    134         return -1;
    135 
    136     return ( (hi << 4) | lo );
    137 }
    138 
    139 int
    140 gsm_hex4_to_short( const char*  hex )
    141 {
    142     int  hi = gsm_hex2_to_byte(hex);
    143     int  lo = gsm_hex2_to_byte(hex+2);
    144 
    145     if (hi < 0 || lo < 0)
    146         return -1;
    147 
    148     return ((hi << 8) | lo);
    149 }
    150 
    151 int
    152 gsm_hex2_to_byte0( const char*  hex )
    153 {
    154     int  hi = gsm_hexchar_to_int0(hex[0]);
    155     int  lo = gsm_hexchar_to_int0(hex[1]);
    156 
    157     return (byte_t)( (hi << 4) | lo );
    158 }
    159 
    160 void
    161 gsm_hex_from_byte( char*  hex, int val )
    162 {
    163     static const char  hexdigits[] = "0123456789abcdef";
    164 
    165     hex[0] = hexdigits[(val >> 4) & 15];
    166     hex[1] = hexdigits[val & 15];
    167 }
    168 
    169 void
    170 gsm_hex_from_short( char*  hex, int  val )
    171 {
    172     gsm_hex_from_byte( hex,   (val >> 8) );
    173     gsm_hex_from_byte( hex+2, val );
    174 }
    175 
    176 
    177 
    178 /** HEX
    179  **/
    180 void
    181 gsm_hex_to_bytes0( cbytes_t  hex, int  hexlen, bytes_t  dst )
    182 {
    183     int  nn;
    184 
    185     for (nn = 0; nn < hexlen/2; nn++ ) {
    186         dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn );
    187     }
    188     if (hexlen & 1) {
    189         dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4;
    190     }
    191 }
    192 
    193 int
    194 gsm_hex_to_bytes( cbytes_t  hex, int  hexlen, bytes_t  dst )
    195 {
    196     int  nn;
    197 
    198     if (hexlen & 1)  /* must be even */
    199         return -1;
    200 
    201     for (nn = 0; nn < hexlen/2; nn++ ) {
    202         int  c = gsm_hex2_to_byte( (const char*)hex+2*nn );
    203         if (c < 0) return -1;
    204         dst[nn] = (byte_t) c;
    205     }
    206     return hexlen/2;
    207 }
    208 
    209 void
    210 gsm_hex_from_bytes( char*  hex, cbytes_t  src, int  srclen )
    211 {
    212     int  nn;
    213 
    214     for (nn = 0; nn < srclen; nn++) {
    215         gsm_hex_from_byte( hex + 2*nn, src[nn] );
    216     }
    217 }
    218 
    219 /** ROPES
    220  **/
    221 
    222 void
    223 gsm_rope_init( GsmRope  rope )
    224 {
    225     rope->data  = NULL;
    226     rope->pos   = 0;
    227     rope->max   = 0;
    228     rope->error = 0;
    229 }
    230 
    231 void
    232 gsm_rope_init_alloc( GsmRope  rope, int  count )
    233 {
    234     rope->data  = rope->data0;
    235     rope->pos   = 0;
    236     rope->max   = sizeof(rope->data0);
    237     rope->error = 0;
    238 
    239     if (count > 0) {
    240         rope->data = calloc( count, 1 );
    241         rope->max  = count;
    242 
    243         if (rope->data == NULL) {
    244             rope->error = 1;
    245             rope->max   = 0;
    246         }
    247     }
    248 }
    249 
    250 int
    251 gsm_rope_done( GsmRope  rope )
    252 {
    253     int  result = rope->error;
    254 
    255     if (rope->data && rope->data != rope->data0)
    256         free(rope->data);
    257 
    258     rope->data  = NULL;
    259     rope->pos   = 0;
    260     rope->max   = 0;
    261     rope->error = 0;
    262 
    263     return result;
    264 }
    265 
    266 
    267 bytes_t
    268 gsm_rope_done_acquire( GsmRope  rope, int  *psize )
    269 {
    270     bytes_t  result = rope->data;
    271 
    272     *psize = rope->pos;
    273     if (result == rope->data0) {
    274         result = malloc(  rope->pos );
    275         if (result != NULL)
    276             memcpy( result, rope->data, rope->pos );
    277     }
    278     return result;
    279 }
    280 
    281 
    282 int
    283 gsm_rope_ensure( GsmRope  rope, int  new_count )
    284 {
    285     if (rope->data != NULL) {
    286         int       old_max  = rope->max;
    287         bytes_t   old_data = rope->data == rope->data0 ? NULL : rope->data;
    288         int       new_max  = old_max;
    289         bytes_t   new_data;
    290 
    291         while (new_max < new_count) {
    292             new_max += (new_max >> 1) + 4;
    293         }
    294         new_data = realloc( old_data, new_max );
    295         if (new_data == NULL) {
    296             rope->error = 1;
    297             return -1;
    298         }
    299         rope->data = new_data;
    300         rope->max  = new_max;
    301     } else {
    302         rope->max = new_count;
    303     }
    304     return 0;
    305 }
    306 
    307 static int
    308 gsm_rope_can_grow( GsmRope  rope, int  count )
    309 {
    310     if (!rope->data || rope->error)
    311         return 0;
    312 
    313     if (rope->pos + count > rope->max)
    314     {
    315         if (rope->data == NULL)
    316             rope->max = rope->pos + count;
    317 
    318         else if (rope->error ||
    319                  gsm_rope_ensure( rope, rope->pos + count ) < 0)
    320             return 0;
    321     }
    322     return 1;
    323 }
    324 
    325 void
    326 gsm_rope_add_c( GsmRope  rope,  char  c )
    327 {
    328     if (gsm_rope_can_grow(rope, 1)) {
    329         rope->data[ rope->pos ] = (byte_t) c;
    330     }
    331     rope->pos += 1;
    332 }
    333 
    334 void
    335 gsm_rope_add( GsmRope  rope, const void*  buf, int  buflen )
    336 {
    337     if (gsm_rope_can_grow(rope, buflen)) {
    338         memcpy( rope->data + rope->pos, (const char*)buf, buflen );
    339     }
    340     rope->pos += buflen;
    341 }
    342 
    343 void*
    344 gsm_rope_reserve( GsmRope  rope, int  count )
    345 {
    346     void*  result = NULL;
    347 
    348     if (gsm_rope_can_grow(rope, count))
    349     {
    350         if (rope->data != NULL)
    351             result = rope->data + rope->pos;
    352     }
    353     rope->pos += count;
    354 
    355     return result;
    356 }
    357 
    358 /* skip a given number of Unicode characters in a utf-8 byte string */
    359 cbytes_t
    360 utf8_skip( cbytes_t   utf8,
    361            cbytes_t   utf8end,
    362            int        count)
    363 {
    364     cbytes_t  p   = utf8;
    365     cbytes_t  end = utf8end;
    366 
    367     for ( ; count > 0; count-- ) {
    368         int  c;
    369 
    370         if (p >= end)
    371             break;
    372 
    373         c = *p++;
    374         if (c > 128) {
    375             while (p < end && (p[0] & 0xc0) == 0x80)
    376                 p++;
    377         }
    378     }
    379     return  p;
    380 }
    381 
    382 
    383 static __inline__ int
    384 utf8_next( cbytes_t  *pp, cbytes_t  end )
    385 {
    386     cbytes_t  p      = *pp;
    387     int       result = -1;
    388 
    389     if (p < end) {
    390         int  c= *p++;
    391         if (c >= 128) {
    392             if ((c & 0xe0) == 0xc0)
    393                 c &= 0x1f;
    394             else if ((c & 0xf0) == 0xe0)
    395                 c &= 0x0f;
    396             else
    397                 c &= 0x07;
    398 
    399             while (p < end && (p[0] & 0xc0) == 0x80) {
    400                 c = (c << 6) | (p[0] & 0x3f);
    401                 p ++;
    402             }
    403         }
    404         result = c;
    405         *pp    = p;
    406     }
    407     return result;
    408 }
    409 
    410 
    411 __inline__ int
    412 utf8_write( bytes_t  utf8, int  offset, int  v )
    413 {
    414     int  result;
    415 
    416     if (v < 128) {
    417         result = 1;
    418         if (utf8)
    419             utf8[offset] = (byte_t) v;
    420     } else if (v < 0x800) {
    421         result = 2;
    422         if (utf8) {
    423             utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) );
    424             utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) );
    425         }
    426     } else if (v < 0x10000) {
    427         result = 3;
    428         if (utf8) {
    429             utf8[offset+0] = (byte_t)( 0xe0 |  (v >> 12) );
    430             utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
    431             utf8[offset+2] = (byte_t)( 0x80 |  (v & 0x3f) );
    432         }
    433     } else {
    434         result = 4;
    435         if (utf8) {
    436             utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) );
    437             utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) );
    438             utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
    439             utf8[offset+3] = (byte_t)( 0x80 |  (v & 0x3f) );
    440         }
    441     }
    442     return  result;
    443 }
    444 
    445 static __inline__ int
    446 ucs2_write( bytes_t  ucs2, int  offset, int  v )
    447 {
    448     if (ucs2) {
    449         ucs2[offset+0] = (byte_t) (v >> 8);
    450         ucs2[offset+1] = (byte_t) (v);
    451     }
    452     return 2;
    453 }
    454 
    455 int
    456 utf8_check( cbytes_t   p, int  utf8len )
    457 {
    458     cbytes_t  end    = p + utf8len;
    459     int       result = 0;
    460 
    461     if (p) {
    462         while (p < end) {
    463             int  c = *p++;
    464             if (c >= 128) {
    465                 int  len;
    466                 if ((c & 0xe0) == 0xc0) {
    467                     len = 1;
    468                 }
    469                 else if ((c & 0xf0) == 0xe0) {
    470                     len = 2;
    471                 }
    472                 else if ((c & 0xf8) == 0xf0) {
    473                     len = 3;
    474                 }
    475                 else
    476                     goto Exit;  /* malformed utf-8 */
    477 
    478                 if (p+len > end) /* string too short */
    479                     goto Exit;
    480 
    481                 for ( ; len > 0; len--, p++ ) {
    482                     if ((p[0] & 0xc0) != 0x80)
    483                         goto Exit;
    484                 }
    485             }
    486         }
    487         result = 1;
    488     }
    489 Exit:
    490     return result;
    491 }
    492 
    493 /** UCS2 to UTF8
    494  **/
    495 
    496 /* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */
    497 int
    498 ucs2_to_utf8( cbytes_t  ucs2,
    499               int       ucs2len,
    500               bytes_t   buf )
    501 {
    502     int  nn;
    503     int  result = 0;
    504 
    505     for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) {
    506         int  c= (ucs2[0] << 8) | ucs2[1];
    507         result += utf8_write(buf, result, c);
    508     }
    509     return result;
    510 }
    511 
    512 /* count the number of UCS2 chars contained in a utf8 byte string */
    513 int
    514 utf8_to_ucs2( cbytes_t  utf8,
    515               int       utf8len,
    516               bytes_t   ucs2 )
    517 {
    518     cbytes_t  p      = utf8;
    519     cbytes_t  end    = p + utf8len;
    520     int       result = 0;
    521 
    522     while (p < end) {
    523         int  c = utf8_next(&p, end);
    524 
    525         if (c < 0)
    526             break;
    527 
    528         result += ucs2_write(ucs2, result, c);
    529     }
    530     return result/2;
    531 }
    532 
    533 
    534 
    535 /** GSM ALPHABET
    536  **/
    537 
    538 #define  GSM_7BITS_ESCAPE   0x1b
    539 #define  GSM_7BITS_UNKNOWN  0
    540 
    541 static const unsigned short   gsm7bits_to_unicode[128] = {
    542   '@', 0xa3,  '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
    543 0x394,  '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e,    0, 0xc6, 0xe6, 0xdf, 0xc9,
    544   ' ',  '!',  '"',  '#', 0xa4,  '%',  '&', '\'',  '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
    545   '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
    546  0xa1,  'A',  'B',  'C',  'D',  'E',  'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
    547   'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',  'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7,
    548  0xbf,  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
    549   'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0,
    550 };
    551 
    552 static const unsigned short  gsm7bits_extend_to_unicode[128] = {
    553     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\f',   0,   0,   0,   0,   0,
    554     0,   0,   0,   0, '^',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    555     0,   0,   0,   0,   0,   0,   0,   0, '{', '}',   0,   0,   0,   0,   0,'\\',
    556     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '[', '~', ']',   0,
    557   '|',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    558     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    559     0,   0,   0,   0,   0,0x20ac, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    560     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    561 };
    562 
    563 
    564 static int
    565 unichar_to_gsm7( int  unicode )
    566 {
    567     int  nn;
    568     for (nn = 0; nn < 128; nn++) {
    569         if (gsm7bits_to_unicode[nn] == unicode) {
    570             return nn;
    571         }
    572     }
    573     return -1;
    574 }
    575 
    576 static int
    577 unichar_to_gsm7_extend( int  unichar )
    578 {
    579     int  nn;
    580     for (nn = 0; nn < 128; nn++) {
    581         if (gsm7bits_extend_to_unicode[nn] == unichar) {
    582             return nn;
    583         }
    584     }
    585     return -1;
    586 }
    587 
    588 
    589 /* return the number of septets needed to encode a unicode charcode */
    590 static int
    591 unichar_to_gsm7_count( int  unicode )
    592 {
    593     int  nn;
    594 
    595     nn = unichar_to_gsm7(unicode);
    596     if (nn >= 0)
    597         return 1;
    598 
    599     nn = unichar_to_gsm7_extend(unicode);
    600     if (nn >= 0)
    601         return 2;
    602 
    603     return 0;
    604 }
    605 
    606 
    607 cbytes_t
    608 utf8_skip_gsm7( cbytes_t  utf8, cbytes_t  utf8end, int  gsm7len )
    609 {
    610     cbytes_t  p   = utf8;
    611     cbytes_t  end = utf8end;
    612 
    613     while (gsm7len >0) {
    614         cbytes_t  q = p;
    615         int       c = utf8_next( &q, end );
    616         int       len;
    617 
    618         if (c < 0)
    619             break;
    620 
    621         len = unichar_to_gsm7_count( c );
    622         if (len == 0)  /* unknown chars are replaced by spaces */
    623             len = 1;
    624 
    625         if (len > gsm7len)
    626             break;
    627 
    628         gsm7len -= len;
    629         p        = q;
    630     }
    631     return  p;
    632 }
    633 
    634 
    635 int
    636 utf8_check_gsm7( cbytes_t  utf8,
    637                  int       utf8len )
    638 {
    639     cbytes_t  utf8end = utf8 + utf8len;
    640 
    641     while (utf8 < utf8end) {
    642         int  c = utf8_next( &utf8, utf8end );
    643         if (unichar_to_gsm7_count(c) == 0)
    644             return 0;
    645     }
    646     return 1;
    647 }
    648 
    649 
    650 int
    651 utf8_from_gsm7( cbytes_t  src,
    652                 int       septet_offset,
    653                 int       septet_count,
    654                 bytes_t   utf8 )
    655 {
    656     int  shift   = (septet_offset & 7);
    657     int  escaped = 0;
    658     int  result  = 0;
    659 
    660     src += (septet_offset >> 3);
    661     for ( ; septet_count > 0; septet_count-- )
    662     {
    663         int  c = (src[0] >> shift) & 0x7f;
    664         int  v;
    665 
    666         if (shift > 1) {
    667             c = ((src[1] << (8-shift)) | c) & 0x7f;
    668         }
    669 
    670         if (escaped) {
    671             v = gsm7bits_extend_to_unicode[c];
    672         } else if (c == GSM_7BITS_ESCAPE) {
    673             escaped = 1;
    674             goto NextSeptet;
    675         } else {
    676             v = gsm7bits_to_unicode[c];
    677         }
    678 
    679         result += utf8_write( utf8, result, v );
    680 
    681     NextSeptet:
    682         shift += 7;
    683         if (shift >= 8) {
    684             shift -= 8;
    685             src   += 1;
    686         }
    687     }
    688     return  result;
    689 }
    690 
    691 
    692 int
    693 utf8_from_gsm8( cbytes_t  src, int  count, bytes_t  utf8 )
    694 {
    695     int  result  = 0;
    696     int  escaped = 0;
    697 
    698 
    699     for ( ; count > 0; count-- )
    700     {
    701         int  c = *src++;
    702 
    703         if (c == 0xff)
    704             break;
    705 
    706         if (c == GSM_7BITS_ESCAPE) {
    707             if (escaped) { /* two escape characters => one space */
    708                 c = 0x20;
    709                 escaped = 0;
    710             } else {
    711                 escaped = 1;
    712                 continue;
    713             }
    714         }
    715         else
    716         {
    717             if (c >= 0x80) {
    718                 c       = 0x20;
    719                 escaped = 0;
    720             } else if (escaped) {
    721                 c = gsm7bits_extend_to_unicode[c];
    722             } else
    723                 c = gsm7bits_to_unicode[c];
    724         }
    725 
    726         result += utf8_write( utf8, result, c );
    727     }
    728     return  result;
    729 }
    730 
    731 /* convert a GSM 7-bit message into a unicode character array
    732  * the 'dst' array must contain at least 160 chars. the function
    733  * returns the number of characters decoded
    734  *
    735  * assumes the 'dst' array has at least septet_count items, returns the
    736  * number of unichars really written
    737  */
    738 int
    739 ucs2_from_gsm7( bytes_t   ucs2,
    740                 cbytes_t  src,
    741                 int       septet_offset,
    742                 int       septet_count )
    743 {
    744     const unsigned char*  p     = src + (septet_offset >> 3);
    745     int                   shift = (septet_offset & 7);
    746     int                   escaped = 0;
    747     int                   result  = 0;
    748 
    749     for ( ; septet_count > 0; septet_count-- )
    750     {
    751         unsigned  val  = (p[0] >> shift) & 0x7f;
    752 
    753         if (shift > 1)
    754             val = (val | (p[1] << (8-shift))) & 0x7f;
    755 
    756         if (escaped) {
    757             int  c = gsm7bits_to_unicode[val];
    758 
    759             result += ucs2_write(ucs2, result, c);
    760             escaped = 0;
    761         }
    762         else if (val == GSM_7BITS_ESCAPE) {
    763             escaped = 1;
    764         }
    765         else {
    766             val = gsm7bits_extend_to_unicode[val];
    767             if (val == 0)
    768                 val = 0x20;
    769 
    770             result += ucs2_write( ucs2, result, val );
    771         }
    772     }
    773     return result/2;
    774 }
    775 
    776 
    777 /* count the number of septets required to write a utf8 string */
    778 static int
    779 utf8_to_gsm7_count( cbytes_t  utf8, int  utf8len )
    780 {
    781     cbytes_t  utf8end = utf8 + utf8len;
    782     int       result  = 0;
    783 
    784     while ( utf8 < utf8end ) {
    785         int  len;
    786         int  c = utf8_next( &utf8, utf8end );
    787 
    788         if (c < 0)
    789             break;
    790 
    791         len = unichar_to_gsm7_count(c);
    792         if (len == 0)    /* replace non-representables with space */
    793             len = 1;
    794 
    795         result += len;
    796     }
    797     return result;
    798 }
    799 
    800 typedef struct {
    801     bytes_t   dst;
    802     unsigned  pad;
    803     int       bits;
    804     int       offset;
    805 } BWriterRec, *BWriter;
    806 
    807 static void
    808 bwriter_init( BWriter  writer, bytes_t  dst, int  start )
    809 {
    810     int  shift = start & 7;
    811 
    812     writer->dst    = dst + (start >> 3);
    813     writer->pad    = 0;
    814     writer->bits   = shift;
    815     writer->offset = start;
    816 
    817     if (shift > 0) {
    818         writer->pad  = writer->dst[0] & ~(0xFF << shift);
    819     }
    820 }
    821 
    822 static void
    823 bwriter_add7( BWriter  writer, unsigned  value )
    824 {
    825     writer->pad  |= (unsigned)(value << writer->bits);
    826     writer->bits += 7;
    827     if (writer->bits >= 8) {
    828         writer->dst[0] = (byte_t)writer->pad;
    829         writer->bits  -= 8;
    830         writer->pad  >>= 8;
    831         writer->dst   += 1;
    832     }
    833     writer->offset += 7;
    834 }
    835 
    836 static int
    837 bwriter_done( BWriter  writer )
    838 {
    839     if (writer->bits > 0) {
    840         writer->dst[0] = (byte_t)writer->pad;
    841         writer->pad    = 0;
    842         writer->bits   = 0;
    843         writer->dst   += 1;
    844     }
    845     return writer->offset;
    846 }
    847 
    848 /* convert a utf8 string to a gsm7 byte string - return the number of septets written */
    849 int
    850 utf8_to_gsm7( cbytes_t  utf8, int  utf8len, bytes_t  dst, int offset )
    851 {
    852     const unsigned char*  utf8end = utf8 + utf8len;
    853     BWriterRec            writer[1];
    854 
    855     if (dst == NULL)
    856         return utf8_to_gsm7_count(utf8, utf8len);
    857 
    858     bwriter_init( writer, dst, offset );
    859     while ( utf8 < utf8end ) {
    860         int  c = utf8_next( &utf8, utf8end );
    861         int  nn;
    862 
    863         if (c < 0)
    864             break;
    865 
    866         nn = unichar_to_gsm7(c);
    867         if (nn >= 0) {
    868             bwriter_add7( writer, nn );
    869             continue;
    870         }
    871 
    872         nn = unichar_to_gsm7_extend(c);
    873         if (nn >= 0) {
    874             bwriter_add7( writer, GSM_7BITS_ESCAPE );
    875             bwriter_add7( writer, nn );
    876             continue;
    877         }
    878 
    879         /* unknown => replaced by space */
    880         bwriter_add7( writer, 0x20 );
    881     }
    882     return  bwriter_done( writer );
    883 }
    884 
    885 
    886 int
    887 utf8_to_gsm8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
    888 {
    889     const unsigned char*  utf8end = utf8 + utf8len;
    890     int                   result  = 0;
    891 
    892     while ( utf8 < utf8end ) {
    893         int  c = utf8_next( &utf8, utf8end );
    894         int  nn;
    895 
    896         if (c < 0)
    897             break;
    898 
    899         nn = unichar_to_gsm7(c);
    900         if (nn >= 0) {
    901             if (dst)
    902                 dst[result] = (byte_t)nn;
    903             result += 1;
    904             continue;
    905         }
    906 
    907         nn = unichar_to_gsm7_extend(c);
    908         if (nn >= 0) {
    909             if (dst) {
    910                 dst[result+0] = (byte_t) GSM_7BITS_ESCAPE;
    911                 dst[result+1] = (byte_t) nn;
    912             }
    913             result += 2;
    914             continue;
    915         }
    916 
    917         /* unknown => space */
    918         if (dst)
    919             dst[result] = 0x20;
    920         result += 1;
    921     }
    922     return  result;
    923 }
    924 
    925 
    926 int
    927 ucs2_to_gsm7( cbytes_t  ucs2, int  ucs2len, bytes_t  dst, int offset )
    928 {
    929     const unsigned char*  ucs2end = ucs2 + ucs2len*2;
    930     BWriterRec            writer[1];
    931 
    932     bwriter_init( writer, dst, offset );
    933     while ( ucs2 < ucs2end ) {
    934         int  c = *ucs2++;
    935         int  nn;
    936 
    937         for (nn = 0; nn < 128; nn++) {
    938             if ( gsm7bits_to_unicode[nn] == c ) {
    939                 bwriter_add7( writer, nn );
    940                 goto NextUnicode;
    941             }
    942         }
    943         for (nn = 0; nn < 128; nn++) {
    944             if ( gsm7bits_extend_to_unicode[nn] == c ) {
    945                 bwriter_add7( writer, GSM_7BITS_ESCAPE );
    946                 bwriter_add7( writer, nn );
    947                 goto NextUnicode;
    948             }
    949         }
    950 
    951         /* unknown */
    952         bwriter_add7( writer, 0x20 );
    953 
    954     NextUnicode:
    955         ;
    956     }
    957     return  bwriter_done( writer );
    958 }
    959 
    960 
    961 int
    962 ucs2_to_gsm8( cbytes_t  ucs2, int  ucs2len, bytes_t  dst )
    963 {
    964     const unsigned char*  ucs2end = ucs2 + ucs2len*2;
    965     bytes_t               dst0    = dst;
    966 
    967     while ( ucs2 < ucs2end ) {
    968         int  c = *ucs2++;
    969         int  nn;
    970 
    971         for (nn = 0; nn < 128; nn++) {
    972             if ( gsm7bits_to_unicode[nn] == c ) {
    973                 *dst++ = (byte_t)nn;
    974                 goto NextUnicode;
    975             }
    976         }
    977         for (nn = 0; nn < 128; nn++) {
    978             if ( gsm7bits_extend_to_unicode[nn] == c ) {
    979                 dst[0] = (byte_t) GSM_7BITS_ESCAPE;
    980                 dst[1] = (byte_t) nn;
    981                 dst   += 2;
    982                 goto NextUnicode;
    983             }
    984         }
    985 
    986         /* unknown */
    987         *dst++ = 0x20;
    988 
    989     NextUnicode:
    990         ;
    991     }
    992     return (dst - dst0);
    993 }
    994 
    995 int
    996 gsm_bcdnum_to_ascii( cbytes_t  bcd, int  count, bytes_t  dst )
    997 {
    998     int  result = 0;
    999     int  shift  = 0;
   1000 
   1001     while (count > 0) {
   1002         int  c = (bcd[0] >> shift) & 0xf;
   1003 
   1004         if (c == 15 && count == 1)  /* ignore trailing 0xf */
   1005             break;
   1006 
   1007         if (c >= 14)
   1008             c = 0;
   1009 
   1010         if (dst) dst[result] = "0123456789*#,N"[c];
   1011         result += 1;
   1012 
   1013         shift += 4;
   1014         if (shift == 8) {
   1015             shift = 0;
   1016             bcd += 1;
   1017         }
   1018     }
   1019     return  result;
   1020 }
   1021 
   1022 
   1023 int
   1024 gsm_bcdnum_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
   1025 {
   1026     cbytes_t  end = ascii + asciilen;
   1027     int  result   = 0;
   1028     int  phase = 0x01;
   1029 
   1030     while (ascii < end) {
   1031         int  c = *ascii++;
   1032 
   1033         if (c == '*')
   1034             c = 10;
   1035         else if (c == '#')
   1036             c = 11;
   1037         else if (c == ',')
   1038             c = 12;
   1039         else if (c == 'N')
   1040             c = 13;
   1041         else {
   1042             c -= '0';
   1043             if ((unsigned)c >= 10U)
   1044                 return -1;
   1045         }
   1046         phase   = (phase << 4) | c;
   1047         result += 1;
   1048         if (phase & 0x100) {
   1049             if (dst) dst[result/2] = (byte_t) phase;
   1050             phase   = 0x01;
   1051         }
   1052     }
   1053 
   1054     if (result & 1) {
   1055         if (dst) dst[result/2] = (byte_t)(phase | 0xf0);
   1056     }
   1057     return result;
   1058 }
   1059 
   1060 /** ADN: Abbreviated Dialing Number
   1061  **/
   1062 
   1063 #define  ADN_FOOTER_SIZE     14
   1064 #define  ADN_OFFSET_NUMBER_LENGTH   0
   1065 #define  ADN_OFFSET_TON_NPI         1
   1066 #define  ADN_OFFSET_NUMBER_START    2
   1067 #define  ADN_OFFSET_NUMBER_END      11
   1068 #define  ADN_OFFSET_CAPABILITY_ID   12
   1069 #define  ADN_OFFSET_EXTENSION_ID    13
   1070 
   1071 /* see 10.5.1 of 3GPP 51.011 */
   1072 static int
   1073 sim_adn_alpha_to_utf8( cbytes_t  alpha, cbytes_t  end, bytes_t  dst )
   1074 {
   1075     int  result = 0;
   1076 
   1077     /* ignore trailing 0xff */
   1078     while (alpha < end && end[-1] == 0xff)
   1079         end--;
   1080 
   1081     if (alpha >= end)
   1082         return 0;
   1083 
   1084     if (alpha[0] == 0x80) { /* UCS/2 source encoding */
   1085         alpha += 1;
   1086         result = ucs2_to_utf8( alpha, (end-alpha)/2, dst );
   1087     }
   1088     else
   1089     {
   1090         int  is_ucs2 = 0;
   1091         int  len = 0, base = 0;
   1092 
   1093         if (alpha+3 <= end && alpha[0] == 0x81) {
   1094             is_ucs2 = 1;
   1095             len     = alpha[1];
   1096             base    = alpha[2] << 7;
   1097             alpha  += 3;
   1098             if (len > end-alpha)
   1099                 len = end-alpha;
   1100         } else if (alpha+4 <= end && alpha[0] == 0x82) {
   1101             is_ucs2 = 1;
   1102             len     = alpha[1];
   1103             base    = (alpha[2] << 8) | alpha[3];
   1104             alpha  += 4;
   1105             if (len > end-alpha)
   1106                 len = end-alpha;
   1107         }
   1108 
   1109         if (is_ucs2) {
   1110             end = alpha + len;
   1111             while (alpha < end) {
   1112                 int  c = alpha[0];
   1113                 if (c >= 0x80) {
   1114                     result += utf8_write(dst, result, base + (c & 0x7f));
   1115                     alpha  += 1;
   1116                 } else {
   1117                     /* GSM character set */
   1118                     int   count;
   1119                     for (count = 0; alpha+count < end && alpha[count] < 128; count++)
   1120                         ;
   1121                     result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL));
   1122                     alpha  += count;
   1123                 }
   1124             }
   1125         }
   1126         else {
   1127             result = utf8_from_gsm8(alpha, end-alpha, dst);
   1128         }
   1129     }
   1130     return result;
   1131 }
   1132 
   1133 #if 0
   1134 static int
   1135 sim_adn_alpha_from_utf8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
   1136 {
   1137     int   result = 0;
   1138 
   1139     if (utf8_check_gsm7(utf8, utf8len)) {
   1140         /* GSM 7-bit compatible, encode directly as 8-bit string */
   1141         result = utf8_to_gsm8(utf8, utf8len, dst);
   1142     } else {
   1143         /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */
   1144         if (dst) {
   1145             dst[0] = 0x80;
   1146         }
   1147         result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2;
   1148     }
   1149     return  result;
   1150 }
   1151 #endif
   1152 
   1153 int
   1154 sim_adn_record_from_bytes( SimAdnRecord  rec, cbytes_t  data, int  len )
   1155 {
   1156     cbytes_t  end    = data + len;
   1157     cbytes_t  footer = end - ADN_FOOTER_SIZE;
   1158     int       num_len;
   1159 
   1160     rec->adn.alpha[0]  = 0;
   1161     rec->adn.number[0] = 0;
   1162     rec->ext_record    = 0xff;
   1163 
   1164     if (len < ADN_FOOTER_SIZE)
   1165         return -1;
   1166 
   1167     /* alpha is optional */
   1168     if (len > ADN_FOOTER_SIZE) {
   1169         cbytes_t  dataend = data + len - ADN_FOOTER_SIZE;
   1170         int       count   = sim_adn_alpha_to_utf8(data, dataend, NULL);
   1171 
   1172         if (count > sizeof(rec->adn.alpha)-1)  /* too long */
   1173             return -1;
   1174 
   1175         sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha);
   1176         rec->adn.alpha[count] = 0;
   1177     }
   1178 
   1179     num_len = footer[ADN_OFFSET_NUMBER_LENGTH];
   1180     if (num_len > 11)
   1181         return -1;
   1182 
   1183     /* decode TON and number to ASCII, NOTE: this is lossy !! */
   1184     {
   1185         int      ton    = footer[ADN_OFFSET_TON_NPI];
   1186         bytes_t  number = (bytes_t) rec->adn.number;
   1187         int      len    = sizeof(rec->adn.number)-1;
   1188         int      count;
   1189 
   1190         if (ton != 0x81 && ton != 0x91)
   1191             return -1;
   1192 
   1193         if (ton == 0x91) {
   1194             *number++ = '+';
   1195             len      -= 1;
   1196         }
   1197 
   1198         count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START,
   1199                                      num_len*2, number );
   1200         number[count] = 0;
   1201     }
   1202     return 0;
   1203 }
   1204 
   1205 int
   1206 sim_adn_record_to_bytes( SimAdnRecord  rec, bytes_t   data, int  datalen )
   1207 {
   1208     bytes_t   end    = data + datalen;
   1209     bytes_t   footer = end - ADN_FOOTER_SIZE;
   1210     int       ton    = 0x81;
   1211     cbytes_t  number = (cbytes_t) rec->adn.number;
   1212 
   1213     if (number[0] == '+') {
   1214         ton     = 0x91;
   1215         number += 1;
   1216     }
   1217     footer[0] = (strlen((const char*)number)+1)/2 + 1;
   1218     /* XXXX: TODO */
   1219     return 0;
   1220 }
   1221