1 /* Copyright (C) 2007-2008 The Android Open Source Project 2 ** 3 ** This software is licensed under the terms of the GNU General Public 4 ** License version 2, as published by the Free Software Foundation, and 5 ** may be copied, distributed, and modified under those terms. 6 ** 7 ** This program is distributed in the hope that it will be useful, 8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 ** GNU General Public License for more details. 11 */ 12 #include "gsm.h" 13 #include <stdlib.h> 14 #include <string.h> 15 16 /** UTILITIES 17 **/ 18 byte_t 19 gsm_int_to_bcdi( int value ) 20 { 21 return (byte_t)((value / 10) | ((value % 10) << 4)); 22 } 23 24 int 25 gsm_int_from_bcdi( byte_t val ) 26 { 27 int ret = 0; 28 29 if ((val & 0xf0) <= 0x90) 30 ret = (val >> 4); 31 32 if ((val & 0x0f) <= 0x90) 33 ret |= (val % 0xf)*10; 34 35 return ret; 36 } 37 38 #if 0 39 static int 40 gsm_bcdi_to_ascii( cbytes_t bcd, int bcdlen, bytes_t dst ) 41 { 42 static byte_t bcdichars[14] = "0123456789*#,N"; 43 44 int result = 0; 45 int shift = 0; 46 47 while (bcdlen > 0) { 48 int c = (bcd[0] >> shift) & 0xf; 49 50 if (c == 0xf && bcdlen == 1) 51 break; 52 53 if (c < 14) { 54 if (dst) dst[result] = bcdichars[c]; 55 result += 1; 56 } 57 bcdlen --; 58 shift += 4; 59 if (shift == 8) { 60 bcd++; 61 shift = 0; 62 } 63 } 64 return result; 65 } 66 #endif 67 68 #if 0 69 static int 70 gsm_bcdi_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst ) 71 { 72 cbytes_t end = ascii + asciilen; 73 int result = 0; 74 int phase = 0x01; 75 76 while (ascii < end) { 77 int c = *ascii++; 78 79 if (c == '*') 80 c = 11; 81 else if (c == '#') 82 c = 12; 83 else if (c == ',') 84 c = 13; 85 else if (c == 'N') 86 c = 14; 87 else { 88 c -= '0'; 89 if ((unsigned)c >= 10) 90 break; 91 } 92 phase = (phase << 4) | c; 93 if (phase & 0x100) { 94 if (dst) dst[result] = (byte_t) phase; 95 result += 1; 96 phase = 0x01; 97 } 98 } 99 if (phase != 0x01) { 100 if (dst) dst[result] = (byte_t)( phase | 0xf0 ); 101 result += 1; 102 } 103 return result; 104 } 105 #endif 106 107 int 108 gsm_hexchar_to_int( char c ) 109 { 110 if ((unsigned)(c - '0') < 10) 111 return c - '0'; 112 if ((unsigned)(c - 'a') < 6) 113 return 10 + (c - 'a'); 114 if ((unsigned)(c - 'A') < 6) 115 return 10 + (c - 'A'); 116 return -1; 117 } 118 119 int 120 gsm_hexchar_to_int0( char c ) 121 { 122 int ret = gsm_hexchar_to_int(c); 123 124 return (ret < 0) ? 0 : ret; 125 } 126 127 int 128 gsm_hex2_to_byte( const char* hex ) 129 { 130 int hi = gsm_hexchar_to_int(hex[0]); 131 int lo = gsm_hexchar_to_int(hex[1]); 132 133 if (hi < 0 || lo < 0) 134 return -1; 135 136 return ( (hi << 4) | lo ); 137 } 138 139 int 140 gsm_hex4_to_short( const char* hex ) 141 { 142 int hi = gsm_hex2_to_byte(hex); 143 int lo = gsm_hex2_to_byte(hex+2); 144 145 if (hi < 0 || lo < 0) 146 return -1; 147 148 return ((hi << 8) | lo); 149 } 150 151 int 152 gsm_hex2_to_byte0( const char* hex ) 153 { 154 int hi = gsm_hexchar_to_int0(hex[0]); 155 int lo = gsm_hexchar_to_int0(hex[1]); 156 157 return (byte_t)( (hi << 4) | lo ); 158 } 159 160 void 161 gsm_hex_from_byte( char* hex, int val ) 162 { 163 static const char hexdigits[] = "0123456789abcdef"; 164 165 hex[0] = hexdigits[(val >> 4) & 15]; 166 hex[1] = hexdigits[val & 15]; 167 } 168 169 void 170 gsm_hex_from_short( char* hex, int val ) 171 { 172 gsm_hex_from_byte( hex, (val >> 8) ); 173 gsm_hex_from_byte( hex+2, val ); 174 } 175 176 177 178 /** HEX 179 **/ 180 void 181 gsm_hex_to_bytes0( cbytes_t hex, int hexlen, bytes_t dst ) 182 { 183 int nn; 184 185 for (nn = 0; nn < hexlen/2; nn++ ) { 186 dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn ); 187 } 188 if (hexlen & 1) { 189 dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4; 190 } 191 } 192 193 int 194 gsm_hex_to_bytes( cbytes_t hex, int hexlen, bytes_t dst ) 195 { 196 int nn; 197 198 if (hexlen & 1) /* must be even */ 199 return -1; 200 201 for (nn = 0; nn < hexlen/2; nn++ ) { 202 int c = gsm_hex2_to_byte( (const char*)hex+2*nn ); 203 if (c < 0) return -1; 204 dst[nn] = (byte_t) c; 205 } 206 return hexlen/2; 207 } 208 209 void 210 gsm_hex_from_bytes( char* hex, cbytes_t src, int srclen ) 211 { 212 int nn; 213 214 for (nn = 0; nn < srclen; nn++) { 215 gsm_hex_from_byte( hex + 2*nn, src[nn] ); 216 } 217 } 218 219 /** ROPES 220 **/ 221 222 void 223 gsm_rope_init( GsmRope rope ) 224 { 225 rope->data = NULL; 226 rope->pos = 0; 227 rope->max = 0; 228 rope->error = 0; 229 } 230 231 void 232 gsm_rope_init_alloc( GsmRope rope, int count ) 233 { 234 rope->data = rope->data0; 235 rope->pos = 0; 236 rope->max = sizeof(rope->data0); 237 rope->error = 0; 238 239 if (count > 0) { 240 rope->data = calloc( count, 1 ); 241 rope->max = count; 242 243 if (rope->data == NULL) { 244 rope->error = 1; 245 rope->max = 0; 246 } 247 } 248 } 249 250 int 251 gsm_rope_done( GsmRope rope ) 252 { 253 int result = rope->error; 254 255 if (rope->data && rope->data != rope->data0) 256 free(rope->data); 257 258 rope->data = NULL; 259 rope->pos = 0; 260 rope->max = 0; 261 rope->error = 0; 262 263 return result; 264 } 265 266 267 bytes_t 268 gsm_rope_done_acquire( GsmRope rope, int *psize ) 269 { 270 bytes_t result = rope->data; 271 272 *psize = rope->pos; 273 if (result == rope->data0) { 274 result = malloc( rope->pos ); 275 if (result != NULL) 276 memcpy( result, rope->data, rope->pos ); 277 } 278 return result; 279 } 280 281 282 int 283 gsm_rope_ensure( GsmRope rope, int new_count ) 284 { 285 if (rope->data != NULL) { 286 int old_max = rope->max; 287 bytes_t old_data = rope->data == rope->data0 ? NULL : rope->data; 288 int new_max = old_max; 289 bytes_t new_data; 290 291 while (new_max < new_count) { 292 new_max += (new_max >> 1) + 4; 293 } 294 new_data = realloc( old_data, new_max ); 295 if (new_data == NULL) { 296 rope->error = 1; 297 return -1; 298 } 299 rope->data = new_data; 300 rope->max = new_max; 301 } else { 302 rope->max = new_count; 303 } 304 return 0; 305 } 306 307 static int 308 gsm_rope_can_grow( GsmRope rope, int count ) 309 { 310 if (!rope->data || rope->error) 311 return 0; 312 313 if (rope->pos + count > rope->max) 314 { 315 if (rope->data == NULL) 316 rope->max = rope->pos + count; 317 318 else if (rope->error || 319 gsm_rope_ensure( rope, rope->pos + count ) < 0) 320 return 0; 321 } 322 return 1; 323 } 324 325 void 326 gsm_rope_add_c( GsmRope rope, char c ) 327 { 328 if (gsm_rope_can_grow(rope, 1)) { 329 rope->data[ rope->pos ] = (byte_t) c; 330 } 331 rope->pos += 1; 332 } 333 334 void 335 gsm_rope_add( GsmRope rope, const void* buf, int buflen ) 336 { 337 if (gsm_rope_can_grow(rope, buflen)) { 338 memcpy( rope->data + rope->pos, (const char*)buf, buflen ); 339 } 340 rope->pos += buflen; 341 } 342 343 void* 344 gsm_rope_reserve( GsmRope rope, int count ) 345 { 346 void* result = NULL; 347 348 if (gsm_rope_can_grow(rope, count)) 349 { 350 if (rope->data != NULL) 351 result = rope->data + rope->pos; 352 } 353 rope->pos += count; 354 355 return result; 356 } 357 358 /* skip a given number of Unicode characters in a utf-8 byte string */ 359 cbytes_t 360 utf8_skip( cbytes_t utf8, 361 cbytes_t utf8end, 362 int count) 363 { 364 cbytes_t p = utf8; 365 cbytes_t end = utf8end; 366 367 for ( ; count > 0; count-- ) { 368 int c; 369 370 if (p >= end) 371 break; 372 373 c = *p++; 374 if (c > 128) { 375 while (p < end && (p[0] & 0xc0) == 0x80) 376 p++; 377 } 378 } 379 return p; 380 } 381 382 383 static __inline__ int 384 utf8_next( cbytes_t *pp, cbytes_t end ) 385 { 386 cbytes_t p = *pp; 387 int result = -1; 388 389 if (p < end) { 390 int c= *p++; 391 if (c >= 128) { 392 if ((c & 0xe0) == 0xc0) 393 c &= 0x1f; 394 else if ((c & 0xf0) == 0xe0) 395 c &= 0x0f; 396 else 397 c &= 0x07; 398 399 while (p < end && (p[0] & 0xc0) == 0x80) { 400 c = (c << 6) | (p[0] & 0x3f); 401 p ++; 402 } 403 } 404 result = c; 405 *pp = p; 406 } 407 return result; 408 } 409 410 411 __inline__ int 412 utf8_write( bytes_t utf8, int offset, int v ) 413 { 414 int result; 415 416 if (v < 128) { 417 result = 1; 418 if (utf8) 419 utf8[offset] = (byte_t) v; 420 } else if (v < 0x800) { 421 result = 2; 422 if (utf8) { 423 utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) ); 424 utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) ); 425 } 426 } else if (v < 0x10000) { 427 result = 3; 428 if (utf8) { 429 utf8[offset+0] = (byte_t)( 0xe0 | (v >> 12) ); 430 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) ); 431 utf8[offset+2] = (byte_t)( 0x80 | (v & 0x3f) ); 432 } 433 } else { 434 result = 4; 435 if (utf8) { 436 utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) ); 437 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) ); 438 utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) ); 439 utf8[offset+3] = (byte_t)( 0x80 | (v & 0x3f) ); 440 } 441 } 442 return result; 443 } 444 445 static __inline__ int 446 ucs2_write( bytes_t ucs2, int offset, int v ) 447 { 448 if (ucs2) { 449 ucs2[offset+0] = (byte_t) (v >> 8); 450 ucs2[offset+1] = (byte_t) (v); 451 } 452 return 2; 453 } 454 455 int 456 utf8_check( cbytes_t p, int utf8len ) 457 { 458 cbytes_t end = p + utf8len; 459 int result = 0; 460 461 if (p) { 462 while (p < end) { 463 int c = *p++; 464 if (c >= 128) { 465 int len; 466 if ((c & 0xe0) == 0xc0) { 467 len = 1; 468 } 469 else if ((c & 0xf0) == 0xe0) { 470 len = 2; 471 } 472 else if ((c & 0xf8) == 0xf0) { 473 len = 3; 474 } 475 else 476 goto Exit; /* malformed utf-8 */ 477 478 if (p+len > end) /* string too short */ 479 goto Exit; 480 481 for ( ; len > 0; len--, p++ ) { 482 if ((p[0] & 0xc0) != 0x80) 483 goto Exit; 484 } 485 } 486 } 487 result = 1; 488 } 489 Exit: 490 return result; 491 } 492 493 /** UCS2 to UTF8 494 **/ 495 496 /* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */ 497 int 498 ucs2_to_utf8( cbytes_t ucs2, 499 int ucs2len, 500 bytes_t buf ) 501 { 502 int nn; 503 int result = 0; 504 505 for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) { 506 int c= (ucs2[0] << 8) | ucs2[1]; 507 result += utf8_write(buf, result, c); 508 } 509 return result; 510 } 511 512 /* count the number of UCS2 chars contained in a utf8 byte string */ 513 int 514 utf8_to_ucs2( cbytes_t utf8, 515 int utf8len, 516 bytes_t ucs2 ) 517 { 518 cbytes_t p = utf8; 519 cbytes_t end = p + utf8len; 520 int result = 0; 521 522 while (p < end) { 523 int c = utf8_next(&p, end); 524 525 if (c < 0) 526 break; 527 528 result += ucs2_write(ucs2, result, c); 529 } 530 return result/2; 531 } 532 533 534 535 /** GSM ALPHABET 536 **/ 537 538 #define GSM_7BITS_ESCAPE 0x1b 539 #define GSM_7BITS_UNKNOWN 0 540 541 static const unsigned short gsm7bits_to_unicode[128] = { 542 '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5, 543 0x394, '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e, 0, 0xc6, 0xe6, 0xdf, 0xc9, 544 ' ', '!', '"', '#', 0xa4, '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', 545 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', 546 0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 547 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7, 548 0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 549 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0, 550 }; 551 552 static const unsigned short gsm7bits_extend_to_unicode[128] = { 553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\f', 0, 0, 0, 0, 0, 554 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 555 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0,'\\', 556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0, 557 '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 559 0, 0, 0, 0, 0,0x20ac, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 561 }; 562 563 564 static int 565 unichar_to_gsm7( int unicode ) 566 { 567 int nn; 568 for (nn = 0; nn < 128; nn++) { 569 if (gsm7bits_to_unicode[nn] == unicode) { 570 return nn; 571 } 572 } 573 return -1; 574 } 575 576 static int 577 unichar_to_gsm7_extend( int unichar ) 578 { 579 int nn; 580 for (nn = 0; nn < 128; nn++) { 581 if (gsm7bits_extend_to_unicode[nn] == unichar) { 582 return nn; 583 } 584 } 585 return -1; 586 } 587 588 589 /* return the number of septets needed to encode a unicode charcode */ 590 static int 591 unichar_to_gsm7_count( int unicode ) 592 { 593 int nn; 594 595 nn = unichar_to_gsm7(unicode); 596 if (nn >= 0) 597 return 1; 598 599 nn = unichar_to_gsm7_extend(unicode); 600 if (nn >= 0) 601 return 2; 602 603 return 0; 604 } 605 606 607 cbytes_t 608 utf8_skip_gsm7( cbytes_t utf8, cbytes_t utf8end, int gsm7len ) 609 { 610 cbytes_t p = utf8; 611 cbytes_t end = utf8end; 612 613 while (gsm7len >0) { 614 cbytes_t q = p; 615 int c = utf8_next( &q, end ); 616 int len; 617 618 if (c < 0) 619 break; 620 621 len = unichar_to_gsm7_count( c ); 622 if (len == 0) /* unknown chars are replaced by spaces */ 623 len = 1; 624 625 if (len > gsm7len) 626 break; 627 628 gsm7len -= len; 629 p = q; 630 } 631 return p; 632 } 633 634 635 int 636 utf8_check_gsm7( cbytes_t utf8, 637 int utf8len ) 638 { 639 cbytes_t utf8end = utf8 + utf8len; 640 641 while (utf8 < utf8end) { 642 int c = utf8_next( &utf8, utf8end ); 643 if (unichar_to_gsm7_count(c) == 0) 644 return 0; 645 } 646 return 1; 647 } 648 649 650 int 651 utf8_from_gsm7( cbytes_t src, 652 int septet_offset, 653 int septet_count, 654 bytes_t utf8 ) 655 { 656 int shift = (septet_offset & 7); 657 int escaped = 0; 658 int result = 0; 659 660 src += (septet_offset >> 3); 661 for ( ; septet_count > 0; septet_count-- ) 662 { 663 int c = (src[0] >> shift) & 0x7f; 664 int v; 665 666 if (shift > 1) { 667 c = ((src[1] << (8-shift)) | c) & 0x7f; 668 } 669 670 if (escaped) { 671 v = gsm7bits_extend_to_unicode[c]; 672 } else if (c == GSM_7BITS_ESCAPE) { 673 escaped = 1; 674 goto NextSeptet; 675 } else { 676 v = gsm7bits_to_unicode[c]; 677 } 678 679 result += utf8_write( utf8, result, v ); 680 681 NextSeptet: 682 shift += 7; 683 if (shift >= 8) { 684 shift -= 8; 685 src += 1; 686 } 687 } 688 return result; 689 } 690 691 692 int 693 utf8_from_gsm8( cbytes_t src, int count, bytes_t utf8 ) 694 { 695 int result = 0; 696 int escaped = 0; 697 698 699 for ( ; count > 0; count-- ) 700 { 701 int c = *src++; 702 703 if (c == 0xff) 704 break; 705 706 if (c == GSM_7BITS_ESCAPE) { 707 if (escaped) { /* two escape characters => one space */ 708 c = 0x20; 709 escaped = 0; 710 } else { 711 escaped = 1; 712 continue; 713 } 714 } 715 else 716 { 717 if (c >= 0x80) { 718 c = 0x20; 719 escaped = 0; 720 } else if (escaped) { 721 c = gsm7bits_extend_to_unicode[c]; 722 } else 723 c = gsm7bits_to_unicode[c]; 724 } 725 726 result += utf8_write( utf8, result, c ); 727 } 728 return result; 729 } 730 731 /* convert a GSM 7-bit message into a unicode character array 732 * the 'dst' array must contain at least 160 chars. the function 733 * returns the number of characters decoded 734 * 735 * assumes the 'dst' array has at least septet_count items, returns the 736 * number of unichars really written 737 */ 738 int 739 ucs2_from_gsm7( bytes_t ucs2, 740 cbytes_t src, 741 int septet_offset, 742 int septet_count ) 743 { 744 const unsigned char* p = src + (septet_offset >> 3); 745 int shift = (septet_offset & 7); 746 int escaped = 0; 747 int result = 0; 748 749 for ( ; septet_count > 0; septet_count-- ) 750 { 751 unsigned val = (p[0] >> shift) & 0x7f; 752 753 if (shift > 1) 754 val = (val | (p[1] << (8-shift))) & 0x7f; 755 756 if (escaped) { 757 int c = gsm7bits_to_unicode[val]; 758 759 result += ucs2_write(ucs2, result, c); 760 escaped = 0; 761 } 762 else if (val == GSM_7BITS_ESCAPE) { 763 escaped = 1; 764 } 765 else { 766 val = gsm7bits_extend_to_unicode[val]; 767 if (val == 0) 768 val = 0x20; 769 770 result += ucs2_write( ucs2, result, val ); 771 } 772 } 773 return result/2; 774 } 775 776 777 /* count the number of septets required to write a utf8 string */ 778 static int 779 utf8_to_gsm7_count( cbytes_t utf8, int utf8len ) 780 { 781 cbytes_t utf8end = utf8 + utf8len; 782 int result = 0; 783 784 while ( utf8 < utf8end ) { 785 int len; 786 int c = utf8_next( &utf8, utf8end ); 787 788 if (c < 0) 789 break; 790 791 len = unichar_to_gsm7_count(c); 792 if (len == 0) /* replace non-representables with space */ 793 len = 1; 794 795 result += len; 796 } 797 return result; 798 } 799 800 typedef struct { 801 bytes_t dst; 802 unsigned pad; 803 int bits; 804 int offset; 805 } BWriterRec, *BWriter; 806 807 static void 808 bwriter_init( BWriter writer, bytes_t dst, int start ) 809 { 810 int shift = start & 7; 811 812 writer->dst = dst + (start >> 3); 813 writer->pad = 0; 814 writer->bits = shift; 815 writer->offset = start; 816 817 if (shift > 0) { 818 writer->pad = writer->dst[0] & ~(0xFF << shift); 819 } 820 } 821 822 static void 823 bwriter_add7( BWriter writer, unsigned value ) 824 { 825 writer->pad |= (unsigned)(value << writer->bits); 826 writer->bits += 7; 827 if (writer->bits >= 8) { 828 writer->dst[0] = (byte_t)writer->pad; 829 writer->bits -= 8; 830 writer->pad >>= 8; 831 writer->dst += 1; 832 } 833 writer->offset += 7; 834 } 835 836 static int 837 bwriter_done( BWriter writer ) 838 { 839 if (writer->bits > 0) { 840 writer->dst[0] = (byte_t)writer->pad; 841 writer->pad = 0; 842 writer->bits = 0; 843 writer->dst += 1; 844 } 845 return writer->offset; 846 } 847 848 /* convert a utf8 string to a gsm7 byte string - return the number of septets written */ 849 int 850 utf8_to_gsm7( cbytes_t utf8, int utf8len, bytes_t dst, int offset ) 851 { 852 const unsigned char* utf8end = utf8 + utf8len; 853 BWriterRec writer[1]; 854 855 if (dst == NULL) 856 return utf8_to_gsm7_count(utf8, utf8len); 857 858 bwriter_init( writer, dst, offset ); 859 while ( utf8 < utf8end ) { 860 int c = utf8_next( &utf8, utf8end ); 861 int nn; 862 863 if (c < 0) 864 break; 865 866 nn = unichar_to_gsm7(c); 867 if (nn >= 0) { 868 bwriter_add7( writer, nn ); 869 continue; 870 } 871 872 nn = unichar_to_gsm7_extend(c); 873 if (nn >= 0) { 874 bwriter_add7( writer, GSM_7BITS_ESCAPE ); 875 bwriter_add7( writer, nn ); 876 continue; 877 } 878 879 /* unknown => replaced by space */ 880 bwriter_add7( writer, 0x20 ); 881 } 882 return bwriter_done( writer ); 883 } 884 885 886 int 887 utf8_to_gsm8( cbytes_t utf8, int utf8len, bytes_t dst ) 888 { 889 const unsigned char* utf8end = utf8 + utf8len; 890 int result = 0; 891 892 while ( utf8 < utf8end ) { 893 int c = utf8_next( &utf8, utf8end ); 894 int nn; 895 896 if (c < 0) 897 break; 898 899 nn = unichar_to_gsm7(c); 900 if (nn >= 0) { 901 if (dst) 902 dst[result] = (byte_t)nn; 903 result += 1; 904 continue; 905 } 906 907 nn = unichar_to_gsm7_extend(c); 908 if (nn >= 0) { 909 if (dst) { 910 dst[result+0] = (byte_t) GSM_7BITS_ESCAPE; 911 dst[result+1] = (byte_t) nn; 912 } 913 result += 2; 914 continue; 915 } 916 917 /* unknown => space */ 918 if (dst) 919 dst[result] = 0x20; 920 result += 1; 921 } 922 return result; 923 } 924 925 926 int 927 ucs2_to_gsm7( cbytes_t ucs2, int ucs2len, bytes_t dst, int offset ) 928 { 929 const unsigned char* ucs2end = ucs2 + ucs2len*2; 930 BWriterRec writer[1]; 931 932 bwriter_init( writer, dst, offset ); 933 while ( ucs2 < ucs2end ) { 934 int c = *ucs2++; 935 int nn; 936 937 for (nn = 0; nn < 128; nn++) { 938 if ( gsm7bits_to_unicode[nn] == c ) { 939 bwriter_add7( writer, nn ); 940 goto NextUnicode; 941 } 942 } 943 for (nn = 0; nn < 128; nn++) { 944 if ( gsm7bits_extend_to_unicode[nn] == c ) { 945 bwriter_add7( writer, GSM_7BITS_ESCAPE ); 946 bwriter_add7( writer, nn ); 947 goto NextUnicode; 948 } 949 } 950 951 /* unknown */ 952 bwriter_add7( writer, 0x20 ); 953 954 NextUnicode: 955 ; 956 } 957 return bwriter_done( writer ); 958 } 959 960 961 int 962 ucs2_to_gsm8( cbytes_t ucs2, int ucs2len, bytes_t dst ) 963 { 964 const unsigned char* ucs2end = ucs2 + ucs2len*2; 965 bytes_t dst0 = dst; 966 967 while ( ucs2 < ucs2end ) { 968 int c = *ucs2++; 969 int nn; 970 971 for (nn = 0; nn < 128; nn++) { 972 if ( gsm7bits_to_unicode[nn] == c ) { 973 *dst++ = (byte_t)nn; 974 goto NextUnicode; 975 } 976 } 977 for (nn = 0; nn < 128; nn++) { 978 if ( gsm7bits_extend_to_unicode[nn] == c ) { 979 dst[0] = (byte_t) GSM_7BITS_ESCAPE; 980 dst[1] = (byte_t) nn; 981 dst += 2; 982 goto NextUnicode; 983 } 984 } 985 986 /* unknown */ 987 *dst++ = 0x20; 988 989 NextUnicode: 990 ; 991 } 992 return (dst - dst0); 993 } 994 995 int 996 gsm_bcdnum_to_ascii( cbytes_t bcd, int count, bytes_t dst ) 997 { 998 int result = 0; 999 int shift = 0; 1000 1001 while (count > 0) { 1002 int c = (bcd[0] >> shift) & 0xf; 1003 1004 if (c == 15 && count == 1) /* ignore trailing 0xf */ 1005 break; 1006 1007 if (c >= 14) 1008 c = 0; 1009 1010 if (dst) dst[result] = "0123456789*#,N"[c]; 1011 result += 1; 1012 1013 shift += 4; 1014 if (shift == 8) { 1015 shift = 0; 1016 bcd += 1; 1017 } 1018 } 1019 return result; 1020 } 1021 1022 1023 int 1024 gsm_bcdnum_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst ) 1025 { 1026 cbytes_t end = ascii + asciilen; 1027 int result = 0; 1028 int phase = 0x01; 1029 1030 while (ascii < end) { 1031 int c = *ascii++; 1032 1033 if (c == '*') 1034 c = 10; 1035 else if (c == '#') 1036 c = 11; 1037 else if (c == ',') 1038 c = 12; 1039 else if (c == 'N') 1040 c = 13; 1041 else { 1042 c -= '0'; 1043 if ((unsigned)c >= 10U) 1044 return -1; 1045 } 1046 phase = (phase << 4) | c; 1047 result += 1; 1048 if (phase & 0x100) { 1049 if (dst) dst[result/2] = (byte_t) phase; 1050 phase = 0x01; 1051 } 1052 } 1053 1054 if (result & 1) { 1055 if (dst) dst[result/2] = (byte_t)(phase | 0xf0); 1056 } 1057 return result; 1058 } 1059 1060 /** ADN: Abbreviated Dialing Number 1061 **/ 1062 1063 #define ADN_FOOTER_SIZE 14 1064 #define ADN_OFFSET_NUMBER_LENGTH 0 1065 #define ADN_OFFSET_TON_NPI 1 1066 #define ADN_OFFSET_NUMBER_START 2 1067 #define ADN_OFFSET_NUMBER_END 11 1068 #define ADN_OFFSET_CAPABILITY_ID 12 1069 #define ADN_OFFSET_EXTENSION_ID 13 1070 1071 /* see 10.5.1 of 3GPP 51.011 */ 1072 static int 1073 sim_adn_alpha_to_utf8( cbytes_t alpha, cbytes_t end, bytes_t dst ) 1074 { 1075 int result = 0; 1076 1077 /* ignore trailing 0xff */ 1078 while (alpha < end && end[-1] == 0xff) 1079 end--; 1080 1081 if (alpha >= end) 1082 return 0; 1083 1084 if (alpha[0] == 0x80) { /* UCS/2 source encoding */ 1085 alpha += 1; 1086 result = ucs2_to_utf8( alpha, (end-alpha)/2, dst ); 1087 } 1088 else 1089 { 1090 int is_ucs2 = 0; 1091 int len = 0, base = 0; 1092 1093 if (alpha+3 <= end && alpha[0] == 0x81) { 1094 is_ucs2 = 1; 1095 len = alpha[1]; 1096 base = alpha[2] << 7; 1097 alpha += 3; 1098 if (len > end-alpha) 1099 len = end-alpha; 1100 } else if (alpha+4 <= end && alpha[0] == 0x82) { 1101 is_ucs2 = 1; 1102 len = alpha[1]; 1103 base = (alpha[2] << 8) | alpha[3]; 1104 alpha += 4; 1105 if (len > end-alpha) 1106 len = end-alpha; 1107 } 1108 1109 if (is_ucs2) { 1110 end = alpha + len; 1111 while (alpha < end) { 1112 int c = alpha[0]; 1113 if (c >= 0x80) { 1114 result += utf8_write(dst, result, base + (c & 0x7f)); 1115 alpha += 1; 1116 } else { 1117 /* GSM character set */ 1118 int count; 1119 for (count = 0; alpha+count < end && alpha[count] < 128; count++) 1120 ; 1121 result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL)); 1122 alpha += count; 1123 } 1124 } 1125 } 1126 else { 1127 result = utf8_from_gsm8(alpha, end-alpha, dst); 1128 } 1129 } 1130 return result; 1131 } 1132 1133 #if 0 1134 static int 1135 sim_adn_alpha_from_utf8( cbytes_t utf8, int utf8len, bytes_t dst ) 1136 { 1137 int result = 0; 1138 1139 if (utf8_check_gsm7(utf8, utf8len)) { 1140 /* GSM 7-bit compatible, encode directly as 8-bit string */ 1141 result = utf8_to_gsm8(utf8, utf8len, dst); 1142 } else { 1143 /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */ 1144 if (dst) { 1145 dst[0] = 0x80; 1146 } 1147 result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2; 1148 } 1149 return result; 1150 } 1151 #endif 1152 1153 int 1154 sim_adn_record_from_bytes( SimAdnRecord rec, cbytes_t data, int len ) 1155 { 1156 cbytes_t end = data + len; 1157 cbytes_t footer = end - ADN_FOOTER_SIZE; 1158 int num_len; 1159 1160 rec->adn.alpha[0] = 0; 1161 rec->adn.number[0] = 0; 1162 rec->ext_record = 0xff; 1163 1164 if (len < ADN_FOOTER_SIZE) 1165 return -1; 1166 1167 /* alpha is optional */ 1168 if (len > ADN_FOOTER_SIZE) { 1169 cbytes_t dataend = data + len - ADN_FOOTER_SIZE; 1170 int count = sim_adn_alpha_to_utf8(data, dataend, NULL); 1171 1172 if (count > sizeof(rec->adn.alpha)-1) /* too long */ 1173 return -1; 1174 1175 sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha); 1176 rec->adn.alpha[count] = 0; 1177 } 1178 1179 num_len = footer[ADN_OFFSET_NUMBER_LENGTH]; 1180 if (num_len > 11) 1181 return -1; 1182 1183 /* decode TON and number to ASCII, NOTE: this is lossy !! */ 1184 { 1185 int ton = footer[ADN_OFFSET_TON_NPI]; 1186 bytes_t number = (bytes_t) rec->adn.number; 1187 int len = sizeof(rec->adn.number)-1; 1188 int count; 1189 1190 if (ton != 0x81 && ton != 0x91) 1191 return -1; 1192 1193 if (ton == 0x91) { 1194 *number++ = '+'; 1195 len -= 1; 1196 } 1197 1198 count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START, 1199 num_len*2, number ); 1200 number[count] = 0; 1201 } 1202 return 0; 1203 } 1204 1205 int 1206 sim_adn_record_to_bytes( SimAdnRecord rec, bytes_t data, int datalen ) 1207 { 1208 bytes_t end = data + datalen; 1209 bytes_t footer = end - ADN_FOOTER_SIZE; 1210 int ton = 0x81; 1211 cbytes_t number = (cbytes_t) rec->adn.number; 1212 1213 if (number[0] == '+') { 1214 ton = 0x91; 1215 number += 1; 1216 } 1217 footer[0] = (strlen((const char*)number)+1)/2 + 1; 1218 /* XXXX: TODO */ 1219 return 0; 1220 } 1221