1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * File ustdio.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 11/18/98 stephen Creation. 15 * 03/12/99 stephen Modified for new C API. 16 * 07/19/99 stephen Fixed read() and gets() 17 ****************************************************************************** 18 */ 19 20 #include "unicode/ustdio.h" 21 #include "unicode/putil.h" 22 #include "cmemory.h" 23 #include "cstring.h" 24 #include "ufile.h" 25 #include "ufmt_cmn.h" 26 #include "unicode/ucnv.h" 27 #include "unicode/ustring.h" 28 29 #include <string.h> 30 31 #define DELIM_LF 0x000A 32 #define DELIM_VT 0x000B 33 #define DELIM_FF 0x000C 34 #define DELIM_CR 0x000D 35 #define DELIM_NEL 0x0085 36 #define DELIM_LS 0x2028 37 #define DELIM_PS 0x2029 38 39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 40 #ifdef U_WINDOWS 41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; 42 static const uint32_t DELIMITERS_LEN = 2; 43 /* TODO: Default newline writing should be detected based upon the converter being used. */ 44 #else 45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; 46 static const uint32_t DELIMITERS_LEN = 1; 47 #endif 48 49 #define IS_FIRST_STRING_DELIMITER(c1) \ 50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 51 || (c1) == DELIM_NEL \ 52 || (c1) == DELIM_LS \ 53 || (c1) == DELIM_PS) 54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 57 58 59 #if !UCONFIG_NO_TRANSLITERATION 60 61 U_CAPI UTransliterator* U_EXPORT2 62 u_fsettransliterator(UFILE *file, UFileDirection direction, 63 UTransliterator *adopt, UErrorCode *status) 64 { 65 UTransliterator *old = NULL; 66 67 if(U_FAILURE(*status)) 68 { 69 return adopt; 70 } 71 72 if(!file) 73 { 74 *status = U_ILLEGAL_ARGUMENT_ERROR; 75 return adopt; 76 } 77 78 if(direction & U_READ) 79 { 80 /** TODO: implement */ 81 *status = U_UNSUPPORTED_ERROR; 82 return adopt; 83 } 84 85 if(adopt == NULL) /* they are clearing it */ 86 { 87 if(file->fTranslit != NULL) 88 { 89 /* TODO: Check side */ 90 old = file->fTranslit->translit; 91 uprv_free(file->fTranslit->buffer); 92 file->fTranslit->buffer=NULL; 93 uprv_free(file->fTranslit); 94 file->fTranslit=NULL; 95 } 96 } 97 else 98 { 99 if(file->fTranslit == NULL) 100 { 101 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); 102 if(!file->fTranslit) 103 { 104 *status = U_MEMORY_ALLOCATION_ERROR; 105 return adopt; 106 } 107 file->fTranslit->capacity = 0; 108 file->fTranslit->length = 0; 109 file->fTranslit->pos = 0; 110 file->fTranslit->buffer = NULL; 111 } 112 else 113 { 114 old = file->fTranslit->translit; 115 ufile_flush_translit(file); 116 } 117 118 file->fTranslit->translit = adopt; 119 } 120 121 return old; 122 } 123 124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) 125 { 126 int32_t newlen; 127 int32_t junkCount = 0; 128 int32_t textLength; 129 int32_t textLimit; 130 UTransPosition pos; 131 UErrorCode status = U_ZERO_ERROR; 132 133 if(count == NULL) 134 { 135 count = &junkCount; 136 } 137 138 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) 139 { 140 /* fast path */ 141 return src; 142 } 143 144 /* First: slide over everything */ 145 if(f->fTranslit->length > f->fTranslit->pos) 146 { 147 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, 148 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); 149 } 150 f->fTranslit->length -= f->fTranslit->pos; /* always */ 151 f->fTranslit->pos = 0; 152 153 /* Calculate new buffer size needed */ 154 newlen = (*count + f->fTranslit->length) * 4; 155 156 if(newlen > f->fTranslit->capacity) 157 { 158 if(f->fTranslit->buffer == NULL) 159 { 160 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); 161 } 162 else 163 { 164 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); 165 } 166 /* Check for malloc/realloc failure. */ 167 if (f->fTranslit->buffer == NULL) { 168 return NULL; 169 } 170 f->fTranslit->capacity = newlen; 171 } 172 173 /* Now, copy any data over */ 174 u_strncpy(f->fTranslit->buffer + f->fTranslit->length, 175 src, 176 *count); 177 f->fTranslit->length += *count; 178 179 /* Now, translit in place as much as we can */ 180 if(flush == FALSE) 181 { 182 textLength = f->fTranslit->length; 183 pos.contextStart = 0; 184 pos.contextLimit = textLength; 185 pos.start = 0; 186 pos.limit = textLength; 187 188 utrans_transIncrementalUChars(f->fTranslit->translit, 189 f->fTranslit->buffer, /* because we shifted */ 190 &textLength, 191 f->fTranslit->capacity, 192 &pos, 193 &status); 194 195 /* now: start/limit point to the transliterated text */ 196 /* Transliterated is [buffer..pos.start) */ 197 *count = pos.start; 198 f->fTranslit->pos = pos.start; 199 f->fTranslit->length = pos.limit; 200 201 return f->fTranslit->buffer; 202 } 203 else 204 { 205 textLength = f->fTranslit->length; 206 textLimit = f->fTranslit->length; 207 208 utrans_transUChars(f->fTranslit->translit, 209 f->fTranslit->buffer, 210 &textLength, 211 f->fTranslit->capacity, 212 0, 213 &textLimit, 214 &status); 215 216 /* out: converted len */ 217 *count = textLimit; 218 219 /* Set pointers to 0 */ 220 f->fTranslit->pos = 0; 221 f->fTranslit->length = 0; 222 223 return f->fTranslit->buffer; 224 } 225 } 226 227 #endif 228 229 void 230 ufile_flush_translit(UFILE *f) 231 { 232 #if !UCONFIG_NO_TRANSLITERATION 233 if((!f)||(!f->fTranslit)) 234 return; 235 #endif 236 237 u_file_write_flush(NULL, 0, f, FALSE, TRUE); 238 } 239 240 241 void 242 ufile_flush_io(UFILE *f) 243 { 244 if((!f) || (!f->fFile)) { 245 return; /* skip if no file */ 246 } 247 248 u_file_write_flush(NULL, 0, f, TRUE, FALSE); 249 } 250 251 252 void 253 ufile_close_translit(UFILE *f) 254 { 255 #if !UCONFIG_NO_TRANSLITERATION 256 if((!f)||(!f->fTranslit)) 257 return; 258 #endif 259 260 ufile_flush_translit(f); 261 262 #if !UCONFIG_NO_TRANSLITERATION 263 if(f->fTranslit->translit) 264 utrans_close(f->fTranslit->translit); 265 266 if(f->fTranslit->buffer) 267 { 268 uprv_free(f->fTranslit->buffer); 269 } 270 271 uprv_free(f->fTranslit); 272 f->fTranslit = NULL; 273 #endif 274 } 275 276 277 /* Input/output */ 278 279 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 280 u_fputs(const UChar *s, 281 UFILE *f) 282 { 283 int32_t count = u_file_write(s, u_strlen(s), f); 284 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); 285 return count; 286 } 287 288 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 289 u_fputc(UChar32 uc, 290 UFILE *f) 291 { 292 UChar buf[2]; 293 int32_t idx = 0; 294 UBool isError = FALSE; 295 296 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); 297 if (isError) { 298 return U_EOF; 299 } 300 return u_file_write(buf, idx, f) == idx ? uc : U_EOF; 301 } 302 303 304 U_CFUNC int32_t U_EXPORT2 305 u_file_write_flush(const UChar *chars, 306 int32_t count, 307 UFILE *f, 308 UBool flushIO, 309 UBool flushTranslit) 310 { 311 /* Set up conversion parameters */ 312 UErrorCode status = U_ZERO_ERROR; 313 const UChar *mySource = chars; 314 const UChar *mySourceBegin; 315 const UChar *mySourceEnd; 316 char charBuffer[UFILE_CHARBUFFER_SIZE]; 317 char *myTarget = charBuffer; 318 int32_t written = 0; 319 int32_t numConverted = 0; 320 321 if (count < 0) { 322 count = u_strlen(chars); 323 } 324 325 #if !UCONFIG_NO_TRANSLITERATION 326 if((f->fTranslit) && (f->fTranslit->translit)) 327 { 328 /* Do the transliteration */ 329 mySource = u_file_translit(f, chars, &count, flushTranslit); 330 } 331 #endif 332 333 /* Write to a string. */ 334 if (!f->fFile) { 335 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); 336 if (flushIO && charsLeft > count) { 337 count++; 338 } 339 written = ufmt_min(count, charsLeft); 340 u_strncpy(f->str.fPos, mySource, written); 341 f->str.fPos += written; 342 return written; 343 } 344 345 mySourceEnd = mySource + count; 346 347 /* Perform the conversion in a loop */ 348 do { 349 mySourceBegin = mySource; /* beginning location for this loop */ 350 status = U_ZERO_ERROR; 351 if(f->fConverter != NULL) { /* We have a valid converter */ 352 ucnv_fromUnicode(f->fConverter, 353 &myTarget, 354 charBuffer + UFILE_CHARBUFFER_SIZE, 355 &mySource, 356 mySourceEnd, 357 NULL, 358 flushIO, 359 &status); 360 } else { /*weiv: do the invariant conversion */ 361 int32_t convertChars = (int32_t) (mySourceEnd - mySource); 362 if (convertChars > UFILE_CHARBUFFER_SIZE) { 363 convertChars = UFILE_CHARBUFFER_SIZE; 364 status = U_BUFFER_OVERFLOW_ERROR; 365 } 366 u_UCharsToChars(mySource, myTarget, convertChars); 367 mySource += convertChars; 368 myTarget += convertChars; 369 } 370 numConverted = (int32_t)(myTarget - charBuffer); 371 372 if (numConverted > 0) { 373 /* write the converted bytes */ 374 fwrite(charBuffer, 375 sizeof(char), 376 numConverted, 377 f->fFile); 378 379 written += (int32_t) (mySource - mySourceBegin); 380 } 381 myTarget = charBuffer; 382 } 383 while(status == U_BUFFER_OVERFLOW_ERROR); 384 385 /* return # of chars written */ 386 return written; 387 } 388 389 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 390 u_file_write( const UChar *chars, 391 int32_t count, 392 UFILE *f) 393 { 394 return u_file_write_flush(chars,count,f,FALSE,FALSE); 395 } 396 397 398 /* private function used for buffering input */ 399 void 400 ufile_fill_uchar_buffer(UFILE *f) 401 { 402 UErrorCode status; 403 const char *mySource; 404 const char *mySourceEnd; 405 UChar *myTarget; 406 int32_t bufferSize; 407 int32_t maxCPBytes; 408 int32_t bytesRead; 409 int32_t availLength; 410 int32_t dataSize; 411 char charBuffer[UFILE_CHARBUFFER_SIZE]; 412 u_localized_string *str; 413 414 if (f->fFile == NULL) { 415 /* There is nothing to do. It's a string. */ 416 return; 417 } 418 419 str = &f->str; 420 dataSize = (int32_t)(str->fLimit - str->fPos); 421 if (f->fFileno == 0 && dataSize > 0) { 422 /* Don't read from stdin too many times. There is still some data. */ 423 return; 424 } 425 426 /* shift the buffer if it isn't empty */ 427 if(dataSize != 0) { 428 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); 429 } 430 431 432 /* record how much buffer space is available */ 433 availLength = UFILE_UCHARBUFFER_SIZE - dataSize; 434 435 /* Determine the # of codepage bytes needed to fill our UChar buffer */ 436 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 437 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); 438 439 /* Read in the data to convert */ 440 if (f->fFileno == 0) { 441 /* Special case. Read from stdin one line at a time. */ 442 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); 443 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); 444 } 445 else { 446 /* A normal file */ 447 bytesRead = (int32_t)fread(charBuffer, 448 sizeof(char), 449 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), 450 f->fFile); 451 } 452 453 /* Set up conversion parameters */ 454 status = U_ZERO_ERROR; 455 mySource = charBuffer; 456 mySourceEnd = charBuffer + bytesRead; 457 myTarget = f->fUCBuffer + dataSize; 458 bufferSize = UFILE_UCHARBUFFER_SIZE; 459 460 if(f->fConverter != NULL) { /* We have a valid converter */ 461 /* Perform the conversion */ 462 ucnv_toUnicode(f->fConverter, 463 &myTarget, 464 f->fUCBuffer + bufferSize, 465 &mySource, 466 mySourceEnd, 467 NULL, 468 (UBool)(feof(f->fFile) != 0), 469 &status); 470 471 } else { /*weiv: do the invariant conversion */ 472 u_charsToUChars(mySource, myTarget, bytesRead); 473 myTarget += bytesRead; 474 } 475 476 /* update the pointers into our array */ 477 str->fPos = str->fBuffer; 478 str->fLimit = myTarget; 479 } 480 481 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 482 u_fgets(UChar *s, 483 int32_t n, 484 UFILE *f) 485 { 486 int32_t dataSize; 487 int32_t count; 488 UChar *alias; 489 const UChar *limit; 490 UChar *sItr; 491 UChar currDelim = 0; 492 u_localized_string *str; 493 494 if (n <= 0) { 495 /* Caller screwed up. We need to write the null terminatior. */ 496 return NULL; 497 } 498 499 /* fill the buffer if needed */ 500 str = &f->str; 501 if (str->fPos >= str->fLimit) { 502 ufile_fill_uchar_buffer(f); 503 } 504 505 /* subtract 1 from n to compensate for the terminator */ 506 --n; 507 508 /* determine the amount of data in the buffer */ 509 dataSize = (int32_t)(str->fLimit - str->fPos); 510 511 /* if 0 characters were left, return 0 */ 512 if (dataSize == 0) 513 return NULL; 514 515 /* otherwise, iteratively fill the buffer and copy */ 516 count = 0; 517 sItr = s; 518 currDelim = 0; 519 while (dataSize > 0 && count < n) { 520 alias = str->fPos; 521 522 /* Find how much to copy */ 523 if (dataSize < (n - count)) { 524 limit = str->fLimit; 525 } 526 else { 527 limit = alias + (n - count); 528 } 529 530 if (!currDelim) { 531 /* Copy UChars until we find the first occurrence of a delimiter character */ 532 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { 533 count++; 534 *(sItr++) = *(alias++); 535 } 536 /* Preserve the newline */ 537 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { 538 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { 539 currDelim = *alias; 540 } 541 else { 542 currDelim = 1; /* This isn't a newline, but it's used to say 543 that we should break later. We've checked all 544 possible newline combinations even across buffer 545 boundaries. */ 546 } 547 count++; 548 *(sItr++) = *(alias++); 549 } 550 } 551 /* If we have a CRLF combination, preserve that too. */ 552 if (alias < limit) { 553 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { 554 count++; 555 *(sItr++) = *(alias++); 556 } 557 currDelim = 1; /* This isn't a newline, but it's used to say 558 that we should break later. We've checked all 559 possible newline combinations even across buffer 560 boundaries. */ 561 } 562 563 /* update the current buffer position */ 564 str->fPos = alias; 565 566 /* if we found a delimiter */ 567 if (currDelim == 1) { 568 /* break out */ 569 break; 570 } 571 572 /* refill the buffer */ 573 ufile_fill_uchar_buffer(f); 574 575 /* determine the amount of data in the buffer */ 576 dataSize = (int32_t)(str->fLimit - str->fPos); 577 } 578 579 /* add the terminator and return s */ 580 *sItr = 0x0000; 581 return s; 582 } 583 584 U_CFUNC UBool U_EXPORT2 585 ufile_getch(UFILE *f, UChar *ch) 586 { 587 UBool isValidChar = FALSE; 588 589 *ch = U_EOF; 590 /* if we have an available character in the buffer, return it */ 591 if(f->str.fPos < f->str.fLimit){ 592 *ch = *(f->str.fPos)++; 593 isValidChar = TRUE; 594 } 595 else { 596 /* otherwise, fill the buffer and return the next character */ 597 if(f->str.fPos >= f->str.fLimit) { 598 ufile_fill_uchar_buffer(f); 599 } 600 if(f->str.fPos < f->str.fLimit) { 601 *ch = *(f->str.fPos)++; 602 isValidChar = TRUE; 603 } 604 } 605 return isValidChar; 606 } 607 608 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 609 u_fgetc(UFILE *f) 610 { 611 UChar ch; 612 ufile_getch(f, &ch); 613 return ch; 614 } 615 616 U_CFUNC UBool U_EXPORT2 617 ufile_getch32(UFILE *f, UChar32 *c32) 618 { 619 UBool isValidChar = FALSE; 620 u_localized_string *str; 621 622 *c32 = U_EOF; 623 624 /* Fill the buffer if it is empty */ 625 str = &f->str; 626 if (f && str->fPos + 1 >= str->fLimit) { 627 ufile_fill_uchar_buffer(f); 628 } 629 630 /* Get the next character in the buffer */ 631 if (str->fPos < str->fLimit) { 632 *c32 = *(str->fPos)++; 633 if (U_IS_LEAD(*c32)) { 634 if (str->fPos < str->fLimit) { 635 UChar c16 = *(str->fPos)++; 636 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); 637 isValidChar = TRUE; 638 } 639 else { 640 *c32 = U_EOF; 641 } 642 } 643 else { 644 isValidChar = TRUE; 645 } 646 } 647 648 return isValidChar; 649 } 650 651 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 652 u_fgetcx(UFILE *f) 653 { 654 UChar32 ch; 655 ufile_getch32(f, &ch); 656 return ch; 657 } 658 659 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 660 u_fungetc(UChar32 ch, 661 UFILE *f) 662 { 663 u_localized_string *str; 664 665 str = &f->str; 666 667 /* if we're at the beginning of the buffer, sorry! */ 668 if (str->fPos == str->fBuffer 669 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) 670 { 671 ch = U_EOF; 672 } 673 else { 674 /* otherwise, put the character back */ 675 /* Remember, read them back on in the reverse order. */ 676 if (U_IS_LEAD(ch)) { 677 if (*--(str->fPos) != U16_TRAIL(ch) 678 || *--(str->fPos) != U16_LEAD(ch)) 679 { 680 ch = U_EOF; 681 } 682 } 683 else if (*--(str->fPos) != ch) { 684 ch = U_EOF; 685 } 686 } 687 return ch; 688 } 689 690 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 691 u_file_read( UChar *chars, 692 int32_t count, 693 UFILE *f) 694 { 695 int32_t dataSize; 696 int32_t read = 0; 697 u_localized_string *str = &f->str; 698 699 do { 700 701 /* determine the amount of data in the buffer */ 702 dataSize = (int32_t)(str->fLimit - str->fPos); 703 if (dataSize <= 0) { 704 /* fill the buffer */ 705 ufile_fill_uchar_buffer(f); 706 dataSize = (int32_t)(str->fLimit - str->fPos); 707 } 708 709 /* Make sure that we don't read too much */ 710 if (dataSize > (count - read)) { 711 dataSize = count - read; 712 } 713 714 /* copy the current data in the buffer */ 715 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); 716 717 /* update number of items read */ 718 read += dataSize; 719 720 /* update the current buffer position */ 721 str->fPos += dataSize; 722 } 723 while (dataSize != 0 && read < count); 724 725 return read; 726 } 727