1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * File ustdio.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 11/18/98 stephen Creation. 15 * 03/12/99 stephen Modified for new C API. 16 * 07/19/99 stephen Fixed read() and gets() 17 ****************************************************************************** 18 */ 19 20 #include "unicode/ustdio.h" 21 #include "unicode/putil.h" 22 #include "cmemory.h" 23 #include "cstring.h" 24 #include "ufile.h" 25 #include "ufmt_cmn.h" 26 #include "unicode/ucnv.h" 27 #include "unicode/ustring.h" 28 29 #include <string.h> 30 31 #define DELIM_LF 0x000A 32 #define DELIM_VT 0x000B 33 #define DELIM_FF 0x000C 34 #define DELIM_CR 0x000D 35 #define DELIM_NEL 0x0085 36 #define DELIM_LS 0x2028 37 #define DELIM_PS 0x2029 38 39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 40 #ifdef U_WINDOWS 41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; 42 static const uint32_t DELIMITERS_LEN = 2; 43 /* TODO: Default newline writing should be detected based upon the converter being used. */ 44 #else 45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; 46 static const uint32_t DELIMITERS_LEN = 1; 47 #endif 48 49 #define IS_FIRST_STRING_DELIMITER(c1) \ 50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 51 || (c1) == DELIM_NEL \ 52 || (c1) == DELIM_LS \ 53 || (c1) == DELIM_PS) 54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 57 58 59 #if !UCONFIG_NO_TRANSLITERATION 60 61 U_CAPI UTransliterator* U_EXPORT2 62 u_fsettransliterator(UFILE *file, UFileDirection direction, 63 UTransliterator *adopt, UErrorCode *status) 64 { 65 UTransliterator *old = NULL; 66 67 if(U_FAILURE(*status)) 68 { 69 return adopt; 70 } 71 72 if(!file) 73 { 74 *status = U_ILLEGAL_ARGUMENT_ERROR; 75 return adopt; 76 } 77 78 if(direction & U_READ) 79 { 80 /** TODO: implement */ 81 *status = U_UNSUPPORTED_ERROR; 82 return adopt; 83 } 84 85 if(adopt == NULL) /* they are clearing it */ 86 { 87 if(file->fTranslit != NULL) 88 { 89 /* TODO: Check side */ 90 old = file->fTranslit->translit; 91 uprv_free(file->fTranslit->buffer); 92 file->fTranslit->buffer=NULL; 93 uprv_free(file->fTranslit); 94 file->fTranslit=NULL; 95 } 96 } 97 else 98 { 99 if(file->fTranslit == NULL) 100 { 101 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); 102 if(!file->fTranslit) 103 { 104 *status = U_MEMORY_ALLOCATION_ERROR; 105 return adopt; 106 } 107 file->fTranslit->capacity = 0; 108 file->fTranslit->length = 0; 109 file->fTranslit->pos = 0; 110 file->fTranslit->buffer = NULL; 111 } 112 else 113 { 114 old = file->fTranslit->translit; 115 ufile_flush_translit(file); 116 } 117 118 file->fTranslit->translit = adopt; 119 } 120 121 return old; 122 } 123 124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) 125 { 126 int32_t newlen; 127 int32_t junkCount = 0; 128 int32_t textLength; 129 int32_t textLimit; 130 UTransPosition pos; 131 UErrorCode status = U_ZERO_ERROR; 132 133 if(count == NULL) 134 { 135 count = &junkCount; 136 } 137 138 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) 139 { 140 /* fast path */ 141 return src; 142 } 143 144 /* First: slide over everything */ 145 if(f->fTranslit->length > f->fTranslit->pos) 146 { 147 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, 148 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); 149 } 150 f->fTranslit->length -= f->fTranslit->pos; /* always */ 151 f->fTranslit->pos = 0; 152 153 /* Calculate new buffer size needed */ 154 newlen = (*count + f->fTranslit->length) * 4; 155 156 if(newlen > f->fTranslit->capacity) 157 { 158 if(f->fTranslit->buffer == NULL) 159 { 160 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); 161 } 162 else 163 { 164 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); 165 } 166 /* Check for malloc/realloc failure. */ 167 if (f->fTranslit->buffer == NULL) { 168 return NULL; 169 } 170 f->fTranslit->capacity = newlen; 171 } 172 173 /* Now, copy any data over */ 174 u_strncpy(f->fTranslit->buffer + f->fTranslit->length, 175 src, 176 *count); 177 f->fTranslit->length += *count; 178 179 /* Now, translit in place as much as we can */ 180 if(flush == FALSE) 181 { 182 textLength = f->fTranslit->length; 183 pos.contextStart = 0; 184 pos.contextLimit = textLength; 185 pos.start = 0; 186 pos.limit = textLength; 187 188 utrans_transIncrementalUChars(f->fTranslit->translit, 189 f->fTranslit->buffer, /* because we shifted */ 190 &textLength, 191 f->fTranslit->capacity, 192 &pos, 193 &status); 194 195 /* now: start/limit point to the transliterated text */ 196 /* Transliterated is [buffer..pos.start) */ 197 *count = pos.start; 198 f->fTranslit->pos = pos.start; 199 f->fTranslit->length = pos.limit; 200 201 return f->fTranslit->buffer; 202 } 203 else 204 { 205 textLength = f->fTranslit->length; 206 textLimit = f->fTranslit->length; 207 208 utrans_transUChars(f->fTranslit->translit, 209 f->fTranslit->buffer, 210 &textLength, 211 f->fTranslit->capacity, 212 0, 213 &textLimit, 214 &status); 215 216 /* out: converted len */ 217 *count = textLimit; 218 219 /* Set pointers to 0 */ 220 f->fTranslit->pos = 0; 221 f->fTranslit->length = 0; 222 223 return f->fTranslit->buffer; 224 } 225 } 226 227 #endif 228 229 void 230 ufile_flush_translit(UFILE *f) 231 { 232 #if !UCONFIG_NO_TRANSLITERATION 233 if((!f)||(!f->fTranslit)) 234 return; 235 #endif 236 237 u_file_write_flush(NULL, 0, f, FALSE, TRUE); 238 } 239 240 241 void 242 ufile_close_translit(UFILE *f) 243 { 244 #if !UCONFIG_NO_TRANSLITERATION 245 if((!f)||(!f->fTranslit)) 246 return; 247 #endif 248 249 ufile_flush_translit(f); 250 251 #if !UCONFIG_NO_TRANSLITERATION 252 if(f->fTranslit->translit) 253 utrans_close(f->fTranslit->translit); 254 255 if(f->fTranslit->buffer) 256 { 257 uprv_free(f->fTranslit->buffer); 258 } 259 260 uprv_free(f->fTranslit); 261 f->fTranslit = NULL; 262 #endif 263 } 264 265 266 /* Input/output */ 267 268 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 269 u_fputs(const UChar *s, 270 UFILE *f) 271 { 272 int32_t count = u_file_write(s, u_strlen(s), f); 273 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); 274 return count; 275 } 276 277 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 278 u_fputc(UChar32 uc, 279 UFILE *f) 280 { 281 UChar buf[2]; 282 int32_t idx = 0; 283 UBool isError = FALSE; 284 285 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); 286 if (isError) { 287 return U_EOF; 288 } 289 return u_file_write(buf, idx, f) == idx ? uc : U_EOF; 290 } 291 292 293 U_CFUNC int32_t U_EXPORT2 294 u_file_write_flush(const UChar *chars, 295 int32_t count, 296 UFILE *f, 297 UBool flushIO, 298 UBool flushTranslit) 299 { 300 /* Set up conversion parameters */ 301 UErrorCode status = U_ZERO_ERROR; 302 const UChar *mySource = chars; 303 const UChar *mySourceBegin; 304 const UChar *mySourceEnd; 305 char charBuffer[UFILE_CHARBUFFER_SIZE]; 306 char *myTarget = charBuffer; 307 int32_t written = 0; 308 int32_t numConverted = 0; 309 310 if (count < 0) { 311 count = u_strlen(chars); 312 } 313 314 #if !UCONFIG_NO_TRANSLITERATION 315 if((f->fTranslit) && (f->fTranslit->translit)) 316 { 317 /* Do the transliteration */ 318 mySource = u_file_translit(f, chars, &count, flushTranslit); 319 } 320 #endif 321 322 /* Write to a string. */ 323 if (!f->fFile) { 324 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); 325 if (flushIO && charsLeft > count) { 326 count++; 327 } 328 written = ufmt_min(count, charsLeft); 329 u_strncpy(f->str.fPos, mySource, written); 330 f->str.fPos += written; 331 return written; 332 } 333 334 mySourceEnd = mySource + count; 335 336 /* Perform the conversion in a loop */ 337 do { 338 mySourceBegin = mySource; /* beginning location for this loop */ 339 status = U_ZERO_ERROR; 340 if(f->fConverter != NULL) { /* We have a valid converter */ 341 ucnv_fromUnicode(f->fConverter, 342 &myTarget, 343 charBuffer + UFILE_CHARBUFFER_SIZE, 344 &mySource, 345 mySourceEnd, 346 NULL, 347 flushIO, 348 &status); 349 } else { /*weiv: do the invariant conversion */ 350 int32_t convertChars = (int32_t) (mySourceEnd - mySource); 351 if (convertChars > UFILE_CHARBUFFER_SIZE) { 352 convertChars = UFILE_CHARBUFFER_SIZE; 353 status = U_BUFFER_OVERFLOW_ERROR; 354 } 355 u_UCharsToChars(mySource, myTarget, convertChars); 356 mySource += convertChars; 357 myTarget += convertChars; 358 } 359 numConverted = (int32_t)(myTarget - charBuffer); 360 361 if (numConverted > 0) { 362 /* write the converted bytes */ 363 fwrite(charBuffer, 364 sizeof(char), 365 numConverted, 366 f->fFile); 367 368 written += (int32_t) (mySource - mySourceBegin); 369 } 370 myTarget = charBuffer; 371 } 372 while(status == U_BUFFER_OVERFLOW_ERROR); 373 374 /* return # of chars written */ 375 return written; 376 } 377 378 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 379 u_file_write( const UChar *chars, 380 int32_t count, 381 UFILE *f) 382 { 383 return u_file_write_flush(chars,count,f,FALSE,FALSE); 384 } 385 386 387 /* private function used for buffering input */ 388 void 389 ufile_fill_uchar_buffer(UFILE *f) 390 { 391 UErrorCode status; 392 const char *mySource; 393 const char *mySourceEnd; 394 UChar *myTarget; 395 int32_t bufferSize; 396 int32_t maxCPBytes; 397 int32_t bytesRead; 398 int32_t availLength; 399 int32_t dataSize; 400 char charBuffer[UFILE_CHARBUFFER_SIZE]; 401 u_localized_string *str; 402 403 if (f->fFile == NULL) { 404 /* There is nothing to do. It's a string. */ 405 return; 406 } 407 408 str = &f->str; 409 dataSize = (int32_t)(str->fLimit - str->fPos); 410 if (f->fFileno == 0 && dataSize > 0) { 411 /* Don't read from stdin too many times. There is still some data. */ 412 return; 413 } 414 415 /* shift the buffer if it isn't empty */ 416 if(dataSize != 0) { 417 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); 418 } 419 420 421 /* record how much buffer space is available */ 422 availLength = UFILE_UCHARBUFFER_SIZE - dataSize; 423 424 /* Determine the # of codepage bytes needed to fill our UChar buffer */ 425 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 426 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); 427 428 /* Read in the data to convert */ 429 if (f->fFileno == 0) { 430 /* Special case. Read from stdin one line at a time. */ 431 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); 432 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); 433 } 434 else { 435 /* A normal file */ 436 bytesRead = (int32_t)fread(charBuffer, 437 sizeof(char), 438 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), 439 f->fFile); 440 } 441 442 /* Set up conversion parameters */ 443 status = U_ZERO_ERROR; 444 mySource = charBuffer; 445 mySourceEnd = charBuffer + bytesRead; 446 myTarget = f->fUCBuffer + dataSize; 447 bufferSize = UFILE_UCHARBUFFER_SIZE; 448 449 if(f->fConverter != NULL) { /* We have a valid converter */ 450 /* Perform the conversion */ 451 ucnv_toUnicode(f->fConverter, 452 &myTarget, 453 f->fUCBuffer + bufferSize, 454 &mySource, 455 mySourceEnd, 456 NULL, 457 (UBool)(feof(f->fFile) != 0), 458 &status); 459 460 } else { /*weiv: do the invariant conversion */ 461 u_charsToUChars(mySource, myTarget, bytesRead); 462 myTarget += bytesRead; 463 } 464 465 /* update the pointers into our array */ 466 str->fPos = str->fBuffer; 467 str->fLimit = myTarget; 468 } 469 470 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 471 u_fgets(UChar *s, 472 int32_t n, 473 UFILE *f) 474 { 475 int32_t dataSize; 476 int32_t count; 477 UChar *alias; 478 const UChar *limit; 479 UChar *sItr; 480 UChar currDelim = 0; 481 u_localized_string *str; 482 483 if (n <= 0) { 484 /* Caller screwed up. We need to write the null terminatior. */ 485 return NULL; 486 } 487 488 /* fill the buffer if needed */ 489 str = &f->str; 490 if (str->fPos >= str->fLimit) { 491 ufile_fill_uchar_buffer(f); 492 } 493 494 /* subtract 1 from n to compensate for the terminator */ 495 --n; 496 497 /* determine the amount of data in the buffer */ 498 dataSize = (int32_t)(str->fLimit - str->fPos); 499 500 /* if 0 characters were left, return 0 */ 501 if (dataSize == 0) 502 return NULL; 503 504 /* otherwise, iteratively fill the buffer and copy */ 505 count = 0; 506 sItr = s; 507 currDelim = 0; 508 while (dataSize > 0 && count < n) { 509 alias = str->fPos; 510 511 /* Find how much to copy */ 512 if (dataSize < (n - count)) { 513 limit = str->fLimit; 514 } 515 else { 516 limit = alias + (n - count); 517 } 518 519 if (!currDelim) { 520 /* Copy UChars until we find the first occurrence of a delimiter character */ 521 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { 522 count++; 523 *(sItr++) = *(alias++); 524 } 525 /* Preserve the newline */ 526 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { 527 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { 528 currDelim = *alias; 529 } 530 else { 531 currDelim = 1; /* This isn't a newline, but it's used to say 532 that we should break later. We've checked all 533 possible newline combinations even across buffer 534 boundaries. */ 535 } 536 count++; 537 *(sItr++) = *(alias++); 538 } 539 } 540 /* If we have a CRLF combination, preserve that too. */ 541 if (alias < limit) { 542 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { 543 count++; 544 *(sItr++) = *(alias++); 545 } 546 currDelim = 1; /* This isn't a newline, but it's used to say 547 that we should break later. We've checked all 548 possible newline combinations even across buffer 549 boundaries. */ 550 } 551 552 /* update the current buffer position */ 553 str->fPos = alias; 554 555 /* if we found a delimiter */ 556 if (currDelim == 1) { 557 /* break out */ 558 break; 559 } 560 561 /* refill the buffer */ 562 ufile_fill_uchar_buffer(f); 563 564 /* determine the amount of data in the buffer */ 565 dataSize = (int32_t)(str->fLimit - str->fPos); 566 } 567 568 /* add the terminator and return s */ 569 *sItr = 0x0000; 570 return s; 571 } 572 573 U_CFUNC UBool U_EXPORT2 574 ufile_getch(UFILE *f, UChar *ch) 575 { 576 UBool isValidChar = FALSE; 577 578 *ch = U_EOF; 579 /* if we have an available character in the buffer, return it */ 580 if(f->str.fPos < f->str.fLimit){ 581 *ch = *(f->str.fPos)++; 582 isValidChar = TRUE; 583 } 584 else { 585 /* otherwise, fill the buffer and return the next character */ 586 if(f->str.fPos >= f->str.fLimit) { 587 ufile_fill_uchar_buffer(f); 588 } 589 if(f->str.fPos < f->str.fLimit) { 590 *ch = *(f->str.fPos)++; 591 isValidChar = TRUE; 592 } 593 } 594 return isValidChar; 595 } 596 597 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 598 u_fgetc(UFILE *f) 599 { 600 UChar ch; 601 ufile_getch(f, &ch); 602 return ch; 603 } 604 605 U_CFUNC UBool U_EXPORT2 606 ufile_getch32(UFILE *f, UChar32 *c32) 607 { 608 UBool isValidChar = FALSE; 609 u_localized_string *str; 610 611 *c32 = U_EOF; 612 613 /* Fill the buffer if it is empty */ 614 str = &f->str; 615 if (f && str->fPos + 1 >= str->fLimit) { 616 ufile_fill_uchar_buffer(f); 617 } 618 619 /* Get the next character in the buffer */ 620 if (str->fPos < str->fLimit) { 621 *c32 = *(str->fPos)++; 622 if (U_IS_LEAD(*c32)) { 623 if (str->fPos < str->fLimit) { 624 UChar c16 = *(str->fPos)++; 625 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); 626 isValidChar = TRUE; 627 } 628 else { 629 *c32 = U_EOF; 630 } 631 } 632 else { 633 isValidChar = TRUE; 634 } 635 } 636 637 return isValidChar; 638 } 639 640 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 641 u_fgetcx(UFILE *f) 642 { 643 UChar32 ch; 644 ufile_getch32(f, &ch); 645 return ch; 646 } 647 648 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 649 u_fungetc(UChar32 ch, 650 UFILE *f) 651 { 652 u_localized_string *str; 653 654 str = &f->str; 655 656 /* if we're at the beginning of the buffer, sorry! */ 657 if (str->fPos == str->fBuffer 658 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) 659 { 660 ch = U_EOF; 661 } 662 else { 663 /* otherwise, put the character back */ 664 /* Remember, read them back on in the reverse order. */ 665 if (U_IS_LEAD(ch)) { 666 if (*--(str->fPos) != U16_TRAIL(ch) 667 || *--(str->fPos) != U16_LEAD(ch)) 668 { 669 ch = U_EOF; 670 } 671 } 672 else if (*--(str->fPos) != ch) { 673 ch = U_EOF; 674 } 675 } 676 return ch; 677 } 678 679 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 680 u_file_read( UChar *chars, 681 int32_t count, 682 UFILE *f) 683 { 684 int32_t dataSize; 685 int32_t read = 0; 686 u_localized_string *str = &f->str; 687 688 do { 689 690 /* determine the amount of data in the buffer */ 691 dataSize = (int32_t)(str->fLimit - str->fPos); 692 if (dataSize <= 0) { 693 /* fill the buffer */ 694 ufile_fill_uchar_buffer(f); 695 dataSize = (int32_t)(str->fLimit - str->fPos); 696 } 697 698 /* Make sure that we don't read too much */ 699 if (dataSize > (count - read)) { 700 dataSize = count - read; 701 } 702 703 /* copy the current data in the buffer */ 704 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); 705 706 /* update number of items read */ 707 read += dataSize; 708 709 /* update the current buffer position */ 710 str->fPos += dataSize; 711 } 712 while (dataSize != 0 && read < count); 713 714 return read; 715 } 716