1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * File ustdio.c 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 11/18/98 stephen Creation. 17 * 03/12/99 stephen Modified for new C API. 18 * 07/19/99 stephen Fixed read() and gets() 19 ****************************************************************************** 20 */ 21 22 #include "unicode/ustdio.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/putil.h" 27 #include "cmemory.h" 28 #include "cstring.h" 29 #include "ufile.h" 30 #include "ufmt_cmn.h" 31 #include "unicode/ucnv.h" 32 #include "unicode/ustring.h" 33 34 #include <string.h> 35 36 #define DELIM_LF 0x000A 37 #define DELIM_VT 0x000B 38 #define DELIM_FF 0x000C 39 #define DELIM_CR 0x000D 40 #define DELIM_NEL 0x0085 41 #define DELIM_LS 0x2028 42 #define DELIM_PS 0x2029 43 44 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 45 #if U_PLATFORM_USES_ONLY_WIN32_API 46 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; 47 static const uint32_t DELIMITERS_LEN = 2; 48 /* TODO: Default newline writing should be detected based upon the converter being used. */ 49 #else 50 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; 51 static const uint32_t DELIMITERS_LEN = 1; 52 #endif 53 54 #define IS_FIRST_STRING_DELIMITER(c1) \ 55 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 56 || (c1) == DELIM_NEL \ 57 || (c1) == DELIM_LS \ 58 || (c1) == DELIM_PS) 59 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 60 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 61 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 62 63 64 #if !UCONFIG_NO_TRANSLITERATION 65 66 U_CAPI UTransliterator* U_EXPORT2 67 u_fsettransliterator(UFILE *file, UFileDirection direction, 68 UTransliterator *adopt, UErrorCode *status) 69 { 70 UTransliterator *old = NULL; 71 72 if(U_FAILURE(*status)) 73 { 74 return adopt; 75 } 76 77 if(!file) 78 { 79 *status = U_ILLEGAL_ARGUMENT_ERROR; 80 return adopt; 81 } 82 83 if(direction & U_READ) 84 { 85 /** TODO: implement */ 86 *status = U_UNSUPPORTED_ERROR; 87 return adopt; 88 } 89 90 if(adopt == NULL) /* they are clearing it */ 91 { 92 if(file->fTranslit != NULL) 93 { 94 /* TODO: Check side */ 95 old = file->fTranslit->translit; 96 uprv_free(file->fTranslit->buffer); 97 file->fTranslit->buffer=NULL; 98 uprv_free(file->fTranslit); 99 file->fTranslit=NULL; 100 } 101 } 102 else 103 { 104 if(file->fTranslit == NULL) 105 { 106 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); 107 if(!file->fTranslit) 108 { 109 *status = U_MEMORY_ALLOCATION_ERROR; 110 return adopt; 111 } 112 file->fTranslit->capacity = 0; 113 file->fTranslit->length = 0; 114 file->fTranslit->pos = 0; 115 file->fTranslit->buffer = NULL; 116 } 117 else 118 { 119 old = file->fTranslit->translit; 120 ufile_flush_translit(file); 121 } 122 123 file->fTranslit->translit = adopt; 124 } 125 126 return old; 127 } 128 129 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) 130 { 131 int32_t newlen; 132 int32_t junkCount = 0; 133 int32_t textLength; 134 int32_t textLimit; 135 UTransPosition pos; 136 UErrorCode status = U_ZERO_ERROR; 137 138 if(count == NULL) 139 { 140 count = &junkCount; 141 } 142 143 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) 144 { 145 /* fast path */ 146 return src; 147 } 148 149 /* First: slide over everything */ 150 if(f->fTranslit->length > f->fTranslit->pos) 151 { 152 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, 153 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); 154 } 155 f->fTranslit->length -= f->fTranslit->pos; /* always */ 156 f->fTranslit->pos = 0; 157 158 /* Calculate new buffer size needed */ 159 newlen = (*count + f->fTranslit->length) * 4; 160 161 if(newlen > f->fTranslit->capacity) 162 { 163 if(f->fTranslit->buffer == NULL) 164 { 165 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); 166 } 167 else 168 { 169 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); 170 } 171 /* Check for malloc/realloc failure. */ 172 if (f->fTranslit->buffer == NULL) { 173 return NULL; 174 } 175 f->fTranslit->capacity = newlen; 176 } 177 178 /* Now, copy any data over */ 179 u_strncpy(f->fTranslit->buffer + f->fTranslit->length, 180 src, 181 *count); 182 f->fTranslit->length += *count; 183 184 /* Now, translit in place as much as we can */ 185 if(flush == FALSE) 186 { 187 textLength = f->fTranslit->length; 188 pos.contextStart = 0; 189 pos.contextLimit = textLength; 190 pos.start = 0; 191 pos.limit = textLength; 192 193 utrans_transIncrementalUChars(f->fTranslit->translit, 194 f->fTranslit->buffer, /* because we shifted */ 195 &textLength, 196 f->fTranslit->capacity, 197 &pos, 198 &status); 199 200 /* now: start/limit point to the transliterated text */ 201 /* Transliterated is [buffer..pos.start) */ 202 *count = pos.start; 203 f->fTranslit->pos = pos.start; 204 f->fTranslit->length = pos.limit; 205 206 return f->fTranslit->buffer; 207 } 208 else 209 { 210 textLength = f->fTranslit->length; 211 textLimit = f->fTranslit->length; 212 213 utrans_transUChars(f->fTranslit->translit, 214 f->fTranslit->buffer, 215 &textLength, 216 f->fTranslit->capacity, 217 0, 218 &textLimit, 219 &status); 220 221 /* out: converted len */ 222 *count = textLimit; 223 224 /* Set pointers to 0 */ 225 f->fTranslit->pos = 0; 226 f->fTranslit->length = 0; 227 228 return f->fTranslit->buffer; 229 } 230 } 231 232 #endif 233 234 void 235 ufile_flush_translit(UFILE *f) 236 { 237 #if !UCONFIG_NO_TRANSLITERATION 238 if((!f)||(!f->fTranslit)) 239 return; 240 #endif 241 242 u_file_write_flush(NULL, 0, f, FALSE, TRUE); 243 } 244 245 246 void 247 ufile_flush_io(UFILE *f) 248 { 249 if((!f) || (!f->fFile)) { 250 return; /* skip if no file */ 251 } 252 253 u_file_write_flush(NULL, 0, f, TRUE, FALSE); 254 } 255 256 257 void 258 ufile_close_translit(UFILE *f) 259 { 260 #if !UCONFIG_NO_TRANSLITERATION 261 if((!f)||(!f->fTranslit)) 262 return; 263 #endif 264 265 ufile_flush_translit(f); 266 267 #if !UCONFIG_NO_TRANSLITERATION 268 if(f->fTranslit->translit) 269 utrans_close(f->fTranslit->translit); 270 271 if(f->fTranslit->buffer) 272 { 273 uprv_free(f->fTranslit->buffer); 274 } 275 276 uprv_free(f->fTranslit); 277 f->fTranslit = NULL; 278 #endif 279 } 280 281 282 /* Input/output */ 283 284 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 285 u_fputs(const UChar *s, 286 UFILE *f) 287 { 288 int32_t count = u_file_write(s, u_strlen(s), f); 289 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); 290 return count; 291 } 292 293 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 294 u_fputc(UChar32 uc, 295 UFILE *f) 296 { 297 UChar buf[2]; 298 int32_t idx = 0; 299 UBool isError = FALSE; 300 301 U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); 302 if (isError) { 303 return U_EOF; 304 } 305 return u_file_write(buf, idx, f) == idx ? uc : U_EOF; 306 } 307 308 309 U_CFUNC int32_t U_EXPORT2 310 u_file_write_flush(const UChar *chars, 311 int32_t count, 312 UFILE *f, 313 UBool flushIO, 314 UBool flushTranslit) 315 { 316 /* Set up conversion parameters */ 317 UErrorCode status = U_ZERO_ERROR; 318 const UChar *mySource = chars; 319 const UChar *mySourceBegin; 320 const UChar *mySourceEnd; 321 char charBuffer[UFILE_CHARBUFFER_SIZE]; 322 char *myTarget = charBuffer; 323 int32_t written = 0; 324 int32_t numConverted = 0; 325 326 if (count < 0) { 327 count = u_strlen(chars); 328 } 329 330 #if !UCONFIG_NO_TRANSLITERATION 331 if((f->fTranslit) && (f->fTranslit->translit)) 332 { 333 /* Do the transliteration */ 334 mySource = u_file_translit(f, chars, &count, flushTranslit); 335 } 336 #endif 337 338 /* Write to a string. */ 339 if (!f->fFile) { 340 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); 341 if (flushIO && charsLeft > count) { 342 count++; 343 } 344 written = ufmt_min(count, charsLeft); 345 u_strncpy(f->str.fPos, mySource, written); 346 f->str.fPos += written; 347 return written; 348 } 349 350 mySourceEnd = mySource + count; 351 352 /* Perform the conversion in a loop */ 353 do { 354 mySourceBegin = mySource; /* beginning location for this loop */ 355 status = U_ZERO_ERROR; 356 if(f->fConverter != NULL) { /* We have a valid converter */ 357 ucnv_fromUnicode(f->fConverter, 358 &myTarget, 359 charBuffer + UFILE_CHARBUFFER_SIZE, 360 &mySource, 361 mySourceEnd, 362 NULL, 363 flushIO, 364 &status); 365 } else { /*weiv: do the invariant conversion */ 366 int32_t convertChars = (int32_t) (mySourceEnd - mySource); 367 if (convertChars > UFILE_CHARBUFFER_SIZE) { 368 convertChars = UFILE_CHARBUFFER_SIZE; 369 status = U_BUFFER_OVERFLOW_ERROR; 370 } 371 u_UCharsToChars(mySource, myTarget, convertChars); 372 mySource += convertChars; 373 myTarget += convertChars; 374 } 375 numConverted = (int32_t)(myTarget - charBuffer); 376 377 if (numConverted > 0) { 378 /* write the converted bytes */ 379 fwrite(charBuffer, 380 sizeof(char), 381 numConverted, 382 f->fFile); 383 384 written += (int32_t) (mySource - mySourceBegin); 385 } 386 myTarget = charBuffer; 387 } 388 while(status == U_BUFFER_OVERFLOW_ERROR); 389 390 /* return # of chars written */ 391 return written; 392 } 393 394 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 395 u_file_write( const UChar *chars, 396 int32_t count, 397 UFILE *f) 398 { 399 return u_file_write_flush(chars,count,f,FALSE,FALSE); 400 } 401 402 403 /* private function used for buffering input */ 404 void 405 ufile_fill_uchar_buffer(UFILE *f) 406 { 407 UErrorCode status; 408 const char *mySource; 409 const char *mySourceEnd; 410 UChar *myTarget; 411 int32_t bufferSize; 412 int32_t maxCPBytes; 413 int32_t bytesRead; 414 int32_t availLength; 415 int32_t dataSize; 416 char charBuffer[UFILE_CHARBUFFER_SIZE]; 417 u_localized_string *str; 418 419 if (f->fFile == NULL) { 420 /* There is nothing to do. It's a string. */ 421 return; 422 } 423 424 str = &f->str; 425 dataSize = (int32_t)(str->fLimit - str->fPos); 426 if (f->fFileno == 0 && dataSize > 0) { 427 /* Don't read from stdin too many times. There is still some data. */ 428 return; 429 } 430 431 /* shift the buffer if it isn't empty */ 432 if(dataSize != 0) { 433 u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ 434 } 435 436 437 /* record how much buffer space is available */ 438 availLength = UFILE_UCHARBUFFER_SIZE - dataSize; 439 440 /* Determine the # of codepage bytes needed to fill our UChar buffer */ 441 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 442 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); 443 444 /* Read in the data to convert */ 445 if (f->fFileno == 0) { 446 /* Special case. Read from stdin one line at a time. */ 447 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); 448 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); 449 } 450 else { 451 /* A normal file */ 452 bytesRead = (int32_t)fread(charBuffer, 453 sizeof(char), 454 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), 455 f->fFile); 456 } 457 458 /* Set up conversion parameters */ 459 status = U_ZERO_ERROR; 460 mySource = charBuffer; 461 mySourceEnd = charBuffer + bytesRead; 462 myTarget = f->fUCBuffer + dataSize; 463 bufferSize = UFILE_UCHARBUFFER_SIZE; 464 465 if(f->fConverter != NULL) { /* We have a valid converter */ 466 /* Perform the conversion */ 467 ucnv_toUnicode(f->fConverter, 468 &myTarget, 469 f->fUCBuffer + bufferSize, 470 &mySource, 471 mySourceEnd, 472 NULL, 473 (UBool)(feof(f->fFile) != 0), 474 &status); 475 476 } else { /*weiv: do the invariant conversion */ 477 u_charsToUChars(mySource, myTarget, bytesRead); 478 myTarget += bytesRead; 479 } 480 481 /* update the pointers into our array */ 482 str->fPos = str->fBuffer; 483 str->fLimit = myTarget; 484 } 485 486 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 487 u_fgets(UChar *s, 488 int32_t n, 489 UFILE *f) 490 { 491 int32_t dataSize; 492 int32_t count; 493 UChar *alias; 494 const UChar *limit; 495 UChar *sItr; 496 UChar currDelim = 0; 497 u_localized_string *str; 498 499 if (n <= 0) { 500 /* Caller screwed up. We need to write the null terminatior. */ 501 return NULL; 502 } 503 504 /* fill the buffer if needed */ 505 str = &f->str; 506 if (str->fPos >= str->fLimit) { 507 ufile_fill_uchar_buffer(f); 508 } 509 510 /* subtract 1 from n to compensate for the terminator */ 511 --n; 512 513 /* determine the amount of data in the buffer */ 514 dataSize = (int32_t)(str->fLimit - str->fPos); 515 516 /* if 0 characters were left, return 0 */ 517 if (dataSize == 0) 518 return NULL; 519 520 /* otherwise, iteratively fill the buffer and copy */ 521 count = 0; 522 sItr = s; 523 currDelim = 0; 524 while (dataSize > 0 && count < n) { 525 alias = str->fPos; 526 527 /* Find how much to copy */ 528 if (dataSize < (n - count)) { 529 limit = str->fLimit; 530 } 531 else { 532 limit = alias + (n - count); 533 } 534 535 if (!currDelim) { 536 /* Copy UChars until we find the first occurrence of a delimiter character */ 537 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { 538 count++; 539 *(sItr++) = *(alias++); 540 } 541 /* Preserve the newline */ 542 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { 543 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { 544 currDelim = *alias; 545 } 546 else { 547 currDelim = 1; /* This isn't a newline, but it's used to say 548 that we should break later. We've checked all 549 possible newline combinations even across buffer 550 boundaries. */ 551 } 552 count++; 553 *(sItr++) = *(alias++); 554 } 555 } 556 /* If we have a CRLF combination, preserve that too. */ 557 if (alias < limit) { 558 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { 559 count++; 560 *(sItr++) = *(alias++); 561 } 562 currDelim = 1; /* This isn't a newline, but it's used to say 563 that we should break later. We've checked all 564 possible newline combinations even across buffer 565 boundaries. */ 566 } 567 568 /* update the current buffer position */ 569 str->fPos = alias; 570 571 /* if we found a delimiter */ 572 if (currDelim == 1) { 573 /* break out */ 574 break; 575 } 576 577 /* refill the buffer */ 578 ufile_fill_uchar_buffer(f); 579 580 /* determine the amount of data in the buffer */ 581 dataSize = (int32_t)(str->fLimit - str->fPos); 582 } 583 584 /* add the terminator and return s */ 585 *sItr = 0x0000; 586 return s; 587 } 588 589 U_CFUNC UBool U_EXPORT2 590 ufile_getch(UFILE *f, UChar *ch) 591 { 592 UBool isValidChar = FALSE; 593 594 *ch = U_EOF; 595 /* if we have an available character in the buffer, return it */ 596 if(f->str.fPos < f->str.fLimit){ 597 *ch = *(f->str.fPos)++; 598 isValidChar = TRUE; 599 } 600 else { 601 /* otherwise, fill the buffer and return the next character */ 602 if(f->str.fPos >= f->str.fLimit) { 603 ufile_fill_uchar_buffer(f); 604 } 605 if(f->str.fPos < f->str.fLimit) { 606 *ch = *(f->str.fPos)++; 607 isValidChar = TRUE; 608 } 609 } 610 return isValidChar; 611 } 612 613 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 614 u_fgetc(UFILE *f) 615 { 616 UChar ch; 617 ufile_getch(f, &ch); 618 return ch; 619 } 620 621 U_CFUNC UBool U_EXPORT2 622 ufile_getch32(UFILE *f, UChar32 *c32) 623 { 624 UBool isValidChar = FALSE; 625 u_localized_string *str; 626 627 *c32 = U_EOF; 628 629 /* Fill the buffer if it is empty */ 630 str = &f->str; 631 if (f && str->fPos + 1 >= str->fLimit) { 632 ufile_fill_uchar_buffer(f); 633 } 634 635 /* Get the next character in the buffer */ 636 if (str->fPos < str->fLimit) { 637 *c32 = *(str->fPos)++; 638 if (U_IS_LEAD(*c32)) { 639 if (str->fPos < str->fLimit) { 640 UChar c16 = *(str->fPos)++; 641 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); 642 isValidChar = TRUE; 643 } 644 else { 645 *c32 = U_EOF; 646 } 647 } 648 else { 649 isValidChar = TRUE; 650 } 651 } 652 653 return isValidChar; 654 } 655 656 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 657 u_fgetcx(UFILE *f) 658 { 659 UChar32 ch; 660 ufile_getch32(f, &ch); 661 return ch; 662 } 663 664 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 665 u_fungetc(UChar32 ch, 666 UFILE *f) 667 { 668 u_localized_string *str; 669 670 str = &f->str; 671 672 /* if we're at the beginning of the buffer, sorry! */ 673 if (str->fPos == str->fBuffer 674 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) 675 { 676 ch = U_EOF; 677 } 678 else { 679 /* otherwise, put the character back */ 680 /* Remember, read them back on in the reverse order. */ 681 if (U_IS_LEAD(ch)) { 682 if (*--(str->fPos) != U16_TRAIL(ch) 683 || *--(str->fPos) != U16_LEAD(ch)) 684 { 685 ch = U_EOF; 686 } 687 } 688 else if (*--(str->fPos) != ch) { 689 ch = U_EOF; 690 } 691 } 692 return ch; 693 } 694 695 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 696 u_file_read( UChar *chars, 697 int32_t count, 698 UFILE *f) 699 { 700 int32_t dataSize; 701 int32_t read = 0; 702 u_localized_string *str = &f->str; 703 704 do { 705 706 /* determine the amount of data in the buffer */ 707 dataSize = (int32_t)(str->fLimit - str->fPos); 708 if (dataSize <= 0) { 709 /* fill the buffer */ 710 ufile_fill_uchar_buffer(f); 711 dataSize = (int32_t)(str->fLimit - str->fPos); 712 } 713 714 /* Make sure that we don't read too much */ 715 if (dataSize > (count - read)) { 716 dataSize = count - read; 717 } 718 719 /* copy the current data in the buffer */ 720 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); 721 722 /* update number of items read */ 723 read += dataSize; 724 725 /* update the current buffer position */ 726 str->fPos += dataSize; 727 } 728 while (dataSize != 0 && read < count); 729 730 return read; 731 } 732 #endif 733