1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File ucbuf.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 05/10/01 Ram Creation. 17 ******************************************************************************* 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uchar.h" 23 #include "unicode/ucnv.h" 24 #include "unicode/ucnv_err.h" 25 #include "unicode/ustring.h" 26 #include "unicode/utf16.h" 27 #include "filestrm.h" 28 #include "cstring.h" 29 #include "cmemory.h" 30 #include "ustrfmt.h" 31 #include "ucbuf.h" 32 #include <stdio.h> 33 34 #if !UCONFIG_NO_CONVERSION 35 36 37 #define MAX_IN_BUF 1000 38 #define MAX_U_BUF 1500 39 #define CONTEXT_LEN 20 40 41 struct UCHARBUF { 42 UChar* buffer; 43 UChar* currentPos; 44 UChar* bufLimit; 45 int32_t bufCapacity; 46 int32_t remaining; 47 int32_t signatureLength; 48 FileStream* in; 49 UConverter* conv; 50 UBool showWarning; /* makes this API not produce any errors */ 51 UBool isBuffered; 52 }; 53 54 U_CAPI UBool U_EXPORT2 55 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 56 char start[8]; 57 int32_t numRead; 58 59 UChar target[1]={ 0 }; 60 UChar* pTarget; 61 const char* pStart; 62 63 /* read a few bytes */ 64 numRead=T_FileStream_read(in, start, sizeof(start)); 65 66 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 67 68 /* unread the bytes beyond what was consumed for U+FEFF */ 69 T_FileStream_rewind(in); 70 if (*signatureLength > 0) { 71 T_FileStream_read(in, start, *signatureLength); 72 } 73 74 if(*cp==NULL){ 75 *conv =NULL; 76 return FALSE; 77 } 78 79 /* open the converter for the detected Unicode charset */ 80 *conv = ucnv_open(*cp,error); 81 82 /* convert and ignore initial U+FEFF, and the buffer overflow */ 83 pTarget = target; 84 pStart = start; 85 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 86 *signatureLength = (int32_t)(pStart - start); 87 if(*error==U_BUFFER_OVERFLOW_ERROR) { 88 *error=U_ZERO_ERROR; 89 } 90 91 /* verify that we successfully read exactly U+FEFF */ 92 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 93 *error=U_INTERNAL_PROGRAM_ERROR; 94 } 95 96 97 return TRUE; 98 } 99 static UBool ucbuf_isCPKnown(const char* cp){ 100 if(ucnv_compareNames("UTF-8",cp)==0){ 101 return TRUE; 102 } 103 if(ucnv_compareNames("UTF-16BE",cp)==0){ 104 return TRUE; 105 } 106 if(ucnv_compareNames("UTF-16LE",cp)==0){ 107 return TRUE; 108 } 109 if(ucnv_compareNames("UTF-16",cp)==0){ 110 return TRUE; 111 } 112 if(ucnv_compareNames("UTF-32",cp)==0){ 113 return TRUE; 114 } 115 if(ucnv_compareNames("UTF-32BE",cp)==0){ 116 return TRUE; 117 } 118 if(ucnv_compareNames("UTF-32LE",cp)==0){ 119 return TRUE; 120 } 121 if(ucnv_compareNames("SCSU",cp)==0){ 122 return TRUE; 123 } 124 if(ucnv_compareNames("BOCU-1",cp)==0){ 125 return TRUE; 126 } 127 if(ucnv_compareNames("UTF-7",cp)==0){ 128 return TRUE; 129 } 130 return FALSE; 131 } 132 133 U_CAPI FileStream * U_EXPORT2 134 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 135 FileStream* in=NULL; 136 if(error==NULL || U_FAILURE(*error)){ 137 return NULL; 138 } 139 if(conv==NULL || cp==NULL || fileName==NULL){ 140 *error = U_ILLEGAL_ARGUMENT_ERROR; 141 return NULL; 142 } 143 /* open the file */ 144 in= T_FileStream_open(fileName,"rb"); 145 146 if(in == NULL){ 147 *error=U_FILE_ACCESS_ERROR; 148 return NULL; 149 } 150 151 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 152 return in; 153 } else { 154 ucnv_close(*conv); 155 *conv=NULL; 156 T_FileStream_close(in); 157 return NULL; 158 } 159 } 160 161 /* fill the uchar buffer */ 162 static UCHARBUF* 163 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 164 UChar* pTarget=NULL; 165 UChar* target=NULL; 166 const char* source=NULL; 167 char carr[MAX_IN_BUF] = {'\0'}; 168 char* cbuf = carr; 169 int32_t inputRead=0; 170 int32_t outputWritten=0; 171 int32_t offset=0; 172 const char* sourceLimit =NULL; 173 int32_t cbufSize=0; 174 pTarget = buf->buffer; 175 /* check if we arrived here without exhausting the buffer*/ 176 if(buf->currentPos<buf->bufLimit){ 177 offset = (int32_t)(buf->bufLimit-buf->currentPos); 178 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 179 } 180 181 #if UCBUF_DEBUG 182 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 183 #endif 184 if(buf->isBuffered){ 185 cbufSize = MAX_IN_BUF; 186 /* read the file */ 187 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 188 buf->remaining-=inputRead; 189 190 }else{ 191 cbufSize = T_FileStream_size(buf->in); 192 cbuf = (char*)uprv_malloc(cbufSize); 193 if (cbuf == NULL) { 194 *error = U_MEMORY_ALLOCATION_ERROR; 195 return NULL; 196 } 197 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 198 buf->remaining-=inputRead; 199 } 200 201 /* just to be sure...*/ 202 if ( 0 == inputRead ) 203 buf->remaining = 0; 204 205 target=pTarget; 206 /* convert the bytes */ 207 if(buf->conv){ 208 /* set the callback to stop */ 209 UConverterToUCallback toUOldAction ; 210 void* toUOldContext; 211 void* toUNewContext=NULL; 212 ucnv_setToUCallBack(buf->conv, 213 UCNV_TO_U_CALLBACK_STOP, 214 toUNewContext, 215 &toUOldAction, 216 (const void**)&toUOldContext, 217 error); 218 /* since state is saved in the converter we add offset to source*/ 219 target = pTarget+offset; 220 source = cbuf; 221 sourceLimit = source + inputRead; 222 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 223 &source,sourceLimit,NULL, 224 (UBool)(buf->remaining==0),error); 225 226 if(U_FAILURE(*error)){ 227 char context[CONTEXT_LEN+1]; 228 char preContext[CONTEXT_LEN+1]; 229 char postContext[CONTEXT_LEN+1]; 230 int8_t len = CONTEXT_LEN; 231 int32_t start=0; 232 int32_t stop =0; 233 int32_t pos =0; 234 /* use erro1 to preserve the error code */ 235 UErrorCode error1 =U_ZERO_ERROR; 236 237 if( buf->showWarning==TRUE){ 238 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 239 " converting input stream to target encoding: %s\n", 240 u_errorName(*error)); 241 } 242 243 244 /* now get the context chars */ 245 ucnv_getInvalidChars(buf->conv,context,&len,&error1); 246 context[len]= 0 ; /* null terminate the buffer */ 247 248 pos = (int32_t)(source - cbuf - len); 249 250 /* for pre-context */ 251 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 252 stop = pos-len; 253 254 memcpy(preContext,cbuf+start,stop-start); 255 /* null terminate the buffer */ 256 preContext[stop-start] = 0; 257 258 /* for post-context */ 259 start = pos+len; 260 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 261 262 memcpy(postContext,source,stop-start); 263 /* null terminate the buffer */ 264 postContext[stop-start] = 0; 265 266 if(buf->showWarning ==TRUE){ 267 /* print out the context */ 268 fprintf(stderr,"\tPre-context: %s\n",preContext); 269 fprintf(stderr,"\tContext: %s\n",context); 270 fprintf(stderr,"\tPost-context: %s\n", postContext); 271 } 272 273 /* reset the converter */ 274 ucnv_reset(buf->conv); 275 276 /* set the call back to substitute 277 * and restart conversion 278 */ 279 ucnv_setToUCallBack(buf->conv, 280 UCNV_TO_U_CALLBACK_SUBSTITUTE, 281 toUNewContext, 282 &toUOldAction, 283 (const void**)&toUOldContext, 284 &error1); 285 286 /* reset source and target start positions */ 287 target = pTarget+offset; 288 source = cbuf; 289 290 /* re convert */ 291 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 292 &source,sourceLimit,NULL, 293 (UBool)(buf->remaining==0),&error1); 294 295 } 296 outputWritten = (int32_t)(target - pTarget); 297 298 #if UCBUF_DEBUG 299 { 300 int i; 301 target = pTarget; 302 for(i=0;i<numRead;i++){ 303 /* printf("%c", (char)(*target++));*/ 304 } 305 } 306 #endif 307 308 }else{ 309 u_charsToUChars(cbuf,target+offset,inputRead); 310 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 311 } 312 buf->currentPos = pTarget; 313 buf->bufLimit=pTarget+outputWritten; 314 *buf->bufLimit=0; /*NUL terminate*/ 315 if(cbuf!=carr){ 316 uprv_free(cbuf); 317 } 318 return buf; 319 } 320 321 322 323 /* get a UChar from the stream*/ 324 U_CAPI int32_t U_EXPORT2 325 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 326 if(error==NULL || U_FAILURE(*error)){ 327 return FALSE; 328 } 329 if(buf->currentPos>=buf->bufLimit){ 330 if(buf->remaining==0){ 331 return U_EOF; 332 } 333 buf=ucbuf_fillucbuf(buf,error); 334 if(U_FAILURE(*error)){ 335 return U_EOF; 336 } 337 } 338 339 return *(buf->currentPos++); 340 } 341 342 /* get a UChar32 from the stream*/ 343 U_CAPI int32_t U_EXPORT2 344 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 345 int32_t retVal = (int32_t)U_EOF; 346 if(error==NULL || U_FAILURE(*error)){ 347 return FALSE; 348 } 349 if(buf->currentPos+1>=buf->bufLimit){ 350 if(buf->remaining==0){ 351 return U_EOF; 352 } 353 buf=ucbuf_fillucbuf(buf,error); 354 if(U_FAILURE(*error)){ 355 return U_EOF; 356 } 357 } 358 if(U16_IS_LEAD(*(buf->currentPos))){ 359 retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 360 buf->currentPos+=2; 361 }else{ 362 retVal = *(buf->currentPos++); 363 } 364 return retVal; 365 } 366 367 /* u_unescapeAt() callback to return a UChar*/ 368 static UChar U_CALLCONV 369 _charAt(int32_t offset, void *context) { 370 return ((UCHARBUF*) context)->currentPos[offset]; 371 } 372 373 /* getc and escape it */ 374 U_CAPI int32_t U_EXPORT2 375 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 376 int32_t length; 377 int32_t offset; 378 UChar32 c32,c1,c2; 379 if(error==NULL || U_FAILURE(*error)){ 380 return FALSE; 381 } 382 /* Fill the buffer if it is empty */ 383 if (buf->currentPos >=buf->bufLimit-2) { 384 ucbuf_fillucbuf(buf,error); 385 } 386 387 /* Get the next character in the buffer */ 388 if (buf->currentPos < buf->bufLimit) { 389 c1 = *(buf->currentPos)++; 390 } else { 391 c1 = U_EOF; 392 } 393 394 c2 = *(buf->currentPos); 395 396 /* If it isn't a backslash, return it */ 397 if (c1 != 0x005C) { 398 return c1; 399 } 400 401 /* Determine the amount of data in the buffer */ 402 length = (int32_t)(buf->bufLimit - buf->currentPos); 403 404 /* The longest escape sequence is \Uhhhhhhhh; make sure 405 we have at least that many characters */ 406 if (length < 10) { 407 408 /* fill the buffer */ 409 ucbuf_fillucbuf(buf,error); 410 length = (int32_t)(buf->bufLimit - buf->buffer); 411 } 412 413 /* Process the escape */ 414 offset = 0; 415 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 416 417 /* check if u_unescapeAt unescaped and converted 418 * to c32 or not 419 */ 420 if(c32==(UChar32)0xFFFFFFFF){ 421 if(buf->showWarning) { 422 char context[CONTEXT_LEN+1]; 423 int32_t len = CONTEXT_LEN; 424 if(length < len) { 425 len = length; 426 } 427 context[len]= 0 ; /* null terminate the buffer */ 428 u_UCharsToChars( buf->currentPos, context, len); 429 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 430 } 431 *error= U_ILLEGAL_ESCAPE_SEQUENCE; 432 return c1; 433 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 434 /* Update the current buffer position */ 435 buf->currentPos += offset; 436 }else{ 437 /* unescaping failed so we just return 438 * c1 and not consume the buffer 439 * this is useful for rules with escapes 440 * in resouce bundles 441 * eg: \' \\ \" 442 */ 443 return c1; 444 } 445 446 return c32; 447 } 448 449 U_CAPI UCHARBUF* U_EXPORT2 450 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 451 452 FileStream* in = NULL; 453 int32_t fileSize=0; 454 const char* knownCp; 455 if(error==NULL || U_FAILURE(*error)){ 456 return NULL; 457 } 458 if(cp==NULL || fileName==NULL){ 459 *error = U_ILLEGAL_ARGUMENT_ERROR; 460 return FALSE; 461 } 462 if (!uprv_strcmp(fileName, "-")) { 463 in = T_FileStream_stdin(); 464 }else{ 465 in = T_FileStream_open(fileName, "rb"); 466 } 467 468 if(in!=NULL){ 469 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 470 fileSize = T_FileStream_size(in); 471 if(buf == NULL){ 472 *error = U_MEMORY_ALLOCATION_ERROR; 473 T_FileStream_close(in); 474 return NULL; 475 } 476 buf->in=in; 477 buf->conv=NULL; 478 buf->showWarning = showWarning; 479 buf->isBuffered = buffered; 480 buf->signatureLength=0; 481 if(*cp==NULL || **cp=='\0'){ 482 /* don't have code page name... try to autodetect */ 483 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 484 }else if(ucbuf_isCPKnown(*cp)){ 485 /* discard BOM */ 486 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 487 } 488 if(U_SUCCESS(*error) && buf->conv==NULL) { 489 buf->conv=ucnv_open(*cp,error); 490 } 491 if(U_FAILURE(*error)){ 492 ucnv_close(buf->conv); 493 uprv_free(buf); 494 T_FileStream_close(in); 495 return NULL; 496 } 497 498 if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 499 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 500 } 501 buf->remaining=fileSize-buf->signatureLength; 502 if(buf->isBuffered){ 503 buf->bufCapacity=MAX_U_BUF; 504 }else{ 505 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 506 } 507 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 508 if (buf->buffer == NULL) { 509 *error = U_MEMORY_ALLOCATION_ERROR; 510 ucbuf_close(buf); 511 return NULL; 512 } 513 buf->currentPos=buf->buffer; 514 buf->bufLimit=buf->buffer; 515 if(U_FAILURE(*error)){ 516 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 517 ucbuf_close(buf); 518 return NULL; 519 } 520 ucbuf_fillucbuf(buf,error); 521 if(U_FAILURE(*error)){ 522 ucbuf_close(buf); 523 return NULL; 524 } 525 return buf; 526 } 527 *error =U_FILE_ACCESS_ERROR; 528 return NULL; 529 } 530 531 532 533 /* TODO: this method will fail if at the 534 * begining of buffer and the uchar to unget 535 * is from the previous buffer. Need to implement 536 * system to take care of that situation. 537 */ 538 U_CAPI void U_EXPORT2 539 ucbuf_ungetc(int32_t c,UCHARBUF* buf){ 540 /* decrement currentPos pointer 541 * if not at the begining of buffer 542 */ 543 if(buf->currentPos!=buf->buffer){ 544 if(*(buf->currentPos-1)==c){ 545 buf->currentPos--; 546 } else { 547 /* ungetc failed - did not match. */ 548 } 549 } else { 550 /* ungetc failed - beginning of buffer. */ 551 } 552 } 553 554 /* frees the resources of UChar* buffer */ 555 static void 556 ucbuf_closebuf(UCHARBUF* buf){ 557 uprv_free(buf->buffer); 558 buf->buffer = NULL; 559 } 560 561 /* close the buf and release resources*/ 562 U_CAPI void U_EXPORT2 563 ucbuf_close(UCHARBUF* buf){ 564 if(buf!=NULL){ 565 if(buf->conv){ 566 ucnv_close(buf->conv); 567 } 568 T_FileStream_close(buf->in); 569 ucbuf_closebuf(buf); 570 uprv_free(buf); 571 } 572 } 573 574 /* rewind the buf and file stream */ 575 U_CAPI void U_EXPORT2 576 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 577 if(error==NULL || U_FAILURE(*error)){ 578 return; 579 } 580 if(buf){ 581 buf->currentPos=buf->buffer; 582 buf->bufLimit=buf->buffer; 583 T_FileStream_rewind(buf->in); 584 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 585 586 ucnv_resetToUnicode(buf->conv); 587 if(buf->signatureLength>0) { 588 UChar target[1]={ 0 }; 589 UChar* pTarget; 590 char start[8]; 591 const char* pStart; 592 int32_t numRead; 593 594 /* read the signature bytes */ 595 numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 596 597 /* convert and ignore initial U+FEFF, and the buffer overflow */ 598 pTarget = target; 599 pStart = start; 600 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 601 if(*error==U_BUFFER_OVERFLOW_ERROR) { 602 *error=U_ZERO_ERROR; 603 } 604 605 /* verify that we successfully read exactly U+FEFF */ 606 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 607 *error=U_INTERNAL_PROGRAM_ERROR; 608 } 609 } 610 } 611 } 612 613 614 U_CAPI int32_t U_EXPORT2 615 ucbuf_size(UCHARBUF* buf){ 616 if(buf){ 617 if(buf->isBuffered){ 618 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 619 }else{ 620 return (int32_t)(buf->bufLimit - buf->buffer); 621 } 622 } 623 return 0; 624 } 625 626 U_CAPI const UChar* U_EXPORT2 627 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 628 if(error==NULL || U_FAILURE(*error)){ 629 return NULL; 630 } 631 if(buf==NULL || len==NULL){ 632 *error = U_ILLEGAL_ARGUMENT_ERROR; 633 return NULL; 634 } 635 *len = (int32_t)(buf->bufLimit - buf->buffer); 636 return buf->buffer; 637 } 638 639 U_CAPI const char* U_EXPORT2 640 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 641 int32_t requiredLen = 0; 642 int32_t dirlen = 0; 643 int32_t filelen = 0; 644 if(status==NULL || U_FAILURE(*status)){ 645 return NULL; 646 } 647 648 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 649 *status = U_ILLEGAL_ARGUMENT_ERROR; 650 return NULL; 651 } 652 653 654 dirlen = (int32_t)uprv_strlen(inputDir); 655 filelen = (int32_t)uprv_strlen(fileName); 656 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 657 requiredLen = dirlen + filelen + 2; 658 if((*len < requiredLen) || target==NULL){ 659 *len = requiredLen; 660 *status = U_BUFFER_OVERFLOW_ERROR; 661 return NULL; 662 } 663 664 target[0] = '\0'; 665 /* 666 * append the input dir to openFileName if the first char in 667 * filename is not file seperation char and the last char input directory is not '.'. 668 * This is to support : 669 * genrb -s. /home/icu/data 670 * genrb -s. icu/data 671 * The user cannot mix notations like 672 * genrb -s. /icu/data --- the absolute path specified. -s redundant 673 * user should use 674 * genrb -s. icu/data --- start from CWD and look in icu/data dir 675 */ 676 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 677 uprv_strcpy(target, inputDir); 678 target[dirlen] = U_FILE_SEP_CHAR; 679 } 680 target[dirlen + 1] = '\0'; 681 } else { 682 requiredLen = dirlen + filelen + 1; 683 if((*len < requiredLen) || target==NULL){ 684 *len = requiredLen; 685 *status = U_BUFFER_OVERFLOW_ERROR; 686 return NULL; 687 } 688 689 uprv_strcpy(target, inputDir); 690 } 691 692 uprv_strcat(target, fileName); 693 return target; 694 } 695 /* 696 * Unicode TR 13 says any of the below chars is 697 * a new line char in a readline function in addition 698 * to CR+LF combination which needs to be 699 * handled seperately 700 */ 701 static UBool ucbuf_isCharNewLine(UChar c){ 702 switch(c){ 703 case 0x000A: /* LF */ 704 case 0x000D: /* CR */ 705 case 0x000C: /* FF */ 706 case 0x0085: /* NEL */ 707 case 0x2028: /* LS */ 708 case 0x2029: /* PS */ 709 return TRUE; 710 default: 711 return FALSE; 712 } 713 } 714 715 U_CAPI const UChar* U_EXPORT2 716 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 717 UChar* temp = buf->currentPos; 718 UChar* savePos =NULL; 719 UChar c=0x0000; 720 if(buf->isBuffered){ 721 /* The input is buffered we have to do more 722 * for returning a pointer U_TRUNCATED_CHAR_FOUND 723 */ 724 for(;;){ 725 c = *temp++; 726 if(buf->remaining==0){ 727 return NULL; /* end of file is reached return NULL */ 728 } 729 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 730 *err= U_TRUNCATED_CHAR_FOUND; 731 return NULL; 732 }else{ 733 ucbuf_fillucbuf(buf,err); 734 if(U_FAILURE(*err)){ 735 return NULL; 736 } 737 } 738 /* 739 * Accoding to TR 13 readLine functions must interpret 740 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 741 */ 742 /* Windows CR LF */ 743 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ 744 *len = (int32_t)(temp++ - buf->currentPos); 745 savePos = buf->currentPos; 746 buf->currentPos = temp; 747 return savePos; 748 } 749 /* else */ 750 751 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 752 *len = (int32_t)(temp - buf->currentPos); 753 savePos = buf->currentPos; 754 buf->currentPos = temp; 755 return savePos; 756 } 757 } 758 }else{ 759 /* we know that all input is read into the internal 760 * buffer so we can safely return pointers 761 */ 762 for(;;){ 763 c = *temp++; 764 765 if(buf->currentPos==buf->bufLimit){ 766 return NULL; /* end of file is reached return NULL */ 767 } 768 /* Windows CR LF */ 769 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ 770 *len = (int32_t)(temp++ - buf->currentPos); 771 savePos = buf->currentPos; 772 buf->currentPos = temp; 773 return savePos; 774 } 775 /* else */ 776 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 777 *len = (int32_t)(temp - buf->currentPos); 778 savePos = buf->currentPos; 779 buf->currentPos = temp; 780 return savePos; 781 } 782 } 783 } 784 /* not reached */ 785 /* A compiler warning will appear if all paths don't contain a return statement. */ 786 /* return NULL;*/ 787 } 788 #endif 789