1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File ucbuf.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/10/01 Ram Creation. 15 ******************************************************************************* 16 */ 17 18 #include "unicode/utypes.h" 19 #include "unicode/putil.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/ucnv_err.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "filestrm.h" 26 #include "cstring.h" 27 #include "cmemory.h" 28 #include "ustrfmt.h" 29 #include "ucbuf.h" 30 #include <stdio.h> 31 32 #if !UCONFIG_NO_CONVERSION 33 34 35 #define MAX_IN_BUF 1000 36 #define MAX_U_BUF 1500 37 #define CONTEXT_LEN 20 38 39 struct UCHARBUF { 40 UChar* buffer; 41 UChar* currentPos; 42 UChar* bufLimit; 43 int32_t bufCapacity; 44 int32_t remaining; 45 int32_t signatureLength; 46 FileStream* in; 47 UConverter* conv; 48 UBool showWarning; /* makes this API not produce any errors */ 49 UBool isBuffered; 50 }; 51 52 U_CAPI UBool U_EXPORT2 53 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 54 char start[8]; 55 int32_t numRead; 56 57 UChar target[1]={ 0 }; 58 UChar* pTarget; 59 const char* pStart; 60 61 /* read a few bytes */ 62 numRead=T_FileStream_read(in, start, sizeof(start)); 63 64 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 65 66 /* unread the bytes beyond what was consumed for U+FEFF */ 67 T_FileStream_rewind(in); 68 if (*signatureLength > 0) { 69 T_FileStream_read(in, start, *signatureLength); 70 } 71 72 if(*cp==NULL){ 73 *conv =NULL; 74 return FALSE; 75 } 76 77 /* open the converter for the detected Unicode charset */ 78 *conv = ucnv_open(*cp,error); 79 80 /* convert and ignore initial U+FEFF, and the buffer overflow */ 81 pTarget = target; 82 pStart = start; 83 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 84 *signatureLength = (int32_t)(pStart - start); 85 if(*error==U_BUFFER_OVERFLOW_ERROR) { 86 *error=U_ZERO_ERROR; 87 } 88 89 /* verify that we successfully read exactly U+FEFF */ 90 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 91 *error=U_INTERNAL_PROGRAM_ERROR; 92 } 93 94 95 return TRUE; 96 } 97 static UBool ucbuf_isCPKnown(const char* cp){ 98 if(ucnv_compareNames("UTF-8",cp)==0){ 99 return TRUE; 100 } 101 if(ucnv_compareNames("UTF-16BE",cp)==0){ 102 return TRUE; 103 } 104 if(ucnv_compareNames("UTF-16LE",cp)==0){ 105 return TRUE; 106 } 107 if(ucnv_compareNames("UTF-16",cp)==0){ 108 return TRUE; 109 } 110 if(ucnv_compareNames("UTF-32",cp)==0){ 111 return TRUE; 112 } 113 if(ucnv_compareNames("UTF-32BE",cp)==0){ 114 return TRUE; 115 } 116 if(ucnv_compareNames("UTF-32LE",cp)==0){ 117 return TRUE; 118 } 119 if(ucnv_compareNames("SCSU",cp)==0){ 120 return TRUE; 121 } 122 if(ucnv_compareNames("BOCU-1",cp)==0){ 123 return TRUE; 124 } 125 if(ucnv_compareNames("UTF-7",cp)==0){ 126 return TRUE; 127 } 128 return FALSE; 129 } 130 131 U_CAPI FileStream * U_EXPORT2 132 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 133 FileStream* in=NULL; 134 if(error==NULL || U_FAILURE(*error)){ 135 return NULL; 136 } 137 if(conv==NULL || cp==NULL || fileName==NULL){ 138 *error = U_ILLEGAL_ARGUMENT_ERROR; 139 return NULL; 140 } 141 /* open the file */ 142 in= T_FileStream_open(fileName,"rb"); 143 144 if(in == NULL){ 145 *error=U_FILE_ACCESS_ERROR; 146 return NULL; 147 } 148 149 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 150 return in; 151 } else { 152 ucnv_close(*conv); 153 *conv=NULL; 154 T_FileStream_close(in); 155 return NULL; 156 } 157 } 158 159 /* fill the uchar buffer */ 160 static UCHARBUF* 161 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 162 UChar* pTarget=NULL; 163 UChar* target=NULL; 164 const char* source=NULL; 165 char carr[MAX_IN_BUF] = {'\0'}; 166 char* cbuf = carr; 167 int32_t inputRead=0; 168 int32_t outputWritten=0; 169 int32_t offset=0; 170 const char* sourceLimit =NULL; 171 int32_t cbufSize=0; 172 pTarget = buf->buffer; 173 /* check if we arrived here without exhausting the buffer*/ 174 if(buf->currentPos<buf->bufLimit){ 175 offset = (int32_t)(buf->bufLimit-buf->currentPos); 176 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 177 } 178 179 #if UCBUF_DEBUG 180 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 181 #endif 182 if(buf->isBuffered){ 183 cbufSize = MAX_IN_BUF; 184 /* read the file */ 185 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 186 buf->remaining-=inputRead; 187 188 }else{ 189 cbufSize = T_FileStream_size(buf->in); 190 cbuf = (char*)uprv_malloc(cbufSize); 191 if (cbuf == NULL) { 192 *error = U_MEMORY_ALLOCATION_ERROR; 193 return NULL; 194 } 195 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 196 buf->remaining-=inputRead; 197 } 198 199 /* just to be sure...*/ 200 if ( 0 == inputRead ) 201 buf->remaining = 0; 202 203 target=pTarget; 204 /* convert the bytes */ 205 if(buf->conv){ 206 /* set the callback to stop */ 207 UConverterToUCallback toUOldAction ; 208 void* toUOldContext; 209 void* toUNewContext=NULL; 210 ucnv_setToUCallBack(buf->conv, 211 UCNV_TO_U_CALLBACK_STOP, 212 toUNewContext, 213 &toUOldAction, 214 (const void**)&toUOldContext, 215 error); 216 /* since state is saved in the converter we add offset to source*/ 217 target = pTarget+offset; 218 source = cbuf; 219 sourceLimit = source + inputRead; 220 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 221 &source,sourceLimit,NULL, 222 (UBool)(buf->remaining==0),error); 223 224 if(U_FAILURE(*error)){ 225 char context[CONTEXT_LEN+1]; 226 char preContext[CONTEXT_LEN+1]; 227 char postContext[CONTEXT_LEN+1]; 228 int8_t len = CONTEXT_LEN; 229 int32_t start=0; 230 int32_t stop =0; 231 int32_t pos =0; 232 /* use erro1 to preserve the error code */ 233 UErrorCode error1 =U_ZERO_ERROR; 234 235 if( buf->showWarning==TRUE){ 236 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 237 " converting input stream to target encoding: %s\n", 238 u_errorName(*error)); 239 } 240 241 242 /* now get the context chars */ 243 ucnv_getInvalidChars(buf->conv,context,&len,&error1); 244 context[len]= 0 ; /* null terminate the buffer */ 245 246 pos = (int32_t)(source - cbuf - len); 247 248 /* for pre-context */ 249 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 250 stop = pos-len; 251 252 memcpy(preContext,cbuf+start,stop-start); 253 /* null terminate the buffer */ 254 preContext[stop-start] = 0; 255 256 /* for post-context */ 257 start = pos+len; 258 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 259 260 memcpy(postContext,source,stop-start); 261 /* null terminate the buffer */ 262 postContext[stop-start] = 0; 263 264 if(buf->showWarning ==TRUE){ 265 /* print out the context */ 266 fprintf(stderr,"\tPre-context: %s\n",preContext); 267 fprintf(stderr,"\tContext: %s\n",context); 268 fprintf(stderr,"\tPost-context: %s\n", postContext); 269 } 270 271 /* reset the converter */ 272 ucnv_reset(buf->conv); 273 274 /* set the call back to substitute 275 * and restart conversion 276 */ 277 ucnv_setToUCallBack(buf->conv, 278 UCNV_TO_U_CALLBACK_SUBSTITUTE, 279 toUNewContext, 280 &toUOldAction, 281 (const void**)&toUOldContext, 282 &error1); 283 284 /* reset source and target start positions */ 285 target = pTarget+offset; 286 source = cbuf; 287 288 /* re convert */ 289 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 290 &source,sourceLimit,NULL, 291 (UBool)(buf->remaining==0),&error1); 292 293 } 294 outputWritten = (int32_t)(target - pTarget); 295 296 #if UCBUF_DEBUG 297 { 298 int i; 299 target = pTarget; 300 for(i=0;i<numRead;i++){ 301 /* printf("%c", (char)(*target++));*/ 302 } 303 } 304 #endif 305 306 }else{ 307 u_charsToUChars(cbuf,target+offset,inputRead); 308 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 309 } 310 buf->currentPos = pTarget; 311 buf->bufLimit=pTarget+outputWritten; 312 *buf->bufLimit=0; /*NUL terminate*/ 313 if(cbuf!=carr){ 314 uprv_free(cbuf); 315 } 316 return buf; 317 } 318 319 320 321 /* get a UChar from the stream*/ 322 U_CAPI int32_t U_EXPORT2 323 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 324 if(error==NULL || U_FAILURE(*error)){ 325 return FALSE; 326 } 327 if(buf->currentPos>=buf->bufLimit){ 328 if(buf->remaining==0){ 329 return U_EOF; 330 } 331 buf=ucbuf_fillucbuf(buf,error); 332 if(U_FAILURE(*error)){ 333 return U_EOF; 334 } 335 } 336 337 return *(buf->currentPos++); 338 } 339 340 /* get a UChar32 from the stream*/ 341 U_CAPI int32_t U_EXPORT2 342 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 343 int32_t retVal = (int32_t)U_EOF; 344 if(error==NULL || U_FAILURE(*error)){ 345 return FALSE; 346 } 347 if(buf->currentPos+1>=buf->bufLimit){ 348 if(buf->remaining==0){ 349 return U_EOF; 350 } 351 buf=ucbuf_fillucbuf(buf,error); 352 if(U_FAILURE(*error)){ 353 return U_EOF; 354 } 355 } 356 if(U16_IS_LEAD(*(buf->currentPos))){ 357 retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 358 buf->currentPos+=2; 359 }else{ 360 retVal = *(buf->currentPos++); 361 } 362 return retVal; 363 } 364 365 /* u_unescapeAt() callback to return a UChar*/ 366 static UChar U_CALLCONV 367 _charAt(int32_t offset, void *context) { 368 return ((UCHARBUF*) context)->currentPos[offset]; 369 } 370 371 /* getc and escape it */ 372 U_CAPI int32_t U_EXPORT2 373 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 374 int32_t length; 375 int32_t offset; 376 UChar32 c32,c1,c2; 377 if(error==NULL || U_FAILURE(*error)){ 378 return FALSE; 379 } 380 /* Fill the buffer if it is empty */ 381 if (buf->currentPos >=buf->bufLimit-2) { 382 ucbuf_fillucbuf(buf,error); 383 } 384 385 /* Get the next character in the buffer */ 386 if (buf->currentPos < buf->bufLimit) { 387 c1 = *(buf->currentPos)++; 388 } else { 389 c1 = U_EOF; 390 } 391 392 c2 = *(buf->currentPos); 393 394 /* If it isn't a backslash, return it */ 395 if (c1 != 0x005C) { 396 return c1; 397 } 398 399 /* Determine the amount of data in the buffer */ 400 length = (int32_t)(buf->bufLimit - buf->currentPos); 401 402 /* The longest escape sequence is \Uhhhhhhhh; make sure 403 we have at least that many characters */ 404 if (length < 10) { 405 406 /* fill the buffer */ 407 ucbuf_fillucbuf(buf,error); 408 length = (int32_t)(buf->bufLimit - buf->buffer); 409 } 410 411 /* Process the escape */ 412 offset = 0; 413 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 414 415 /* check if u_unescapeAt unescaped and converted 416 * to c32 or not 417 */ 418 if(c32==0xFFFFFFFF){ 419 if(buf->showWarning) { 420 char context[CONTEXT_LEN+1]; 421 int32_t len = CONTEXT_LEN; 422 if(length < len) { 423 len = length; 424 } 425 context[len]= 0 ; /* null terminate the buffer */ 426 u_UCharsToChars( buf->currentPos, context, len); 427 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 428 } 429 *error= U_ILLEGAL_ESCAPE_SEQUENCE; 430 return c1; 431 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 432 /* Update the current buffer position */ 433 buf->currentPos += offset; 434 }else{ 435 /* unescaping failed so we just return 436 * c1 and not consume the buffer 437 * this is useful for rules with escapes 438 * in resouce bundles 439 * eg: \' \\ \" 440 */ 441 return c1; 442 } 443 444 return c32; 445 } 446 447 U_CAPI UCHARBUF* U_EXPORT2 448 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 449 450 FileStream* in = NULL; 451 int32_t fileSize=0; 452 const char* knownCp; 453 if(error==NULL || U_FAILURE(*error)){ 454 return NULL; 455 } 456 if(cp==NULL || fileName==NULL){ 457 *error = U_ILLEGAL_ARGUMENT_ERROR; 458 return FALSE; 459 } 460 if (!uprv_strcmp(fileName, "-")) { 461 in = T_FileStream_stdin(); 462 }else{ 463 in = T_FileStream_open(fileName, "rb"); 464 } 465 466 if(in!=NULL){ 467 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 468 fileSize = T_FileStream_size(in); 469 if(buf == NULL){ 470 *error = U_MEMORY_ALLOCATION_ERROR; 471 T_FileStream_close(in); 472 return NULL; 473 } 474 buf->in=in; 475 buf->conv=NULL; 476 buf->showWarning = showWarning; 477 buf->isBuffered = buffered; 478 buf->signatureLength=0; 479 if(*cp==NULL || **cp=='\0'){ 480 /* don't have code page name... try to autodetect */ 481 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 482 }else if(ucbuf_isCPKnown(*cp)){ 483 /* discard BOM */ 484 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 485 } 486 if(U_SUCCESS(*error) && buf->conv==NULL) { 487 buf->conv=ucnv_open(*cp,error); 488 } 489 if(U_FAILURE(*error)){ 490 ucnv_close(buf->conv); 491 uprv_free(buf); 492 T_FileStream_close(in); 493 return NULL; 494 } 495 496 if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 497 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 498 } 499 buf->remaining=fileSize-buf->signatureLength; 500 if(buf->isBuffered){ 501 buf->bufCapacity=MAX_U_BUF; 502 }else{ 503 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 504 } 505 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 506 if (buf->buffer == NULL) { 507 *error = U_MEMORY_ALLOCATION_ERROR; 508 ucbuf_close(buf); 509 return NULL; 510 } 511 buf->currentPos=buf->buffer; 512 buf->bufLimit=buf->buffer; 513 if(U_FAILURE(*error)){ 514 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 515 ucbuf_close(buf); 516 return NULL; 517 } 518 ucbuf_fillucbuf(buf,error); 519 if(U_FAILURE(*error)){ 520 ucbuf_close(buf); 521 return NULL; 522 } 523 return buf; 524 } 525 *error =U_FILE_ACCESS_ERROR; 526 return NULL; 527 } 528 529 530 531 /* TODO: this method will fail if at the 532 * begining of buffer and the uchar to unget 533 * is from the previous buffer. Need to implement 534 * system to take care of that situation. 535 */ 536 U_CAPI void U_EXPORT2 537 ucbuf_ungetc(int32_t c,UCHARBUF* buf){ 538 /* decrement currentPos pointer 539 * if not at the begining of buffer 540 */ 541 if(buf->currentPos!=buf->buffer){ 542 if(*(buf->currentPos-1)==c){ 543 buf->currentPos--; 544 } else { 545 /* ungetc failed - did not match. */ 546 } 547 } else { 548 /* ungetc failed - beginning of buffer. */ 549 } 550 } 551 552 /* frees the resources of UChar* buffer */ 553 static void 554 ucbuf_closebuf(UCHARBUF* buf){ 555 uprv_free(buf->buffer); 556 buf->buffer = NULL; 557 } 558 559 /* close the buf and release resources*/ 560 U_CAPI void U_EXPORT2 561 ucbuf_close(UCHARBUF* buf){ 562 if(buf!=NULL){ 563 if(buf->conv){ 564 ucnv_close(buf->conv); 565 } 566 T_FileStream_close(buf->in); 567 ucbuf_closebuf(buf); 568 uprv_free(buf); 569 } 570 } 571 572 /* rewind the buf and file stream */ 573 U_CAPI void U_EXPORT2 574 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 575 if(error==NULL || U_FAILURE(*error)){ 576 return; 577 } 578 if(buf){ 579 buf->currentPos=buf->buffer; 580 buf->bufLimit=buf->buffer; 581 T_FileStream_rewind(buf->in); 582 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 583 584 ucnv_resetToUnicode(buf->conv); 585 if(buf->signatureLength>0) { 586 UChar target[1]={ 0 }; 587 UChar* pTarget; 588 char start[8]; 589 const char* pStart; 590 int32_t numRead; 591 592 /* read the signature bytes */ 593 numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 594 595 /* convert and ignore initial U+FEFF, and the buffer overflow */ 596 pTarget = target; 597 pStart = start; 598 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 599 if(*error==U_BUFFER_OVERFLOW_ERROR) { 600 *error=U_ZERO_ERROR; 601 } 602 603 /* verify that we successfully read exactly U+FEFF */ 604 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 605 *error=U_INTERNAL_PROGRAM_ERROR; 606 } 607 } 608 } 609 } 610 611 612 U_CAPI int32_t U_EXPORT2 613 ucbuf_size(UCHARBUF* buf){ 614 if(buf){ 615 if(buf->isBuffered){ 616 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 617 }else{ 618 return (int32_t)(buf->bufLimit - buf->buffer); 619 } 620 } 621 return 0; 622 } 623 624 U_CAPI const UChar* U_EXPORT2 625 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 626 if(error==NULL || U_FAILURE(*error)){ 627 return NULL; 628 } 629 if(buf==NULL || len==NULL){ 630 *error = U_ILLEGAL_ARGUMENT_ERROR; 631 return NULL; 632 } 633 *len = (int32_t)(buf->bufLimit - buf->buffer); 634 return buf->buffer; 635 } 636 637 U_CAPI const char* U_EXPORT2 638 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 639 int32_t requiredLen = 0; 640 int32_t dirlen = 0; 641 int32_t filelen = 0; 642 if(status==NULL || U_FAILURE(*status)){ 643 return NULL; 644 } 645 646 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 647 *status = U_ILLEGAL_ARGUMENT_ERROR; 648 return NULL; 649 } 650 651 652 dirlen = (int32_t)uprv_strlen(inputDir); 653 filelen = (int32_t)uprv_strlen(fileName); 654 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 655 requiredLen = dirlen + filelen + 2; 656 if((*len < requiredLen) || target==NULL){ 657 *len = requiredLen; 658 *status = U_BUFFER_OVERFLOW_ERROR; 659 return NULL; 660 } 661 662 target[0] = '\0'; 663 /* 664 * append the input dir to openFileName if the first char in 665 * filename is not file seperation char and the last char input directory is not '.'. 666 * This is to support : 667 * genrb -s. /home/icu/data 668 * genrb -s. icu/data 669 * The user cannot mix notations like 670 * genrb -s. /icu/data --- the absolute path specified. -s redundant 671 * user should use 672 * genrb -s. icu/data --- start from CWD and look in icu/data dir 673 */ 674 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 675 uprv_strcpy(target, inputDir); 676 target[dirlen] = U_FILE_SEP_CHAR; 677 } 678 target[dirlen + 1] = '\0'; 679 } else { 680 requiredLen = dirlen + filelen + 1; 681 if((*len < requiredLen) || target==NULL){ 682 *len = requiredLen; 683 *status = U_BUFFER_OVERFLOW_ERROR; 684 return NULL; 685 } 686 687 uprv_strcpy(target, inputDir); 688 } 689 690 uprv_strcat(target, fileName); 691 return target; 692 } 693 /* 694 * Unicode TR 13 says any of the below chars is 695 * a new line char in a readline function in addition 696 * to CR+LF combination which needs to be 697 * handled seperately 698 */ 699 static UBool ucbuf_isCharNewLine(UChar c){ 700 switch(c){ 701 case 0x000A: /* LF */ 702 case 0x000D: /* CR */ 703 case 0x000C: /* FF */ 704 case 0x0085: /* NEL */ 705 case 0x2028: /* LS */ 706 case 0x2029: /* PS */ 707 return TRUE; 708 default: 709 return FALSE; 710 } 711 } 712 713 U_CAPI const UChar* U_EXPORT2 714 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 715 UChar* temp = buf->currentPos; 716 UChar* savePos =NULL; 717 UChar c=0x0000; 718 if(buf->isBuffered){ 719 /* The input is buffered we have to do more 720 * for returning a pointer U_TRUNCATED_CHAR_FOUND 721 */ 722 for(;;){ 723 c = *temp++; 724 if(buf->remaining==0){ 725 return NULL; /* end of file is reached return NULL */ 726 } 727 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 728 *err= U_TRUNCATED_CHAR_FOUND; 729 return NULL; 730 }else{ 731 ucbuf_fillucbuf(buf,err); 732 if(U_FAILURE(*err)){ 733 return NULL; 734 } 735 } 736 /* 737 * Accoding to TR 13 readLine functions must interpret 738 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 739 */ 740 /* Windows CR LF */ 741 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ 742 *len = (int32_t)(temp++ - buf->currentPos); 743 savePos = buf->currentPos; 744 buf->currentPos = temp; 745 return savePos; 746 } 747 /* else */ 748 749 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 750 *len = (int32_t)(temp - buf->currentPos); 751 savePos = buf->currentPos; 752 buf->currentPos = temp; 753 return savePos; 754 } 755 } 756 }else{ 757 /* we know that all input is read into the internal 758 * buffer so we can safely return pointers 759 */ 760 for(;;){ 761 c = *temp++; 762 763 if(buf->currentPos==buf->bufLimit){ 764 return NULL; /* end of file is reached return NULL */ 765 } 766 /* Windows CR LF */ 767 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){ 768 *len = (int32_t)(temp++ - buf->currentPos); 769 savePos = buf->currentPos; 770 buf->currentPos = temp; 771 return savePos; 772 } 773 /* else */ 774 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 775 *len = (int32_t)(temp - buf->currentPos); 776 savePos = buf->currentPos; 777 buf->currentPos = temp; 778 return savePos; 779 } 780 } 781 } 782 /* not reached */ 783 /* A compiler warning will appear if all paths don't contain a return statement. */ 784 /* return NULL;*/ 785 } 786 #endif 787