1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File ucbuf.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/10/01 Ram Creation. 15 ******************************************************************************* 16 */ 17 18 #include "unicode/utypes.h" 19 #include "unicode/putil.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "filestrm.h" 23 #include "cstring.h" 24 #include "cmemory.h" 25 #include "ustrfmt.h" 26 #include "unicode/ustring.h" 27 #include "unicode/uchar.h" 28 #include "ucbuf.h" 29 #include <stdio.h> 30 31 #if !UCONFIG_NO_CONVERSION 32 33 34 #define MAX_IN_BUF 1000 35 #define MAX_U_BUF 1500 36 #define CONTEXT_LEN 20 37 38 struct UCHARBUF { 39 UChar* buffer; 40 UChar* currentPos; 41 UChar* bufLimit; 42 int32_t bufCapacity; 43 int32_t remaining; 44 int32_t signatureLength; 45 FileStream* in; 46 UConverter* conv; 47 UBool showWarning; /* makes this API not produce any errors */ 48 UBool isBuffered; 49 }; 50 51 U_CAPI UBool U_EXPORT2 52 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 53 char start[8]; 54 int32_t numRead; 55 56 UChar target[1]={ 0 }; 57 UChar* pTarget; 58 const char* pStart; 59 60 /* read a few bytes */ 61 numRead=T_FileStream_read(in, start, sizeof(start)); 62 63 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 64 65 /* unread the bytes beyond what was consumed for U+FEFF */ 66 T_FileStream_rewind(in); 67 if (*signatureLength > 0) { 68 numRead = T_FileStream_read(in, start, *signatureLength); 69 } 70 71 if(*cp==NULL){ 72 *conv =NULL; 73 return FALSE; 74 } 75 76 /* open the converter for the detected Unicode charset */ 77 *conv = ucnv_open(*cp,error); 78 79 /* convert and ignore initial U+FEFF, and the buffer overflow */ 80 pTarget = target; 81 pStart = start; 82 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 83 *signatureLength = (int32_t)(pStart - start); 84 if(*error==U_BUFFER_OVERFLOW_ERROR) { 85 *error=U_ZERO_ERROR; 86 } 87 88 /* verify that we successfully read exactly U+FEFF */ 89 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 90 *error=U_INTERNAL_PROGRAM_ERROR; 91 } 92 93 94 return TRUE; 95 } 96 static UBool ucbuf_isCPKnown(const char* cp){ 97 if(ucnv_compareNames("UTF-8",cp)==0){ 98 return TRUE; 99 } 100 if(ucnv_compareNames("UTF-16BE",cp)==0){ 101 return TRUE; 102 } 103 if(ucnv_compareNames("UTF-16LE",cp)==0){ 104 return TRUE; 105 } 106 if(ucnv_compareNames("UTF-16",cp)==0){ 107 return TRUE; 108 } 109 if(ucnv_compareNames("UTF-32",cp)==0){ 110 return TRUE; 111 } 112 if(ucnv_compareNames("UTF-32BE",cp)==0){ 113 return TRUE; 114 } 115 if(ucnv_compareNames("UTF-32LE",cp)==0){ 116 return TRUE; 117 } 118 if(ucnv_compareNames("SCSU",cp)==0){ 119 return TRUE; 120 } 121 if(ucnv_compareNames("BOCU-1",cp)==0){ 122 return TRUE; 123 } 124 if(ucnv_compareNames("UTF-7",cp)==0){ 125 return TRUE; 126 } 127 return FALSE; 128 } 129 130 U_CAPI FileStream * U_EXPORT2 131 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 132 FileStream* in=NULL; 133 if(error==NULL || U_FAILURE(*error)){ 134 return NULL; 135 } 136 if(conv==NULL || cp==NULL || fileName==NULL){ 137 *error = U_ILLEGAL_ARGUMENT_ERROR; 138 return NULL; 139 } 140 /* open the file */ 141 in= T_FileStream_open(fileName,"rb"); 142 143 if(in == NULL){ 144 *error=U_FILE_ACCESS_ERROR; 145 return NULL; 146 } 147 148 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 149 return in; 150 } else { 151 ucnv_close(*conv); 152 *conv=NULL; 153 T_FileStream_close(in); 154 return NULL; 155 } 156 } 157 158 /* fill the uchar buffer */ 159 static UCHARBUF* 160 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 161 UChar* pTarget=NULL; 162 UChar* target=NULL; 163 const char* source=NULL; 164 char carr[MAX_IN_BUF] = {'\0'}; 165 char* cbuf = carr; 166 int32_t inputRead=0; 167 int32_t outputWritten=0; 168 int32_t offset=0; 169 const char* sourceLimit =NULL; 170 int32_t cbufSize=0; 171 pTarget = buf->buffer; 172 /* check if we arrived here without exhausting the buffer*/ 173 if(buf->currentPos<buf->bufLimit){ 174 offset = (int32_t)(buf->bufLimit-buf->currentPos); 175 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 176 } 177 178 #if DEBUG 179 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 180 #endif 181 if(buf->isBuffered){ 182 cbufSize = MAX_IN_BUF; 183 /* read the file */ 184 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 185 buf->remaining-=inputRead; 186 187 }else{ 188 cbufSize = T_FileStream_size(buf->in); 189 cbuf = (char*)uprv_malloc(cbufSize); 190 if (cbuf == NULL) { 191 *error = U_MEMORY_ALLOCATION_ERROR; 192 return NULL; 193 } 194 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 195 buf->remaining-=inputRead; 196 } 197 198 /* just to be sure...*/ 199 if ( 0 == inputRead ) 200 buf->remaining = 0; 201 202 target=pTarget; 203 /* convert the bytes */ 204 if(buf->conv){ 205 /* set the callback to stop */ 206 UConverterToUCallback toUOldAction ; 207 void* toUOldContext; 208 void* toUNewContext=NULL; 209 ucnv_setToUCallBack(buf->conv, 210 UCNV_TO_U_CALLBACK_STOP, 211 toUNewContext, 212 &toUOldAction, 213 (const void**)&toUOldContext, 214 error); 215 /* since state is saved in the converter we add offset to source*/ 216 target = pTarget+offset; 217 source = cbuf; 218 sourceLimit = source + inputRead; 219 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 220 &source,sourceLimit,NULL, 221 (UBool)(buf->remaining==0),error); 222 223 if(U_FAILURE(*error)){ 224 char context[CONTEXT_LEN+1]; 225 char preContext[CONTEXT_LEN+1]; 226 char postContext[CONTEXT_LEN+1]; 227 int8_t len = CONTEXT_LEN; 228 int32_t start=0; 229 int32_t stop =0; 230 int32_t pos =0; 231 /* use erro1 to preserve the error code */ 232 UErrorCode error1 =U_ZERO_ERROR; 233 234 if( buf->showWarning==TRUE){ 235 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 236 " converting input stream to target encoding: %s\n", 237 u_errorName(*error)); 238 } 239 240 241 /* now get the context chars */ 242 ucnv_getInvalidChars(buf->conv,context,&len,&error1); 243 context[len]= 0 ; /* null terminate the buffer */ 244 245 pos = (int32_t)(source - cbuf - len); 246 247 /* for pre-context */ 248 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 249 stop = pos-len; 250 251 memcpy(preContext,cbuf+start,stop-start); 252 /* null terminate the buffer */ 253 preContext[stop-start] = 0; 254 255 /* for post-context */ 256 start = pos+len; 257 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 258 259 memcpy(postContext,source,stop-start); 260 /* null terminate the buffer */ 261 postContext[stop-start] = 0; 262 263 if(buf->showWarning ==TRUE){ 264 /* print out the context */ 265 fprintf(stderr,"\tPre-context: %s\n",preContext); 266 fprintf(stderr,"\tContext: %s\n",context); 267 fprintf(stderr,"\tPost-context: %s\n", postContext); 268 } 269 270 /* reset the converter */ 271 ucnv_reset(buf->conv); 272 273 /* set the call back to substitute 274 * and restart conversion 275 */ 276 ucnv_setToUCallBack(buf->conv, 277 UCNV_TO_U_CALLBACK_SUBSTITUTE, 278 toUNewContext, 279 &toUOldAction, 280 (const void**)&toUOldContext, 281 &error1); 282 283 /* reset source and target start positions */ 284 target = pTarget+offset; 285 source = cbuf; 286 287 /* re convert */ 288 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 289 &source,sourceLimit,NULL, 290 (UBool)(buf->remaining==0),&error1); 291 292 } 293 outputWritten = (int32_t)(target - pTarget); 294 295 296 #if DEBUG 297 { 298 int i; 299 target = pTarget; 300 for(i=0;i<numRead;i++){ 301 /* printf("%c", (char)(*target++));*/ 302 } 303 } 304 #endif 305 306 }else{ 307 u_charsToUChars(cbuf,target+offset,inputRead); 308 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 309 } 310 buf->currentPos = pTarget; 311 buf->bufLimit=pTarget+outputWritten; 312 *buf->bufLimit=0; /*NUL terminate*/ 313 if(cbuf!=carr){ 314 uprv_free(cbuf); 315 } 316 return buf; 317 } 318 319 320 321 /* get a UChar from the stream*/ 322 U_CAPI int32_t U_EXPORT2 323 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 324 if(error==NULL || U_FAILURE(*error)){ 325 return FALSE; 326 } 327 if(buf->currentPos>=buf->bufLimit){ 328 if(buf->remaining==0){ 329 return U_EOF; 330 } 331 buf=ucbuf_fillucbuf(buf,error); 332 if(U_FAILURE(*error)){ 333 return U_EOF; 334 } 335 } 336 337 return *(buf->currentPos++); 338 } 339 340 /* get a UChar32 from the stream*/ 341 U_CAPI int32_t U_EXPORT2 342 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 343 int32_t retVal = (int32_t)U_EOF; 344 if(error==NULL || U_FAILURE(*error)){ 345 return FALSE; 346 } 347 if(buf->currentPos+1>=buf->bufLimit){ 348 if(buf->remaining==0){ 349 return U_EOF; 350 } 351 buf=ucbuf_fillucbuf(buf,error); 352 if(U_FAILURE(*error)){ 353 return U_EOF; 354 } 355 } 356 if(UTF_IS_LEAD(*(buf->currentPos))){ 357 retVal=UTF16_GET_PAIR_VALUE(buf->currentPos[0],buf->currentPos[1]); 358 buf->currentPos+=2; 359 }else{ 360 retVal = *(buf->currentPos++); 361 } 362 return retVal; 363 } 364 365 /* u_unescapeAt() callback to return a UChar*/ 366 static UChar U_CALLCONV 367 _charAt(int32_t offset, void *context) { 368 return ((UCHARBUF*) context)->currentPos[offset]; 369 } 370 371 /* getc and escape it */ 372 U_CAPI int32_t U_EXPORT2 373 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 374 int32_t length; 375 int32_t offset; 376 UChar32 c32,c1,c2; 377 if(error==NULL || U_FAILURE(*error)){ 378 return FALSE; 379 } 380 /* Fill the buffer if it is empty */ 381 if (buf->currentPos >=buf->bufLimit-2) { 382 ucbuf_fillucbuf(buf,error); 383 } 384 385 /* Get the next character in the buffer */ 386 if (buf->currentPos < buf->bufLimit) { 387 c1 = *(buf->currentPos)++; 388 } else { 389 c1 = U_EOF; 390 } 391 392 c2 = *(buf->currentPos); 393 394 /* If it isn't a backslash, return it */ 395 if (c1 != 0x005C) { 396 return c1; 397 } 398 399 /* Determine the amount of data in the buffer */ 400 length = (int32_t)(buf->bufLimit - buf->currentPos); 401 402 /* The longest escape sequence is \Uhhhhhhhh; make sure 403 we have at least that many characters */ 404 if (length < 10) { 405 406 /* fill the buffer */ 407 ucbuf_fillucbuf(buf,error); 408 length = (int32_t)(buf->bufLimit - buf->buffer); 409 } 410 411 /* Process the escape */ 412 offset = 0; 413 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 414 415 /* check if u_unescapeAt unescaped and converted 416 * to c32 or not 417 */ 418 if(c32==0xFFFFFFFF){ 419 if(buf->showWarning) { 420 char context[CONTEXT_LEN+1]; 421 int32_t len = CONTEXT_LEN; 422 if(length < len) { 423 len = length; 424 } 425 context[len]= 0 ; /* null terminate the buffer */ 426 u_UCharsToChars( buf->currentPos, context, len); 427 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 428 } 429 *error= U_ILLEGAL_ESCAPE_SEQUENCE; 430 return c1; 431 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 432 /* Update the current buffer position */ 433 buf->currentPos += offset; 434 }else{ 435 /* unescaping failed so we just return 436 * c1 and not consume the buffer 437 * this is useful for rules with escapes 438 * in resouce bundles 439 * eg: \' \\ \" 440 */ 441 return c1; 442 } 443 444 return c32; 445 } 446 447 U_CAPI UCHARBUF* U_EXPORT2 448 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 449 450 FileStream* in = NULL; 451 int32_t fileSize=0; 452 const char* knownCp; 453 if(error==NULL || U_FAILURE(*error)){ 454 return NULL; 455 } 456 if(cp==NULL || fileName==NULL){ 457 *error = U_ILLEGAL_ARGUMENT_ERROR; 458 return FALSE; 459 } 460 if (!uprv_strcmp(fileName, "-")) { 461 in = T_FileStream_stdin(); 462 }else{ 463 in = T_FileStream_open(fileName, "rb"); 464 } 465 466 if(in!=NULL){ 467 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 468 fileSize = T_FileStream_size(in); 469 if(buf == NULL){ 470 *error = U_MEMORY_ALLOCATION_ERROR; 471 T_FileStream_close(in); 472 return NULL; 473 } 474 buf->in=in; 475 buf->conv=NULL; 476 buf->showWarning = showWarning; 477 buf->isBuffered = buffered; 478 buf->signatureLength=0; 479 if(*cp==NULL || **cp=='\0'){ 480 /* don't have code page name... try to autodetect */ 481 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 482 }else if(ucbuf_isCPKnown(*cp)){ 483 /* discard BOM */ 484 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 485 } 486 if(U_SUCCESS(*error) && buf->conv==NULL) { 487 buf->conv=ucnv_open(*cp,error); 488 } 489 if(U_FAILURE(*error)){ 490 ucnv_close(buf->conv); 491 uprv_free(buf); 492 T_FileStream_close(in); 493 return NULL; 494 } 495 496 if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 497 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 498 } 499 buf->remaining=fileSize-buf->signatureLength; 500 if(buf->isBuffered){ 501 buf->bufCapacity=MAX_U_BUF; 502 }else{ 503 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 504 } 505 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 506 if (buf->buffer == NULL) { 507 *error = U_MEMORY_ALLOCATION_ERROR; 508 ucbuf_close(buf); 509 return NULL; 510 } 511 buf->currentPos=buf->buffer; 512 buf->bufLimit=buf->buffer; 513 if(U_FAILURE(*error)){ 514 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 515 ucbuf_close(buf); 516 return NULL; 517 } 518 ucbuf_fillucbuf(buf,error); 519 if(U_FAILURE(*error)){ 520 ucbuf_close(buf); 521 return NULL; 522 } 523 return buf; 524 } 525 *error =U_FILE_ACCESS_ERROR; 526 return NULL; 527 } 528 529 530 531 /* TODO: this method will fail if at the 532 * begining of buffer and the uchar to unget 533 * is from the previous buffer. Need to implement 534 * system to take care of that situation. 535 */ 536 U_CAPI void U_EXPORT2 537 ucbuf_ungetc(int32_t c,UCHARBUF* buf){ 538 /* decrement currentPos pointer 539 * if not at the begining of buffer 540 */ 541 if(buf->currentPos!=buf->buffer){ 542 if(*(buf->currentPos-1)==c){ 543 buf->currentPos--; 544 } else { 545 /* ungetc failed - did not match. */ 546 } 547 } else { 548 /* ungetc failed - beginning of buffer. */ 549 } 550 } 551 552 /* frees the resources of UChar* buffer */ 553 static void 554 ucbuf_closebuf(UCHARBUF* buf){ 555 uprv_free(buf->buffer); 556 buf->buffer = NULL; 557 } 558 559 /* close the buf and release resources*/ 560 U_CAPI void U_EXPORT2 561 ucbuf_close(UCHARBUF* buf){ 562 if(buf!=NULL){ 563 if(buf->conv){ 564 ucnv_close(buf->conv); 565 } 566 T_FileStream_close(buf->in); 567 ucbuf_closebuf(buf); 568 uprv_free(buf); 569 } 570 } 571 572 /* rewind the buf and file stream */ 573 U_CAPI void U_EXPORT2 574 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 575 if(error==NULL || U_FAILURE(*error)){ 576 return; 577 } 578 if(buf){ 579 buf->currentPos=buf->buffer; 580 buf->bufLimit=buf->buffer; 581 T_FileStream_rewind(buf->in); 582 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 583 584 ucnv_resetToUnicode(buf->conv); 585 if(buf->signatureLength>0) { 586 UChar target[1]={ 0 }; 587 UChar* pTarget; 588 char start[8]; 589 const char* pStart; 590 int32_t numRead; 591 592 /* read the signature bytes */ 593 numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 594 595 /* convert and ignore initial U+FEFF, and the buffer overflow */ 596 pTarget = target; 597 pStart = start; 598 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 599 if(*error==U_BUFFER_OVERFLOW_ERROR) { 600 *error=U_ZERO_ERROR; 601 } 602 603 /* verify that we successfully read exactly U+FEFF */ 604 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 605 *error=U_INTERNAL_PROGRAM_ERROR; 606 } 607 } 608 } 609 } 610 611 612 U_CAPI int32_t U_EXPORT2 613 ucbuf_size(UCHARBUF* buf){ 614 if(buf){ 615 if(buf->isBuffered){ 616 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 617 }else{ 618 return (int32_t)(buf->bufLimit - buf->buffer); 619 } 620 } 621 return 0; 622 } 623 624 U_CAPI const UChar* U_EXPORT2 625 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 626 if(error==NULL || U_FAILURE(*error)){ 627 return NULL; 628 } 629 if(buf==NULL || len==NULL){ 630 *error = U_ILLEGAL_ARGUMENT_ERROR; 631 return NULL; 632 } 633 *len = (int32_t)(buf->bufLimit - buf->buffer); 634 return buf->buffer; 635 } 636 637 U_CAPI const char* U_EXPORT2 638 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 639 int32_t requiredLen = 0; 640 int32_t dirlen = 0; 641 int32_t filelen = 0; 642 if(status==NULL || U_FAILURE(*status)){ 643 return NULL; 644 } 645 646 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 647 *status = U_ILLEGAL_ARGUMENT_ERROR; 648 return NULL; 649 } 650 651 652 dirlen = (int32_t)uprv_strlen(inputDir); 653 filelen = (int32_t)uprv_strlen(fileName); 654 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 655 requiredLen = dirlen + filelen + 2; 656 if((*len < requiredLen) || target==NULL){ 657 *len = requiredLen; 658 *status = U_BUFFER_OVERFLOW_ERROR; 659 return NULL; 660 } 661 662 target[0] = '\0'; 663 /* 664 * append the input dir to openFileName if the first char in 665 * filename is not file seperation char and the last char input directory is not '.'. 666 * This is to support : 667 * genrb -s. /home/icu/data 668 * genrb -s. icu/data 669 * The user cannot mix notations like 670 * genrb -s. /icu/data --- the absolute path specified. -s redundant 671 * user should use 672 * genrb -s. icu/data --- start from CWD and look in icu/data dir 673 */ 674 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 675 uprv_strcpy(target, inputDir); 676 target[dirlen] = U_FILE_SEP_CHAR; 677 } 678 target[dirlen + 1] = '\0'; 679 } else { 680 requiredLen = dirlen + filelen + 1; 681 if((*len < requiredLen) || target==NULL){ 682 *len = requiredLen; 683 *status = U_BUFFER_OVERFLOW_ERROR; 684 return NULL; 685 } 686 687 uprv_strcpy(target, inputDir); 688 } 689 690 uprv_strcat(target, fileName); 691 return target; 692 } 693 /* 694 * Unicode TR 13 says any of the below chars is 695 * a new line char in a readline function in addition 696 * to CR+LF combination which needs to be 697 * handled seperately 698 */ 699 static UBool ucbuf_isCharNewLine(UChar c){ 700 switch(c){ 701 case 0x000A: /* LF */ 702 case 0x000D: /* CR */ 703 case 0x000C: /* FF */ 704 case 0x0085: /* NEL */ 705 case 0x2028: /* LS */ 706 case 0x2029: /* PS */ 707 return TRUE; 708 default: 709 return FALSE; 710 } 711 } 712 713 U_CAPI const UChar* U_EXPORT2 714 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 715 UChar* temp = buf->currentPos; 716 UChar* savePos =NULL; 717 UChar c=0x0000; 718 if(buf->isBuffered){ 719 /* The input is buffered we have to do more 720 * for returning a pointer U_TRUNCATED_CHAR_FOUND 721 */ 722 for(;;){ 723 c = *temp++; 724 if(buf->remaining==0){ 725 return NULL; /* end of file is reached return NULL */ 726 } 727 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 728 *err= U_TRUNCATED_CHAR_FOUND; 729 return NULL; 730 }else{ 731 ucbuf_fillucbuf(buf,err); 732 if(U_FAILURE(*err)){ 733 return NULL; 734 } 735 } 736 /* 737 * Accoding to TR 13 readLine functions must interpret 738 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 739 */ 740 /* Windows CR LF */ 741 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 742 *len = (int32_t)(temp++ - buf->currentPos); 743 savePos = buf->currentPos; 744 buf->currentPos = temp; 745 return savePos; 746 } 747 /* else */ 748 749 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 750 *len = (int32_t)(temp - buf->currentPos); 751 savePos = buf->currentPos; 752 buf->currentPos = temp; 753 return savePos; 754 } 755 } 756 }else{ 757 /* we know that all input is read into the internal 758 * buffer so we can safely return pointers 759 */ 760 for(;;){ 761 c = *temp++; 762 763 if(buf->currentPos==buf->bufLimit){ 764 return NULL; /* end of file is reached return NULL */ 765 } 766 /* Windows CR LF */ 767 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 768 *len = (int32_t)(temp++ - buf->currentPos); 769 savePos = buf->currentPos; 770 buf->currentPos = temp; 771 return savePos; 772 } 773 /* else */ 774 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 775 *len = (int32_t)(temp - buf->currentPos); 776 savePos = buf->currentPos; 777 buf->currentPos = temp; 778 return savePos; 779 } 780 } 781 } 782 /* not reached */ 783 /* A compiler warning will appear if all paths don't contain a return statement. */ 784 /* return NULL;*/ 785 } 786 #endif 787