1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File ucbuf.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/10/01 Ram Creation. 15 ******************************************************************************* 16 */ 17 18 #include "unicode/utypes.h" 19 #include "unicode/putil.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/ucnv_err.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "filestrm.h" 26 #include "cstring.h" 27 #include "cmemory.h" 28 #include "ustrfmt.h" 29 #include "ucbuf.h" 30 #include <stdio.h> 31 32 #if !UCONFIG_NO_CONVERSION 33 34 35 #define MAX_IN_BUF 1000 36 #define MAX_U_BUF 1500 37 #define CONTEXT_LEN 20 38 39 struct UCHARBUF { 40 UChar* buffer; 41 UChar* currentPos; 42 UChar* bufLimit; 43 int32_t bufCapacity; 44 int32_t remaining; 45 int32_t signatureLength; 46 FileStream* in; 47 UConverter* conv; 48 UBool showWarning; /* makes this API not produce any errors */ 49 UBool isBuffered; 50 }; 51 52 U_CAPI UBool U_EXPORT2 53 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 54 char start[8]; 55 int32_t numRead; 56 57 UChar target[1]={ 0 }; 58 UChar* pTarget; 59 const char* pStart; 60 61 /* read a few bytes */ 62 numRead=T_FileStream_read(in, start, sizeof(start)); 63 64 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 65 66 /* unread the bytes beyond what was consumed for U+FEFF */ 67 T_FileStream_rewind(in); 68 if (*signatureLength > 0) { 69 T_FileStream_read(in, start, *signatureLength); 70 } 71 72 if(*cp==NULL){ 73 *conv =NULL; 74 return FALSE; 75 } 76 77 /* open the converter for the detected Unicode charset */ 78 *conv = ucnv_open(*cp,error); 79 80 /* convert and ignore initial U+FEFF, and the buffer overflow */ 81 pTarget = target; 82 pStart = start; 83 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 84 *signatureLength = (int32_t)(pStart - start); 85 if(*error==U_BUFFER_OVERFLOW_ERROR) { 86 *error=U_ZERO_ERROR; 87 } 88 89 /* verify that we successfully read exactly U+FEFF */ 90 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 91 *error=U_INTERNAL_PROGRAM_ERROR; 92 } 93 94 95 return TRUE; 96 } 97 static UBool ucbuf_isCPKnown(const char* cp){ 98 if(ucnv_compareNames("UTF-8",cp)==0){ 99 return TRUE; 100 } 101 if(ucnv_compareNames("UTF-16BE",cp)==0){ 102 return TRUE; 103 } 104 if(ucnv_compareNames("UTF-16LE",cp)==0){ 105 return TRUE; 106 } 107 if(ucnv_compareNames("UTF-16",cp)==0){ 108 return TRUE; 109 } 110 if(ucnv_compareNames("UTF-32",cp)==0){ 111 return TRUE; 112 } 113 if(ucnv_compareNames("UTF-32BE",cp)==0){ 114 return TRUE; 115 } 116 if(ucnv_compareNames("UTF-32LE",cp)==0){ 117 return TRUE; 118 } 119 if(ucnv_compareNames("SCSU",cp)==0){ 120 return TRUE; 121 } 122 if(ucnv_compareNames("BOCU-1",cp)==0){ 123 return TRUE; 124 } 125 if(ucnv_compareNames("UTF-7",cp)==0){ 126 return TRUE; 127 } 128 return FALSE; 129 } 130 131 U_CAPI FileStream * U_EXPORT2 132 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 133 FileStream* in=NULL; 134 if(error==NULL || U_FAILURE(*error)){ 135 return NULL; 136 } 137 if(conv==NULL || cp==NULL || fileName==NULL){ 138 *error = U_ILLEGAL_ARGUMENT_ERROR; 139 return NULL; 140 } 141 /* open the file */ 142 in= T_FileStream_open(fileName,"rb"); 143 144 if(in == NULL){ 145 *error=U_FILE_ACCESS_ERROR; 146 return NULL; 147 } 148 149 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 150 return in; 151 } else { 152 ucnv_close(*conv); 153 *conv=NULL; 154 T_FileStream_close(in); 155 return NULL; 156 } 157 } 158 159 /* fill the uchar buffer */ 160 static UCHARBUF* 161 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 162 UChar* pTarget=NULL; 163 UChar* target=NULL; 164 const char* source=NULL; 165 char carr[MAX_IN_BUF] = {'\0'}; 166 char* cbuf = carr; 167 int32_t inputRead=0; 168 int32_t outputWritten=0; 169 int32_t offset=0; 170 const char* sourceLimit =NULL; 171 int32_t cbufSize=0; 172 pTarget = buf->buffer; 173 /* check if we arrived here without exhausting the buffer*/ 174 if(buf->currentPos<buf->bufLimit){ 175 offset = (int32_t)(buf->bufLimit-buf->currentPos); 176 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 177 } 178 179 #if DEBUG 180 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 181 #endif 182 if(buf->isBuffered){ 183 cbufSize = MAX_IN_BUF; 184 /* read the file */ 185 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 186 buf->remaining-=inputRead; 187 188 }else{ 189 cbufSize = T_FileStream_size(buf->in); 190 cbuf = (char*)uprv_malloc(cbufSize); 191 if (cbuf == NULL) { 192 *error = U_MEMORY_ALLOCATION_ERROR; 193 return NULL; 194 } 195 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 196 buf->remaining-=inputRead; 197 } 198 199 /* just to be sure...*/ 200 if ( 0 == inputRead ) 201 buf->remaining = 0; 202 203 target=pTarget; 204 /* convert the bytes */ 205 if(buf->conv){ 206 /* set the callback to stop */ 207 UConverterToUCallback toUOldAction ; 208 void* toUOldContext; 209 void* toUNewContext=NULL; 210 ucnv_setToUCallBack(buf->conv, 211 UCNV_TO_U_CALLBACK_STOP, 212 toUNewContext, 213 &toUOldAction, 214 (const void**)&toUOldContext, 215 error); 216 /* since state is saved in the converter we add offset to source*/ 217 target = pTarget+offset; 218 source = cbuf; 219 sourceLimit = source + inputRead; 220 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 221 &source,sourceLimit,NULL, 222 (UBool)(buf->remaining==0),error); 223 224 if(U_FAILURE(*error)){ 225 char context[CONTEXT_LEN+1]; 226 char preContext[CONTEXT_LEN+1]; 227 char postContext[CONTEXT_LEN+1]; 228 int8_t len = CONTEXT_LEN; 229 int32_t start=0; 230 int32_t stop =0; 231 int32_t pos =0; 232 /* use erro1 to preserve the error code */ 233 UErrorCode error1 =U_ZERO_ERROR; 234 235 if( buf->showWarning==TRUE){ 236 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 237 " converting input stream to target encoding: %s\n", 238 u_errorName(*error)); 239 } 240 241 242 /* now get the context chars */ 243 ucnv_getInvalidChars(buf->conv,context,&len,&error1); 244 context[len]= 0 ; /* null terminate the buffer */ 245 246 pos = (int32_t)(source - cbuf - len); 247 248 /* for pre-context */ 249 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 250 stop = pos-len; 251 252 memcpy(preContext,cbuf+start,stop-start); 253 /* null terminate the buffer */ 254 preContext[stop-start] = 0; 255 256 /* for post-context */ 257 start = pos+len; 258 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 259 260 memcpy(postContext,source,stop-start); 261 /* null terminate the buffer */ 262 postContext[stop-start] = 0; 263 264 if(buf->showWarning ==TRUE){ 265 /* print out the context */ 266 fprintf(stderr,"\tPre-context: %s\n",preContext); 267 fprintf(stderr,"\tContext: %s\n",context); 268 fprintf(stderr,"\tPost-context: %s\n", postContext); 269 } 270 271 /* reset the converter */ 272 ucnv_reset(buf->conv); 273 274 /* set the call back to substitute 275 * and restart conversion 276 */ 277 ucnv_setToUCallBack(buf->conv, 278 UCNV_TO_U_CALLBACK_SUBSTITUTE, 279 toUNewContext, 280 &toUOldAction, 281 (const void**)&toUOldContext, 282 &error1); 283 284 /* reset source and target start positions */ 285 target = pTarget+offset; 286 source = cbuf; 287 288 /* re convert */ 289 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 290 &source,sourceLimit,NULL, 291 (UBool)(buf->remaining==0),&error1); 292 293 } 294 outputWritten = (int32_t)(target - pTarget); 295 296 297 #if DEBUG 298 { 299 int i; 300 target = pTarget; 301 for(i=0;i<numRead;i++){ 302 /* printf("%c", (char)(*target++));*/ 303 } 304 } 305 #endif 306 307 }else{ 308 u_charsToUChars(cbuf,target+offset,inputRead); 309 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 310 } 311 buf->currentPos = pTarget; 312 buf->bufLimit=pTarget+outputWritten; 313 *buf->bufLimit=0; /*NUL terminate*/ 314 if(cbuf!=carr){ 315 uprv_free(cbuf); 316 } 317 return buf; 318 } 319 320 321 322 /* get a UChar from the stream*/ 323 U_CAPI int32_t U_EXPORT2 324 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 325 if(error==NULL || U_FAILURE(*error)){ 326 return FALSE; 327 } 328 if(buf->currentPos>=buf->bufLimit){ 329 if(buf->remaining==0){ 330 return U_EOF; 331 } 332 buf=ucbuf_fillucbuf(buf,error); 333 if(U_FAILURE(*error)){ 334 return U_EOF; 335 } 336 } 337 338 return *(buf->currentPos++); 339 } 340 341 /* get a UChar32 from the stream*/ 342 U_CAPI int32_t U_EXPORT2 343 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 344 int32_t retVal = (int32_t)U_EOF; 345 if(error==NULL || U_FAILURE(*error)){ 346 return FALSE; 347 } 348 if(buf->currentPos+1>=buf->bufLimit){ 349 if(buf->remaining==0){ 350 return U_EOF; 351 } 352 buf=ucbuf_fillucbuf(buf,error); 353 if(U_FAILURE(*error)){ 354 return U_EOF; 355 } 356 } 357 if(U16_IS_LEAD(*(buf->currentPos))){ 358 retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 359 buf->currentPos+=2; 360 }else{ 361 retVal = *(buf->currentPos++); 362 } 363 return retVal; 364 } 365 366 /* u_unescapeAt() callback to return a UChar*/ 367 static UChar U_CALLCONV 368 _charAt(int32_t offset, void *context) { 369 return ((UCHARBUF*) context)->currentPos[offset]; 370 } 371 372 /* getc and escape it */ 373 U_CAPI int32_t U_EXPORT2 374 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 375 int32_t length; 376 int32_t offset; 377 UChar32 c32,c1,c2; 378 if(error==NULL || U_FAILURE(*error)){ 379 return FALSE; 380 } 381 /* Fill the buffer if it is empty */ 382 if (buf->currentPos >=buf->bufLimit-2) { 383 ucbuf_fillucbuf(buf,error); 384 } 385 386 /* Get the next character in the buffer */ 387 if (buf->currentPos < buf->bufLimit) { 388 c1 = *(buf->currentPos)++; 389 } else { 390 c1 = U_EOF; 391 } 392 393 c2 = *(buf->currentPos); 394 395 /* If it isn't a backslash, return it */ 396 if (c1 != 0x005C) { 397 return c1; 398 } 399 400 /* Determine the amount of data in the buffer */ 401 length = (int32_t)(buf->bufLimit - buf->currentPos); 402 403 /* The longest escape sequence is \Uhhhhhhhh; make sure 404 we have at least that many characters */ 405 if (length < 10) { 406 407 /* fill the buffer */ 408 ucbuf_fillucbuf(buf,error); 409 length = (int32_t)(buf->bufLimit - buf->buffer); 410 } 411 412 /* Process the escape */ 413 offset = 0; 414 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 415 416 /* check if u_unescapeAt unescaped and converted 417 * to c32 or not 418 */ 419 if(c32==0xFFFFFFFF){ 420 if(buf->showWarning) { 421 char context[CONTEXT_LEN+1]; 422 int32_t len = CONTEXT_LEN; 423 if(length < len) { 424 len = length; 425 } 426 context[len]= 0 ; /* null terminate the buffer */ 427 u_UCharsToChars( buf->currentPos, context, len); 428 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 429 } 430 *error= U_ILLEGAL_ESCAPE_SEQUENCE; 431 return c1; 432 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 433 /* Update the current buffer position */ 434 buf->currentPos += offset; 435 }else{ 436 /* unescaping failed so we just return 437 * c1 and not consume the buffer 438 * this is useful for rules with escapes 439 * in resouce bundles 440 * eg: \' \\ \" 441 */ 442 return c1; 443 } 444 445 return c32; 446 } 447 448 U_CAPI UCHARBUF* U_EXPORT2 449 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 450 451 FileStream* in = NULL; 452 int32_t fileSize=0; 453 const char* knownCp; 454 if(error==NULL || U_FAILURE(*error)){ 455 return NULL; 456 } 457 if(cp==NULL || fileName==NULL){ 458 *error = U_ILLEGAL_ARGUMENT_ERROR; 459 return FALSE; 460 } 461 if (!uprv_strcmp(fileName, "-")) { 462 in = T_FileStream_stdin(); 463 }else{ 464 in = T_FileStream_open(fileName, "rb"); 465 } 466 467 if(in!=NULL){ 468 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 469 fileSize = T_FileStream_size(in); 470 if(buf == NULL){ 471 *error = U_MEMORY_ALLOCATION_ERROR; 472 T_FileStream_close(in); 473 return NULL; 474 } 475 buf->in=in; 476 buf->conv=NULL; 477 buf->showWarning = showWarning; 478 buf->isBuffered = buffered; 479 buf->signatureLength=0; 480 if(*cp==NULL || **cp=='\0'){ 481 /* don't have code page name... try to autodetect */ 482 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 483 }else if(ucbuf_isCPKnown(*cp)){ 484 /* discard BOM */ 485 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 486 } 487 if(U_SUCCESS(*error) && buf->conv==NULL) { 488 buf->conv=ucnv_open(*cp,error); 489 } 490 if(U_FAILURE(*error)){ 491 ucnv_close(buf->conv); 492 uprv_free(buf); 493 T_FileStream_close(in); 494 return NULL; 495 } 496 497 if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 498 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 499 } 500 buf->remaining=fileSize-buf->signatureLength; 501 if(buf->isBuffered){ 502 buf->bufCapacity=MAX_U_BUF; 503 }else{ 504 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 505 } 506 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 507 if (buf->buffer == NULL) { 508 *error = U_MEMORY_ALLOCATION_ERROR; 509 ucbuf_close(buf); 510 return NULL; 511 } 512 buf->currentPos=buf->buffer; 513 buf->bufLimit=buf->buffer; 514 if(U_FAILURE(*error)){ 515 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 516 ucbuf_close(buf); 517 return NULL; 518 } 519 ucbuf_fillucbuf(buf,error); 520 if(U_FAILURE(*error)){ 521 ucbuf_close(buf); 522 return NULL; 523 } 524 return buf; 525 } 526 *error =U_FILE_ACCESS_ERROR; 527 return NULL; 528 } 529 530 531 532 /* TODO: this method will fail if at the 533 * begining of buffer and the uchar to unget 534 * is from the previous buffer. Need to implement 535 * system to take care of that situation. 536 */ 537 U_CAPI void U_EXPORT2 538 ucbuf_ungetc(int32_t c,UCHARBUF* buf){ 539 /* decrement currentPos pointer 540 * if not at the begining of buffer 541 */ 542 if(buf->currentPos!=buf->buffer){ 543 if(*(buf->currentPos-1)==c){ 544 buf->currentPos--; 545 } else { 546 /* ungetc failed - did not match. */ 547 } 548 } else { 549 /* ungetc failed - beginning of buffer. */ 550 } 551 } 552 553 /* frees the resources of UChar* buffer */ 554 static void 555 ucbuf_closebuf(UCHARBUF* buf){ 556 uprv_free(buf->buffer); 557 buf->buffer = NULL; 558 } 559 560 /* close the buf and release resources*/ 561 U_CAPI void U_EXPORT2 562 ucbuf_close(UCHARBUF* buf){ 563 if(buf!=NULL){ 564 if(buf->conv){ 565 ucnv_close(buf->conv); 566 } 567 T_FileStream_close(buf->in); 568 ucbuf_closebuf(buf); 569 uprv_free(buf); 570 } 571 } 572 573 /* rewind the buf and file stream */ 574 U_CAPI void U_EXPORT2 575 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 576 if(error==NULL || U_FAILURE(*error)){ 577 return; 578 } 579 if(buf){ 580 buf->currentPos=buf->buffer; 581 buf->bufLimit=buf->buffer; 582 T_FileStream_rewind(buf->in); 583 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 584 585 ucnv_resetToUnicode(buf->conv); 586 if(buf->signatureLength>0) { 587 UChar target[1]={ 0 }; 588 UChar* pTarget; 589 char start[8]; 590 const char* pStart; 591 int32_t numRead; 592 593 /* read the signature bytes */ 594 numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 595 596 /* convert and ignore initial U+FEFF, and the buffer overflow */ 597 pTarget = target; 598 pStart = start; 599 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 600 if(*error==U_BUFFER_OVERFLOW_ERROR) { 601 *error=U_ZERO_ERROR; 602 } 603 604 /* verify that we successfully read exactly U+FEFF */ 605 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 606 *error=U_INTERNAL_PROGRAM_ERROR; 607 } 608 } 609 } 610 } 611 612 613 U_CAPI int32_t U_EXPORT2 614 ucbuf_size(UCHARBUF* buf){ 615 if(buf){ 616 if(buf->isBuffered){ 617 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 618 }else{ 619 return (int32_t)(buf->bufLimit - buf->buffer); 620 } 621 } 622 return 0; 623 } 624 625 U_CAPI const UChar* U_EXPORT2 626 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 627 if(error==NULL || U_FAILURE(*error)){ 628 return NULL; 629 } 630 if(buf==NULL || len==NULL){ 631 *error = U_ILLEGAL_ARGUMENT_ERROR; 632 return NULL; 633 } 634 *len = (int32_t)(buf->bufLimit - buf->buffer); 635 return buf->buffer; 636 } 637 638 U_CAPI const char* U_EXPORT2 639 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 640 int32_t requiredLen = 0; 641 int32_t dirlen = 0; 642 int32_t filelen = 0; 643 if(status==NULL || U_FAILURE(*status)){ 644 return NULL; 645 } 646 647 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 648 *status = U_ILLEGAL_ARGUMENT_ERROR; 649 return NULL; 650 } 651 652 653 dirlen = (int32_t)uprv_strlen(inputDir); 654 filelen = (int32_t)uprv_strlen(fileName); 655 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 656 requiredLen = dirlen + filelen + 2; 657 if((*len < requiredLen) || target==NULL){ 658 *len = requiredLen; 659 *status = U_BUFFER_OVERFLOW_ERROR; 660 return NULL; 661 } 662 663 target[0] = '\0'; 664 /* 665 * append the input dir to openFileName if the first char in 666 * filename is not file seperation char and the last char input directory is not '.'. 667 * This is to support : 668 * genrb -s. /home/icu/data 669 * genrb -s. icu/data 670 * The user cannot mix notations like 671 * genrb -s. /icu/data --- the absolute path specified. -s redundant 672 * user should use 673 * genrb -s. icu/data --- start from CWD and look in icu/data dir 674 */ 675 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 676 uprv_strcpy(target, inputDir); 677 target[dirlen] = U_FILE_SEP_CHAR; 678 } 679 target[dirlen + 1] = '\0'; 680 } else { 681 requiredLen = dirlen + filelen + 1; 682 if((*len < requiredLen) || target==NULL){ 683 *len = requiredLen; 684 *status = U_BUFFER_OVERFLOW_ERROR; 685 return NULL; 686 } 687 688 uprv_strcpy(target, inputDir); 689 } 690 691 uprv_strcat(target, fileName); 692 return target; 693 } 694 /* 695 * Unicode TR 13 says any of the below chars is 696 * a new line char in a readline function in addition 697 * to CR+LF combination which needs to be 698 * handled seperately 699 */ 700 static UBool ucbuf_isCharNewLine(UChar c){ 701 switch(c){ 702 case 0x000A: /* LF */ 703 case 0x000D: /* CR */ 704 case 0x000C: /* FF */ 705 case 0x0085: /* NEL */ 706 case 0x2028: /* LS */ 707 case 0x2029: /* PS */ 708 return TRUE; 709 default: 710 return FALSE; 711 } 712 } 713 714 U_CAPI const UChar* U_EXPORT2 715 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 716 UChar* temp = buf->currentPos; 717 UChar* savePos =NULL; 718 UChar c=0x0000; 719 if(buf->isBuffered){ 720 /* The input is buffered we have to do more 721 * for returning a pointer U_TRUNCATED_CHAR_FOUND 722 */ 723 for(;;){ 724 c = *temp++; 725 if(buf->remaining==0){ 726 return NULL; /* end of file is reached return NULL */ 727 } 728 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 729 *err= U_TRUNCATED_CHAR_FOUND; 730 return NULL; 731 }else{ 732 ucbuf_fillucbuf(buf,err); 733 if(U_FAILURE(*err)){ 734 return NULL; 735 } 736 } 737 /* 738 * Accoding to TR 13 readLine functions must interpret 739 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 740 */ 741 /* Windows CR LF */ 742 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 743 *len = (int32_t)(temp++ - buf->currentPos); 744 savePos = buf->currentPos; 745 buf->currentPos = temp; 746 return savePos; 747 } 748 /* else */ 749 750 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 751 *len = (int32_t)(temp - buf->currentPos); 752 savePos = buf->currentPos; 753 buf->currentPos = temp; 754 return savePos; 755 } 756 } 757 }else{ 758 /* we know that all input is read into the internal 759 * buffer so we can safely return pointers 760 */ 761 for(;;){ 762 c = *temp++; 763 764 if(buf->currentPos==buf->bufLimit){ 765 return NULL; /* end of file is reached return NULL */ 766 } 767 /* Windows CR LF */ 768 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 769 *len = (int32_t)(temp++ - buf->currentPos); 770 savePos = buf->currentPos; 771 buf->currentPos = temp; 772 return savePos; 773 } 774 /* else */ 775 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 776 *len = (int32_t)(temp - buf->currentPos); 777 savePos = buf->currentPos; 778 buf->currentPos = temp; 779 return savePos; 780 } 781 } 782 } 783 /* not reached */ 784 /* A compiler warning will appear if all paths don't contain a return statement. */ 785 /* return NULL;*/ 786 } 787 #endif 788