Home | History | Annotate | Download | only in toolutil
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File ucbuf.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   05/10/01    Ram         Creation.
     17 *******************************************************************************
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/ucnv.h"
     24 #include "unicode/ucnv_err.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/utf16.h"
     27 #include "filestrm.h"
     28 #include "cstring.h"
     29 #include "cmemory.h"
     30 #include "ustrfmt.h"
     31 #include "ucbuf.h"
     32 #include <stdio.h>
     33 
     34 #if !UCONFIG_NO_CONVERSION
     35 
     36 
     37 #define MAX_IN_BUF 1000
     38 #define MAX_U_BUF 1500
     39 #define CONTEXT_LEN 20
     40 
     41 struct UCHARBUF {
     42     UChar* buffer;
     43     UChar* currentPos;
     44     UChar* bufLimit;
     45     int32_t bufCapacity;
     46     int32_t remaining;
     47     int32_t signatureLength;
     48     FileStream* in;
     49     UConverter* conv;
     50     UBool showWarning; /* makes this API not produce any errors */
     51     UBool isBuffered;
     52 };
     53 
     54 U_CAPI UBool U_EXPORT2
     55 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
     56     char start[8];
     57     int32_t numRead;
     58 
     59     UChar target[1]={ 0 };
     60     UChar* pTarget;
     61     const char* pStart;
     62 
     63     /* read a few bytes */
     64     numRead=T_FileStream_read(in, start, sizeof(start));
     65 
     66     *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
     67 
     68     /* unread the bytes beyond what was consumed for U+FEFF */
     69     T_FileStream_rewind(in);
     70     if (*signatureLength > 0) {
     71         T_FileStream_read(in, start, *signatureLength);
     72     }
     73 
     74     if(*cp==NULL){
     75         *conv =NULL;
     76         return FALSE;
     77     }
     78 
     79     /* open the converter for the detected Unicode charset */
     80     *conv = ucnv_open(*cp,error);
     81 
     82     /* convert and ignore initial U+FEFF, and the buffer overflow */
     83     pTarget = target;
     84     pStart = start;
     85     ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
     86     *signatureLength = (int32_t)(pStart - start);
     87     if(*error==U_BUFFER_OVERFLOW_ERROR) {
     88         *error=U_ZERO_ERROR;
     89     }
     90 
     91     /* verify that we successfully read exactly U+FEFF */
     92     if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
     93         *error=U_INTERNAL_PROGRAM_ERROR;
     94     }
     95 
     96 
     97     return TRUE;
     98 }
     99 static UBool ucbuf_isCPKnown(const char* cp){
    100     if(ucnv_compareNames("UTF-8",cp)==0){
    101         return TRUE;
    102     }
    103     if(ucnv_compareNames("UTF-16BE",cp)==0){
    104         return TRUE;
    105     }
    106     if(ucnv_compareNames("UTF-16LE",cp)==0){
    107         return TRUE;
    108     }
    109     if(ucnv_compareNames("UTF-16",cp)==0){
    110         return TRUE;
    111     }
    112     if(ucnv_compareNames("UTF-32",cp)==0){
    113         return TRUE;
    114     }
    115     if(ucnv_compareNames("UTF-32BE",cp)==0){
    116         return TRUE;
    117     }
    118     if(ucnv_compareNames("UTF-32LE",cp)==0){
    119         return TRUE;
    120     }
    121     if(ucnv_compareNames("SCSU",cp)==0){
    122         return TRUE;
    123     }
    124     if(ucnv_compareNames("BOCU-1",cp)==0){
    125         return TRUE;
    126     }
    127     if(ucnv_compareNames("UTF-7",cp)==0){
    128         return TRUE;
    129     }
    130     return FALSE;
    131 }
    132 
    133 U_CAPI FileStream * U_EXPORT2
    134 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
    135     FileStream* in=NULL;
    136     if(error==NULL || U_FAILURE(*error)){
    137         return NULL;
    138     }
    139     if(conv==NULL || cp==NULL || fileName==NULL){
    140         *error = U_ILLEGAL_ARGUMENT_ERROR;
    141         return NULL;
    142     }
    143     /* open the file */
    144     in= T_FileStream_open(fileName,"rb");
    145 
    146     if(in == NULL){
    147         *error=U_FILE_ACCESS_ERROR;
    148         return NULL;
    149     }
    150 
    151     if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
    152         return in;
    153     } else {
    154         ucnv_close(*conv);
    155         *conv=NULL;
    156         T_FileStream_close(in);
    157         return NULL;
    158     }
    159 }
    160 
    161 /* fill the uchar buffer */
    162 static UCHARBUF*
    163 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
    164     UChar* pTarget=NULL;
    165     UChar* target=NULL;
    166     const char* source=NULL;
    167     char  carr[MAX_IN_BUF] = {'\0'};
    168     char* cbuf =  carr;
    169     int32_t inputRead=0;
    170     int32_t outputWritten=0;
    171     int32_t offset=0;
    172     const char* sourceLimit =NULL;
    173     int32_t cbufSize=0;
    174     pTarget = buf->buffer;
    175     /* check if we arrived here without exhausting the buffer*/
    176     if(buf->currentPos<buf->bufLimit){
    177         offset = (int32_t)(buf->bufLimit-buf->currentPos);
    178         memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
    179     }
    180 
    181 #if UCBUF_DEBUG
    182     memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
    183 #endif
    184     if(buf->isBuffered){
    185         cbufSize = MAX_IN_BUF;
    186         /* read the file */
    187         inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
    188         buf->remaining-=inputRead;
    189 
    190     }else{
    191         cbufSize = T_FileStream_size(buf->in);
    192         cbuf = (char*)uprv_malloc(cbufSize);
    193         if (cbuf == NULL) {
    194         	*error = U_MEMORY_ALLOCATION_ERROR;
    195         	return NULL;
    196         }
    197         inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
    198         buf->remaining-=inputRead;
    199     }
    200 
    201     /* just to be sure...*/
    202     if ( 0 == inputRead )
    203        buf->remaining = 0;
    204 
    205     target=pTarget;
    206     /* convert the bytes */
    207     if(buf->conv){
    208         /* set the callback to stop */
    209         UConverterToUCallback toUOldAction ;
    210         void* toUOldContext;
    211         void* toUNewContext=NULL;
    212         ucnv_setToUCallBack(buf->conv,
    213            UCNV_TO_U_CALLBACK_STOP,
    214            toUNewContext,
    215            &toUOldAction,
    216            (const void**)&toUOldContext,
    217            error);
    218         /* since state is saved in the converter we add offset to source*/
    219         target = pTarget+offset;
    220         source = cbuf;
    221         sourceLimit = source + inputRead;
    222         ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
    223                         &source,sourceLimit,NULL,
    224                         (UBool)(buf->remaining==0),error);
    225 
    226         if(U_FAILURE(*error)){
    227             char context[CONTEXT_LEN+1];
    228             char preContext[CONTEXT_LEN+1];
    229             char postContext[CONTEXT_LEN+1];
    230             int8_t len = CONTEXT_LEN;
    231             int32_t start=0;
    232             int32_t stop =0;
    233             int32_t pos =0;
    234             /* use erro1 to preserve the error code */
    235             UErrorCode error1 =U_ZERO_ERROR;
    236 
    237             if( buf->showWarning==TRUE){
    238                 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
    239                                " converting input stream to target encoding: %s\n",
    240                                u_errorName(*error));
    241             }
    242 
    243 
    244             /* now get the context chars */
    245             ucnv_getInvalidChars(buf->conv,context,&len,&error1);
    246             context[len]= 0 ; /* null terminate the buffer */
    247 
    248             pos = (int32_t)(source - cbuf - len);
    249 
    250             /* for pre-context */
    251             start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
    252             stop  = pos-len;
    253 
    254             memcpy(preContext,cbuf+start,stop-start);
    255             /* null terminate the buffer */
    256             preContext[stop-start] = 0;
    257 
    258             /* for post-context */
    259             start = pos+len;
    260             stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
    261 
    262             memcpy(postContext,source,stop-start);
    263             /* null terminate the buffer */
    264             postContext[stop-start] = 0;
    265 
    266             if(buf->showWarning ==TRUE){
    267                 /* print out the context */
    268                 fprintf(stderr,"\tPre-context: %s\n",preContext);
    269                 fprintf(stderr,"\tContext: %s\n",context);
    270                 fprintf(stderr,"\tPost-context: %s\n", postContext);
    271             }
    272 
    273             /* reset the converter */
    274             ucnv_reset(buf->conv);
    275 
    276             /* set the call back to substitute
    277              * and restart conversion
    278              */
    279             ucnv_setToUCallBack(buf->conv,
    280                UCNV_TO_U_CALLBACK_SUBSTITUTE,
    281                toUNewContext,
    282                &toUOldAction,
    283                (const void**)&toUOldContext,
    284                &error1);
    285 
    286             /* reset source and target start positions */
    287             target = pTarget+offset;
    288             source = cbuf;
    289 
    290             /* re convert */
    291             ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
    292                             &source,sourceLimit,NULL,
    293                             (UBool)(buf->remaining==0),&error1);
    294 
    295         }
    296         outputWritten = (int32_t)(target - pTarget);
    297 
    298 #if UCBUF_DEBUG
    299         {
    300             int i;
    301             target = pTarget;
    302             for(i=0;i<numRead;i++){
    303               /*  printf("%c", (char)(*target++));*/
    304             }
    305         }
    306 #endif
    307 
    308     }else{
    309         u_charsToUChars(cbuf,target+offset,inputRead);
    310         outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
    311     }
    312     buf->currentPos = pTarget;
    313     buf->bufLimit=pTarget+outputWritten;
    314     *buf->bufLimit=0; /*NUL terminate*/
    315     if(cbuf!=carr){
    316         uprv_free(cbuf);
    317     }
    318     return buf;
    319 }
    320 
    321 
    322 
    323 /* get a UChar from the stream*/
    324 U_CAPI int32_t U_EXPORT2
    325 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
    326     if(error==NULL || U_FAILURE(*error)){
    327         return FALSE;
    328     }
    329     if(buf->currentPos>=buf->bufLimit){
    330         if(buf->remaining==0){
    331             return U_EOF;
    332         }
    333         buf=ucbuf_fillucbuf(buf,error);
    334         if(U_FAILURE(*error)){
    335             return U_EOF;
    336         }
    337     }
    338 
    339     return *(buf->currentPos++);
    340 }
    341 
    342 /* get a UChar32 from the stream*/
    343 U_CAPI int32_t U_EXPORT2
    344 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
    345     int32_t retVal = (int32_t)U_EOF;
    346     if(error==NULL || U_FAILURE(*error)){
    347         return FALSE;
    348     }
    349     if(buf->currentPos+1>=buf->bufLimit){
    350         if(buf->remaining==0){
    351             return U_EOF;
    352         }
    353         buf=ucbuf_fillucbuf(buf,error);
    354         if(U_FAILURE(*error)){
    355             return U_EOF;
    356         }
    357     }
    358     if(U16_IS_LEAD(*(buf->currentPos))){
    359         retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
    360         buf->currentPos+=2;
    361     }else{
    362         retVal = *(buf->currentPos++);
    363     }
    364     return retVal;
    365 }
    366 
    367 /* u_unescapeAt() callback to return a UChar*/
    368 static UChar U_CALLCONV
    369 _charAt(int32_t offset, void *context) {
    370     return ((UCHARBUF*) context)->currentPos[offset];
    371 }
    372 
    373 /* getc and escape it */
    374 U_CAPI int32_t U_EXPORT2
    375 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
    376     int32_t length;
    377     int32_t offset;
    378     UChar32 c32,c1,c2;
    379     if(error==NULL || U_FAILURE(*error)){
    380         return FALSE;
    381     }
    382     /* Fill the buffer if it is empty */
    383     if (buf->currentPos >=buf->bufLimit-2) {
    384         ucbuf_fillucbuf(buf,error);
    385     }
    386 
    387     /* Get the next character in the buffer */
    388     if (buf->currentPos < buf->bufLimit) {
    389         c1 = *(buf->currentPos)++;
    390     } else {
    391         c1 = U_EOF;
    392     }
    393 
    394     c2 = *(buf->currentPos);
    395 
    396     /* If it isn't a backslash, return it */
    397     if (c1 != 0x005C) {
    398         return c1;
    399     }
    400 
    401     /* Determine the amount of data in the buffer */
    402     length = (int32_t)(buf->bufLimit - buf->currentPos);
    403 
    404     /* The longest escape sequence is \Uhhhhhhhh; make sure
    405        we have at least that many characters */
    406     if (length < 10) {
    407 
    408         /* fill the buffer */
    409         ucbuf_fillucbuf(buf,error);
    410         length = (int32_t)(buf->bufLimit - buf->buffer);
    411     }
    412 
    413     /* Process the escape */
    414     offset = 0;
    415     c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
    416 
    417     /* check if u_unescapeAt unescaped and converted
    418      * to c32 or not
    419      */
    420     if(c32==(UChar32)0xFFFFFFFF){
    421         if(buf->showWarning) {
    422             char context[CONTEXT_LEN+1];
    423             int32_t len = CONTEXT_LEN;
    424             if(length < len) {
    425                 len = length;
    426             }
    427             context[len]= 0 ; /* null terminate the buffer */
    428             u_UCharsToChars( buf->currentPos, context, len);
    429             fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
    430         }
    431         *error= U_ILLEGAL_ESCAPE_SEQUENCE;
    432         return c1;
    433     }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
    434         /* Update the current buffer position */
    435         buf->currentPos += offset;
    436     }else{
    437         /* unescaping failed so we just return
    438          * c1 and not consume the buffer
    439          * this is useful for rules with escapes
    440          * in resouce bundles
    441          * eg: \' \\ \"
    442          */
    443         return c1;
    444     }
    445 
    446     return c32;
    447 }
    448 
    449 U_CAPI UCHARBUF* U_EXPORT2
    450 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
    451 
    452     FileStream* in = NULL;
    453     int32_t fileSize=0;
    454     const char* knownCp;
    455     if(error==NULL || U_FAILURE(*error)){
    456         return NULL;
    457     }
    458     if(cp==NULL || fileName==NULL){
    459         *error = U_ILLEGAL_ARGUMENT_ERROR;
    460         return FALSE;
    461     }
    462     if (!uprv_strcmp(fileName, "-")) {
    463         in = T_FileStream_stdin();
    464     }else{
    465         in = T_FileStream_open(fileName, "rb");
    466     }
    467 
    468     if(in!=NULL){
    469         UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
    470         fileSize = T_FileStream_size(in);
    471         if(buf == NULL){
    472             *error = U_MEMORY_ALLOCATION_ERROR;
    473             T_FileStream_close(in);
    474             return NULL;
    475         }
    476         buf->in=in;
    477         buf->conv=NULL;
    478         buf->showWarning = showWarning;
    479         buf->isBuffered = buffered;
    480         buf->signatureLength=0;
    481         if(*cp==NULL || **cp=='\0'){
    482             /* don't have code page name... try to autodetect */
    483             ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
    484         }else if(ucbuf_isCPKnown(*cp)){
    485             /* discard BOM */
    486             ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
    487         }
    488         if(U_SUCCESS(*error) && buf->conv==NULL) {
    489             buf->conv=ucnv_open(*cp,error);
    490         }
    491         if(U_FAILURE(*error)){
    492             ucnv_close(buf->conv);
    493             uprv_free(buf);
    494             T_FileStream_close(in);
    495             return NULL;
    496         }
    497 
    498         if((buf->conv==NULL) && (buf->showWarning==TRUE)){
    499             fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
    500         }
    501         buf->remaining=fileSize-buf->signatureLength;
    502         if(buf->isBuffered){
    503             buf->bufCapacity=MAX_U_BUF;
    504         }else{
    505             buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
    506         }
    507         buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
    508         if (buf->buffer == NULL) {
    509             *error = U_MEMORY_ALLOCATION_ERROR;
    510             ucbuf_close(buf);
    511             return NULL;
    512         }
    513         buf->currentPos=buf->buffer;
    514         buf->bufLimit=buf->buffer;
    515         if(U_FAILURE(*error)){
    516             fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
    517             ucbuf_close(buf);
    518             return NULL;
    519         }
    520         ucbuf_fillucbuf(buf,error);
    521         if(U_FAILURE(*error)){
    522             ucbuf_close(buf);
    523             return NULL;
    524         }
    525         return buf;
    526     }
    527     *error =U_FILE_ACCESS_ERROR;
    528     return NULL;
    529 }
    530 
    531 
    532 
    533 /* TODO: this method will fail if at the
    534  * begining of buffer and the uchar to unget
    535  * is from the previous buffer. Need to implement
    536  * system to take care of that situation.
    537  */
    538 U_CAPI void U_EXPORT2
    539 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
    540     /* decrement currentPos pointer
    541      * if not at the begining of buffer
    542      */
    543     if(buf->currentPos!=buf->buffer){
    544         if(*(buf->currentPos-1)==c){
    545             buf->currentPos--;
    546         } else {
    547             /* ungetc failed - did not match. */
    548         }
    549     } else {
    550        /* ungetc failed - beginning of buffer. */
    551     }
    552 }
    553 
    554 /* frees the resources of UChar* buffer */
    555 static void
    556 ucbuf_closebuf(UCHARBUF* buf){
    557     uprv_free(buf->buffer);
    558     buf->buffer = NULL;
    559 }
    560 
    561 /* close the buf and release resources*/
    562 U_CAPI void U_EXPORT2
    563 ucbuf_close(UCHARBUF* buf){
    564     if(buf!=NULL){
    565         if(buf->conv){
    566             ucnv_close(buf->conv);
    567         }
    568         T_FileStream_close(buf->in);
    569         ucbuf_closebuf(buf);
    570         uprv_free(buf);
    571     }
    572 }
    573 
    574 /* rewind the buf and file stream */
    575 U_CAPI void U_EXPORT2
    576 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
    577     if(error==NULL || U_FAILURE(*error)){
    578         return;
    579     }
    580     if(buf){
    581         buf->currentPos=buf->buffer;
    582         buf->bufLimit=buf->buffer;
    583         T_FileStream_rewind(buf->in);
    584         buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
    585 
    586         ucnv_resetToUnicode(buf->conv);
    587         if(buf->signatureLength>0) {
    588             UChar target[1]={ 0 };
    589             UChar* pTarget;
    590             char start[8];
    591             const char* pStart;
    592             int32_t numRead;
    593 
    594             /* read the signature bytes */
    595             numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
    596 
    597             /* convert and ignore initial U+FEFF, and the buffer overflow */
    598             pTarget = target;
    599             pStart = start;
    600             ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
    601             if(*error==U_BUFFER_OVERFLOW_ERROR) {
    602                 *error=U_ZERO_ERROR;
    603             }
    604 
    605             /* verify that we successfully read exactly U+FEFF */
    606             if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
    607                 *error=U_INTERNAL_PROGRAM_ERROR;
    608             }
    609         }
    610     }
    611 }
    612 
    613 
    614 U_CAPI int32_t U_EXPORT2
    615 ucbuf_size(UCHARBUF* buf){
    616     if(buf){
    617         if(buf->isBuffered){
    618             return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
    619         }else{
    620             return (int32_t)(buf->bufLimit - buf->buffer);
    621         }
    622     }
    623     return 0;
    624 }
    625 
    626 U_CAPI const UChar* U_EXPORT2
    627 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
    628     if(error==NULL || U_FAILURE(*error)){
    629         return NULL;
    630     }
    631     if(buf==NULL || len==NULL){
    632         *error = U_ILLEGAL_ARGUMENT_ERROR;
    633         return NULL;
    634     }
    635     *len = (int32_t)(buf->bufLimit - buf->buffer);
    636     return buf->buffer;
    637 }
    638 
    639 U_CAPI const char* U_EXPORT2
    640 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
    641     int32_t requiredLen = 0;
    642     int32_t dirlen =  0;
    643     int32_t filelen = 0;
    644     if(status==NULL || U_FAILURE(*status)){
    645         return NULL;
    646     }
    647 
    648     if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
    649         *status = U_ILLEGAL_ARGUMENT_ERROR;
    650         return NULL;
    651     }
    652 
    653 
    654     dirlen  = (int32_t)uprv_strlen(inputDir);
    655     filelen = (int32_t)uprv_strlen(fileName);
    656     if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
    657         requiredLen = dirlen + filelen + 2;
    658         if((*len < requiredLen) || target==NULL){
    659             *len = requiredLen;
    660             *status = U_BUFFER_OVERFLOW_ERROR;
    661             return NULL;
    662         }
    663 
    664         target[0] = '\0';
    665         /*
    666          * append the input dir to openFileName if the first char in
    667          * filename is not file seperation char and the last char input directory is  not '.'.
    668          * This is to support :
    669          * genrb -s. /home/icu/data
    670          * genrb -s. icu/data
    671          * The user cannot mix notations like
    672          * genrb -s. /icu/data --- the absolute path specified. -s redundant
    673          * user should use
    674          * genrb -s. icu/data  --- start from CWD and look in icu/data dir
    675          */
    676         if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
    677             uprv_strcpy(target, inputDir);
    678             target[dirlen]     = U_FILE_SEP_CHAR;
    679         }
    680         target[dirlen + 1] = '\0';
    681     } else {
    682         requiredLen = dirlen + filelen + 1;
    683         if((*len < requiredLen) || target==NULL){
    684             *len = requiredLen;
    685             *status = U_BUFFER_OVERFLOW_ERROR;
    686             return NULL;
    687         }
    688 
    689         uprv_strcpy(target, inputDir);
    690     }
    691 
    692     uprv_strcat(target, fileName);
    693     return target;
    694 }
    695 /*
    696  * Unicode TR 13 says any of the below chars is
    697  * a new line char in a readline function in addition
    698  * to CR+LF combination which needs to be
    699  * handled seperately
    700  */
    701 static UBool ucbuf_isCharNewLine(UChar c){
    702     switch(c){
    703     case 0x000A: /* LF  */
    704     case 0x000D: /* CR  */
    705     case 0x000C: /* FF  */
    706     case 0x0085: /* NEL */
    707     case 0x2028: /* LS  */
    708     case 0x2029: /* PS  */
    709         return TRUE;
    710     default:
    711         return FALSE;
    712     }
    713 }
    714 
    715 U_CAPI const UChar* U_EXPORT2
    716 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
    717     UChar* temp = buf->currentPos;
    718     UChar* savePos =NULL;
    719     UChar c=0x0000;
    720     if(buf->isBuffered){
    721         /* The input is buffered we have to do more
    722         * for returning a pointer U_TRUNCATED_CHAR_FOUND
    723         */
    724         for(;;){
    725             c = *temp++;
    726             if(buf->remaining==0){
    727                 return NULL; /* end of file is reached return NULL */
    728             }
    729             if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
    730                 *err= U_TRUNCATED_CHAR_FOUND;
    731                 return NULL;
    732             }else{
    733                 ucbuf_fillucbuf(buf,err);
    734                 if(U_FAILURE(*err)){
    735                     return NULL;
    736                 }
    737             }
    738             /*
    739              * Accoding to TR 13 readLine functions must interpret
    740              * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
    741              */
    742             /* Windows CR LF */
    743             if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
    744                 *len = (int32_t)(temp++ - buf->currentPos);
    745                 savePos = buf->currentPos;
    746                 buf->currentPos = temp;
    747                 return savePos;
    748             }
    749             /* else */
    750 
    751             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
    752                 *len = (int32_t)(temp - buf->currentPos);
    753                 savePos = buf->currentPos;
    754                 buf->currentPos = temp;
    755                 return savePos;
    756             }
    757         }
    758     }else{
    759     /* we know that all input is read into the internal
    760     * buffer so we can safely return pointers
    761         */
    762         for(;;){
    763             c = *temp++;
    764 
    765             if(buf->currentPos==buf->bufLimit){
    766                 return NULL; /* end of file is reached return NULL */
    767             }
    768             /* Windows CR LF */
    769             if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
    770                 *len = (int32_t)(temp++ - buf->currentPos);
    771                 savePos = buf->currentPos;
    772                 buf->currentPos = temp;
    773                 return savePos;
    774             }
    775             /* else */
    776             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
    777                 *len = (int32_t)(temp - buf->currentPos);
    778                 savePos = buf->currentPos;
    779                 buf->currentPos = temp;
    780                 return savePos;
    781             }
    782         }
    783     }
    784     /* not reached */
    785     /* A compiler warning will appear if all paths don't contain a return statement. */
    786 /*    return NULL;*/
    787 }
    788 #endif
    789