Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 2000-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  ucnvscsu.c
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2000nov18
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This is an implementation of the Standard Compression Scheme for Unicode
     19 *   as defined in http://www.unicode.org/unicode/reports/tr6/ .
     20 *   Reserved commands and window settings are treated as illegal sequences and
     21 *   will result in callback calls.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 
     26 #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
     27 
     28 #include "unicode/ucnv.h"
     29 #include "unicode/ucnv_cb.h"
     30 #include "unicode/utf16.h"
     31 #include "ucnv_bld.h"
     32 #include "ucnv_cnv.h"
     33 #include "cmemory.h"
     34 
     35 /* SCSU definitions --------------------------------------------------------- */
     36 
     37 /* SCSU command byte values */
     38 enum {
     39     SQ0=0x01, /* Quote from window pair 0 */
     40     SQ7=0x08, /* Quote from window pair 7 */
     41     SDX=0x0B, /* Define a window as extended */
     42     Srs=0x0C, /* reserved */
     43     SQU=0x0E, /* Quote a single Unicode character */
     44     SCU=0x0F, /* Change to Unicode mode */
     45     SC0=0x10, /* Select window 0 */
     46     SC7=0x17, /* Select window 7 */
     47     SD0=0x18, /* Define and select window 0 */
     48     SD7=0x1F, /* Define and select window 7 */
     49 
     50     UC0=0xE0, /* Select window 0 */
     51     UC7=0xE7, /* Select window 7 */
     52     UD0=0xE8, /* Define and select window 0 */
     53     UD7=0xEF, /* Define and select window 7 */
     54     UQU=0xF0, /* Quote a single Unicode character */
     55     UDX=0xF1, /* Define a Window as extended */
     56     Urs=0xF2  /* reserved */
     57 };
     58 
     59 enum {
     60     /*
     61      * Unicode code points from 3400 to E000 are not adressible by
     62      * dynamic window, since in these areas no short run alphabets are
     63      * found. Therefore add gapOffset to all values from gapThreshold.
     64      */
     65     gapThreshold=0x68,
     66     gapOffset=0xAC00,
     67 
     68     /* values between reservedStart and fixedThreshold are reserved */
     69     reservedStart=0xA8,
     70 
     71     /* use table of predefined fixed offsets for values from fixedThreshold */
     72     fixedThreshold=0xF9
     73 };
     74 
     75 /* constant offsets for the 8 static windows */
     76 static const uint32_t staticOffsets[8]={
     77     0x0000, /* ASCII for quoted tags */
     78     0x0080, /* Latin - 1 Supplement (for access to punctuation) */
     79     0x0100, /* Latin Extended-A */
     80     0x0300, /* Combining Diacritical Marks */
     81     0x2000, /* General Punctuation */
     82     0x2080, /* Currency Symbols */
     83     0x2100, /* Letterlike Symbols and Number Forms */
     84     0x3000  /* CJK Symbols and punctuation */
     85 };
     86 
     87 /* initial offsets for the 8 dynamic (sliding) windows */
     88 static const uint32_t initialDynamicOffsets[8]={
     89     0x0080, /* Latin-1 */
     90     0x00C0, /* Latin Extended A */
     91     0x0400, /* Cyrillic */
     92     0x0600, /* Arabic */
     93     0x0900, /* Devanagari */
     94     0x3040, /* Hiragana */
     95     0x30A0, /* Katakana */
     96     0xFF00  /* Fullwidth ASCII */
     97 };
     98 
     99 /* Table of fixed predefined Offsets */
    100 static const uint32_t fixedOffsets[]={
    101     /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
    102     /* 0xFA */ 0x0250, /* IPA extensions */
    103     /* 0xFB */ 0x0370, /* Greek */
    104     /* 0xFC */ 0x0530, /* Armenian */
    105     /* 0xFD */ 0x3040, /* Hiragana */
    106     /* 0xFE */ 0x30A0, /* Katakana */
    107     /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
    108 };
    109 
    110 /* state values */
    111 enum {
    112     readCommand,
    113     quotePairOne,
    114     quotePairTwo,
    115     quoteOne,
    116     definePairOne,
    117     definePairTwo,
    118     defineOne
    119 };
    120 
    121 typedef struct SCSUData {
    122     /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
    123     uint32_t toUDynamicOffsets[8];
    124     uint32_t fromUDynamicOffsets[8];
    125 
    126     /* state machine state - toUnicode */
    127     UBool toUIsSingleByteMode;
    128     uint8_t toUState;
    129     int8_t toUQuoteWindow, toUDynamicWindow;
    130     uint8_t toUByteOne;
    131     uint8_t toUPadding[3];
    132 
    133     /* state machine state - fromUnicode */
    134     UBool fromUIsSingleByteMode;
    135     int8_t fromUDynamicWindow;
    136 
    137     /*
    138      * windowUse[] keeps track of the use of the dynamic windows:
    139      * At nextWindowUseIndex there is the least recently used window,
    140      * and the following windows (in a wrapping manner) are more and more
    141      * recently used.
    142      * At nextWindowUseIndex-1 there is the most recently used window.
    143      */
    144     uint8_t locale;
    145     int8_t nextWindowUseIndex;
    146     int8_t windowUse[8];
    147 } SCSUData;
    148 
    149 static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
    150 static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
    151 
    152 enum {
    153     lGeneric, l_ja
    154 };
    155 
    156 /* SCSU setup functions ----------------------------------------------------- */
    157 
    158 static void
    159 _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
    160     SCSUData *scsu=(SCSUData *)cnv->extraInfo;
    161 
    162     if(choice<=UCNV_RESET_TO_UNICODE) {
    163         /* reset toUnicode */
    164         uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
    165 
    166         scsu->toUIsSingleByteMode=TRUE;
    167         scsu->toUState=readCommand;
    168         scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
    169         scsu->toUByteOne=0;
    170 
    171         cnv->toULength=0;
    172     }
    173     if(choice!=UCNV_RESET_TO_UNICODE) {
    174         /* reset fromUnicode */
    175         uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
    176 
    177         scsu->fromUIsSingleByteMode=TRUE;
    178         scsu->fromUDynamicWindow=0;
    179 
    180         scsu->nextWindowUseIndex=0;
    181         switch(scsu->locale) {
    182         case l_ja:
    183             uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
    184             break;
    185         default:
    186             uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
    187             break;
    188         }
    189 
    190         cnv->fromUChar32=0;
    191     }
    192 }
    193 
    194 static void
    195 _SCSUOpen(UConverter *cnv,
    196           UConverterLoadArgs *pArgs,
    197           UErrorCode *pErrorCode) {
    198     const char *locale=pArgs->locale;
    199     if(pArgs->onlyTestIsLoadable) {
    200         return;
    201     }
    202     cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
    203     if(cnv->extraInfo!=NULL) {
    204         if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
    205             ((SCSUData *)cnv->extraInfo)->locale=l_ja;
    206         } else {
    207             ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
    208         }
    209         _SCSUReset(cnv, UCNV_RESET_BOTH);
    210     } else {
    211         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    212     }
    213 
    214     /* Set the substitution character U+fffd as a Unicode string. */
    215     cnv->subUChars[0]=0xfffd;
    216     cnv->subCharLen=-1;
    217 }
    218 
    219 static void
    220 _SCSUClose(UConverter *cnv) {
    221     if(cnv->extraInfo!=NULL) {
    222         if(!cnv->isExtraLocal) {
    223             uprv_free(cnv->extraInfo);
    224         }
    225         cnv->extraInfo=NULL;
    226     }
    227 }
    228 
    229 /* SCSU-to-Unicode conversion functions ------------------------------------- */
    230 
    231 static void
    232 _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    233                           UErrorCode *pErrorCode) {
    234     UConverter *cnv;
    235     SCSUData *scsu;
    236     const uint8_t *source, *sourceLimit;
    237     UChar *target;
    238     const UChar *targetLimit;
    239     int32_t *offsets;
    240     UBool isSingleByteMode;
    241     uint8_t state, byteOne;
    242     int8_t quoteWindow, dynamicWindow;
    243 
    244     int32_t sourceIndex, nextSourceIndex;
    245 
    246     uint8_t b;
    247 
    248     /* set up the local pointers */
    249     cnv=pArgs->converter;
    250     scsu=(SCSUData *)cnv->extraInfo;
    251 
    252     source=(const uint8_t *)pArgs->source;
    253     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    254     target=pArgs->target;
    255     targetLimit=pArgs->targetLimit;
    256     offsets=pArgs->offsets;
    257 
    258     /* get the state machine state */
    259     isSingleByteMode=scsu->toUIsSingleByteMode;
    260     state=scsu->toUState;
    261     quoteWindow=scsu->toUQuoteWindow;
    262     dynamicWindow=scsu->toUDynamicWindow;
    263     byteOne=scsu->toUByteOne;
    264 
    265     /* sourceIndex=-1 if the current character began in the previous buffer */
    266     sourceIndex=state==readCommand ? 0 : -1;
    267     nextSourceIndex=0;
    268 
    269     /*
    270      * conversion "loop"
    271      *
    272      * For performance, this is not a normal C loop.
    273      * Instead, there are two code blocks for the two SCSU modes.
    274      * The function branches to either one, and a change of the mode is done with a goto to
    275      * the other branch.
    276      *
    277      * Each branch has two conventional loops:
    278      * - a fast-path loop for the most common codes in the mode
    279      * - a loop for all other codes in the mode
    280      * When the fast-path runs into a code that it cannot handle, its loop ends and it
    281      * runs into the following loop to handle the other codes.
    282      * The end of the input or output buffer is also handled by the slower loop.
    283      * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
    284      *
    285      * The callback handling is done by returning with an error code.
    286      * The conversion framework actually calls the callback function.
    287      */
    288     if(isSingleByteMode) {
    289         /* fast path for single-byte mode */
    290         if(state==readCommand) {
    291 fastSingle:
    292             while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
    293                 ++source;
    294                 ++nextSourceIndex;
    295                 if(b<=0x7f) {
    296                     /* write US-ASCII graphic character or DEL */
    297                     *target++=(UChar)b;
    298                     if(offsets!=NULL) {
    299                         *offsets++=sourceIndex;
    300                     }
    301                 } else {
    302                     /* write from dynamic window */
    303                     uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
    304                     if(c<=0xffff) {
    305                         *target++=(UChar)c;
    306                         if(offsets!=NULL) {
    307                             *offsets++=sourceIndex;
    308                         }
    309                     } else {
    310                         /* output surrogate pair */
    311                         *target++=(UChar)(0xd7c0+(c>>10));
    312                         if(target<targetLimit) {
    313                             *target++=(UChar)(0xdc00|(c&0x3ff));
    314                             if(offsets!=NULL) {
    315                                 *offsets++=sourceIndex;
    316                                 *offsets++=sourceIndex;
    317                             }
    318                         } else {
    319                             /* target overflow */
    320                             if(offsets!=NULL) {
    321                                 *offsets++=sourceIndex;
    322                             }
    323                             cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
    324                             cnv->UCharErrorBufferLength=1;
    325                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    326                             goto endloop;
    327                         }
    328                     }
    329                 }
    330                 sourceIndex=nextSourceIndex;
    331             }
    332         }
    333 
    334         /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
    335 singleByteMode:
    336         while(source<sourceLimit) {
    337             if(target>=targetLimit) {
    338                 /* target is full */
    339                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    340                 break;
    341             }
    342             b=*source++;
    343             ++nextSourceIndex;
    344             switch(state) {
    345             case readCommand:
    346                 /* redundant conditions are commented out */
    347                 /* here: b<0x20 because otherwise we would be in fastSingle */
    348                 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
    349                     /* CR/LF/TAB/NUL */
    350                     *target++=(UChar)b;
    351                     if(offsets!=NULL) {
    352                         *offsets++=sourceIndex;
    353                     }
    354                     sourceIndex=nextSourceIndex;
    355                     goto fastSingle;
    356                 } else if(SC0<=b) {
    357                     if(b<=SC7) {
    358                         dynamicWindow=(int8_t)(b-SC0);
    359                         sourceIndex=nextSourceIndex;
    360                         goto fastSingle;
    361                     } else /* if(SD0<=b && b<=SD7) */ {
    362                         dynamicWindow=(int8_t)(b-SD0);
    363                         state=defineOne;
    364                     }
    365                 } else if(/* SQ0<=b && */ b<=SQ7) {
    366                     quoteWindow=(int8_t)(b-SQ0);
    367                     state=quoteOne;
    368                 } else if(b==SDX) {
    369                     state=definePairOne;
    370                 } else if(b==SQU) {
    371                     state=quotePairOne;
    372                 } else if(b==SCU) {
    373                     sourceIndex=nextSourceIndex;
    374                     isSingleByteMode=FALSE;
    375                     goto fastUnicode;
    376                 } else /* Srs */ {
    377                     /* callback(illegal) */
    378                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    379                     cnv->toUBytes[0]=b;
    380                     cnv->toULength=1;
    381                     goto endloop;
    382                 }
    383 
    384                 /* store the first byte of a multibyte sequence in toUBytes[] */
    385                 cnv->toUBytes[0]=b;
    386                 cnv->toULength=1;
    387                 break;
    388             case quotePairOne:
    389                 byteOne=b;
    390                 cnv->toUBytes[1]=b;
    391                 cnv->toULength=2;
    392                 state=quotePairTwo;
    393                 break;
    394             case quotePairTwo:
    395                 *target++=(UChar)((byteOne<<8)|b);
    396                 if(offsets!=NULL) {
    397                     *offsets++=sourceIndex;
    398                 }
    399                 sourceIndex=nextSourceIndex;
    400                 state=readCommand;
    401                 goto fastSingle;
    402             case quoteOne:
    403                 if(b<0x80) {
    404                     /* all static offsets are in the BMP */
    405                     *target++=(UChar)(staticOffsets[quoteWindow]+b);
    406                     if(offsets!=NULL) {
    407                         *offsets++=sourceIndex;
    408                     }
    409                 } else {
    410                     /* write from dynamic window */
    411                     uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
    412                     if(c<=0xffff) {
    413                         *target++=(UChar)c;
    414                         if(offsets!=NULL) {
    415                             *offsets++=sourceIndex;
    416                         }
    417                     } else {
    418                         /* output surrogate pair */
    419                         *target++=(UChar)(0xd7c0+(c>>10));
    420                         if(target<targetLimit) {
    421                             *target++=(UChar)(0xdc00|(c&0x3ff));
    422                             if(offsets!=NULL) {
    423                                 *offsets++=sourceIndex;
    424                                 *offsets++=sourceIndex;
    425                             }
    426                         } else {
    427                             /* target overflow */
    428                             if(offsets!=NULL) {
    429                                 *offsets++=sourceIndex;
    430                             }
    431                             cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
    432                             cnv->UCharErrorBufferLength=1;
    433                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    434                             goto endloop;
    435                         }
    436                     }
    437                 }
    438                 sourceIndex=nextSourceIndex;
    439                 state=readCommand;
    440                 goto fastSingle;
    441             case definePairOne:
    442                 dynamicWindow=(int8_t)((b>>5)&7);
    443                 byteOne=(uint8_t)(b&0x1f);
    444                 cnv->toUBytes[1]=b;
    445                 cnv->toULength=2;
    446                 state=definePairTwo;
    447                 break;
    448             case definePairTwo:
    449                 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
    450                 sourceIndex=nextSourceIndex;
    451                 state=readCommand;
    452                 goto fastSingle;
    453             case defineOne:
    454                 if(b==0) {
    455                     /* callback(illegal): Reserved window offset value 0 */
    456                     cnv->toUBytes[1]=b;
    457                     cnv->toULength=2;
    458                     goto endloop;
    459                 } else if(b<gapThreshold) {
    460                     scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
    461                 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
    462                     scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
    463                 } else if(b>=fixedThreshold) {
    464                     scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
    465                 } else {
    466                     /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
    467                     cnv->toUBytes[1]=b;
    468                     cnv->toULength=2;
    469                     goto endloop;
    470                 }
    471                 sourceIndex=nextSourceIndex;
    472                 state=readCommand;
    473                 goto fastSingle;
    474             }
    475         }
    476     } else {
    477         /* fast path for Unicode mode */
    478         if(state==readCommand) {
    479 fastUnicode:
    480             while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
    481                 *target++=(UChar)((b<<8)|source[1]);
    482                 if(offsets!=NULL) {
    483                     *offsets++=sourceIndex;
    484                 }
    485                 sourceIndex=nextSourceIndex;
    486                 nextSourceIndex+=2;
    487                 source+=2;
    488             }
    489         }
    490 
    491         /* normal state machine for Unicode mode */
    492 /* unicodeByteMode: */
    493         while(source<sourceLimit) {
    494             if(target>=targetLimit) {
    495                 /* target is full */
    496                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    497                 break;
    498             }
    499             b=*source++;
    500             ++nextSourceIndex;
    501             switch(state) {
    502             case readCommand:
    503                 if((uint8_t)(b-UC0)>(Urs-UC0)) {
    504                     byteOne=b;
    505                     cnv->toUBytes[0]=b;
    506                     cnv->toULength=1;
    507                     state=quotePairTwo;
    508                 } else if(/* UC0<=b && */ b<=UC7) {
    509                     dynamicWindow=(int8_t)(b-UC0);
    510                     sourceIndex=nextSourceIndex;
    511                     isSingleByteMode=TRUE;
    512                     goto fastSingle;
    513                 } else if(/* UD0<=b && */ b<=UD7) {
    514                     dynamicWindow=(int8_t)(b-UD0);
    515                     isSingleByteMode=TRUE;
    516                     cnv->toUBytes[0]=b;
    517                     cnv->toULength=1;
    518                     state=defineOne;
    519                     goto singleByteMode;
    520                 } else if(b==UDX) {
    521                     isSingleByteMode=TRUE;
    522                     cnv->toUBytes[0]=b;
    523                     cnv->toULength=1;
    524                     state=definePairOne;
    525                     goto singleByteMode;
    526                 } else if(b==UQU) {
    527                     cnv->toUBytes[0]=b;
    528                     cnv->toULength=1;
    529                     state=quotePairOne;
    530                 } else /* Urs */ {
    531                     /* callback(illegal) */
    532                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    533                     cnv->toUBytes[0]=b;
    534                     cnv->toULength=1;
    535                     goto endloop;
    536                 }
    537                 break;
    538             case quotePairOne:
    539                 byteOne=b;
    540                 cnv->toUBytes[1]=b;
    541                 cnv->toULength=2;
    542                 state=quotePairTwo;
    543                 break;
    544             case quotePairTwo:
    545                 *target++=(UChar)((byteOne<<8)|b);
    546                 if(offsets!=NULL) {
    547                     *offsets++=sourceIndex;
    548                 }
    549                 sourceIndex=nextSourceIndex;
    550                 state=readCommand;
    551                 goto fastUnicode;
    552             }
    553         }
    554     }
    555 endloop:
    556 
    557     /* set the converter state back into UConverter */
    558     if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
    559         /* reset to deal with the next character */
    560         state=readCommand;
    561     } else if(state==readCommand) {
    562         /* not in a multi-byte sequence, reset toULength */
    563         cnv->toULength=0;
    564     }
    565     scsu->toUIsSingleByteMode=isSingleByteMode;
    566     scsu->toUState=state;
    567     scsu->toUQuoteWindow=quoteWindow;
    568     scsu->toUDynamicWindow=dynamicWindow;
    569     scsu->toUByteOne=byteOne;
    570 
    571     /* write back the updated pointers */
    572     pArgs->source=(const char *)source;
    573     pArgs->target=target;
    574     pArgs->offsets=offsets;
    575     return;
    576 }
    577 
    578 /*
    579  * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
    580  * If a change is made in the original function, then either
    581  * change this function the same way or
    582  * re-copy the original function and remove the variables
    583  * offsets, sourceIndex, and nextSourceIndex.
    584  */
    585 static void
    586 _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
    587                UErrorCode *pErrorCode) {
    588     UConverter *cnv;
    589     SCSUData *scsu;
    590     const uint8_t *source, *sourceLimit;
    591     UChar *target;
    592     const UChar *targetLimit;
    593     UBool isSingleByteMode;
    594     uint8_t state, byteOne;
    595     int8_t quoteWindow, dynamicWindow;
    596 
    597     uint8_t b;
    598 
    599     /* set up the local pointers */
    600     cnv=pArgs->converter;
    601     scsu=(SCSUData *)cnv->extraInfo;
    602 
    603     source=(const uint8_t *)pArgs->source;
    604     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    605     target=pArgs->target;
    606     targetLimit=pArgs->targetLimit;
    607 
    608     /* get the state machine state */
    609     isSingleByteMode=scsu->toUIsSingleByteMode;
    610     state=scsu->toUState;
    611     quoteWindow=scsu->toUQuoteWindow;
    612     dynamicWindow=scsu->toUDynamicWindow;
    613     byteOne=scsu->toUByteOne;
    614 
    615     /*
    616      * conversion "loop"
    617      *
    618      * For performance, this is not a normal C loop.
    619      * Instead, there are two code blocks for the two SCSU modes.
    620      * The function branches to either one, and a change of the mode is done with a goto to
    621      * the other branch.
    622      *
    623      * Each branch has two conventional loops:
    624      * - a fast-path loop for the most common codes in the mode
    625      * - a loop for all other codes in the mode
    626      * When the fast-path runs into a code that it cannot handle, its loop ends and it
    627      * runs into the following loop to handle the other codes.
    628      * The end of the input or output buffer is also handled by the slower loop.
    629      * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
    630      *
    631      * The callback handling is done by returning with an error code.
    632      * The conversion framework actually calls the callback function.
    633      */
    634     if(isSingleByteMode) {
    635         /* fast path for single-byte mode */
    636         if(state==readCommand) {
    637 fastSingle:
    638             while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
    639                 ++source;
    640                 if(b<=0x7f) {
    641                     /* write US-ASCII graphic character or DEL */
    642                     *target++=(UChar)b;
    643                 } else {
    644                     /* write from dynamic window */
    645                     uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
    646                     if(c<=0xffff) {
    647                         *target++=(UChar)c;
    648                     } else {
    649                         /* output surrogate pair */
    650                         *target++=(UChar)(0xd7c0+(c>>10));
    651                         if(target<targetLimit) {
    652                             *target++=(UChar)(0xdc00|(c&0x3ff));
    653                         } else {
    654                             /* target overflow */
    655                             cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
    656                             cnv->UCharErrorBufferLength=1;
    657                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    658                             goto endloop;
    659                         }
    660                     }
    661                 }
    662             }
    663         }
    664 
    665         /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
    666 singleByteMode:
    667         while(source<sourceLimit) {
    668             if(target>=targetLimit) {
    669                 /* target is full */
    670                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    671                 break;
    672             }
    673             b=*source++;
    674             switch(state) {
    675             case readCommand:
    676                 /* redundant conditions are commented out */
    677                 /* here: b<0x20 because otherwise we would be in fastSingle */
    678                 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
    679                     /* CR/LF/TAB/NUL */
    680                     *target++=(UChar)b;
    681                     goto fastSingle;
    682                 } else if(SC0<=b) {
    683                     if(b<=SC7) {
    684                         dynamicWindow=(int8_t)(b-SC0);
    685                         goto fastSingle;
    686                     } else /* if(SD0<=b && b<=SD7) */ {
    687                         dynamicWindow=(int8_t)(b-SD0);
    688                         state=defineOne;
    689                     }
    690                 } else if(/* SQ0<=b && */ b<=SQ7) {
    691                     quoteWindow=(int8_t)(b-SQ0);
    692                     state=quoteOne;
    693                 } else if(b==SDX) {
    694                     state=definePairOne;
    695                 } else if(b==SQU) {
    696                     state=quotePairOne;
    697                 } else if(b==SCU) {
    698                     isSingleByteMode=FALSE;
    699                     goto fastUnicode;
    700                 } else /* Srs */ {
    701                     /* callback(illegal) */
    702                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    703                     cnv->toUBytes[0]=b;
    704                     cnv->toULength=1;
    705                     goto endloop;
    706                 }
    707 
    708                 /* store the first byte of a multibyte sequence in toUBytes[] */
    709                 cnv->toUBytes[0]=b;
    710                 cnv->toULength=1;
    711                 break;
    712             case quotePairOne:
    713                 byteOne=b;
    714                 cnv->toUBytes[1]=b;
    715                 cnv->toULength=2;
    716                 state=quotePairTwo;
    717                 break;
    718             case quotePairTwo:
    719                 *target++=(UChar)((byteOne<<8)|b);
    720                 state=readCommand;
    721                 goto fastSingle;
    722             case quoteOne:
    723                 if(b<0x80) {
    724                     /* all static offsets are in the BMP */
    725                     *target++=(UChar)(staticOffsets[quoteWindow]+b);
    726                 } else {
    727                     /* write from dynamic window */
    728                     uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
    729                     if(c<=0xffff) {
    730                         *target++=(UChar)c;
    731                     } else {
    732                         /* output surrogate pair */
    733                         *target++=(UChar)(0xd7c0+(c>>10));
    734                         if(target<targetLimit) {
    735                             *target++=(UChar)(0xdc00|(c&0x3ff));
    736                         } else {
    737                             /* target overflow */
    738                             cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
    739                             cnv->UCharErrorBufferLength=1;
    740                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    741                             goto endloop;
    742                         }
    743                     }
    744                 }
    745                 state=readCommand;
    746                 goto fastSingle;
    747             case definePairOne:
    748                 dynamicWindow=(int8_t)((b>>5)&7);
    749                 byteOne=(uint8_t)(b&0x1f);
    750                 cnv->toUBytes[1]=b;
    751                 cnv->toULength=2;
    752                 state=definePairTwo;
    753                 break;
    754             case definePairTwo:
    755                 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
    756                 state=readCommand;
    757                 goto fastSingle;
    758             case defineOne:
    759                 if(b==0) {
    760                     /* callback(illegal): Reserved window offset value 0 */
    761                     cnv->toUBytes[1]=b;
    762                     cnv->toULength=2;
    763                     goto endloop;
    764                 } else if(b<gapThreshold) {
    765                     scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
    766                 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
    767                     scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
    768                 } else if(b>=fixedThreshold) {
    769                     scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
    770                 } else {
    771                     /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
    772                     cnv->toUBytes[1]=b;
    773                     cnv->toULength=2;
    774                     goto endloop;
    775                 }
    776                 state=readCommand;
    777                 goto fastSingle;
    778             }
    779         }
    780     } else {
    781         /* fast path for Unicode mode */
    782         if(state==readCommand) {
    783 fastUnicode:
    784             while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
    785                 *target++=(UChar)((b<<8)|source[1]);
    786                 source+=2;
    787             }
    788         }
    789 
    790         /* normal state machine for Unicode mode */
    791 /* unicodeByteMode: */
    792         while(source<sourceLimit) {
    793             if(target>=targetLimit) {
    794                 /* target is full */
    795                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    796                 break;
    797             }
    798             b=*source++;
    799             switch(state) {
    800             case readCommand:
    801                 if((uint8_t)(b-UC0)>(Urs-UC0)) {
    802                     byteOne=b;
    803                     cnv->toUBytes[0]=b;
    804                     cnv->toULength=1;
    805                     state=quotePairTwo;
    806                 } else if(/* UC0<=b && */ b<=UC7) {
    807                     dynamicWindow=(int8_t)(b-UC0);
    808                     isSingleByteMode=TRUE;
    809                     goto fastSingle;
    810                 } else if(/* UD0<=b && */ b<=UD7) {
    811                     dynamicWindow=(int8_t)(b-UD0);
    812                     isSingleByteMode=TRUE;
    813                     cnv->toUBytes[0]=b;
    814                     cnv->toULength=1;
    815                     state=defineOne;
    816                     goto singleByteMode;
    817                 } else if(b==UDX) {
    818                     isSingleByteMode=TRUE;
    819                     cnv->toUBytes[0]=b;
    820                     cnv->toULength=1;
    821                     state=definePairOne;
    822                     goto singleByteMode;
    823                 } else if(b==UQU) {
    824                     cnv->toUBytes[0]=b;
    825                     cnv->toULength=1;
    826                     state=quotePairOne;
    827                 } else /* Urs */ {
    828                     /* callback(illegal) */
    829                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    830                     cnv->toUBytes[0]=b;
    831                     cnv->toULength=1;
    832                     goto endloop;
    833                 }
    834                 break;
    835             case quotePairOne:
    836                 byteOne=b;
    837                 cnv->toUBytes[1]=b;
    838                 cnv->toULength=2;
    839                 state=quotePairTwo;
    840                 break;
    841             case quotePairTwo:
    842                 *target++=(UChar)((byteOne<<8)|b);
    843                 state=readCommand;
    844                 goto fastUnicode;
    845             }
    846         }
    847     }
    848 endloop:
    849 
    850     /* set the converter state back into UConverter */
    851     if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
    852         /* reset to deal with the next character */
    853         state=readCommand;
    854     } else if(state==readCommand) {
    855         /* not in a multi-byte sequence, reset toULength */
    856         cnv->toULength=0;
    857     }
    858     scsu->toUIsSingleByteMode=isSingleByteMode;
    859     scsu->toUState=state;
    860     scsu->toUQuoteWindow=quoteWindow;
    861     scsu->toUDynamicWindow=dynamicWindow;
    862     scsu->toUByteOne=byteOne;
    863 
    864     /* write back the updated pointers */
    865     pArgs->source=(const char *)source;
    866     pArgs->target=target;
    867     return;
    868 }
    869 
    870 /* SCSU-from-Unicode conversion functions ----------------------------------- */
    871 
    872 /*
    873  * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
    874  * reasonable results. The lookahead is minimal.
    875  * Many cases are simple:
    876  * A character fits directly into the current mode, a dynamic or static window,
    877  * or is not compressible. These cases are tested first.
    878  * Real compression heuristics are applied to the rest, in code branches for
    879  * single/Unicode mode and BMP/supplementary code points.
    880  * The heuristics used here are extremely simple.
    881  */
    882 
    883 /* get the number of the window that this character is in, or -1 */
    884 static int8_t
    885 getWindow(const uint32_t offsets[8], uint32_t c) {
    886     int i;
    887     for(i=0; i<8; ++i) {
    888         if((uint32_t)(c-offsets[i])<=0x7f) {
    889             return (int8_t)(i);
    890         }
    891     }
    892     return -1;
    893 }
    894 
    895 /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
    896 static UBool
    897 isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
    898     return (UBool)(c<=offset+0x7f &&
    899           (c>=offset || (c<=0x7f &&
    900                         (c>=0x20 || (1UL<<c)&0x2601))));
    901                                 /* binary 0010 0110 0000 0001,
    902                                    check for b==0xd || b==0xa || b==9 || b==0 */
    903 }
    904 
    905 /*
    906  * getNextDynamicWindow returns the next dynamic window to be redefined
    907  */
    908 static int8_t
    909 getNextDynamicWindow(SCSUData *scsu) {
    910     int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
    911     if(++scsu->nextWindowUseIndex==8) {
    912         scsu->nextWindowUseIndex=0;
    913     }
    914     return window;
    915 }
    916 
    917 /*
    918  * useDynamicWindow() adjusts
    919  * windowUse[] and nextWindowUseIndex for the algorithm to choose
    920  * the next dynamic window to be defined;
    921  * a subclass may override it and provide its own algorithm.
    922  */
    923 static void
    924 useDynamicWindow(SCSUData *scsu, int8_t window) {
    925     /*
    926      * move the existing window, which just became the most recently used one,
    927      * up in windowUse[] to nextWindowUseIndex-1
    928      */
    929 
    930     /* first, find the index of the window - backwards to favor the more recently used windows */
    931     int i, j;
    932 
    933     i=scsu->nextWindowUseIndex;
    934     do {
    935         if(--i<0) {
    936             i=7;
    937         }
    938     } while(scsu->windowUse[i]!=window);
    939 
    940     /* now copy each windowUse[i+1] to [i] */
    941     j=i+1;
    942     if(j==8) {
    943         j=0;
    944     }
    945     while(j!=scsu->nextWindowUseIndex) {
    946         scsu->windowUse[i]=scsu->windowUse[j];
    947         i=j;
    948         if(++j==8) { j=0; }
    949     }
    950 
    951     /* finally, set the window into the most recently used index */
    952     scsu->windowUse[i]=window;
    953 }
    954 
    955 /*
    956  * calculate the offset and the code for a dynamic window that contains the character
    957  * takes fixed offsets into account
    958  * the offset of the window is stored in the offset variable,
    959  * the code is returned
    960  *
    961  * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
    962  */
    963 static int
    964 getDynamicOffset(uint32_t c, uint32_t *pOffset) {
    965     int i;
    966 
    967     for(i=0; i<7; ++i) {
    968         if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
    969             *pOffset=fixedOffsets[i];
    970             return 0xf9+i;
    971         }
    972     }
    973 
    974     if(c<0x80) {
    975         /* No dynamic window for US-ASCII. */
    976         return -1;
    977     } else if(c<0x3400 ||
    978               (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
    979               (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
    980     ) {
    981         /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
    982         *pOffset=c&0x7fffff80;
    983         return (int)(c>>7);
    984     } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
    985         /* For these characters we need to take the gapOffset into account. */
    986         *pOffset=c&0x7fffff80;
    987         return (int)((c-gapOffset)>>7);
    988     } else {
    989         return -1;
    990     }
    991 }
    992 
    993 /*
    994  * Idea for compression:
    995  *  - save SCSUData and other state before really starting work
    996  *  - at endloop, see if compression could be better with just unicode mode
    997  *  - don't do this if a callback has been called
    998  *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
    999  *  - different buffer handling!
   1000  *
   1001  * Drawback or need for corrective handling:
   1002  * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
   1003  * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
   1004  * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
   1005  *
   1006  * How to achieve both?
   1007  *  - Only replace the result after an SDX or SCU?
   1008  */
   1009 
   1010 static void
   1011 _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
   1012                             UErrorCode *pErrorCode) {
   1013     UConverter *cnv;
   1014     SCSUData *scsu;
   1015     const UChar *source, *sourceLimit;
   1016     uint8_t *target;
   1017     int32_t targetCapacity;
   1018     int32_t *offsets;
   1019 
   1020     UBool isSingleByteMode;
   1021     uint8_t dynamicWindow;
   1022     uint32_t currentOffset;
   1023 
   1024     uint32_t c, delta;
   1025 
   1026     int32_t sourceIndex, nextSourceIndex;
   1027 
   1028     int32_t length;
   1029 
   1030     /* variables for compression heuristics */
   1031     uint32_t offset;
   1032     UChar lead, trail;
   1033     int code;
   1034     int8_t window;
   1035 
   1036     /* set up the local pointers */
   1037     cnv=pArgs->converter;
   1038     scsu=(SCSUData *)cnv->extraInfo;
   1039 
   1040     /* set up the local pointers */
   1041     source=pArgs->source;
   1042     sourceLimit=pArgs->sourceLimit;
   1043     target=(uint8_t *)pArgs->target;
   1044     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
   1045     offsets=pArgs->offsets;
   1046 
   1047     /* get the state machine state */
   1048     isSingleByteMode=scsu->fromUIsSingleByteMode;
   1049     dynamicWindow=scsu->fromUDynamicWindow;
   1050     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1051 
   1052     c=cnv->fromUChar32;
   1053 
   1054     /* sourceIndex=-1 if the current character began in the previous buffer */
   1055     sourceIndex= c==0 ? 0 : -1;
   1056     nextSourceIndex=0;
   1057 
   1058     /* similar conversion "loop" as in toUnicode */
   1059 loop:
   1060     if(isSingleByteMode) {
   1061         if(c!=0 && targetCapacity>0) {
   1062             goto getTrailSingle;
   1063         }
   1064 
   1065         /* state machine for single-byte mode */
   1066 /* singleByteMode: */
   1067         while(source<sourceLimit) {
   1068             if(targetCapacity<=0) {
   1069                 /* target is full */
   1070                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1071                 break;
   1072             }
   1073             c=*source++;
   1074             ++nextSourceIndex;
   1075 
   1076             if((c-0x20)<=0x5f) {
   1077                 /* pass US-ASCII graphic character through */
   1078                 *target++=(uint8_t)c;
   1079                 if(offsets!=NULL) {
   1080                     *offsets++=sourceIndex;
   1081                 }
   1082                 --targetCapacity;
   1083             } else if(c<0x20) {
   1084                 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1085                     /* CR/LF/TAB/NUL */
   1086                     *target++=(uint8_t)c;
   1087                     if(offsets!=NULL) {
   1088                         *offsets++=sourceIndex;
   1089                     }
   1090                     --targetCapacity;
   1091                 } else {
   1092                     /* quote C0 control character */
   1093                     c|=SQ0<<8;
   1094                     length=2;
   1095                     goto outputBytes;
   1096                 }
   1097             } else if((delta=c-currentOffset)<=0x7f) {
   1098                 /* use the current dynamic window */
   1099                 *target++=(uint8_t)(delta|0x80);
   1100                 if(offsets!=NULL) {
   1101                     *offsets++=sourceIndex;
   1102                 }
   1103                 --targetCapacity;
   1104             } else if(U16_IS_SURROGATE(c)) {
   1105                 if(U16_IS_SURROGATE_LEAD(c)) {
   1106 getTrailSingle:
   1107                     lead=(UChar)c;
   1108                     if(source<sourceLimit) {
   1109                         /* test the following code unit */
   1110                         trail=*source;
   1111                         if(U16_IS_TRAIL(trail)) {
   1112                             ++source;
   1113                             ++nextSourceIndex;
   1114                             c=U16_GET_SUPPLEMENTARY(c, trail);
   1115                             /* convert this surrogate code point */
   1116                             /* exit this condition tree */
   1117                         } else {
   1118                             /* this is an unmatched lead code unit (1st surrogate) */
   1119                             /* callback(illegal) */
   1120                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1121                             goto endloop;
   1122                         }
   1123                     } else {
   1124                         /* no more input */
   1125                         break;
   1126                     }
   1127                 } else {
   1128                     /* this is an unmatched trail code unit (2nd surrogate) */
   1129                     /* callback(illegal) */
   1130                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1131                     goto endloop;
   1132                 }
   1133 
   1134                 /* compress supplementary character U+10000..U+10ffff */
   1135                 if((delta=c-currentOffset)<=0x7f) {
   1136                     /* use the current dynamic window */
   1137                     *target++=(uint8_t)(delta|0x80);
   1138                     if(offsets!=NULL) {
   1139                         *offsets++=sourceIndex;
   1140                     }
   1141                     --targetCapacity;
   1142                 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1143                     /* there is a dynamic window that contains this character, change to it */
   1144                     dynamicWindow=window;
   1145                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1146                     useDynamicWindow(scsu, dynamicWindow);
   1147                     c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1148                     length=2;
   1149                     goto outputBytes;
   1150                 } else if((code=getDynamicOffset(c, &offset))>=0) {
   1151                     /* might check if there are more characters in this window to come */
   1152                     /* define an extended window with this character */
   1153                     code-=0x200;
   1154                     dynamicWindow=getNextDynamicWindow(scsu);
   1155                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1156                     useDynamicWindow(scsu, dynamicWindow);
   1157                     c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1158                     length=4;
   1159                     goto outputBytes;
   1160                 } else {
   1161                     /* change to Unicode mode and output this (lead, trail) pair */
   1162                     isSingleByteMode=FALSE;
   1163                     *target++=(uint8_t)SCU;
   1164                     if(offsets!=NULL) {
   1165                         *offsets++=sourceIndex;
   1166                     }
   1167                     --targetCapacity;
   1168                     c=((uint32_t)lead<<16)|trail;
   1169                     length=4;
   1170                     goto outputBytes;
   1171                 }
   1172             } else if(c<0xa0) {
   1173                 /* quote C1 control character */
   1174                 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
   1175                 length=2;
   1176                 goto outputBytes;
   1177             } else if(c==0xfeff || c>=0xfff0) {
   1178                 /* quote signature character=byte order mark and specials */
   1179                 c|=SQU<<16;
   1180                 length=3;
   1181                 goto outputBytes;
   1182             } else {
   1183                 /* compress all other BMP characters */
   1184                 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1185                     /* there is a window defined that contains this character - switch to it or quote from it? */
   1186                     if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
   1187                         /* change to dynamic window */
   1188                         dynamicWindow=window;
   1189                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1190                         useDynamicWindow(scsu, dynamicWindow);
   1191                         c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1192                         length=2;
   1193                         goto outputBytes;
   1194                     } else {
   1195                         /* quote from dynamic window */
   1196                         c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
   1197                         length=2;
   1198                         goto outputBytes;
   1199                     }
   1200                 } else if((window=getWindow(staticOffsets, c))>=0) {
   1201                     /* quote from static window */
   1202                     c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
   1203                     length=2;
   1204                     goto outputBytes;
   1205                 } else if((code=getDynamicOffset(c, &offset))>=0) {
   1206                     /* define a dynamic window with this character */
   1207                     dynamicWindow=getNextDynamicWindow(scsu);
   1208                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1209                     useDynamicWindow(scsu, dynamicWindow);
   1210                     c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1211                     length=3;
   1212                     goto outputBytes;
   1213                 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
   1214                           (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1215                 ) {
   1216                     /*
   1217                      * this character is not compressible (a BMP ideograph or similar);
   1218                      * switch to Unicode mode if this is the last character in the block
   1219                      * or there is at least one more ideograph following immediately
   1220                      */
   1221                     isSingleByteMode=FALSE;
   1222                     c|=SCU<<16;
   1223                     length=3;
   1224                     goto outputBytes;
   1225                 } else {
   1226                     /* quote Unicode */
   1227                     c|=SQU<<16;
   1228                     length=3;
   1229                     goto outputBytes;
   1230                 }
   1231             }
   1232 
   1233             /* normal end of conversion: prepare for a new character */
   1234             c=0;
   1235             sourceIndex=nextSourceIndex;
   1236         }
   1237     } else {
   1238         if(c!=0 && targetCapacity>0) {
   1239             goto getTrailUnicode;
   1240         }
   1241 
   1242         /* state machine for Unicode mode */
   1243 /* unicodeByteMode: */
   1244         while(source<sourceLimit) {
   1245             if(targetCapacity<=0) {
   1246                 /* target is full */
   1247                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1248                 break;
   1249             }
   1250             c=*source++;
   1251             ++nextSourceIndex;
   1252 
   1253             if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
   1254                 /* not compressible, write character directly */
   1255                 if(targetCapacity>=2) {
   1256                     *target++=(uint8_t)(c>>8);
   1257                     *target++=(uint8_t)c;
   1258                     if(offsets!=NULL) {
   1259                         *offsets++=sourceIndex;
   1260                         *offsets++=sourceIndex;
   1261                     }
   1262                     targetCapacity-=2;
   1263                 } else {
   1264                     length=2;
   1265                     goto outputBytes;
   1266                 }
   1267             } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
   1268                 /* compress BMP character if the following one is not an uncompressible ideograph */
   1269                 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
   1270                     if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
   1271                         /* ASCII digit or letter */
   1272                         isSingleByteMode=TRUE;
   1273                         c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
   1274                         length=2;
   1275                         goto outputBytes;
   1276                     } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1277                         /* there is a dynamic window that contains this character, change to it */
   1278                         isSingleByteMode=TRUE;
   1279                         dynamicWindow=window;
   1280                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1281                         useDynamicWindow(scsu, dynamicWindow);
   1282                         c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1283                         length=2;
   1284                         goto outputBytes;
   1285                     } else if((code=getDynamicOffset(c, &offset))>=0) {
   1286                         /* define a dynamic window with this character */
   1287                         isSingleByteMode=TRUE;
   1288                         dynamicWindow=getNextDynamicWindow(scsu);
   1289                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1290                         useDynamicWindow(scsu, dynamicWindow);
   1291                         c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1292                         length=3;
   1293                         goto outputBytes;
   1294                     }
   1295                 }
   1296 
   1297                 /* don't know how to compress this character, just write it directly */
   1298                 length=2;
   1299                 goto outputBytes;
   1300             } else if(c<0xe000) {
   1301                 /* c is a surrogate */
   1302                 if(U16_IS_SURROGATE_LEAD(c)) {
   1303 getTrailUnicode:
   1304                     lead=(UChar)c;
   1305                     if(source<sourceLimit) {
   1306                         /* test the following code unit */
   1307                         trail=*source;
   1308                         if(U16_IS_TRAIL(trail)) {
   1309                             ++source;
   1310                             ++nextSourceIndex;
   1311                             c=U16_GET_SUPPLEMENTARY(c, trail);
   1312                             /* convert this surrogate code point */
   1313                             /* exit this condition tree */
   1314                         } else {
   1315                             /* this is an unmatched lead code unit (1st surrogate) */
   1316                             /* callback(illegal) */
   1317                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1318                             goto endloop;
   1319                         }
   1320                     } else {
   1321                         /* no more input */
   1322                         break;
   1323                     }
   1324                 } else {
   1325                     /* this is an unmatched trail code unit (2nd surrogate) */
   1326                     /* callback(illegal) */
   1327                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1328                     goto endloop;
   1329                 }
   1330 
   1331                 /* compress supplementary character */
   1332                 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
   1333                     !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1334                 ) {
   1335                     /*
   1336                      * there is a dynamic window that contains this character and
   1337                      * the following character is not uncompressible,
   1338                      * change to the window
   1339                      */
   1340                     isSingleByteMode=TRUE;
   1341                     dynamicWindow=window;
   1342                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1343                     useDynamicWindow(scsu, dynamicWindow);
   1344                     c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1345                     length=2;
   1346                     goto outputBytes;
   1347                 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
   1348                           (code=getDynamicOffset(c, &offset))>=0
   1349                 ) {
   1350                     /* two supplementary characters in (probably) the same window - define an extended one */
   1351                     isSingleByteMode=TRUE;
   1352                     code-=0x200;
   1353                     dynamicWindow=getNextDynamicWindow(scsu);
   1354                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1355                     useDynamicWindow(scsu, dynamicWindow);
   1356                     c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1357                     length=4;
   1358                     goto outputBytes;
   1359                 } else {
   1360                     /* don't know how to compress this character, just write it directly */
   1361                     c=((uint32_t)lead<<16)|trail;
   1362                     length=4;
   1363                     goto outputBytes;
   1364                 }
   1365             } else /* 0xe000<=c<0xf300 */ {
   1366                 /* quote to avoid SCSU tags */
   1367                 c|=UQU<<16;
   1368                 length=3;
   1369                 goto outputBytes;
   1370             }
   1371 
   1372             /* normal end of conversion: prepare for a new character */
   1373             c=0;
   1374             sourceIndex=nextSourceIndex;
   1375         }
   1376     }
   1377 endloop:
   1378 
   1379     /* set the converter state back into UConverter */
   1380     scsu->fromUIsSingleByteMode=isSingleByteMode;
   1381     scsu->fromUDynamicWindow=dynamicWindow;
   1382 
   1383     cnv->fromUChar32=c;
   1384 
   1385     /* write back the updated pointers */
   1386     pArgs->source=source;
   1387     pArgs->target=(char *)target;
   1388     pArgs->offsets=offsets;
   1389     return;
   1390 
   1391 outputBytes:
   1392     /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
   1393     /* from the first if in the loop we know that targetCapacity>0 */
   1394     if(length<=targetCapacity) {
   1395         if(offsets==NULL) {
   1396             switch(length) {
   1397                 /* each branch falls through to the next one */
   1398             case 4:
   1399                 *target++=(uint8_t)(c>>24);
   1400                 U_FALLTHROUGH;
   1401             case 3:
   1402                 *target++=(uint8_t)(c>>16);
   1403                 U_FALLTHROUGH;
   1404             case 2:
   1405                 *target++=(uint8_t)(c>>8);
   1406                 U_FALLTHROUGH;
   1407             case 1:
   1408                 *target++=(uint8_t)c;
   1409                 U_FALLTHROUGH;
   1410             default:
   1411                 /* will never occur */
   1412                 break;
   1413             }
   1414         } else {
   1415             switch(length) {
   1416                 /* each branch falls through to the next one */
   1417             case 4:
   1418                 *target++=(uint8_t)(c>>24);
   1419                 *offsets++=sourceIndex;
   1420                 U_FALLTHROUGH;
   1421             case 3:
   1422                 *target++=(uint8_t)(c>>16);
   1423                 *offsets++=sourceIndex;
   1424                 U_FALLTHROUGH;
   1425             case 2:
   1426                 *target++=(uint8_t)(c>>8);
   1427                 *offsets++=sourceIndex;
   1428                 U_FALLTHROUGH;
   1429             case 1:
   1430                 *target++=(uint8_t)c;
   1431                 *offsets++=sourceIndex;
   1432                 U_FALLTHROUGH;
   1433             default:
   1434                 /* will never occur */
   1435                 break;
   1436             }
   1437         }
   1438         targetCapacity-=length;
   1439 
   1440         /* normal end of conversion: prepare for a new character */
   1441         c=0;
   1442         sourceIndex=nextSourceIndex;
   1443         goto loop;
   1444     } else {
   1445         uint8_t *p;
   1446 
   1447         /*
   1448          * We actually do this backwards here:
   1449          * In order to save an intermediate variable, we output
   1450          * first to the overflow buffer what does not fit into the
   1451          * regular target.
   1452          */
   1453         /* we know that 0<=targetCapacity<length<=4 */
   1454         /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
   1455         length-=targetCapacity;
   1456         p=(uint8_t *)cnv->charErrorBuffer;
   1457         switch(length) {
   1458             /* each branch falls through to the next one */
   1459         case 4:
   1460             *p++=(uint8_t)(c>>24);
   1461             U_FALLTHROUGH;
   1462         case 3:
   1463             *p++=(uint8_t)(c>>16);
   1464             U_FALLTHROUGH;
   1465         case 2:
   1466             *p++=(uint8_t)(c>>8);
   1467             U_FALLTHROUGH;
   1468         case 1:
   1469             *p=(uint8_t)c;
   1470             U_FALLTHROUGH;
   1471         default:
   1472             /* will never occur */
   1473             break;
   1474         }
   1475         cnv->charErrorBufferLength=(int8_t)length;
   1476 
   1477         /* now output what fits into the regular target */
   1478         c>>=8*length; /* length was reduced by targetCapacity */
   1479         switch(targetCapacity) {
   1480             /* each branch falls through to the next one */
   1481         case 3:
   1482             *target++=(uint8_t)(c>>16);
   1483             if(offsets!=NULL) {
   1484                 *offsets++=sourceIndex;
   1485             }
   1486             U_FALLTHROUGH;
   1487         case 2:
   1488             *target++=(uint8_t)(c>>8);
   1489             if(offsets!=NULL) {
   1490                 *offsets++=sourceIndex;
   1491             }
   1492             U_FALLTHROUGH;
   1493         case 1:
   1494             *target++=(uint8_t)c;
   1495             if(offsets!=NULL) {
   1496                 *offsets++=sourceIndex;
   1497             }
   1498             U_FALLTHROUGH;
   1499         default:
   1500             break;
   1501         }
   1502 
   1503         /* target overflow */
   1504         targetCapacity=0;
   1505         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1506         c=0;
   1507         goto endloop;
   1508     }
   1509 }
   1510 
   1511 /*
   1512  * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
   1513  * If a change is made in the original function, then either
   1514  * change this function the same way or
   1515  * re-copy the original function and remove the variables
   1516  * offsets, sourceIndex, and nextSourceIndex.
   1517  */
   1518 static void
   1519 _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
   1520                  UErrorCode *pErrorCode) {
   1521     UConverter *cnv;
   1522     SCSUData *scsu;
   1523     const UChar *source, *sourceLimit;
   1524     uint8_t *target;
   1525     int32_t targetCapacity;
   1526 
   1527     UBool isSingleByteMode;
   1528     uint8_t dynamicWindow;
   1529     uint32_t currentOffset;
   1530 
   1531     uint32_t c, delta;
   1532 
   1533     int32_t length;
   1534 
   1535     /* variables for compression heuristics */
   1536     uint32_t offset;
   1537     UChar lead, trail;
   1538     int code;
   1539     int8_t window;
   1540 
   1541     /* set up the local pointers */
   1542     cnv=pArgs->converter;
   1543     scsu=(SCSUData *)cnv->extraInfo;
   1544 
   1545     /* set up the local pointers */
   1546     source=pArgs->source;
   1547     sourceLimit=pArgs->sourceLimit;
   1548     target=(uint8_t *)pArgs->target;
   1549     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
   1550 
   1551     /* get the state machine state */
   1552     isSingleByteMode=scsu->fromUIsSingleByteMode;
   1553     dynamicWindow=scsu->fromUDynamicWindow;
   1554     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1555 
   1556     c=cnv->fromUChar32;
   1557 
   1558     /* similar conversion "loop" as in toUnicode */
   1559 loop:
   1560     if(isSingleByteMode) {
   1561         if(c!=0 && targetCapacity>0) {
   1562             goto getTrailSingle;
   1563         }
   1564 
   1565         /* state machine for single-byte mode */
   1566 /* singleByteMode: */
   1567         while(source<sourceLimit) {
   1568             if(targetCapacity<=0) {
   1569                 /* target is full */
   1570                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1571                 break;
   1572             }
   1573             c=*source++;
   1574 
   1575             if((c-0x20)<=0x5f) {
   1576                 /* pass US-ASCII graphic character through */
   1577                 *target++=(uint8_t)c;
   1578                 --targetCapacity;
   1579             } else if(c<0x20) {
   1580                 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1581                     /* CR/LF/TAB/NUL */
   1582                     *target++=(uint8_t)c;
   1583                     --targetCapacity;
   1584                 } else {
   1585                     /* quote C0 control character */
   1586                     c|=SQ0<<8;
   1587                     length=2;
   1588                     goto outputBytes;
   1589                 }
   1590             } else if((delta=c-currentOffset)<=0x7f) {
   1591                 /* use the current dynamic window */
   1592                 *target++=(uint8_t)(delta|0x80);
   1593                 --targetCapacity;
   1594             } else if(U16_IS_SURROGATE(c)) {
   1595                 if(U16_IS_SURROGATE_LEAD(c)) {
   1596 getTrailSingle:
   1597                     lead=(UChar)c;
   1598                     if(source<sourceLimit) {
   1599                         /* test the following code unit */
   1600                         trail=*source;
   1601                         if(U16_IS_TRAIL(trail)) {
   1602                             ++source;
   1603                             c=U16_GET_SUPPLEMENTARY(c, trail);
   1604                             /* convert this surrogate code point */
   1605                             /* exit this condition tree */
   1606                         } else {
   1607                             /* this is an unmatched lead code unit (1st surrogate) */
   1608                             /* callback(illegal) */
   1609                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1610                             goto endloop;
   1611                         }
   1612                     } else {
   1613                         /* no more input */
   1614                         break;
   1615                     }
   1616                 } else {
   1617                     /* this is an unmatched trail code unit (2nd surrogate) */
   1618                     /* callback(illegal) */
   1619                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1620                     goto endloop;
   1621                 }
   1622 
   1623                 /* compress supplementary character U+10000..U+10ffff */
   1624                 if((delta=c-currentOffset)<=0x7f) {
   1625                     /* use the current dynamic window */
   1626                     *target++=(uint8_t)(delta|0x80);
   1627                     --targetCapacity;
   1628                 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1629                     /* there is a dynamic window that contains this character, change to it */
   1630                     dynamicWindow=window;
   1631                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1632                     useDynamicWindow(scsu, dynamicWindow);
   1633                     c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1634                     length=2;
   1635                     goto outputBytes;
   1636                 } else if((code=getDynamicOffset(c, &offset))>=0) {
   1637                     /* might check if there are more characters in this window to come */
   1638                     /* define an extended window with this character */
   1639                     code-=0x200;
   1640                     dynamicWindow=getNextDynamicWindow(scsu);
   1641                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1642                     useDynamicWindow(scsu, dynamicWindow);
   1643                     c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1644                     length=4;
   1645                     goto outputBytes;
   1646                 } else {
   1647                     /* change to Unicode mode and output this (lead, trail) pair */
   1648                     isSingleByteMode=FALSE;
   1649                     *target++=(uint8_t)SCU;
   1650                     --targetCapacity;
   1651                     c=((uint32_t)lead<<16)|trail;
   1652                     length=4;
   1653                     goto outputBytes;
   1654                 }
   1655             } else if(c<0xa0) {
   1656                 /* quote C1 control character */
   1657                 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
   1658                 length=2;
   1659                 goto outputBytes;
   1660             } else if(c==0xfeff || c>=0xfff0) {
   1661                 /* quote signature character=byte order mark and specials */
   1662                 c|=SQU<<16;
   1663                 length=3;
   1664                 goto outputBytes;
   1665             } else {
   1666                 /* compress all other BMP characters */
   1667                 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1668                     /* there is a window defined that contains this character - switch to it or quote from it? */
   1669                     if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
   1670                         /* change to dynamic window */
   1671                         dynamicWindow=window;
   1672                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1673                         useDynamicWindow(scsu, dynamicWindow);
   1674                         c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1675                         length=2;
   1676                         goto outputBytes;
   1677                     } else {
   1678                         /* quote from dynamic window */
   1679                         c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
   1680                         length=2;
   1681                         goto outputBytes;
   1682                     }
   1683                 } else if((window=getWindow(staticOffsets, c))>=0) {
   1684                     /* quote from static window */
   1685                     c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
   1686                     length=2;
   1687                     goto outputBytes;
   1688                 } else if((code=getDynamicOffset(c, &offset))>=0) {
   1689                     /* define a dynamic window with this character */
   1690                     dynamicWindow=getNextDynamicWindow(scsu);
   1691                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1692                     useDynamicWindow(scsu, dynamicWindow);
   1693                     c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1694                     length=3;
   1695                     goto outputBytes;
   1696                 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
   1697                           (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1698                 ) {
   1699                     /*
   1700                      * this character is not compressible (a BMP ideograph or similar);
   1701                      * switch to Unicode mode if this is the last character in the block
   1702                      * or there is at least one more ideograph following immediately
   1703                      */
   1704                     isSingleByteMode=FALSE;
   1705                     c|=SCU<<16;
   1706                     length=3;
   1707                     goto outputBytes;
   1708                 } else {
   1709                     /* quote Unicode */
   1710                     c|=SQU<<16;
   1711                     length=3;
   1712                     goto outputBytes;
   1713                 }
   1714             }
   1715 
   1716             /* normal end of conversion: prepare for a new character */
   1717             c=0;
   1718         }
   1719     } else {
   1720         if(c!=0 && targetCapacity>0) {
   1721             goto getTrailUnicode;
   1722         }
   1723 
   1724         /* state machine for Unicode mode */
   1725 /* unicodeByteMode: */
   1726         while(source<sourceLimit) {
   1727             if(targetCapacity<=0) {
   1728                 /* target is full */
   1729                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1730                 break;
   1731             }
   1732             c=*source++;
   1733 
   1734             if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
   1735                 /* not compressible, write character directly */
   1736                 if(targetCapacity>=2) {
   1737                     *target++=(uint8_t)(c>>8);
   1738                     *target++=(uint8_t)c;
   1739                     targetCapacity-=2;
   1740                 } else {
   1741                     length=2;
   1742                     goto outputBytes;
   1743                 }
   1744             } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
   1745                 /* compress BMP character if the following one is not an uncompressible ideograph */
   1746                 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
   1747                     if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
   1748                         /* ASCII digit or letter */
   1749                         isSingleByteMode=TRUE;
   1750                         c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
   1751                         length=2;
   1752                         goto outputBytes;
   1753                     } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1754                         /* there is a dynamic window that contains this character, change to it */
   1755                         isSingleByteMode=TRUE;
   1756                         dynamicWindow=window;
   1757                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1758                         useDynamicWindow(scsu, dynamicWindow);
   1759                         c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1760                         length=2;
   1761                         goto outputBytes;
   1762                     } else if((code=getDynamicOffset(c, &offset))>=0) {
   1763                         /* define a dynamic window with this character */
   1764                         isSingleByteMode=TRUE;
   1765                         dynamicWindow=getNextDynamicWindow(scsu);
   1766                         currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1767                         useDynamicWindow(scsu, dynamicWindow);
   1768                         c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1769                         length=3;
   1770                         goto outputBytes;
   1771                     }
   1772                 }
   1773 
   1774                 /* don't know how to compress this character, just write it directly */
   1775                 length=2;
   1776                 goto outputBytes;
   1777             } else if(c<0xe000) {
   1778                 /* c is a surrogate */
   1779                 if(U16_IS_SURROGATE_LEAD(c)) {
   1780 getTrailUnicode:
   1781                     lead=(UChar)c;
   1782                     if(source<sourceLimit) {
   1783                         /* test the following code unit */
   1784                         trail=*source;
   1785                         if(U16_IS_TRAIL(trail)) {
   1786                             ++source;
   1787                             c=U16_GET_SUPPLEMENTARY(c, trail);
   1788                             /* convert this surrogate code point */
   1789                             /* exit this condition tree */
   1790                         } else {
   1791                             /* this is an unmatched lead code unit (1st surrogate) */
   1792                             /* callback(illegal) */
   1793                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1794                             goto endloop;
   1795                         }
   1796                     } else {
   1797                         /* no more input */
   1798                         break;
   1799                     }
   1800                 } else {
   1801                     /* this is an unmatched trail code unit (2nd surrogate) */
   1802                     /* callback(illegal) */
   1803                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1804                     goto endloop;
   1805                 }
   1806 
   1807                 /* compress supplementary character */
   1808                 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
   1809                     !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1810                 ) {
   1811                     /*
   1812                      * there is a dynamic window that contains this character and
   1813                      * the following character is not uncompressible,
   1814                      * change to the window
   1815                      */
   1816                     isSingleByteMode=TRUE;
   1817                     dynamicWindow=window;
   1818                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1819                     useDynamicWindow(scsu, dynamicWindow);
   1820                     c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1821                     length=2;
   1822                     goto outputBytes;
   1823                 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
   1824                           (code=getDynamicOffset(c, &offset))>=0
   1825                 ) {
   1826                     /* two supplementary characters in (probably) the same window - define an extended one */
   1827                     isSingleByteMode=TRUE;
   1828                     code-=0x200;
   1829                     dynamicWindow=getNextDynamicWindow(scsu);
   1830                     currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1831                     useDynamicWindow(scsu, dynamicWindow);
   1832                     c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1833                     length=4;
   1834                     goto outputBytes;
   1835                 } else {
   1836                     /* don't know how to compress this character, just write it directly */
   1837                     c=((uint32_t)lead<<16)|trail;
   1838                     length=4;
   1839                     goto outputBytes;
   1840                 }
   1841             } else /* 0xe000<=c<0xf300 */ {
   1842                 /* quote to avoid SCSU tags */
   1843                 c|=UQU<<16;
   1844                 length=3;
   1845                 goto outputBytes;
   1846             }
   1847 
   1848             /* normal end of conversion: prepare for a new character */
   1849             c=0;
   1850         }
   1851     }
   1852 endloop:
   1853 
   1854     /* set the converter state back into UConverter */
   1855     scsu->fromUIsSingleByteMode=isSingleByteMode;
   1856     scsu->fromUDynamicWindow=dynamicWindow;
   1857 
   1858     cnv->fromUChar32=c;
   1859 
   1860     /* write back the updated pointers */
   1861     pArgs->source=source;
   1862     pArgs->target=(char *)target;
   1863     return;
   1864 
   1865 outputBytes:
   1866     /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
   1867     /* from the first if in the loop we know that targetCapacity>0 */
   1868     if(length<=targetCapacity) {
   1869         switch(length) {
   1870             /* each branch falls through to the next one */
   1871         case 4:
   1872             *target++=(uint8_t)(c>>24);
   1873             U_FALLTHROUGH;
   1874         case 3:
   1875             *target++=(uint8_t)(c>>16);
   1876             U_FALLTHROUGH;
   1877         case 2:
   1878             *target++=(uint8_t)(c>>8);
   1879             U_FALLTHROUGH;
   1880         case 1:
   1881             *target++=(uint8_t)c;
   1882             U_FALLTHROUGH;
   1883         default:
   1884             /* will never occur */
   1885             break;
   1886         }
   1887         targetCapacity-=length;
   1888 
   1889         /* normal end of conversion: prepare for a new character */
   1890         c=0;
   1891         goto loop;
   1892     } else {
   1893         uint8_t *p;
   1894 
   1895         /*
   1896          * We actually do this backwards here:
   1897          * In order to save an intermediate variable, we output
   1898          * first to the overflow buffer what does not fit into the
   1899          * regular target.
   1900          */
   1901         /* we know that 0<=targetCapacity<length<=4 */
   1902         /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
   1903         length-=targetCapacity;
   1904         p=(uint8_t *)cnv->charErrorBuffer;
   1905         switch(length) {
   1906             /* each branch falls through to the next one */
   1907         case 4:
   1908             *p++=(uint8_t)(c>>24);
   1909             U_FALLTHROUGH;
   1910         case 3:
   1911             *p++=(uint8_t)(c>>16);
   1912             U_FALLTHROUGH;
   1913         case 2:
   1914             *p++=(uint8_t)(c>>8);
   1915             U_FALLTHROUGH;
   1916         case 1:
   1917             *p=(uint8_t)c;
   1918             U_FALLTHROUGH;
   1919         default:
   1920             /* will never occur */
   1921             break;
   1922         }
   1923         cnv->charErrorBufferLength=(int8_t)length;
   1924 
   1925         /* now output what fits into the regular target */
   1926         c>>=8*length; /* length was reduced by targetCapacity */
   1927         switch(targetCapacity) {
   1928             /* each branch falls through to the next one */
   1929         case 3:
   1930             *target++=(uint8_t)(c>>16);
   1931             U_FALLTHROUGH;
   1932         case 2:
   1933             *target++=(uint8_t)(c>>8);
   1934             U_FALLTHROUGH;
   1935         case 1:
   1936             *target++=(uint8_t)c;
   1937             U_FALLTHROUGH;
   1938         default:
   1939             break;
   1940         }
   1941 
   1942         /* target overflow */
   1943         targetCapacity=0;
   1944         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1945         c=0;
   1946         goto endloop;
   1947     }
   1948 }
   1949 
   1950 /* miscellaneous ------------------------------------------------------------ */
   1951 
   1952 static const char *
   1953 _SCSUGetName(const UConverter *cnv) {
   1954     SCSUData *scsu=(SCSUData *)cnv->extraInfo;
   1955 
   1956     switch(scsu->locale) {
   1957     case l_ja:
   1958         return "SCSU,locale=ja";
   1959     default:
   1960         return "SCSU";
   1961     }
   1962 }
   1963 
   1964 /* structure for SafeClone calculations */
   1965 struct cloneSCSUStruct
   1966 {
   1967     UConverter cnv;
   1968     SCSUData mydata;
   1969 };
   1970 
   1971 static UConverter *
   1972 _SCSUSafeClone(const UConverter *cnv,
   1973                void *stackBuffer,
   1974                int32_t *pBufferSize,
   1975                UErrorCode *status)
   1976 {
   1977     struct cloneSCSUStruct * localClone;
   1978     int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
   1979 
   1980     if (U_FAILURE(*status)){
   1981         return 0;
   1982     }
   1983 
   1984     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
   1985         *pBufferSize = bufferSizeNeeded;
   1986         return 0;
   1987     }
   1988 
   1989     localClone = (struct cloneSCSUStruct *)stackBuffer;
   1990     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   1991 
   1992     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
   1993     localClone->cnv.extraInfo = &localClone->mydata;
   1994     localClone->cnv.isExtraLocal = TRUE;
   1995 
   1996     return &localClone->cnv;
   1997 }
   1998 
   1999 
   2000 static const UConverterImpl _SCSUImpl={
   2001     UCNV_SCSU,
   2002 
   2003     NULL,
   2004     NULL,
   2005 
   2006     _SCSUOpen,
   2007     _SCSUClose,
   2008     _SCSUReset,
   2009 
   2010     _SCSUToUnicode,
   2011     _SCSUToUnicodeWithOffsets,
   2012     _SCSUFromUnicode,
   2013     _SCSUFromUnicodeWithOffsets,
   2014     NULL,
   2015 
   2016     NULL,
   2017     _SCSUGetName,
   2018     NULL,
   2019     _SCSUSafeClone,
   2020     ucnv_getCompleteUnicodeSet
   2021 };
   2022 
   2023 static const UConverterStaticData _SCSUStaticData={
   2024     sizeof(UConverterStaticData),
   2025     "SCSU",
   2026     1212, /* CCSID for SCSU */
   2027     UCNV_IBM, UCNV_SCSU,
   2028     1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
   2029     /*
   2030      * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
   2031      * substitution string.
   2032      */
   2033     { 0x0e, 0xff, 0xfd, 0 }, 3,
   2034     FALSE, FALSE,
   2035     0,
   2036     0,
   2037     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   2038 };
   2039 
   2040 const UConverterSharedData _SCSUData=
   2041         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
   2042 
   2043 #endif
   2044