Cross Reference: /external/icu/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetLMBCS.java

Lines Matching full:lmbcs
28  * LMBCS
37  * LMBCS to and from Unicode.
39  * Since the LMBCS character set is only sparsely documented in existing
41  * file to serve as a guide to understanding LMBCS.
43  * LMBCS was originally designed with these four sometimes-competing design goals:
50  * All of the national character sets LMBCS was trying to encode are 'ANSI'
55  * bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS.
59      * The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as
63      * data bytes. The maximum size of a LMBCS character is 3 bytes:
75      * Most of the values less than 0x20 are reserved in LMBCS to announce
93      * So, the beginnning of understanding LMBCS is that IF the first byte of a LMBCS
106      * However, to fully understand LMBCS, you must also understand a series of
115      * character set. So, for example, the LMBCS sequence x10 x10 xAE is the
134      * that must be moved to different values because LMBCS reserves those
142      * useful doctrine that any byte less than 0x20 in a LMBCS char must be
149      * LMBCS, was to use up the spaces of the form
161      * represented in LMBCS:
169      * LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
175      * a LMBCS char that holds a literal Unicode value.
179      * To squish the LMBCS representation down even further, and to make
181      * from a LMBCS character. This is decided on a process-by-process basis. The
190      * optimization group as part of the name of the converter (LMBCS-1, LMBCS-2,
191      * etc.). Using plain 'LMBCS' as the name of the converter will give you
192      * LMBCS-1.
197      * Because of the extensive use of other character sets, the LMBCS converter
205     private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */
234      * LMBCS to Unicode.
236      * However, to translate Unicode to LMBCS, we need some more support.
239      * code point back into LMBCS. The first thing we do is look up into a table
244      * SBCS, the table will place it in the SBCS sets, to make the LMBCS code point
249                                                            LMBCS sbcs native encoding
252                                                            LMBCS mbcs native encoding
434      * LMBCS groups. We use this table, and the associated code, to
437      *     This table maps locale ID's to LMBCS opt groups.
523      * vtable. There is also room in there for converter-specific data. LMBCS
531         short OptGroup;                         /* default Opt. grp. for this LMBCS session */
560       //get the Opt Group number for the LMBCS converter
577         /* A function to call when we are looking at the Unicode group byte in LMBCS */
610         /* Return the Unicode representation for the current LMBCS character. */
630              * 1. 'CurByte' points at the first byte of a LMBCS character
634              * 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte)
680                         /* check for LMBCS doubled-group-byte case */
758             byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */
778                             LMBCS[i] = toUBytesArray[i];
780                             LMBCS[i] = source.get();
783                     tmpSourceBuffer = ByteBuffer.wrap(LMBCS);
793                             toUBytesArray[i] = LMBCS[i];
852          * Unicode to LMBCS, and we suspect that a Unicode character will fit into
857         private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) {
886                 LMBCS[pLMBCS++] = (byte)group;
888                     LMBCS[pLMBCS++] = (byte)group;
900                 LMBCS[pLMBCS++] = (byte)(value[0] >> 24);
902                 LMBCS[pLMBCS++] = (byte)(value[0] >> 16);
904                 LMBCS[pLMBCS++] = (byte)(value[0] >> 8);
906                 LMBCS[pLMBCS++] = (byte)value[0];
916          * know we are writing LMBCS using the Unicode group.
918         private int LMBCSConvertUni(byte[] LMBCS, char uniChar) {
923             LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE;
926                 LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO;
927                 LMBCS[index++] = (byte)HighCh;
929                 LMBCS[index++] = (byte)HighCh;
930                 LMBCS[index++] = (byte)LowCh;
934         /* The main Unicode to LMBCS conversion function */
940             byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX];
947              * Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
991                     LMBCS[pLMBCS++] = (byte)uniChar;
999                         bytes_written = LMBCSConvertUni(LMBCS, uniChar);
1003                             LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
1004                             LMBCS[pLMBCS++] = (byte)(ULMBCS_CTRLOFFSET + uniChar);
1006                             LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
1007                             LMBCS[pLMBCS++] = (byte)uniChar;
1012                         bytes_written = LMBCSConversionWorker(group, LMBCS, uniChar, lastConverterIndex, groups_tried);
1020                                 bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried);
1023                                     bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
1026                                     bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1029                                  bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1035                             bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1039                             bytes_written = LMBCSConversionWorker(lastConverterIndex[0], LMBCS, uniChar, lastConverterIndex, groups_tried);
1057                                     bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried);
1065                                 bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
1070                             bytes_written = LMBCSConvertUni(LMBCS, uniChar);
1079                     target.put(LMBCS[pLMBCS++]);
1094                         errorBuffer[i] = LMBCS[pLMBCS++];
OpenGrok