Home | History | Annotate | Download | only in charset

Lines Matching full:lmbcs

28  * LMBCS
37 * LMBCS to and from Unicode.
39 * Since the LMBCS character set is only sparsely documented in existing
41 * file to serve as a guide to understanding LMBCS.
43 * LMBCS was originally designed with these four sometimes-competing design goals:
50 * All of the national character sets LMBCS was trying to encode are 'ANSI'
55 * bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS.
59 * The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as
63 * data bytes. The maximum size of a LMBCS character is 3 bytes:
75 * Most of the values less than 0x20 are reserved in LMBCS to announce
93 * So, the beginnning of understanding LMBCS is that IF the first byte of a LMBCS
106 * However, to fully understand LMBCS, you must also understand a series of
115 * character set. So, for example, the LMBCS sequence x10 x10 xAE is the
134 * that must be moved to different values because LMBCS reserves those
142 * useful doctrine that any byte less than 0x20 in a LMBCS char must be
149 * LMBCS, was to use up the spaces of the form
161 * represented in LMBCS:
169 * LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
175 * a LMBCS char that holds a literal Unicode value.
179 * To squish the LMBCS representation down even further, and to make
181 * from a LMBCS character. This is decided on a process-by-process basis. The
190 * optimization group as part of the name of the converter (LMBCS-1, LMBCS-2,
191 * etc.). Using plain 'LMBCS' as the name of the converter will give you
192 * LMBCS-1.
197 * Because of the extensive use of other character sets, the LMBCS converter
205 private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */
234 * LMBCS to Unicode.
236 * However, to translate Unicode to LMBCS, we need some more support.
239 * code point back into LMBCS. The first thing we do is look up into a table
244 * SBCS, the table will place it in the SBCS sets, to make the LMBCS code point
249 LMBCS sbcs native encoding
252 LMBCS mbcs native encoding
434 * LMBCS groups. We use this table, and the associated code, to
437 * This table maps locale ID's to LMBCS opt groups.
523 * vtable. There is also room in there for converter-specific data. LMBCS
531 short OptGroup; /* default Opt. grp. for this LMBCS session */
560 //get the Opt Group number for the LMBCS converter
577 /* A function to call when we are looking at the Unicode group byte in LMBCS */
610 /* Return the Unicode representation for the current LMBCS character. */
630 * 1. 'CurByte' points at the first byte of a LMBCS character
634 * 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte)
680 /* check for LMBCS doubled-group-byte case */
758 byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */
778 LMBCS[i] = toUBytesArray[i];
780 LMBCS[i] = source.get();
783 tmpSourceBuffer = ByteBuffer.wrap(LMBCS);
793 toUBytesArray[i] = LMBCS[i];
852 * Unicode to LMBCS, and we suspect that a Unicode character will fit into
857 private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) {
886 LMBCS[pLMBCS++] = (byte)group;
888 LMBCS[pLMBCS++] = (byte)group;
900 LMBCS[pLMBCS++] = (byte)(value[0] >> 24);
902 LMBCS[pLMBCS++] = (byte)(value[0] >> 16);
904 LMBCS[pLMBCS++] = (byte)(value[0] >> 8);
906 LMBCS[pLMBCS++] = (byte)value[0];
916 * know we are writing LMBCS using the Unicode group.
918 private int LMBCSConvertUni(byte[] LMBCS, char uniChar) {
923 LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE;
926 LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO;
927 LMBCS[index++] = (byte)HighCh;
929 LMBCS[index++] = (byte)HighCh;
930 LMBCS[index++] = (byte)LowCh;
934 /* The main Unicode to LMBCS conversion function */
940 byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX];
947 * Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
991 LMBCS[pLMBCS++] = (byte)uniChar;
999 bytes_written = LMBCSConvertUni(LMBCS, uniChar);
1003 LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
1004 LMBCS[pLMBCS++] = (byte)(ULMBCS_CTRLOFFSET + uniChar);
1006 LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
1007 LMBCS[pLMBCS++] = (byte)uniChar;
1012 bytes_written = LMBCSConversionWorker(group, LMBCS, uniChar, lastConverterIndex, groups_tried);
1020 bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried);
1023 bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
1026 bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1029 bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1035 bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
1039 bytes_written = LMBCSConversionWorker(lastConverterIndex[0], LMBCS, uniChar, lastConverterIndex, groups_tried);
1057 bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried);
1065 bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
1070 bytes_written = LMBCSConvertUni(LMBCS, uniChar);
1079 target.put(LMBCS[pLMBCS++]);
1094 errorBuffer[i] = LMBCS[pLMBCS++];