1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 /** 18 * @author Ilya S. Okomin 19 * @version $Revision$ 20 */ 21 package java.awt.font; 22 23 import java.io.IOException; 24 import java.io.Serializable; 25 26 /** 27 * The Class NumericShaper provides methods to convert latin character codes 28 * to unicode character codes. 29 * For tables of the character codes used, 30 * see <a href="http://www.unicode.org/Public/UNIDATA/">unicode.org</a>. 31 */ 32 public final class NumericShaper implements Serializable { 33 34 /** The Constant serialVersionUID. */ 35 private static final long serialVersionUID = -8022764705923730308L; 36 37 /** 38 * The Constant EUROPEAN indicates the latin and extended range, 39 * and latin decimal base. 40 */ 41 public static final int EUROPEAN = 1; 42 43 /** The Constant ARABIC indicates the ARABIC range and decimal base. */ 44 public static final int ARABIC = 2; 45 46 /** 47 * The Constant EASTERN_ARABIC indicates the ARABIC range and 48 * ARABIC_EXTENDED decimal base. 49 */ 50 public static final int EASTERN_ARABIC = 4; 51 52 /** 53 * The Constant DEVANAGARI indicates the DEVANAGARI range and 54 * decimal base. 55 */ 56 public static final int DEVANAGARI = 8; 57 58 /** The Constant BENGALI indicates the BENGALI range and decimal base. */ 59 public static final int BENGALI = 16; 60 61 /** The Constant GURMUKHI indicates the GURMUKHI range and decimal base. */ 62 public static final int GURMUKHI = 32; 63 64 /** The Constant GUJARATI indicates the GUJARATI range and decimal base. */ 65 public static final int GUJARATI = 64; 66 67 /** The Constant ORIYA indicates the ORIYA range and decimal base. */ 68 public static final int ORIYA = 128; 69 70 /** The Constant TAMIL indicates the TAMIL range and decimal base. */ 71 public static final int TAMIL = 256; 72 73 /** The Constant TELUGU indicates the TELUGU range and decimal base. */ 74 public static final int TELUGU = 512; 75 76 /** The Constant KANNADA indicates the KANNADA range and decimal base. */ 77 public static final int KANNADA = 1024; 78 79 /** 80 * The Constant MALAYALAM indicates the MALAYALAM range and decimal base. 81 */ 82 public static final int MALAYALAM = 2048; 83 84 /** The Constant THAI indicates the THAI range and decimal base. */ 85 public static final int THAI = 4096; 86 87 /** The Constant LAO indicates the LAO range and decimal base. */ 88 public static final int LAO = 8192; 89 90 /** The Constant TIBETAN indicates the TIBETAN range and decimal base. */ 91 public static final int TIBETAN = 16384; 92 93 /** The Constant MYANMAR indicates the MYANMAR range and decimal base. */ 94 public static final int MYANMAR = 32768; 95 96 /** 97 * The Constant ETHIOPIC indicates the ETHIOPIC range and decimal base. 98 */ 99 public static final int ETHIOPIC = 65536; 100 101 /** The Constant KHMER indicates the KHMER range and decimal base. */ 102 public static final int KHMER = 131072; 103 104 /** 105 * The Constant MONGOLIAN indicates the MONGOLIAN range and 106 * decimal base. 107 */ 108 public static final int MONGOLIAN = 262144; 109 110 /** The Constant ALL_RANGES indicates all ranges. */ 111 public static final int ALL_RANGES = 524287; 112 113 /* Further one can find the set of script indices. 114 * Index is the power you need the 2 to raise to to get corresponding 115 * range constant value. Also script ranges, context names and digits low 116 * ranges are indexed with these indices. 117 */ 118 119 // Index of the EUROPEAN range 120 /** The Constant INDEX_EUROPEAN. */ 121 private static final int INDEX_EUROPEAN = 0; 122 123 // Index of the ARABIC range 124 /** The Constant INDEX_ARABIC. */ 125 private static final int INDEX_ARABIC = 1; 126 127 // Index of the EASTERN_ARABIC range 128 /** The Constant INDEX_EASTERN_ARABIC. */ 129 private static final int INDEX_EASTERN_ARABIC = 2; 130 131 // Index of the DEVANAGARI range 132 /** The Constant INDEX_DEVANAGARI. */ 133 private static final int INDEX_DEVANAGARI = 3; 134 135 // Index of the BENGALI range 136 /** The Constant INDEX_BENGALI. */ 137 private static final int INDEX_BENGALI = 4; 138 139 // Index of the GURMUKHI range 140 /** The Constant INDEX_GURMUKHI. */ 141 private static final int INDEX_GURMUKHI = 5; 142 143 // Index of the GUJARTI range 144 /** The Constant INDEX_GUJARATI. */ 145 private static final int INDEX_GUJARATI = 6; 146 147 // Index of the ORIYA range 148 /** The Constant INDEX_ORIYA. */ 149 private static final int INDEX_ORIYA = 7; 150 151 // Index of the TAMIL range 152 /** The Constant INDEX_TAMIL. */ 153 private static final int INDEX_TAMIL = 8; 154 155 // Index of the TELUGU range 156 /** The Constant INDEX_TELUGU. */ 157 private static final int INDEX_TELUGU = 9; 158 159 // Index of the KANNADA range 160 /** The Constant INDEX_KANNADA. */ 161 private static final int INDEX_KANNADA = 10; 162 163 // Index of the MALAYALAM range 164 /** The Constant INDEX_MALAYALAM. */ 165 private static final int INDEX_MALAYALAM = 11; 166 167 // Index of the THAI range 168 /** The Constant INDEX_THAI. */ 169 private static final int INDEX_THAI = 12; 170 171 // Index of the LAO range 172 /** The Constant INDEX_LAO. */ 173 private static final int INDEX_LAO = 13; 174 175 // Index of the TIBETAN range 176 /** The Constant INDEX_TIBETAN. */ 177 private static final int INDEX_TIBETAN = 14; 178 179 // Index of the MYANMAR range 180 /** The Constant INDEX_MYANMAR. */ 181 private static final int INDEX_MYANMAR = 15; 182 183 // Index of the ETHIOPIC range 184 /** The Constant INDEX_ETHIOPIC. */ 185 private static final int INDEX_ETHIOPIC = 16; 186 187 // Index of the KHMER range 188 /** The Constant INDEX_KHMER. */ 189 private static final int INDEX_KHMER = 17; 190 191 // Index of the MONGOLIAN range 192 /** The Constant INDEX_MONGOLIAN. */ 193 private static final int INDEX_MONGOLIAN = 18; 194 195 // Maximum index that range can't exceed 196 /** The Constant MAX_INDEX. */ 197 private static final int MAX_INDEX = 19; 198 199 /* 200 * Scripts ranges array. Array represents ranges as pairs of 201 * lowest and highest range bounds. 202 * Data is taken from the UnicodeData.txt file from 203 * http://www.unicode.org/Public/UNIDATA/ 204 */ 205 /** The scripts ranges. */ 206 private final int[] scriptsRanges = { 207 0x0000, 0x024F, // EUROPEAN (basic latin + latin-1 + extended) 208 0x0600, 0x06FF, // ARABIC 209 0x0600, 0x06FF, // EASTERN_ARABIC (XXX: diff with ARABIC ? ) 210 0x0900, 0x097F, // DEVANAGARI 211 0x0980, 0x09FF, // BENGALI 212 0x0A00, 0x0A7F, // GURMUKHI 213 0x0A80, 0x0AFF, // GUJARATI 214 0x0B00, 0x0B7F, // ORIYA 215 0x0B80, 0x0BFF, // TAMIL 216 0x0C00, 0x0C7F, // TELUGU 217 0x0C80, 0x0CFF, // KANNADA 218 0x0D00, 0x0D7F, // MALAYALAM 219 0x0E00, 0x0E7F, // THAI 220 0x0E80, 0x0EFF, // LAO 221 0x0F00, 0x0FFF, // TIBETAN 222 0x1000, 0x109F, // MYANMAR 223 0x1200, 0x137F, // ETHIOPIC 224 0x1780, 0x17FF, // KHMER 225 0x1800, 0x18AF // MONGOLIAN 226 }; 227 228 /* 229 * Digit low ranges values decreased by 0x0030. Each low range 230 * value decreased by 0x0030 for easy obtaing unicode value of the 231 * context dependent digit. European digits starts from 0x0030 hence 232 * context dependent unicode digit value equals to 233 * digitsLowRanges[script index] + european digit char unicode value. 234 * !! the only exception is ETHIOPIC script where there is no '0' digit 235 * Data is taken from the UnicodeData.txt file from 236 * http://www.unicode.org/Public/UNIDATA/ 237 */ 238 /** The digits low ranges. */ 239 private final int[] digitsLowRanges = { 240 0x0000, // EUROPEAN 241 0x0630, // ARABIC 242 0x0630, // EASTERN_ARABIC 243 0x0936, // DEVANAGARI 244 0x09B6, // BENGALI 245 0x0A36, // GURMUKHI 246 0x0AB6, // GUJARATI 247 0x0B36, // ORIYA 248 0x0BB6, // TAMIL 249 0x0C36, // TELUGU 250 0x0CB6, // KANNADA 251 0x0D36, // MALAYALAM 252 0x0E20, // THAI 253 0x0EA0, // LAO 254 0x0EF0, // TIBETAN 255 0x1010, // MYANMAR 256 0x1338, // ETHIOPIC - (low range-1) no ETHIOPIC '0' DIGIT! 257 0x17B0, // KHMER 258 0x17E0 // MONGOLIAN 259 }; 260 261 // Set of context names used in toString method 262 /** The contexts. */ 263 private final String[] contexts = { 264 "EUROPEAN", 265 "ARABIC", 266 "EASTERN_ARABIC", 267 "DEVANAGARI", 268 "BENGALI", 269 "GURMUKHI", 270 "GUJARATI", 271 "ORIYA", 272 "TAMIL", 273 "TELUGU", 274 "KANNADA", 275 "MALAYALAM", 276 "THAI", 277 "LAO", 278 "TIBETAN", 279 "MYANMAR", 280 "ETHIOPIC", 281 "KHMER", 282 "MONGOLIAN" 283 }; 284 285 /* 286 * Strong characters flags array is to determine if the 287 * unicode bidirectional category of the character is strong, 288 * according to Unicode specification. If the bit with index equals to 289 * character's unicode value is 1 - the character is strong. 290 * This array was generated using UnicodeData.txt file from 291 * http://www.unicode.org/Public/UNIDATA/ 292 */ 293 294 /** The Constant STRONG_TEXT_FLAGS. */ 295 private static final int[] STRONG_TEXT_FLAGS = { 0, 0, 134217726, 134217726, 296 0, 69207040, -8388609, -8388609, -1, -1, -1, -1, -1, -1, -1, -1, 297 -1, -1, -65533, -1, -1, -100663297, 196611, 16415, 0, 0, 0, 298 67108864, -10432, -5, -32769, -4194305, -1, -1, -1, -1, -1017, -1, 299 -32769, 67108863, 65535, -131072, -25165825, -2, 767, 1073741824, 300 -65463, 2033663, -939513841, 134217726, 2047, -73728, -1, -1, 301 541065215, -67059616, -180225, 65535, -8192, 16383, -1, 131135, 0, 302 0, 0, 0, 0, 0, 0, 0, 0, 0, -8, -469762049, -16703999, 537001971, 303 -417812, -473563649, -1333765759, 133431235, -423960, -1016201729, 304 1577058305, 1900480, -278552, -470942209, 72193, 65475, -417812, 305 1676541439, -1333782143, 262083, -700594200, -1006647528, 8396230, 306 524224, -139282, 66059775, 30, 65475, -139284, -470811137, 307 1080036831, 65475, -139284, -1006633473, 8396225, 65475, -58720276, 308 805044223, -16547713, 1835008, -2, 917503, 268402815, 0, -17816170, 309 537783470, 872349791, 0, -50331649, -1050673153, -257, -2147481601, 310 3872, -1073741824, 237503, 0, -1, 16914171, 16777215, 0, 0, -1, 311 -65473, 536870911, -1, -1, -2080374785, -1, -1, -249, -1, 67108863, 312 -1, -1, 1031749119, -1, -49665, 2134769663, -8388803, -1, 313 -12713985, -1, 134217727, 536870911, 65535, -1, -1, 2097151, -2, 314 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 315 -1, 8388607, 134217726, -1, -1, 131071, 253951, 6553599, 262143, 316 122879, -1, -1065353217, 401605055, 1023, 67043328, -1, -1, 317 16777215, -1, 511, 0, 0, 536870911, 33226872, -64, 2047999, -1, 318 -64513, 67044351, 0, -830472193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 0, 0, 320 -1, -1, -1, -1, 268435455, -1, -1, 67108863, 1061158911, -1, 321 -1426112705, 1073741823, -1, 1608515583, 265232348, 534519807, 322 49152, 27648, 0, -2147352576, 2031616, 0, 0, 0, 1043332228, 323 -201605808, 992, -1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 324 -4194304, -1, 134217727, 2097152, 0, 0, 0, 0, 0, 0, 0, -268435456, 325 -1, -1, 1023, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4096, 0, 0, 0, 326 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 328 -32769, 2147483647, 0, -1, -1, -1, 31, -1, -65473, -1, 32831, 329 8388607, 2139062143, 2139062143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 330 0, 0, 0, 0, 0, 0, 224, 524157950, -2, -1, -528482305, -2, -1, 331 -134217729, -32, -122881, -1, -1, -32769, 16777215, 0, -65536, 332 536870911, -1, 15, -1879048193, -1, 131071, -61441, 2147483647, -1, 333 -1, -1, -125829121, -1, -1, 1073741823, 2147483647, 1, 0, 0, 0, 0, 334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 343 0, 0, 2097152, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 373 134217728, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 374 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 375 -1, -1, -1, -1, -1, -1, -1, 8191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2117, 159, 0, 0, 377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 378 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 392 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 393 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 394 0, 0, 0, 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2147483648, 1, 0, 0, -2147483648, 396 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 397 0, 0, 0, 0, 0, 0, 0, 0, 0, -2147483648, 1, 0, 0, 0, 0, 0, 0, 0, 0, 398 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 399 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 401 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 403 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2147483648, -1, -1, -1, 407 -1, -1, -1, -1, -1, -1, -49153, -1, -63489, -1, -1, 67108863, 0, 408 -1594359681, 1602223615, -37, -1, -1, 262143, -524288, -1, -1, -1, 409 -1, -1, -1, -1, -1, -1, -1, 1073741823, -65536, -1, -196609, -1, 410 255, 536805376, 0, 0, 0, -2162688, -1, -1, -1, 536870911, 0, 411 134217726, 134217726, -64, -1, 2147483647, 486341884, 0 412 413 }; 414 415 // index of context range (Serialization support) 416 /** The key. */ 417 private int key; 418 419 // flag, true if shaping contextual (Serialization support) 420 /** The mask. */ 421 private int mask; 422 423 // ranges to be shaped 424 /** The ranges. */ 425 private int fRanges; 426 427 // index of the default context 428 /** The default context index. */ 429 private int fDefaultContextIndex; 430 431 // flag if NumericShaper shapes contextually 432 /** The contextual. */ 433 private boolean fContextual; 434 435 // uses for non-context dependent case only 436 /** The single range index. */ 437 private int fSingleRangeIndex; 438 439 /** 440 * Creates NumericShaper with specified parameters. 441 * 442 * @param ranges specified ranges to be shaped 443 * @param defaultContext default context range 444 * @param isContextual specifies if the instance is contextual 445 */ 446 private NumericShaper(int ranges, int defaultContext, boolean isContextual){ 447 this.fRanges = ranges; 448 this.fDefaultContextIndex = getIndexFromRange(defaultContext); 449 this.fContextual = isContextual; 450 451 if (!fContextual){ 452 fSingleRangeIndex = getIndexFromRange(ranges); 453 } 454 } 455 456 /** 457 * Returns script index of the specified context range. 458 * 459 * @param range specified range 460 * 461 * @return one of the script indices according to the specified range. 462 */ 463 private int getIndexFromRange(int range){ 464 if (range == 0) { 465 throw rangeException(range); 466 } 467 468 int index = 0; 469 while (index < MAX_INDEX){ 470 if (range == (1 << index)){ 471 return index; 472 } 473 index++; 474 } 475 476 throw rangeException(range); 477 } 478 479 /** 480 * Returns range corresponding to the specified script index. 481 * 482 * @param index specified script index 483 * 484 * @return one of the range constants according to the specified script index. 485 */ 486 private int getRangeFromIndex(int index){ 487 if (index < 0 || index >= MAX_INDEX){ 488 throw rangeException(index); 489 } 490 491 return 1 << index; 492 } 493 494 private static IllegalArgumentException rangeException(int value) { 495 throw new IllegalArgumentException("Illegal range argument value: " + value); 496 } 497 498 @Override 499 public int hashCode() { 500 int result = 17; 501 result = 31 * result + fRanges; 502 result = 31 * result + fDefaultContextIndex; 503 result = 31 * result + (fContextual ? 1 : 0); 504 return result; 505 } 506 507 @Override 508 public boolean equals(Object obj) { 509 if (obj == null) { 510 return false; 511 } 512 513 if (obj == this) { 514 return true; 515 } 516 517 try { 518 NumericShaper ns = (NumericShaper)obj; 519 return (fRanges == ns.fRanges && 520 fDefaultContextIndex == ns.fDefaultContextIndex && 521 fContextual == ns.fContextual); 522 } catch (ClassCastException e){ 523 } 524 525 return false; 526 } 527 528 @Override 529 public String toString() { 530 /* !! There is no description in the documentation what this method must 531 * return. Thus format of toString method is based on 1.5 release 532 * behavior and can be obtained using next test sample: 533 * 534 * // Simple shapers toString format 535 * System.out.println(NumericShaper.getShaper(NumericShaper.EASTERN_ARABIC)); 536 * 537 * // Context shapers with default context toString format 538 * System.out.println(NumericShaper.getContextualShaper( 539 * NumericShaper.ARABIC | NumericShaper.TAMIL)); 540 * 541 * // Context shapers with context 542 * System.out.println(NumericShaper.getContextualShaper( 543 * NumericShaper.ARABIC | NumericShaper.TAMIL, 544 * NumericShaper.EASTERN_ARABIC)); 545 */ 546 StringBuilder sb = new StringBuilder(super.toString()); 547 548 sb.append("[contextual:"); 549 sb.append(fContextual); 550 551 if (fContextual){ 552 sb.append(", context:"); 553 sb.append(contexts[fDefaultContextIndex]); 554 } 555 556 sb.append(", range(s): "); 557 if (fContextual) { 558 int index = 0; 559 boolean isFirst = true; 560 while (index < MAX_INDEX){ 561 if ((fRanges & (1 << index)) != 0){ 562 if (isFirst){ 563 isFirst = false; 564 } else { 565 sb.append(", "); 566 } 567 sb.append(contexts[index]); 568 } 569 index++; 570 } 571 } else { 572 sb.append(contexts[fSingleRangeIndex]); 573 } 574 sb.append("]"); 575 576 return sb.toString(); 577 } 578 579 /** 580 * Gets the NumericShaper for the specified unicode ranges 581 * and default unicode range. The defaultContext parameter 582 * is used as the starting context (which indicates the 583 * language/script being used). The OR logical operation 584 * should be used for multiple ranges: 585 * NumericShaper.DEVANAGARI | NumericShaper.BENGALI. 586 * The NumericShaper returned by this method is contextual 587 * in that it supports multiple character ranges, depending 588 * on the context. 589 * 590 * @param ranges the unicode ranges. 591 * @param defaultContext the default, starting context. 592 * 593 * @return the NumericShaper for the specified ranges. 594 */ 595 public static NumericShaper getContextualShaper(int ranges, 596 int defaultContext) { 597 ranges &= ALL_RANGES; 598 defaultContext &= ALL_RANGES; 599 return new NumericShaper(ranges, defaultContext, true); 600 } 601 602 /** 603 * Gets the NumericShaper for the specified unicode ranges. 604 * The OR logical operation should be used for multiple ranges: 605 * NumericShaper.DEVANAGARI | NumericShaper.BENGALI. 606 * The NumericShaper returned by this method is contextual 607 * in that it supports multiple character ranges, depending 608 * on the context. 609 * 610 * @param ranges the unicode ranges. 611 * 612 * @return the NumericShaper for the specified ranges. 613 */ 614 public static NumericShaper getContextualShaper(int ranges) { 615 ranges &= ALL_RANGES; 616 return new NumericShaper(ranges, EUROPEAN, true); 617 } 618 619 /** 620 * Gets the masks for all of the ranges supported by this NumericShaper, 621 * packed into an int value using the logical OR logical operation 622 * for multiple ranges: 623 * NumericShaper.DEVANAGARI | NumericShaper.BENGALI. 624 * 625 * @return all ranges of this NumericShaper. 626 */ 627 public int getRanges() { 628 return fRanges; 629 } 630 631 /** 632 * Gets a NumericShaper for the specified unicode range. 633 * The NumericShaper supports only a single range and 634 * hence is not contextual. 635 * 636 * @param singleRange the specified unicode single range. 637 * 638 * @return the NumericShaper for the specified unicode range. 639 */ 640 public static NumericShaper getShaper(int singleRange) { 641 singleRange &= ALL_RANGES; 642 return new NumericShaper(singleRange, EUROPEAN, false); 643 } 644 645 /** 646 * Checks if this NumericShaper is contextual (supporting 647 * multiple script ranges) or not. 648 * 649 * @return true, if this NumericShaper is contextual, false otherwise. 650 */ 651 public boolean isContextual() { 652 return fContextual; 653 } 654 655 /** 656 * Transforms the encoding of the text, starting from the character 657 * at index start and transforming count characters, 658 * using the specified context. 659 * 660 * @param text the text to be shaped. 661 * @param start the start offset of the text. 662 * @param count the number of characters to be shaped. 663 * @param context the context to be used for shaping. 664 */ 665 public void shape(char[] text, int start, int count, int context) { 666 if (isContextual()){ 667 contextualShape(text, start, count, getIndexFromRange(context)); 668 } else { 669 nonContextualShape(text, start, count); 670 } 671 } 672 673 /** 674 * Transforms the encoding of the text, starting from the character 675 * at index start and transforming count characters. 676 * 677 * @param text the text to be shaped. 678 * @param start the start offset of the text. 679 * @param count the number of characters to be shaped. 680 */ 681 public void shape(char[] text, int start, int count) { 682 if (isContextual()){ 683 contextualShape(text, start, count, fDefaultContextIndex); 684 } else { 685 nonContextualShape(text, start, count); 686 } 687 } 688 689 /** 690 * Converts count of digits of the given array of characters from the start 691 * index using specified context. This method is applied for the contextual 692 * shaping, if the shaper instance is not contextual use nonContextualShape 693 * method. 694 * 695 * @param text an array of chars 696 * @param start index of the first character to convert 697 * @param count a number of characters to convert 698 * @param contextIndex index of the script index to use in shaper 699 */ 700 private void contextualShape(char[] text, int start, int count, 701 int contextIndex){ 702 char maxDigit = (char)0x0039; 703 char minDigit = (char)0x0030; 704 705 int currIndex; 706 if (((1 << contextIndex) & fRanges) == 0 ){ 707 currIndex = INDEX_EUROPEAN; 708 } else { 709 currIndex = contextIndex; 710 } 711 712 for (int ind = start; ind < start + count; ind++){ 713 if (minDigit <= text[ind] && text[ind] <= maxDigit){ 714 if (currIndex != INDEX_ETHIOPIC || text[ind] != '0'){ 715 text[ind] = (char)(digitsLowRanges[currIndex] + text[ind]); 716 } 717 } else { 718 if(isCharStrong(text[ind])){ 719 int index = getCharIndex(text[ind]); 720 if (currIndex != index){ 721 if (((1 << index) & fRanges) != 0){ 722 currIndex = index; 723 } else { 724 currIndex = INDEX_EUROPEAN; 725 } 726 } 727 } 728 } 729 } 730 731 } 732 733 /** 734 * Converts count of digits of the given array of characters from the start 735 * index. Method is applied for non-contextual shaper. 736 * 737 * @param text an array of chars 738 * @param start index of the first character to convert 739 * @param count a number of characters to convert 740 */ 741 private void nonContextualShape(char[] text, int start, int count){ 742 char maxDigit = (char)0x0039; 743 char minDigit = (char)((fRanges == ETHIOPIC) ? 0x0031 : 0x0030); 744 for (int ind = start; ind < start + count; ind++){ 745 if (minDigit <= text[ind] && text[ind] <= maxDigit){ 746 text[ind] = (char)(digitsLowRanges[fSingleRangeIndex] + text[ind]); 747 } 748 } 749 750 } 751 752 /** 753 * Returns the index of the script of the specified char. 754 * 755 * @param ch specified unicode character 756 * 757 * @return script index corresponding to the given char 758 */ 759 private int getCharIndex(char ch){ 760 int index = INDEX_EUROPEAN; 761 for (int i=0; i < MAX_INDEX; i++){ 762 int j = i * 2; 763 if (scriptsRanges[j] <= ch && ch <= scriptsRanges[j+1]){ 764 return i; 765 } 766 } 767 768 return index; 769 } 770 771 /** 772 * Returns true if the bidirectional category of the character 773 * is strong. 774 * 775 * @param chr the chr 776 * 777 * @return true, if the character is strong, false otherwise 778 */ 779 private boolean isCharStrong(int chr) { 780 return (STRONG_TEXT_FLAGS[chr >> 5] & (1 << (chr % 32))) != 0; 781 } 782 783 /** 784 * Updates all private serialized fields for object to be correctly serialized 785 * according to the serialized form of this class mentioned in the 786 * documentation. 787 */ 788 private void updateRangesFields(){ 789 fRanges = (mask & ~(1 << 31)); 790 fContextual = ((mask &(1 << 31)) != 0); 791 if (fContextual){ 792 fRanges = (mask & ~(1 << 31)); 793 fDefaultContextIndex = key; 794 } else { 795 fRanges = mask; 796 fSingleRangeIndex = key; 797 } 798 } 799 800 /** 801 * Updates private fields for object after deserialization 802 * according to the serialized form of this class mentioned in the 803 * documentation. 804 */ 805 private void updateKeyMaskFields(){ 806 mask = fRanges; 807 if (fContextual){ 808 mask |= (1 << 31); 809 key = fDefaultContextIndex; 810 } else{ 811 key = fSingleRangeIndex; 812 } 813 } 814 815 /** 816 * Write object. 817 * 818 * @param out the out 819 * 820 * @throws IOException Signals that an I/O exception has occurred. 821 */ 822 private void writeObject(java.io.ObjectOutputStream out) 823 throws IOException{ 824 updateKeyMaskFields(); 825 out.defaultWriteObject(); 826 } 827 828 /** 829 * Read object. 830 * 831 * @param in the in 832 * 833 * @throws IOException Signals that an I/O exception has occurred. 834 * @throws ClassNotFoundException the class not found exception 835 */ 836 private void readObject(java.io.ObjectInputStream in) 837 throws IOException, ClassNotFoundException{ 838 in.defaultReadObject(); 839 updateRangesFields(); 840 } 841 842 } 843