1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 * Copyright (C) 1998-2007 International Business Machines Corporation and 6 * Unicode, Inc. All Rights Reserved.<br> 7 * The Unicode Consortium makes no expressed or implied warranty of any 8 * kind, and assumes no liability for errors or omissions. 9 * No liability is assumed for incidental and consequential damages 10 * in connection with or arising out of the use of the information here. 11 */ 12 package android.icu.dev.test.normalizer; 13 14 import java.util.BitSet; 15 16 import android.icu.dev.test.UTF16Util; 17 import android.icu.testsharding.MainTestShard; 18 19 /** 20 * Accesses the Normalization Data used for Forms C and D.<br> 21 * @author Mark Davis 22 * Updates for supplementary code points: 23 * Vladimir Weinstein & Markus Scherer 24 */ 25 @MainTestShard 26 public class NormalizerData { 27 // static final String copyright = "Copyright (C) 1998-2003 International Business Machines Corporation and Unicode, Inc."; 28 29 /** 30 * Constant for use in getPairwiseComposition 31 */ 32 public static final int NOT_COMPOSITE = '\uFFFF'; 33 34 /** 35 * Gets the combining class of a character from the 36 * Unicode Character Database. 37 * @param ch the source character 38 * @return value from 0 to 255 39 */ 40 public int getCanonicalClass(int ch) { 41 return canonicalClass.get(ch); 42 } 43 44 /** 45 * Returns the composite of the two characters. If the two 46 * characters don't combine, returns NOT_COMPOSITE. 47 * @param first first character (e.g. 'c') 48 * @param second second character (e.g. \u0327 cedilla) 49 * @return composite (e.g. \u00C7 c cedilla) 50 */ 51 public int getPairwiseComposition(int first, int second) { 52 return compose.get(((long)first << 32) | second); 53 } 54 55 56 /** 57 * Gets recursive decomposition of a character from the 58 * Unicode Character Database. 59 * @param canonical If true 60 * bit is on in this byte, then selects the recursive 61 * canonical decomposition, otherwise selects 62 * the recursive compatibility and canonical decomposition. 63 * @param ch the source character 64 * @param buffer buffer to be filled with the decomposition 65 */ 66 public void getRecursiveDecomposition(boolean canonical, int ch, StringBuffer buffer) { 67 String decomp = decompose.get(ch); 68 if (decomp != null && !(canonical && isCompatibility.get(ch))) { 69 for (int i = 0; i < decomp.length(); i+=UTF16Util.codePointLength(ch)) { 70 ch = UTF16Util.nextCodePoint(decomp, i); 71 getRecursiveDecomposition(canonical, ch, buffer); 72 } 73 } else { // if no decomp, append 74 UTF16Util.appendCodePoint(buffer, ch); 75 } 76 } 77 78 // ================================================= 79 // PRIVATES 80 // ================================================= 81 82 /** 83 * Only accessed by NormalizerBuilder. 84 */ 85 NormalizerData(IntHashtable canonicalClass, IntStringHashtable decompose, 86 LongHashtable compose, BitSet isCompatibility, BitSet isExcluded) { 87 this.canonicalClass = canonicalClass; 88 this.decompose = decompose; 89 this.compose = compose; 90 this.isCompatibility = isCompatibility; 91 this.isExcluded = isExcluded; 92 } 93 94 /** 95 * Just accessible for testing. 96 */ 97 boolean getExcluded (char ch) { 98 return isExcluded.get(ch); 99 } 100 101 /** 102 * Just accessible for testing. 103 */ 104 String getRawDecompositionMapping (char ch) { 105 return decompose.get(ch); 106 } 107 108 /** 109 * For now, just use IntHashtable 110 * Two-stage tables would be used in an optimized implementation. 111 */ 112 private IntHashtable canonicalClass; 113 114 /** 115 * The main data table maps chars to a 32-bit int. 116 * It holds either a pair: top = first, bottom = second 117 * or singleton: top = 0, bottom = single. 118 * If there is no decomposition, the value is 0. 119 * Two-stage tables would be used in an optimized implementation. 120 * An optimization could also map chars to a small index, then use that 121 * index in a small array of ints. 122 */ 123 private IntStringHashtable decompose; 124 125 /** 126 * Maps from pairs of characters to single. 127 * If there is no decomposition, the value is NOT_COMPOSITE. 128 */ 129 private LongHashtable compose; 130 131 /** 132 * Tells whether decomposition is canonical or not. 133 */ 134 private BitSet isCompatibility = new BitSet(); 135 136 /** 137 * Tells whether character is script-excluded or not. 138 * Used only while building, and for testing. 139 */ 140 141 private BitSet isExcluded = new BitSet(); 142 } 143