1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * created on: 2011feb25 10 * created by: Markus W. Scherer 11 */ 12 13 package android.icu.impl; 14 15 /** 16 * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space. 17 * Hardcodes these properties, does not load data, does not depend on other ICU classes. 18 * <p> 19 * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points, 20 * and both properties only include BMP code points (no supplementary ones). 21 * Pattern_Syntax includes some unassigned code points. 22 * <p> 23 * [:Pattern_White_Space:] = 24 * [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029] 25 * <p> 26 * [:Pattern_Syntax:] = 27 * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE 28 * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7 29 * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E 30 * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F 31 * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46] 32 * @author mscherer 33 * @hide Only a subset of ICU is exposed in Android 34 */ 35 public final class PatternProps { 36 /** 37 * @return true if c is a Pattern_Syntax code point. 38 */ 39 public static boolean isSyntax(int c) { 40 if(c<0) { 41 return false; 42 } else if(c<=0xff) { 43 return latin1[c]==3; 44 } else if(c<0x2010) { 45 return false; 46 } else if(c<=0x3030) { 47 int bits=syntax2000[index2000[(c-0x2000)>>5]]; 48 return ((bits>>(c&0x1f))&1)!=0; 49 } else if(0xfd3e<=c && c<=0xfe46) { 50 return c<=0xfd3f || 0xfe45<=c; 51 } else { 52 return false; 53 } 54 } 55 56 /** 57 * @return true if c is a Pattern_Syntax or Pattern_White_Space code point. 58 */ 59 public static boolean isSyntaxOrWhiteSpace(int c) { 60 if(c<0) { 61 return false; 62 } else if(c<=0xff) { 63 return latin1[c]!=0; 64 } else if(c<0x200e) { 65 return false; 66 } else if(c<=0x3030) { 67 int bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]]; 68 return ((bits>>(c&0x1f))&1)!=0; 69 } else if(0xfd3e<=c && c<=0xfe46) { 70 return c<=0xfd3f || 0xfe45<=c; 71 } else { 72 return false; 73 } 74 } 75 76 /** 77 * @return true if c is a Pattern_White_Space character. 78 */ 79 public static boolean isWhiteSpace(int c) { 80 if(c<0) { 81 return false; 82 } else if(c<=0xff) { 83 return latin1[c]==5; 84 } else if(0x200e<=c && c<=0x2029) { 85 return c<=0x200f || 0x2028<=c; 86 } else { 87 return false; 88 } 89 } 90 91 /** 92 * Skips over Pattern_White_Space starting at index i of the CharSequence. 93 * @return The smallest index at or after i with a non-white space character. 94 */ 95 public static int skipWhiteSpace(CharSequence s, int i) { 96 while(i<s.length() && isWhiteSpace(s.charAt(i))) { 97 ++i; 98 } 99 return i; 100 } 101 102 /** 103 * @return s except with leading and trailing Pattern_White_Space removed. 104 */ 105 public static String trimWhiteSpace(String s) { 106 if(s.length()==0 || (!isWhiteSpace(s.charAt(0)) && !isWhiteSpace(s.charAt(s.length()-1)))) { 107 return s; 108 } 109 int start=0; 110 int limit=s.length(); 111 while(start<limit && isWhiteSpace(s.charAt(start))) { 112 ++start; 113 } 114 if(start<limit) { 115 // There is non-white space at start; we will not move limit below that, 116 // so we need not test start<limit in the loop. 117 while(isWhiteSpace(s.charAt(limit-1))) { 118 --limit; 119 } 120 } 121 return s.substring(start, limit); 122 } 123 124 /** 125 * Tests whether the CharSequence contains a "pattern identifier", that is, 126 * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. 127 * @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s. 128 */ 129 public static boolean isIdentifier(CharSequence s) { 130 int limit=s.length(); 131 if(limit==0) { 132 return false; 133 } 134 int start=0; 135 do { 136 if(isSyntaxOrWhiteSpace(s.charAt(start++))) { 137 return false; 138 } 139 } while(start<limit); 140 return true; 141 } 142 143 /** 144 * Tests whether the CharSequence contains a "pattern identifier", that is, 145 * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. 146 * @return true if there are no Pattern_White_Space or Pattern_Syntax characters 147 * in s between start and (exclusive) limit. 148 */ 149 public static boolean isIdentifier(CharSequence s, int start, int limit) { 150 if(start>=limit) { 151 return false; 152 } 153 do { 154 if(isSyntaxOrWhiteSpace(s.charAt(start++))) { 155 return false; 156 } 157 } while(start<limit); 158 return true; 159 } 160 161 /** 162 * Skips over a "pattern identifier" starting at index i of the CharSequence. 163 * @return The smallest index at or after i with 164 * a Pattern_White_Space or Pattern_Syntax character. 165 */ 166 public static int skipIdentifier(CharSequence s, int i) { 167 while(i<s.length() && !isSyntaxOrWhiteSpace(s.charAt(i))) { 168 ++i; 169 } 170 return i; 171 } 172 173 /* 174 * One byte per Latin-1 character. 175 * Bit 0 is set if either Pattern property is true, 176 * bit 1 if Pattern_Syntax is true, 177 * bit 2 if Pattern_White_Space is true. 178 * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5. 179 */ 180 private static final byte latin1[]=new byte[] { // 256 181 // WS: 9..D 182 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0, 183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184 // WS: 20 Syntax: 21..2F 185 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 186 // Syntax: 3A..40 187 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 188 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 189 // Syntax: 5B..5E 190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 191 // Syntax: 60 192 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193 // Syntax: 7B..7E 194 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 195 // WS: 85 196 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 198 // Syntax: A1..A7, A9, AB, AC, AE 199 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0, 200 // Syntax: B0, B1, B6, BB, BF 201 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, 202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203 // Syntax: D7 204 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 206 // Syntax: F7 207 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0 208 }; 209 210 /* 211 * One byte per 32 characters from U+2000..U+303F indexing into 212 * a small table of 32-bit data words. 213 * The first two data words are all-zeros and all-ones. 214 */ 215 private static final byte index2000[]=new byte[] { // 130 216 2, 3, 4, 0, 0, 0, 0, 0, // 20xx 217 0, 0, 0, 0, 5, 1, 1, 1, // 21xx 218 1, 1, 1, 1, 1, 1, 1, 1, // 22xx 219 1, 1, 1, 1, 1, 1, 1, 1, // 23xx 220 1, 1, 1, 0, 0, 0, 0, 0, // 24xx 221 1, 1, 1, 1, 1, 1, 1, 1, // 25xx 222 1, 1, 1, 1, 1, 1, 1, 1, // 26xx 223 1, 1, 1, 6, 7, 1, 1, 1, // 27xx 224 1, 1, 1, 1, 1, 1, 1, 1, // 28xx 225 1, 1, 1, 1, 1, 1, 1, 1, // 29xx 226 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx 227 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx 228 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx 229 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx 230 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx 231 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx 232 8, 9 // 3000..303F 233 }; 234 235 /* 236 * One 32-bit integer per 32 characters. Ranges of all-false and all-true 237 * are mapped to the first two values, other ranges map to appropriate bit patterns. 238 */ 239 private static final int syntax2000[]=new int[] { 240 0, 241 -1, 242 0xffff0000, // 2: 2010..201F 243 0x7fff00ff, // 3: 2020..2027, 2030..203E 244 0x7feffffe, // 4: 2041..2053, 2055..205E 245 0xffff0000, // 5: 2190..219F 246 0x003fffff, // 6: 2760..2775 247 0xfff00000, // 7: 2794..279F 248 0xffffff0e, // 8: 3001..3003, 3008..301F 249 0x00010001 // 9: 3020, 3030 250 }; 251 252 /* 253 * Same as syntax2000, but with additional bits set for the 254 * Pattern_White_Space characters 200E 200F 2028 2029. 255 */ 256 private static final int syntaxOrWhiteSpace2000[]=new int[] { 257 0, 258 -1, 259 0xffffc000, // 2: 200E..201F 260 0x7fff03ff, // 3: 2020..2029, 2030..203E 261 0x7feffffe, // 4: 2041..2053, 2055..205E 262 0xffff0000, // 5: 2190..219F 263 0x003fffff, // 6: 2760..2775 264 0xfff00000, // 7: 2794..279F 265 0xffffff0e, // 8: 3001..3003, 3008..301F 266 0x00010001 // 9: 3020, 3030 267 }; 268 } 269