1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.lang; 19 20 import dalvik.annotation.optimization.FastNative; 21 import java.io.Serializable; 22 import java.io.UnsupportedEncodingException; 23 import java.nio.ByteBuffer; 24 import java.nio.CharBuffer; 25 import java.nio.charset.Charset; 26 import java.util.Arrays; 27 import java.util.Comparator; 28 import libcore.util.CharsetUtils; 29 import libcore.util.EmptyArray; 30 31 /** 32 * Class used to generate strings instead of calling String.<init>. 33 * 34 * @hide 35 */ 36 public final class StringFactory { 37 38 // TODO: Remove once native methods are in place. 39 private static final char REPLACEMENT_CHAR = (char) 0xfffd; 40 41 public static String newEmptyString() { 42 return newStringFromChars(EmptyArray.CHAR, 0, 0); 43 } 44 45 public static String newStringFromBytes(byte[] data) { 46 return newStringFromBytes(data, 0, data.length); 47 } 48 49 public static String newStringFromBytes(byte[] data, int high) { 50 return newStringFromBytes(data, high, 0, data.length); 51 } 52 53 public static String newStringFromBytes(byte[] data, int offset, int byteCount) { 54 return newStringFromBytes(data, offset, byteCount, Charset.defaultCharset()); 55 } 56 57 @FastNative 58 public static native String newStringFromBytes(byte[] data, int high, int offset, int byteCount); 59 60 public static String newStringFromBytes(byte[] data, int offset, int byteCount, String charsetName) throws UnsupportedEncodingException { 61 return newStringFromBytes(data, offset, byteCount, Charset.forNameUEE(charsetName)); 62 } 63 64 public static String newStringFromBytes(byte[] data, String charsetName) throws UnsupportedEncodingException { 65 return newStringFromBytes(data, 0, data.length, Charset.forNameUEE(charsetName)); 66 } 67 68 // TODO: Implement this method natively. 69 public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) { 70 if ((offset | byteCount) < 0 || byteCount > data.length - offset) { 71 throw new StringIndexOutOfBoundsException(data.length, offset, byteCount); 72 } 73 74 char[] value; 75 int length; 76 77 // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed. 78 String canonicalCharsetName = charset.name(); 79 if (canonicalCharsetName.equals("UTF-8")) { 80 byte[] d = data; 81 char[] v = new char[byteCount]; 82 83 int idx = offset; 84 int last = offset + byteCount; 85 int s = 0; 86 outer: 87 while (idx < last) { 88 byte b0 = d[idx++]; 89 if ((b0 & 0x80) == 0) { 90 // 0xxxxxxx 91 // Range: U-00000000 - U-0000007F 92 int val = b0 & 0xff; 93 v[s++] = (char) val; 94 } else if (((b0 & 0xe0) == 0xc0) || ((b0 & 0xf0) == 0xe0) || 95 ((b0 & 0xf8) == 0xf0) || ((b0 & 0xfc) == 0xf8) || ((b0 & 0xfe) == 0xfc)) { 96 int utfCount = 1; 97 if ((b0 & 0xf0) == 0xe0) utfCount = 2; 98 else if ((b0 & 0xf8) == 0xf0) utfCount = 3; 99 else if ((b0 & 0xfc) == 0xf8) utfCount = 4; 100 else if ((b0 & 0xfe) == 0xfc) utfCount = 5; 101 102 // 110xxxxx (10xxxxxx)+ 103 // Range: U-00000080 - U-000007FF (count == 1) 104 // Range: U-00000800 - U-0000FFFF (count == 2) 105 // Range: U-00010000 - U-001FFFFF (count == 3) 106 // Range: U-00200000 - U-03FFFFFF (count == 4) 107 // Range: U-04000000 - U-7FFFFFFF (count == 5) 108 109 if (idx + utfCount > last) { 110 v[s++] = REPLACEMENT_CHAR; 111 continue; 112 } 113 114 // Extract usable bits from b0 115 int val = b0 & (0x1f >> (utfCount - 1)); 116 for (int i = 0; i < utfCount; ++i) { 117 byte b = d[idx++]; 118 if ((b & 0xc0) != 0x80) { 119 v[s++] = REPLACEMENT_CHAR; 120 idx--; // Put the input char back 121 continue outer; 122 } 123 // Push new bits in from the right side 124 val <<= 6; 125 val |= b & 0x3f; 126 } 127 128 // Note: Java allows overlong char 129 // specifications To disallow, check that val 130 // is greater than or equal to the minimum 131 // value for each count: 132 // 133 // count min value 134 // ----- ---------- 135 // 1 0x80 136 // 2 0x800 137 // 3 0x10000 138 // 4 0x200000 139 // 5 0x4000000 140 141 // Allow surrogate values (0xD800 - 0xDFFF) to 142 // be specified using 3-byte UTF values only 143 if ((utfCount != 2) && (val >= 0xD800) && (val <= 0xDFFF)) { 144 v[s++] = REPLACEMENT_CHAR; 145 continue; 146 } 147 148 // Reject chars greater than the Unicode maximum of U+10FFFF. 149 if (val > 0x10FFFF) { 150 v[s++] = REPLACEMENT_CHAR; 151 continue; 152 } 153 154 // Encode chars from U+10000 up as surrogate pairs 155 if (val < 0x10000) { 156 v[s++] = (char) val; 157 } else { 158 int x = val & 0xffff; 159 int u = (val >> 16) & 0x1f; 160 int w = (u - 1) & 0xffff; 161 int hi = 0xd800 | (w << 6) | (x >> 10); 162 int lo = 0xdc00 | (x & 0x3ff); 163 v[s++] = (char) hi; 164 v[s++] = (char) lo; 165 } 166 } else { 167 // Illegal values 0x8*, 0x9*, 0xa*, 0xb*, 0xfd-0xff 168 v[s++] = REPLACEMENT_CHAR; 169 } 170 } 171 172 if (s == byteCount) { 173 // We guessed right, so we can use our temporary array as-is. 174 value = v; 175 length = s; 176 } else { 177 // Our temporary array was too big, so reallocate and copy. 178 value = new char[s]; 179 length = s; 180 System.arraycopy(v, 0, value, 0, s); 181 } 182 } else if (canonicalCharsetName.equals("ISO-8859-1")) { 183 value = new char[byteCount]; 184 length = byteCount; 185 CharsetUtils.isoLatin1BytesToChars(data, offset, byteCount, value); 186 } else if (canonicalCharsetName.equals("US-ASCII")) { 187 value = new char[byteCount]; 188 length = byteCount; 189 CharsetUtils.asciiBytesToChars(data, offset, byteCount, value); 190 } else { 191 CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); 192 length = cb.length(); 193 if (length > 0) { 194 // We could use cb.array() directly, but that would mean we'd have to trust 195 // the CharsetDecoder doesn't hang on to the CharBuffer and mutate it later, 196 // which would break String's immutability guarantee. It would also tend to 197 // mean that we'd be wasting memory because CharsetDecoder doesn't trim the 198 // array. So we copy. 199 value = new char[length]; 200 System.arraycopy(cb.array(), 0, value, 0, length); 201 } else { 202 value = EmptyArray.CHAR; 203 } 204 } 205 return newStringFromChars(value, 0, length); 206 } 207 208 public static String newStringFromBytes(byte[] data, Charset charset) { 209 return newStringFromBytes(data, 0, data.length, charset); 210 } 211 212 public static String newStringFromChars(char[] data) { 213 return newStringFromChars(data, 0, data.length); 214 } 215 216 public static String newStringFromChars(char[] data, int offset, int charCount) { 217 if ((offset | charCount) < 0 || charCount > data.length - offset) { 218 throw new StringIndexOutOfBoundsException(data.length, offset, charCount); 219 } 220 return newStringFromChars(offset, charCount, data); 221 } 222 223 // The char array passed as {@code java_data} must not be a null reference. 224 @FastNative 225 static native String newStringFromChars(int offset, int charCount, char[] data); 226 227 @FastNative 228 public static native String newStringFromString(String toCopy); 229 230 public static String newStringFromStringBuffer(StringBuffer stringBuffer) { 231 synchronized (stringBuffer) { 232 return newStringFromChars(stringBuffer.getValue(), 0, stringBuffer.length()); 233 } 234 } 235 236 // TODO: Implement this method natively. 237 public static String newStringFromCodePoints(int[] codePoints, int offset, int count) { 238 if (codePoints == null) { 239 throw new NullPointerException("codePoints == null"); 240 } 241 if ((offset | count) < 0 || count > codePoints.length - offset) { 242 throw new StringIndexOutOfBoundsException(codePoints.length, offset, count); 243 } 244 char[] value = new char[count * 2]; 245 int end = offset + count; 246 int length = 0; 247 for (int i = offset; i < end; i++) { 248 length += Character.toChars(codePoints[i], value, length); 249 } 250 return newStringFromChars(value, 0, length); 251 } 252 253 public static String newStringFromStringBuilder(StringBuilder stringBuilder) { 254 return newStringFromChars(stringBuilder.getValue(), 0, stringBuilder.length()); 255 } 256 } 257